diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index d8c9d0a432ad8f..3df9e56db38a43 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -475,6 +475,10 @@ class TargetFrameLowering {
   /// debug info.
   virtual DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const;
 
+  /// If frame pointer or base pointer is clobbered by an instruction, we should
+  /// spill/restore it around that instruction.
+  virtual void spillFPBP(MachineFunction &MF) const {}
+
   /// This method is called at the end of prolog/epilog code insertion, so
   /// targets can emit remarks based on the final frame layout.
   virtual void emitRemarks(const MachineFunction &MF,
diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp
index 06de92515c0444..f5bedc7b8ecdfc 100644
--- a/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -25,6 +25,7 @@
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
 using namespace llvm;
 
@@ -184,6 +185,10 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
   unsigned NumRegs = TRI.getNumSupportedRegs(*MF);
   BitVector CSRSaved(NumRegs), CSRRestored(NumRegs);
 
+#ifndef NDEBUG
+  int RememberState = 0;
+#endif
+
   // Determine cfa offset and register set by the block.
   for (MachineInstr &MI : *MBBInfo.MBB) {
     if (MI.isCFIInstruction()) {
@@ -228,17 +233,25 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
       case MCCFIInstruction::OpRememberState:
         // TODO: Add support for handling cfi_remember_state.
 #ifndef NDEBUG
-        report_fatal_error(
-            "Support for cfi_remember_state not implemented! Value of CFA "
-            "may be incorrect!\n");
+        // Currently we need cfi_remember_state and cfi_restore_state to be in
+        // the same BB, so it will not impact outgoing CFA.
+        ++RememberState;
+        if (RememberState != 1)
+          MF->getContext().reportError(
+              SMLoc(),
+              "Support for cfi_remember_state not implemented! Value of CFA "
+              "may be incorrect!\n");
 #endif
         break;
       case MCCFIInstruction::OpRestoreState:
         // TODO: Add support for handling cfi_restore_state.
 #ifndef NDEBUG
-        report_fatal_error(
-            "Support for cfi_restore_state not implemented! Value of CFA may "
-            "be incorrect!\n");
+        --RememberState;
+        if (RememberState != 0)
+          MF->getContext().reportError(
+              SMLoc(),
+              "Support for cfi_restore_state not implemented! Value of CFA may "
+              "be incorrect!\n");
 #endif
         break;
       // Other CFI directives do not affect CFA value.
@@ -264,6 +277,14 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
     }
   }
 
+#ifndef NDEBUG
+  if (RememberState != 0)
+    MF->getContext().reportError(
+        SMLoc(),
+        "Support for cfi_remember_state not implemented! Value of CFA may be "
+        "incorrect!\n");
+#endif
+
   MBBInfo.Processed = true;
 
   // Update outgoing CFA info.
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index f4490873cfdcdb..ee03eaa8ae527c 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -228,6 +228,11 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
   FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF);
   ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
 
+  // Spill frame pointer and/or base pointer registers if they are clobbered.
+  // It is placed before call frame instruction elimination so it will not mess
+  // with stack arguments.
+  TFI->spillFPBP(MF);
+
   // Calculate the MaxCallFrameSize value for the function's frame
   // information. Also eliminates call frame pseudo instructions.
   calculateCallFrameInfo(MF);
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index bdc9a0d29670a1..77dac1197f85e9 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -4227,3 +4227,323 @@ void X86FrameLowering::restoreWinEHStackPointersInParent(
                                   /*RestoreSP=*/IsSEH);
   }
 }
+
+// Compute the alignment gap between current SP after spilling FP/BP and the
+// next properly aligned stack offset.
+static int computeFPBPAlignmentGap(MachineFunction &MF,
+                                   const TargetRegisterClass *RC,
+                                   unsigned NumSpilledRegs) {
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  unsigned AllocSize = TRI->getSpillSize(*RC) * NumSpilledRegs;
+  Align StackAlign = MF.getSubtarget().getFrameLowering()->getStackAlign();
+  unsigned AlignedSize = alignTo(AllocSize, StackAlign);
+  return AlignedSize - AllocSize;
+}
+
+void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
+                                        MachineBasicBlock::iterator BeforeMI,
+                                        Register FP, Register BP,
+                                        int SPAdjust) const {
+  assert(FP.isValid() || BP.isValid());
+
+  MachineBasicBlock *MBB = BeforeMI->getParent();
+  DebugLoc DL = BeforeMI->getDebugLoc();
+
+  // Spill FP.
+  if (FP.isValid()) {
+    BuildMI(*MBB, BeforeMI, DL,
+            TII.get(getPUSHOpcode(MF.getSubtarget<X86Subtarget>())))
+        .addReg(FP);
+  }
+
+  // Spill BP.
+  if (BP.isValid()) {
+    BuildMI(*MBB, BeforeMI, DL,
+            TII.get(getPUSHOpcode(MF.getSubtarget<X86Subtarget>())))
+        .addReg(BP);
+  }
+
+  // Make sure SP is aligned.
+  if (SPAdjust)
+    emitSPUpdate(*MBB, BeforeMI, DL, -SPAdjust, false);
+
+  // Emit unwinding information.
+  if (FP.isValid() && needsDwarfCFI(MF)) {
+    // Emit .cfi_remember_state to remember old frame.
+    unsigned CFIIndex =
+        MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr));
+    BuildMI(*MBB, BeforeMI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex);
+
+    // Setup new CFA value with DW_CFA_def_cfa_expression:
+    //     DW_OP_breg7+offset, DW_OP_deref, DW_OP_consts 16, DW_OP_plus
+    SmallString<64> CfaExpr;
+    uint8_t buffer[16];
+    int Offset = SPAdjust;
+    if (BP.isValid())
+      Offset += TRI->getSpillSize(*TRI->getMinimalPhysRegClass(BP));
+    // If BeforeMI is a frame setup instruction, we need to adjust the position
+    // and offset of the new cfi instruction.
+    if (TII.isFrameSetup(*BeforeMI)) {
+      Offset += alignTo(TII.getFrameSize(*BeforeMI), getStackAlign());
+      BeforeMI = std::next(BeforeMI);
+    }
+    Register StackPtr = TRI->getStackRegister();
+    if (STI.isTarget64BitILP32())
+      StackPtr = Register(getX86SubSuperRegister(StackPtr, 64));
+    unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackPtr, true);
+    CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfStackPtr));
+    CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
+    CfaExpr.push_back(dwarf::DW_OP_deref);
+    CfaExpr.push_back(dwarf::DW_OP_consts);
+    CfaExpr.append(buffer, buffer + encodeSLEB128(SlotSize * 2, buffer));
+    CfaExpr.push_back((uint8_t)dwarf::DW_OP_plus);
+
+    SmallString<64> DefCfaExpr;
+    DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
+    DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
+    DefCfaExpr.append(CfaExpr.str());
+    BuildCFI(*MBB, BeforeMI, DL,
+             MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
+             MachineInstr::FrameSetup);
+  }
+}
+
+void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF,
+                                          MachineBasicBlock::iterator AfterMI,
+                                          Register FP, Register BP,
+                                          int SPAdjust) const {
+  assert(FP.isValid() || BP.isValid());
+
+  // Adjust SP so it points to spilled FP or BP.
+  MachineBasicBlock *MBB = AfterMI->getParent();
+  MachineBasicBlock::iterator Pos = std::next(AfterMI);
+  DebugLoc DL = AfterMI->getDebugLoc();
+  if (SPAdjust)
+    emitSPUpdate(*MBB, Pos, DL, SPAdjust, false);
+
+  // Restore BP.
+  if (BP.isValid()) {
+    BuildMI(*MBB, Pos, DL,
+            TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), BP);
+  }
+
+  // Restore FP.
+  if (FP.isValid()) {
+    BuildMI(*MBB, Pos, DL,
+            TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), FP);
+
+    // Emit unwinding information.
+    if (needsDwarfCFI(MF)) {
+      // Restore original frame with .cfi_restore_state.
+      unsigned CFIIndex =
+          MF.addFrameInst(MCCFIInstruction::createRestoreState(nullptr));
+      BuildMI(*MBB, Pos, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+          .addCFIIndex(CFIIndex);
+    }
+  }
+}
+
+void X86FrameLowering::saveAndRestoreFPBPUsingSP(
+    MachineFunction &MF, MachineBasicBlock::iterator BeforeMI,
+    MachineBasicBlock::iterator AfterMI, bool SpillFP, bool SpillBP) const {
+  assert(SpillFP || SpillBP);
+
+  Register FP, BP;
+  const TargetRegisterClass *RC;
+  unsigned NumRegs = 0;
+
+  if (SpillFP) {
+    FP = TRI->getFrameRegister(MF);
+    if (STI.isTarget64BitILP32())
+      FP = Register(getX86SubSuperRegister(FP, 64));
+    RC = TRI->getMinimalPhysRegClass(FP);
+    ++NumRegs;
+  }
+  if (SpillBP) {
+    BP = TRI->getBaseRegister();
+    if (STI.isTarget64BitILP32())
+      BP = Register(getX86SubSuperRegister(BP, 64));
+    RC = TRI->getMinimalPhysRegClass(BP);
+    ++NumRegs;
+  }
+  int SPAdjust = computeFPBPAlignmentGap(MF, RC, NumRegs);
+
+  spillFPBPUsingSP(MF, BeforeMI, FP, BP, SPAdjust);
+  restoreFPBPUsingSP(MF, AfterMI, FP, BP, SPAdjust);
+}
+
+bool X86FrameLowering::skipSpillFPBP(
+    MachineFunction &MF, MachineBasicBlock::reverse_iterator &MI) const {
+  if (MI->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX) {
+    // The pseudo instruction LCMPXCHG16B_SAVE_RBX is generated in the form
+    //     SaveRbx = COPY RBX
+    //     SaveRbx = LCMPXCHG16B_SAVE_RBX ..., SaveRbx, implicit-def rbx
+    // And later LCMPXCHG16B_SAVE_RBX is expanded to restore RBX from SaveRbx.
+    // We should skip this instruction sequence.
+    int FI;
+    unsigned Reg;
+    while (!(MI->getOpcode() == TargetOpcode::COPY &&
+             MI->getOperand(1).getReg() == X86::RBX) &&
+           !((Reg = TII.isStoreToStackSlot(*MI, FI)) && Reg == X86::RBX))
+      ++MI;
+    return true;
+  }
+  return false;
+}
+
+static bool isFPBPAccess(const MachineInstr &MI, Register FP, Register BP,
+                         const TargetRegisterInfo *TRI, bool &AccessFP,
+                         bool &AccessBP) {
+  AccessFP = AccessBP = false;
+  if (FP) {
+    if (MI.findRegisterUseOperandIdx(FP, TRI, false) != -1 ||
+        MI.findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
+      AccessFP = true;
+  }
+  if (BP) {
+    if (MI.findRegisterUseOperandIdx(BP, TRI, false) != -1 ||
+        MI.findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
+      AccessBP = true;
+  }
+  return AccessFP || AccessBP;
+}
+
+// Invoke instruction has been lowered to normal function call. We try to figure
+// out if MI comes from Invoke.
+// Do we have any better method?
+static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels) {
+  if (!MI.isCall())
+    return false;
+  if (InsideEHLabels)
+    return true;
+
+  const MachineBasicBlock *MBB = MI.getParent();
+  if (!MBB->hasEHPadSuccessor())
+    return false;
+
+  // Check if there is another call instruction from MI to the end of MBB.
+  MachineBasicBlock::const_iterator MBBI = MI, ME = MBB->end();
+  for (++MBBI; MBBI != ME; ++MBBI)
+    if (MBBI->isCall())
+      return false;
+  return true;
+}
+
+/// If a function uses base pointer and the base pointer is clobbered by inline
+/// asm, RA doesn't detect this case, and after the inline asm, the base pointer
+/// contains garbage value.
+/// For example if a 32b x86 function uses base pointer esi, and esi is
+/// clobbered by following inline asm
+///     asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory");
+/// We need to save esi before the asm and restore it after the asm.
+///
+/// The problem can also occur to frame pointer if there is a function call, and
+/// the callee uses a different calling convention and clobbers the fp.
+///
+/// Because normal frame objects (spill slots) are accessed through fp/bp
+/// register, so we can't spill fp/bp to normal spill slots.
+///
+/// FIXME: There are 2 possible enhancements:
+/// 1. In many cases there are different physical registers not clobbered by
+/// inline asm, we can use one of them as base pointer. Or use a virtual
+/// register as base pointer and let RA allocate a physical register to it.
+/// 2. If there is no other instructions access stack with fp/bp from the
+/// inline asm to the epilog, and no cfi requirement for a correct fp, we can
+/// skip the save and restore operations.
+void X86FrameLowering::spillFPBP(MachineFunction &MF) const {
+  Register FP, BP;
+  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+  if (TFI.hasFP(MF))
+    FP = TRI->getFrameRegister(MF);
+  if (TRI->hasBasePointer(MF))
+    BP = TRI->getBaseRegister();
+  if (!FP && !BP)
+    return;
+
+  for (MachineBasicBlock &MBB : MF) {
+    bool InsideEHLabels = false;
+    auto MI = MBB.rbegin(), ME = MBB.rend();
+    auto TermMI = MBB.getFirstTerminator();
+    if (TermMI != MBB.begin())
+      MI = *(std::prev(TermMI));
+
+    while (MI != ME) {
+      // Skip frame setup/destroy instructions.
+      // Skip Invoke (call inside try block) instructions.
+      // Skip instructions handled by target.
+      if (MI->getFlag(MachineInstr::MIFlag::FrameSetup) ||
+          MI->getFlag(MachineInstr::MIFlag::FrameDestroy) ||
+          isInvoke(*MI, InsideEHLabels) || skipSpillFPBP(MF, MI)) {
+        ++MI;
+        continue;
+      }
+
+      if (MI->getOpcode() == TargetOpcode::EH_LABEL) {
+        InsideEHLabels = !InsideEHLabels;
+        ++MI;
+        continue;
+      }
+
+      bool AccessFP, AccessBP;
+      // Check if fp or bp is used in MI.
+      if (!isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)) {
+        ++MI;
+        continue;
+      }
+
+      // Look for the range [DefMI, KillMI] in which fp or bp is defined and
+      // used.
+      bool FPLive = false, BPLive = false;
+      bool SpillFP = false, SpillBP = false;
+      auto DefMI = MI, KillMI = MI;
+      do {
+        SpillFP |= AccessFP;
+        SpillBP |= AccessBP;
+
+        // Maintain FPLive and BPLive.
+        if (FPLive && MI->findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
+          FPLive = false;
+        if (FP && MI->findRegisterUseOperandIdx(FP, TRI, false) != -1)
+          FPLive = true;
+        if (BPLive && MI->findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
+          BPLive = false;
+        if (BP && MI->findRegisterUseOperandIdx(BP, TRI, false) != -1)
+          BPLive = true;
+
+        DefMI = MI++;
+      } while ((MI != ME) &&
+               (FPLive || BPLive ||
+                isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)));
+
+      // Don't need to save/restore if FP is accessed through llvm.frameaddress.
+      if (FPLive && !SpillBP)
+        continue;
+
+      // If the bp is clobbered by a call, we should save and restore outside of
+      // the frame setup instructions.
+      if (KillMI->isCall() && DefMI != ME) {
+        const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+        auto FrameSetup = std::next(DefMI);
+        // Look for frame setup instruction toward the start of the BB.
+        // If we reach another call instruction, it means no frame setup
+        // instruction for the current call instruction.
+        while (FrameSetup != ME && !TII.isFrameSetup(*FrameSetup) &&
+               !FrameSetup->isCall())
+          ++FrameSetup;
+        // If a frame setup instruction is found, we need to find out the
+        // corresponding frame destroy instruction.
+        if (FrameSetup != ME && TII.isFrameSetup(*FrameSetup)) {
+          while (!TII.isFrameInstr(*KillMI))
+            --KillMI;
+          DefMI = FrameSetup;
+          MI = DefMI;
+          ++MI;
+        }
+      }
+
+      // Call target function to spill and restore FP and BP registers.
+      saveAndRestoreFPBPUsingSP(MF, &(*DefMI), &(*KillMI), SpillFP, SpillBP);
+    }
+  }
+}
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index 2dc9ecc6109d78..e21f6ab3d16d5f 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -103,6 +103,8 @@ class X86FrameLowering : public TargetFrameLowering {
                               MutableArrayRef<CalleeSavedInfo> CSI,
                               const TargetRegisterInfo *TRI) const override;
 
+  void spillFPBP(MachineFunction &MF) const override;
+
   bool hasFP(const MachineFunction &MF) const override;
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
   bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
@@ -267,6 +269,29 @@ class X86FrameLowering : public TargetFrameLowering {
   void emitCatchRetReturnValue(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator MBBI,
                                MachineInstr *CatchRet) const;
+
+  /// Issue instructions to allocate stack space and spill frame pointer and/or
+  /// base pointer to stack using stack pointer register.
+  void spillFPBPUsingSP(MachineFunction &MF,
+                        const MachineBasicBlock::iterator BeforeMI, Register FP,
+                        Register BP, int SPAdjust) const;
+
+  /// Issue instructions to restore frame pointer and/or base pointer from stack
+  /// using stack pointer register, and free stack space.
+  void restoreFPBPUsingSP(MachineFunction &MF,
+                          const MachineBasicBlock::iterator AfterMI,
+                          Register FP, Register BP, int SPAdjust) const;
+
+  void saveAndRestoreFPBPUsingSP(MachineFunction &MF,
+                                 MachineBasicBlock::iterator BeforeMI,
+                                 MachineBasicBlock::iterator AfterMI,
+                                 bool SpillFP, bool SpillBP) const;
+
+  // If MI uses fp/bp, but target can handle it, and doesn't want to be spilled
+  // again, this function should return true, and update MI so we will not check
+  // any instructions from related sequence.
+  bool skipSpillFPBP(MachineFunction &MF,
+                     MachineBasicBlock::reverse_iterator &MI) const;
 };
 
 } // End llvm namespace
diff --git a/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll b/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll
index aa5c54d30e3bc4..f20c4c1ae27867 100644
--- a/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll
+++ b/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll
@@ -43,8 +43,12 @@ define void @widget(float %arg) nounwind {
 ; FRAME-NEXT:    xorl %r8d, %r8d
 ; FRAME-NEXT:    callq *%rsi
 ; FRAME-NEXT:    movss %xmm6, 0
+; FRAME-NEXT:    pushq %rbp
+; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
+; FRAME-NEXT:    popq %rax
+; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
 ; FRAME-NEXT:    addq $48, %rsp
 ; FRAME-NEXT:    pop2 %r15, %rsi
diff --git a/llvm/test/CodeGen/X86/apx/push2-pop2.ll b/llvm/test/CodeGen/X86/apx/push2-pop2.ll
index 25139f1da8272c..6bd9f525090ee2 100644
--- a/llvm/test/CodeGen/X86/apx/push2-pop2.ll
+++ b/llvm/test/CodeGen/X86/apx/push2-pop2.ll
@@ -24,8 +24,12 @@ define void @csr1() nounwind {
 ; FRAME:       # %bb.0: # %entry
 ; FRAME-NEXT:    pushq %rbp
 ; FRAME-NEXT:    movq %rsp, %rbp
+; FRAME-NEXT:    pushq %rbp
+; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
+; FRAME-NEXT:    popq %rax
+; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    retq
 entry:
@@ -59,8 +63,12 @@ define void @csr2() nounwind {
 ; FRAME-NEXT:    pushq %rbp
 ; FRAME-NEXT:    movq %rsp, %rbp
 ; FRAME-NEXT:    pushq %r15
+; FRAME-NEXT:    pushq %rbp
+; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
+; FRAME-NEXT:    popq %rax
+; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    popq %r15
 ; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    retq
@@ -95,8 +103,12 @@ define void @csr3() nounwind {
 ; FRAME-NEXT:    pushq %rbp
 ; FRAME-NEXT:    movq %rsp, %rbp
 ; FRAME-NEXT:    push2 %r14, %r15
+; FRAME-NEXT:    pushq %rbp
+; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
+; FRAME-NEXT:    popq %rax
+; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    pop2 %r15, %r14
 ; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    retq
@@ -136,8 +148,12 @@ define void @csr4() nounwind {
 ; FRAME-NEXT:    movq %rsp, %rbp
 ; FRAME-NEXT:    push2 %r14, %r15
 ; FRAME-NEXT:    pushq %r13
+; FRAME-NEXT:    pushq %rbp
+; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
+; FRAME-NEXT:    popq %rax
+; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    popq %r13
 ; FRAME-NEXT:    pop2 %r15, %r14
 ; FRAME-NEXT:    popq %rbp
@@ -178,8 +194,12 @@ define void @csr5() nounwind {
 ; FRAME-NEXT:    movq %rsp, %rbp
 ; FRAME-NEXT:    push2 %r14, %r15
 ; FRAME-NEXT:    push2 %r12, %r13
+; FRAME-NEXT:    pushq %rbp
+; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
+; FRAME-NEXT:    popq %rax
+; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    pop2 %r13, %r12
 ; FRAME-NEXT:    pop2 %r15, %r14
 ; FRAME-NEXT:    popq %rbp
@@ -225,8 +245,12 @@ define void @csr6() nounwind {
 ; FRAME-NEXT:    push2 %r14, %r15
 ; FRAME-NEXT:    push2 %r12, %r13
 ; FRAME-NEXT:    pushq %rbx
+; FRAME-NEXT:    pushq %rbp
+; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
+; FRAME-NEXT:    popq %rax
+; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    popq %rbx
 ; FRAME-NEXT:    pop2 %r13, %r12
 ; FRAME-NEXT:    pop2 %r15, %r14
diff --git a/llvm/test/CodeGen/X86/apx/pushp-popp.ll b/llvm/test/CodeGen/X86/apx/pushp-popp.ll
index ad4306fccce669..625e70b07198e8 100644
--- a/llvm/test/CodeGen/X86/apx/pushp-popp.ll
+++ b/llvm/test/CodeGen/X86/apx/pushp-popp.ll
@@ -18,8 +18,12 @@ define void @csr2() nounwind {
 ; FRAME-NEXT:    pushp %rbp
 ; FRAME-NEXT:    movq %rsp, %rbp
 ; FRAME-NEXT:    pushp %r15
+; FRAME-NEXT:    pushp %rbp
+; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
+; FRAME-NEXT:    popq %rax
+; FRAME-NEXT:    popp %rbp
 ; FRAME-NEXT:    popp %r15
 ; FRAME-NEXT:    popp %rbp
 ; FRAME-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
index 25d182afd66e71..78870278eeace9 100644
--- a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
+++ b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
@@ -69,8 +69,12 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
 ; X64-NEXT:    andq $-64, %rsp
 ; X64-NEXT:    subq $128, %rsp
 ; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm0
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    movq %rsp, %rdi
 ; X64-NEXT:    callq _func_float16_ptr
+; X64-NEXT:    addq $8, %rsp
+; X64-NEXT:    popq %rbp
 ; X64-NEXT:    vaddps (%rsp), %zmm0, %zmm0
 ; X64-NEXT:    leaq -16(%rbp), %rsp
 ; X64-NEXT:    popq %r12
@@ -149,8 +153,12 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
 ; X64-NEXT:    subq $128, %rsp
 ; X64-NEXT:    vmovaps %zmm1, %zmm16
 ; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm0
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    movq %rsp, %rdi
 ; X64-NEXT:    callq _func_float16_ptr
+; X64-NEXT:    addq $8, %rsp
+; X64-NEXT:    popq %rbp
 ; X64-NEXT:    vaddps %zmm16, %zmm0, %zmm0
 ; X64-NEXT:    vaddps (%rsp), %zmm0, %zmm0
 ; X64-NEXT:    leaq -16(%rbp), %rsp
diff --git a/llvm/test/CodeGen/X86/clobber_base_ptr.ll b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
new file mode 100644
index 00000000000000..2c39560f02d160
--- /dev/null
+++ b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i386-pc-windows-gnu"
+
+; This function uses esi as base pointer, the inline asm clobbers esi, so we
+; should save esi using esp before the inline asm, and restore esi after the
+; inline asm.
+
+define i32 @clober_bp() {
+; CHECK-LABEL: clober_bp:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    .cfi_offset %ebp, -8
+; CHECK-NEXT:    movl %esp, %ebp
+; CHECK-NEXT:    .cfi_def_cfa_register %ebp
+; CHECK-NEXT:    pushl %edi
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    andl $-16, %esp
+; CHECK-NEXT:    subl $16, %esp
+; CHECK-NEXT:    movl %esp, %esi
+; CHECK-NEXT:    .cfi_offset %esi, -16
+; CHECK-NEXT:    .cfi_offset %edi, -12
+; CHECK-NEXT:    movl $4, 12(%esi)
+; CHECK-NEXT:    movl 12(%esi), %eax
+; CHECK-NEXT:    addl $3, %eax
+; CHECK-NEXT:    andl $-4, %eax
+; CHECK-NEXT:    calll __alloca
+; CHECK-NEXT:    movl %esp, %eax
+; CHECK-NEXT:    andl $-16, %eax
+; CHECK-NEXT:    movl %eax, %esp
+; CHECK-NEXT:    movl $1, (%eax)
+; CHECK-NEXT:    leal 8(%esi), %edi
+; CHECK-NEXT:    movl $4, %ecx
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    movl %eax, %esi
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    rep movsb (%esi), %es:(%edi)
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    movl 8(%esi), %eax
+; CHECK-NEXT:    leal -8(%ebp), %esp
+; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    popl %edi
+; CHECK-NEXT:    popl %ebp
+; CHECK-NEXT:    retl
+entry:
+  %size = alloca i32, align 4
+  %g = alloca i32, align 4
+  store volatile i32 4, ptr %size, align 4
+  %len = load volatile i32, ptr %size, align 4
+  %var_array = alloca i8, i32 %len, align 16
+  store i32 1, ptr %var_array, align 16
+  %nil = call { ptr, ptr, i32 } asm "rep movsb", "={di},={si},={cx},0,1,2,~{memory},~{dirflag},~{fpsr},~{flags}"(ptr %g, ptr %var_array, i32 4)
+  %retval = load i32, ptr %g, align 4
+  ret i32 %retval
+}
+
+; This function has the same code except the inline asm also clobbers
+; frame pointer.
+
+define i32 @clobber_bpfp() {
+; CHECK-LABEL: clobber_bpfp:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    .cfi_offset %ebp, -8
+; CHECK-NEXT:    movl %esp, %ebp
+; CHECK-NEXT:    .cfi_def_cfa_register %ebp
+; CHECK-NEXT:    pushl %edi
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    andl $-16, %esp
+; CHECK-NEXT:    subl $16, %esp
+; CHECK-NEXT:    movl %esp, %esi
+; CHECK-NEXT:    .cfi_offset %esi, -16
+; CHECK-NEXT:    .cfi_offset %edi, -12
+; CHECK-NEXT:    movl $4, 12(%esi)
+; CHECK-NEXT:    movl 12(%esi), %eax
+; CHECK-NEXT:    addl $3, %eax
+; CHECK-NEXT:    andl $-4, %eax
+; CHECK-NEXT:    calll __alloca
+; CHECK-NEXT:    movl %esp, %eax
+; CHECK-NEXT:    andl $-16, %eax
+; CHECK-NEXT:    movl %eax, %esp
+; CHECK-NEXT:    movl $1, (%eax)
+; CHECK-NEXT:    leal 8(%esi), %edi
+; CHECK-NEXT:    movl $4, %ecx
+; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    .cfi_remember_state
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x06, 0x74, 0x04, 0x06, 0x11, 0x08, 0x22 #
+; CHECK-NEXT:    movl %eax, %esi
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    rep movsb (%esi), %es:(%edi)
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    popl %ebp
+; CHECK-NEXT:    .cfi_restore_state
+; CHECK-NEXT:    movl 8(%esi), %eax
+; CHECK-NEXT:    leal -8(%ebp), %esp
+; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    popl %edi
+; CHECK-NEXT:    popl %ebp
+; CHECK-NEXT:    retl
+entry:
+  %size = alloca i32, align 4
+  %g = alloca i32, align 4
+  store volatile i32 4, ptr %size, align 4
+  %len = load volatile i32, ptr %size, align 4
+  %var_array = alloca i8, i32 %len, align 16
+  store i32 1, ptr %var_array, align 16
+  %nil = call { ptr, ptr, i32 } asm "rep movsb", "={di},={si},={cx},0,1,2,~{memory},~{dirflag},~{fpsr},~{flags},~{ebp}"(ptr %g, ptr %var_array, i32 4)
+  %retval = load i32, ptr %g, align 4
+  ret i32 %retval
+}
+
diff --git a/llvm/test/CodeGen/X86/clobber_frame_ptr.ll b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
new file mode 100644
index 00000000000000..6209e1a85e9e19
--- /dev/null
+++ b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
@@ -0,0 +1,159 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=x86_64-pc-linux -stackrealign -verify-machineinstrs < %s | FileCheck %s
+
+; Calling convention ghccc uses ebp to pass parameter, so calling a function
+; using ghccc clobbers ebp. We should save and restore ebp around such a call
+; if ebp is used as frame pointer.
+
+declare ghccc i32 @external(i32)
+
+; Basic test with ghccc calling convention.
+define i32 @test1(i32 %0, i32 %1) {
+; CHECK-LABEL: test1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    movq %rsp, %rbp
+; CHECK-NEXT:    .cfi_def_cfa_register %rbp
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    andq $-16, %rsp
+; CHECK-NEXT:    subq $16, %rsp
+; CHECK-NEXT:    .cfi_offset %rbx, -56
+; CHECK-NEXT:    .cfi_offset %r12, -48
+; CHECK-NEXT:    .cfi_offset %r13, -40
+; CHECK-NEXT:    .cfi_offset %r14, -32
+; CHECK-NEXT:    .cfi_offset %r15, -24
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_remember_state
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 #
+; CHECK-NEXT:    movl %esi, %ebp
+; CHECK-NEXT:    movq %rdi, %r13
+; CHECK-NEXT:    callq external@PLT
+; CHECK-NEXT:    addq $8, %rsp
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_restore_state
+; CHECK-NEXT:    leaq -40(%rbp), %rsp
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    popq %r13
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
+; CHECK-NEXT:    retq
+    %x = call ghccc i32 @external(i32 %0, i32 %1)
+    ret i32 %x
+}
+
+; Calling convention hipe has similar behavior. It clobbers rbp but not rbx.
+
+declare cc 11 i64 @hipe1(i64)
+declare cc 11 i64 @hipe2(i64, i64, i64, i64, i64, i64, i64)
+
+; Basic test with hipe calling convention.
+define i64 @test2(i64 %a0, i64 %a1) {
+; CHECK-LABEL: test2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    movq %rsp, %rbp
+; CHECK-NEXT:    .cfi_def_cfa_register %rbp
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    andq $-16, %rsp
+; CHECK-NEXT:    subq $16, %rsp
+; CHECK-NEXT:    .cfi_offset %rbx, -56
+; CHECK-NEXT:    .cfi_offset %r12, -48
+; CHECK-NEXT:    .cfi_offset %r13, -40
+; CHECK-NEXT:    .cfi_offset %r14, -32
+; CHECK-NEXT:    .cfi_offset %r15, -24
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_remember_state
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 #
+; CHECK-NEXT:    movq %rsi, %rbp
+; CHECK-NEXT:    movq %rdi, %r15
+; CHECK-NEXT:    callq hipe1@PLT
+; CHECK-NEXT:    addq $8, %rsp
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_restore_state
+; CHECK-NEXT:    movq %r15, %rax
+; CHECK-NEXT:    leaq -40(%rbp), %rsp
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    popq %r13
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
+; CHECK-NEXT:    retq
+  %x = call cc 11 i64 @hipe1(i64 %a0, i64 %a1)
+  ret i64 %x
+}
+
+; Test with more arguments, so some of them are passed from stack. The spilling
+; of rbp should not disturb stack arguments.
+; fixme: current generated code is wrong because rbp is used to load passed in
+;        argument after rbp is assigned argument for function call, it is caused
+;        by x86-cf-opt.
+define i64 @test3(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7) {
+; CHECK-LABEL: test3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    movq %rsp, %rbp
+; CHECK-NEXT:    .cfi_def_cfa_register %rbp
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    andq $-16, %rsp
+; CHECK-NEXT:    subq $16, %rsp
+; CHECK-NEXT:    .cfi_offset %rbx, -56
+; CHECK-NEXT:    .cfi_offset %r12, -48
+; CHECK-NEXT:    .cfi_offset %r13, -40
+; CHECK-NEXT:    .cfi_offset %r14, -32
+; CHECK-NEXT:    .cfi_offset %r15, -24
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_remember_state
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 #
+; CHECK-NEXT:    movq %rsi, %rbp
+; CHECK-NEXT:    movq %rdi, %r15
+; CHECK-NEXT:    movq %rdx, %rsi
+; CHECK-NEXT:    movq %rcx, %rdx
+; CHECK-NEXT:    movq %r8, %rcx
+; CHECK-NEXT:    movq %r9, %r8
+; CHECK-NEXT:    pushq 24(%rbp)
+; CHECK-NEXT:    pushq 16(%rbp)
+; CHECK-NEXT:    callq hipe2@PLT
+; CHECK-NEXT:    addq $8, %rsp
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_restore_state
+; CHECK-NEXT:    addq $16, %rsp
+; CHECK-NEXT:    movq %r15, %rax
+; CHECK-NEXT:    leaq -40(%rbp), %rsp
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    popq %r13
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
+; CHECK-NEXT:    retq
+  %x = call cc 11 i64 @hipe2(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7)
+  ret i64 %x
+}
diff --git a/llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll b/llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll
new file mode 100644
index 00000000000000..25c951d8b1a109
--- /dev/null
+++ b/llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s | FileCheck %s
+
+target triple = "x86_64-linux-gnux32"
+
+define i32 @foo() {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    movq %rsp, %rbp
+; CHECK-NEXT:    .cfi_def_cfa_register %rbp
+; CHECK-NEXT:    subq $16, %rsp
+; CHECK-NEXT:    movl $4, -8(%rbp)
+; CHECK-NEXT:    movl $5, -4(%rbp)
+; CHECK-NEXT:    movl -8(%rbp), %eax
+; CHECK-NEXT:    movq %rsp, %rcx
+; CHECK-NEXT:    addq $15, %rax
+; CHECK-NEXT:    andq $-16, %rax
+; CHECK-NEXT:    movq %rcx, %rdx
+; CHECK-NEXT:    subq %rax, %rdx
+; CHECK-NEXT:    movq %rdx, %rsp
+; CHECK-NEXT:    negq %rax
+; CHECK-NEXT:    movl $1, (%rcx,%rax)
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_remember_state
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 #
+; CHECK-NEXT:    movl $123, %ebp
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    addq $8, %rsp
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_restore_state
+; CHECK-NEXT:    movl -4(%rbp), %eax
+; CHECK-NEXT:    movq %rbp, %rsp
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
+; CHECK-NEXT:    retq
+entry:
+  %size = alloca i32, align 4
+  %g = alloca i32, align 4
+  store volatile i32 4, ptr %size, align 4
+  store volatile i32 5, ptr %g, align 4
+  %len = load volatile i32, ptr %size, align 4
+  %var_array = alloca i8, i32 %len, align 16
+  store i32 1, ptr %var_array, align 16
+  call void asm "nop", "{ebp},~{memory}"(i32 123)
+  %retval = load i32, ptr %g, align 4
+  ret i32 %retval
+}
diff --git a/llvm/test/CodeGen/X86/i386-baseptr.ll b/llvm/test/CodeGen/X86/i386-baseptr.ll
index 08e4bde7353a42..777eb838b84cc7 100644
--- a/llvm/test/CodeGen/X86/i386-baseptr.ll
+++ b/llvm/test/CodeGen/X86/i386-baseptr.ll
@@ -109,10 +109,14 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32
 ; CHECK-NEXT:    subl %eax, %edx
 ; CHECK-NEXT:    movl %edx, %esp
 ; CHECK-NEXT:    negl %eax
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    subl $28, %esp
 ; CHECK-NEXT:    movl $405, %esi # imm = 0x195
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    addl $28, %esp
+; CHECK-NEXT:    popl %esi
 ; CHECK-NEXT:    movl $405, %ebx # imm = 0x195
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    nop
diff --git a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll
index 3c98eead8d18f5..d3ca872509ad5a 100644
--- a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll
@@ -37,6 +37,8 @@ define void @func() local_unnamed_addr #0 {
 ; CHECK-NEXT:  .Ltmp0:
 ; CHECK-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ebx
 ; CHECK-NEXT:    calll static_func
+; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    subl $12, %esp
 ; CHECK-NEXT:    #APP
 ; CHECK-EMPTY:
 ; CHECK-NEXT:    calll static_func
@@ -52,6 +54,8 @@ define void @func() local_unnamed_addr #0 {
 ; CHECK-NEXT:    shrl $0, %esp
 ; CHECK-EMPTY:
 ; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    addl $12, %esp
+; CHECK-NEXT:    popl %ebp
 entry:
   %call = tail call i32 @static_func()
 ;; We test call, CALL, and jmp.
diff --git a/llvm/test/CodeGen/X86/x86-32-intrcc.ll b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
index 3c3944c2082bd5..a0f937e2c323b6 100644
--- a/llvm/test/CodeGen/X86/x86-32-intrcc.ll
+++ b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
@@ -108,8 +108,10 @@ define x86_intrcc void @test_isr_clobbers(ptr byval(%struct.interrupt_frame) %fr
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    andl $-16, %esp
 ; CHECK-NEXT:    cld
+; CHECK-NEXT:    pushl %ebp
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    leal -12(%ebp), %esp
 ; CHECK-NEXT:    popl %eax
 ; CHECK-NEXT:    popl %ebx
@@ -127,8 +129,10 @@ define x86_intrcc void @test_isr_clobbers(ptr byval(%struct.interrupt_frame) %fr
 ; CHECK0-NEXT:    pushl %eax
 ; CHECK0-NEXT:    andl $-16, %esp
 ; CHECK0-NEXT:    cld
+; CHECK0-NEXT:    pushl %ebp
 ; CHECK0-NEXT:    #APP
 ; CHECK0-NEXT:    #NO_APP
+; CHECK0-NEXT:    popl %ebp
 ; CHECK0-NEXT:    leal -12(%ebp), %esp
 ; CHECK0-NEXT:    popl %eax
 ; CHECK0-NEXT:    popl %ebx
diff --git a/llvm/test/CodeGen/X86/x86-64-baseptr.ll b/llvm/test/CodeGen/X86/x86-64-baseptr.ll
index 8cda4ba2814bba..020004def6e7ad 100644
--- a/llvm/test/CodeGen/X86/x86-64-baseptr.ll
+++ b/llvm/test/CodeGen/X86/x86-64-baseptr.ll
@@ -136,10 +136,14 @@ define void @clobber_base() #0 {
 ; X32ABI-NEXT:    subl %eax, %edx
 ; X32ABI-NEXT:    negl %eax
 ; X32ABI-NEXT:    movl %edx, %esp
+; X32ABI-NEXT:    pushq %rbx
+; X32ABI-NEXT:    subl $24, %esp
 ; X32ABI-NEXT:    movl $405, %ebx # imm = 0x195
 ; X32ABI-NEXT:    #APP
 ; X32ABI-NEXT:    nop
 ; X32ABI-NEXT:    #NO_APP
+; X32ABI-NEXT:    addl $24, %esp
+; X32ABI-NEXT:    popq %rbx
 ; X32ABI-NEXT:    movl $8, %edx
 ; X32ABI-NEXT:    #APP
 ; X32ABI-NEXT:    movl %edx, (%ebx)
@@ -268,6 +272,8 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32
 ; X32ABI-NEXT:    subl %eax, %edx
 ; X32ABI-NEXT:    negl %eax
 ; X32ABI-NEXT:    movl %edx, %esp
+; X32ABI-NEXT:    pushq %rbx
+; X32ABI-NEXT:    subl $24, %esp
 ; X32ABI-NEXT:    movl $405, %ebx # imm = 0x195
 ; X32ABI-NEXT:    #APP
 ; X32ABI-NEXT:    nop
@@ -275,6 +281,8 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32
 ; X32ABI-NEXT:    #APP
 ; X32ABI-NEXT:    nop
 ; X32ABI-NEXT:    #NO_APP
+; X32ABI-NEXT:    addl $24, %esp
+; X32ABI-NEXT:    popq %rbx
 ; X32ABI-NEXT:    movl $8, %edx
 ; X32ABI-NEXT:    #APP
 ; X32ABI-NEXT:    movl %edx, (%ebx)
@@ -385,10 +393,14 @@ define void @vmw_host_printf(ptr %fmt, ...) nounwind {
 ; X32ABI-NEXT:    movl $48, (%eax)
 ; X32ABI-NEXT:    movl $8, (%eax)
 ; X32ABI-NEXT:    xorl %eax, %eax
+; X32ABI-NEXT:    pushq %rbx
+; X32ABI-NEXT:    subl $24, %esp
 ; X32ABI-NEXT:    xorl %ebx, %ebx
 ; X32ABI-NEXT:    xorl %ecx, %ecx
 ; X32ABI-NEXT:    #APP
 ; X32ABI-NEXT:    #NO_APP
+; X32ABI-NEXT:    addl $24, %esp
+; X32ABI-NEXT:    popq %rbx
 ; X32ABI-NEXT:    leal -8(%ebp), %esp
 ; X32ABI-NEXT:    popq %rbx
 ; X32ABI-NEXT:    popq %rbp
diff --git a/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll b/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll
index 47aefdbf0e466e..b4c18dd7f4573e 100644
--- a/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll
@@ -94,6 +94,8 @@ define i64 @read_flags_reg_pressure() nounwind {
 ; WIN64-NEXT:    pushq %rbx
 ; WIN64-NEXT:    subq $16, %rsp
 ; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp
+; WIN64-NEXT:    pushq %rbp
+; WIN64-NEXT:    pushq %rax
 ; WIN64-NEXT:    #APP
 ; WIN64-NEXT:    #NO_APP
 ; WIN64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -103,6 +105,8 @@ define i64 @read_flags_reg_pressure() nounwind {
 ; WIN64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
 ; WIN64-NEXT:    #APP
 ; WIN64-NEXT:    #NO_APP
+; WIN64-NEXT:    addq $8, %rsp
+; WIN64-NEXT:    popq %rbp
 ; WIN64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
 ; WIN64-NEXT:    addq $16, %rsp
 ; WIN64-NEXT:    popq %rbx
@@ -177,6 +181,8 @@ define void @write_flags_reg_pressure(i64 noundef %0) nounwind {
 ; WIN64-NEXT:    subq $16, %rsp
 ; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp
 ; WIN64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; WIN64-NEXT:    pushq %rbp
+; WIN64-NEXT:    pushq %rax
 ; WIN64-NEXT:    #APP
 ; WIN64-NEXT:    #NO_APP
 ; WIN64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -186,6 +192,8 @@ define void @write_flags_reg_pressure(i64 noundef %0) nounwind {
 ; WIN64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
 ; WIN64-NEXT:    #APP
 ; WIN64-NEXT:    #NO_APP
+; WIN64-NEXT:    popq %rax
+; WIN64-NEXT:    popq %rbp
 ; WIN64-NEXT:    addq $16, %rsp
 ; WIN64-NEXT:    popq %rbx
 ; WIN64-NEXT:    popq %rdi