From dc06306ba4ddbd0f14ec850217bde99468de5e86 Mon Sep 17 00:00:00 2001 From: Rose <83477269+AtariDreams@users.noreply.github.com> Date: Tue, 20 Feb 2024 14:29:42 -0500 Subject: [PATCH] [X86] Resolve FIXME: Copy kill flag for eflags We now mark the last eflags usage as kill if the def was also kill. --- llvm/lib/CodeGen/MachineCombiner.cpp | 15 ++++-- llvm/lib/CodeGen/ModuloSchedule.cpp | 7 ++- llvm/lib/CodeGen/PeepholeOptimizer.cpp | 2 +- llvm/lib/CodeGen/RegisterCoalescer.cpp | 4 +- .../lib/CodeGen/TwoAddressInstructionPass.cpp | 6 ++- llvm/lib/Target/X86/X86FlagsCopyLowering.cpp | 49 +++++++++++++++---- llvm/lib/Target/X86/X86InstrInfo.cpp | 39 +++++++++------ .../X86/X86SpeculativeLoadHardening.cpp | 10 ++-- 8 files changed, 93 insertions(+), 39 deletions(-) diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp index c65937935ed820..4e31d036c25150 100644 --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -232,8 +232,10 @@ MachineCombiner::getDepth(SmallVectorImpl &InsInstrs, assert(DefInstr && "There must be a definition for a new virtual register"); DepthOp = InstrDepth[II->second]; - int DefIdx = DefInstr->findRegisterDefOperandIdx(MO.getReg()); - int UseIdx = InstrPtr->findRegisterUseOperandIdx(MO.getReg()); + int DefIdx = + DefInstr->findRegisterDefOperandIdx(MO.getReg(), false, false, TRI); + int UseIdx = + InstrPtr->findRegisterUseOperandIdx(MO.getReg(), false, TRI); LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx, InstrPtr, UseIdx); } else { @@ -244,8 +246,11 @@ MachineCombiner::getDepth(SmallVectorImpl &InsInstrs, DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth; if (!isTransientMI(DefInstr)) LatencyOp = TSchedModel.computeOperandLatency( - DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), - InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); + DefInstr, + DefInstr->findRegisterDefOperandIdx(MO.getReg(), false, false, + TRI), + InstrPtr, + InstrPtr->findRegisterUseOperandIdx(MO.getReg(), false, TRI)); } } IDepth = std::max(IDepth, DepthOp + LatencyOp); @@ -284,7 +289,7 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) { LatencyOp = TSchedModel.computeOperandLatency( NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO, - UseMO->findRegisterUseOperandIdx(MO.getReg())); + UseMO->findRegisterUseOperandIdx(MO.getReg(), false, TRI)); } else { LatencyOp = TSchedModel.computeInstrLatency(NewRoot); } diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index 0bef513342ff12..20f4bcd9ab93a9 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -1673,7 +1673,9 @@ void PeelingModuloScheduleExpander::moveStageBetweenBlocks( // we don't need the phi anymore. if (getStage(Def) == Stage) { Register PhiReg = MI.getOperand(0).getReg(); - assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg()) != -1); + assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg(), false, + false, + MRI.getTargetRegisterInfo()) != -1); MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); MI.getOperand(0).setReg(PhiReg); PhiToDelete.push_back(&MI); @@ -1899,7 +1901,8 @@ Register PeelingModuloScheduleExpander::getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB) { MachineInstr *MI = MRI.getUniqueVRegDef(Reg); - unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg); + unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg, false, false, + MRI.getTargetRegisterInfo()); return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg(); } diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 1b1f22e827cb1e..c2e3df99e78921 100644 --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -1577,7 +1577,7 @@ bool PeepholeOptimizer::findTargetRecurrence( return false; MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg)); - unsigned Idx = MI.findRegisterUseOperandIdx(Reg); + unsigned Idx = MI.findRegisterUseOperandIdx(Reg, false, TRI); // Only interested in recurrences whose instructions have only one def, which // is a virtual register. diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 7e9c992031f8d3..bbb3560b5f5556 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -723,7 +723,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // If the source instruction was killing the source register before the // merge, unset the isKill marker given the live range has been extended. - int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg(), true); + int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg(), true, TRI); if (UIdx != -1) { ValSEndInst->getOperand(UIdx).setIsKill(false); } @@ -848,7 +848,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, return { false, false }; // If DefMI is a two-address instruction then commuting it will change the // destination register. - int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg()); + int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg(), false, TRI); assert(DefIdx != -1); unsigned UseOpIdx; if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index ebacbc420f8580..863871488032c7 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1355,8 +1355,10 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, << "2addr: NEW INST: " << *NewMIs[1]); // Transform the instruction, now that it no longer has a load. - unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA); - unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB); + unsigned NewDstIdx = + NewMIs[1]->findRegisterDefOperandIdx(regA, false, TRI); + unsigned NewSrcIdx = + NewMIs[1]->findRegisterUseOperandIdx(regB, false, TRI); MachineBasicBlock::iterator NewMI = NewMIs[1]; bool TransformResult = tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist, true); diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp index af25b34fbab995..7debca3b0a1f7f 100644 --- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp +++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -347,6 +347,31 @@ static X86::CondCode getCondFromFCMOV(unsigned Opcode) { } } +static bool checkEFLAGSLive(MachineInstr *MI) { + if (MI->killsRegister(X86::EFLAGS)) + return false; + + // The EFLAGS operand of MI might be missing a kill marker. + // Figure out whether EFLAGS operand should LIVE after MI instruction. + MachineBasicBlock *BB = MI->getParent(); + MachineBasicBlock::iterator ItrMI = MI; + + // Scan forward through BB for a use/def of EFLAGS. + for (auto I = std::next(ItrMI), E = BB->end(); I != E; ++I) { + if (I->readsRegister(X86::EFLAGS)) + return true; + if (I->definesRegister(X86::EFLAGS)) + return false; + } + + // We hit the end of the block, check whether EFLAGS is live into a successor. + for (MachineBasicBlock *Succ : BB->successors()) + if (Succ->isLiveIn(X86::EFLAGS)) + return true; + + return false; +} + bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName() << " **********\n"); @@ -442,7 +467,8 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { llvm::reverse(llvm::make_range(Begin, End)), [&](MachineInstr &MI) { // Flag any instruction (other than the copy we are // currently rewriting) that defs EFLAGS. - return &MI != CopyI && MI.findRegisterDefOperand(X86::EFLAGS); + return &MI != CopyI && + MI.findRegisterDefOperand(X86::EFLAGS, false, false, TRI); }); }; auto HasEFLAGSClobberPath = [&](MachineBasicBlock *BeginMBB, @@ -500,7 +526,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { auto DefIt = llvm::find_if( llvm::reverse(llvm::make_range(TestMBB->instr_begin(), TestPos)), [&](MachineInstr &MI) { - return MI.findRegisterDefOperand(X86::EFLAGS); + return MI.findRegisterDefOperand(X86::EFLAGS, false, false, TRI); }); if (DefIt.base() != TestMBB->instr_begin()) { dbgs() << " Using EFLAGS defined by: "; @@ -562,9 +588,10 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { break; } - MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS); + MachineOperand *FlagUse = + MI.findRegisterUseOperand(X86::EFLAGS, false, TRI); if (!FlagUse) { - if (MI.findRegisterDefOperand(X86::EFLAGS)) { + if (MI.findRegisterDefOperand(X86::EFLAGS, false, false, TRI)) { // If EFLAGS are defined, it's as-if they were killed. We can stop // scanning here. // @@ -614,7 +641,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { rewriteCopy(MI, *FlagUse, CopyDefI); } else { // We assume all other instructions that use flags also def them. - assert(MI.findRegisterDefOperand(X86::EFLAGS) && + assert(MI.findRegisterDefOperand(X86::EFLAGS, false, false, TRI) && "Expected a def of EFLAGS for this instruction!"); // NB!!! Several arithmetic instructions only *partially* update @@ -683,6 +710,8 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { // Now rewrite the jumps that use the flags. These we handle specially // because if there are multiple jumps in a single basic block we'll have // to do surgery on the CFG. + bool CopyDefIsKill = !checkEFLAGSLive(&CopyDefI); + MachineOperand *LastEflagsUse = nullptr; MachineBasicBlock *LastJmpMBB = nullptr; for (MachineInstr *JmpI : JmpIs) { // Past the first jump within a basic block we need to split the blocks @@ -693,10 +722,12 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { LastJmpMBB = JmpI->getParent(); rewriteCondJmp(*TestMBB, TestPos, TestLoc, *JmpI, CondRegs); + if (CopyDefIsKill && JmpI->readsRegister(X86::EFLAGS)) + LastEflagsUse = JmpI->findRegisterUseOperand(X86::EFLAGS, false, TRI); } - // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if - // the copy's def operand is itself a kill. + if (LastEflagsUse && CopyDefIsKill) + LastEflagsUse->setIsKill(true); } #ifndef NDEBUG @@ -733,7 +764,7 @@ CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs( // Stop scanning when we see the first definition of the EFLAGS as prior to // this we would potentially capture the wrong flag state. - if (MI.findRegisterDefOperand(X86::EFLAGS)) + if (MI.findRegisterDefOperand(X86::EFLAGS, false, false, TRI)) break; } return CondRegs; @@ -911,7 +942,7 @@ void X86FlagsCopyLoweringPass::rewriteCondJmp( // Rewrite the jump to use the !ZF flag from the test, and kill its use of // flags afterward. JmpI.getOperand(1).setImm(Inverted ? X86::COND_E : X86::COND_NE); - JmpI.findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); + JmpI.findRegisterUseOperand(X86::EFLAGS, false, TRI)->setIsKill(true); LLVM_DEBUG(dbgs() << " fixed jCC: "; JmpI.dump()); } diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 0f21880f6df90c..4ea922a9747cc9 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3711,7 +3711,8 @@ bool X86InstrInfo::analyzeBranchImpl( // In practice we should never have an undef eflags operand, if we do // abort here as we are not prepared to preserve the flag. - if (I->findRegisterUseOperand(X86::EFLAGS)->isUndef()) + if (I->findRegisterUseOperand(X86::EFLAGS, false, &getRegisterInfo()) + ->isUndef()) return true; // Working from the bottom, handle the first conditional branch. @@ -5455,7 +5456,8 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, } // Make sure Sub instruction defines EFLAGS and mark the def live. - MachineOperand *FlagDef = Sub->findRegisterDefOperand(X86::EFLAGS); + MachineOperand *FlagDef = Sub->findRegisterDefOperand( + X86::EFLAGS, false, false, &getRegisterInfo()); assert(FlagDef && "Unable to locate a def EFLAGS operand"); FlagDef->setIsDead(false); @@ -5608,7 +5610,8 @@ bool X86InstrInfo::foldImmediateImpl(MachineInstr &UseMI, MachineInstr *DefMI, return false; } - if (UseMI.findRegisterUseOperand(Reg)->getSubReg()) + if (UseMI.findRegisterUseOperand(Reg, false, MRI->getTargetRegisterInfo()) + ->getSubReg()) return false; // Immediate has larger code size than register. So avoid folding the // immediate if it has more than 1 use and we are optimizing for size. @@ -5655,7 +5658,7 @@ bool X86InstrInfo::foldImmediateImpl(MachineInstr &UseMI, MachineInstr *DefMI, if (!MakeChange) return true; UseMI.setDesc(get(X86::MOV32r0)); - UseMI.removeOperand(UseMI.findRegisterUseOperandIdx(Reg)); + UseMI.removeOperand(UseMI.findRegisterUseOperandIdx(Reg, false, TRI)); UseMI.addOperand(MachineOperand::CreateReg(X86::EFLAGS, /*isDef=*/true, /*isImp=*/true, /*isKill=*/false, @@ -5677,17 +5680,18 @@ bool X86InstrInfo::foldImmediateImpl(MachineInstr &UseMI, MachineInstr *DefMI, NewOpc == X86::SBB64ri32 || NewOpc == X86::SBB32ri || NewOpc == X86::SUB64ri32_ND || NewOpc == X86::SUB32ri_ND || NewOpc == X86::SBB64ri32_ND || NewOpc == X86::SBB32ri_ND) && - UseMI.findRegisterUseOperandIdx(Reg) != 2) + UseMI.findRegisterUseOperandIdx(Reg, false, &getRegisterInfo()) != 2) return false; // For CMP instructions the immediate can only be at index 1. if ((NewOpc == X86::CMP64ri32 || NewOpc == X86::CMP32ri) && - UseMI.findRegisterUseOperandIdx(Reg) != 1) + UseMI.findRegisterUseOperandIdx(Reg, false, &getRegisterInfo()) != 1) return false; using namespace X86; if (isSHL(Opc) || isSHR(Opc) || isSAR(Opc) || isROL(Opc) || isROR(Opc) || isRCL(Opc) || isRCR(Opc)) { - unsigned RegIdx = UseMI.findRegisterUseOperandIdx(Reg); + unsigned RegIdx = + UseMI.findRegisterUseOperandIdx(Reg, false, &getRegisterInfo()); if (RegIdx < 2) return false; if (!isInt<8>(ImmVal)) @@ -5716,8 +5720,10 @@ bool X86InstrInfo::foldImmediateImpl(MachineInstr &UseMI, MachineInstr *DefMI, // ==> // %100 = COPY %101 UseMI.setDesc(get(TargetOpcode::COPY)); - UseMI.removeOperand(UseMI.findRegisterUseOperandIdx(Reg)); - UseMI.removeOperand(UseMI.findRegisterDefOperandIdx(X86::EFLAGS)); + UseMI.removeOperand( + UseMI.findRegisterUseOperandIdx(Reg, false, &getRegisterInfo())); + UseMI.removeOperand(UseMI.findRegisterDefOperandIdx( + X86::EFLAGS, false, false, &getRegisterInfo())); UseMI.untieRegOperand(0); UseMI.clearFlag(MachineInstr::MIFlag::NoSWrap); UseMI.clearFlag(MachineInstr::MIFlag::NoUWrap); @@ -9516,7 +9522,8 @@ bool X86InstrInfo::hasReassociableOperands(const MachineInstr &Inst, // not change anything because rearranging the operands could affect other // instructions that depend on the exact status flags (zero, sign, etc.) // that are set by using these particular operands with this operation. - const MachineOperand *FlagDef = Inst.findRegisterDefOperand(X86::EFLAGS); + const MachineOperand *FlagDef = Inst.findRegisterDefOperand( + X86::EFLAGS, false, false, &getRegisterInfo()); assert((Inst.getNumDefs() == 1 || FlagDef) && "Implicit def isn't flags?"); if (FlagDef && !FlagDef->isDead()) return false; @@ -10038,8 +10045,10 @@ void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &NewMI1, MachineInstr &NewMI2) const { // Integer instructions may define an implicit EFLAGS dest register operand. - MachineOperand *OldFlagDef1 = OldMI1.findRegisterDefOperand(X86::EFLAGS); - MachineOperand *OldFlagDef2 = OldMI2.findRegisterDefOperand(X86::EFLAGS); + MachineOperand *OldFlagDef1 = OldMI1.findRegisterDefOperand( + X86::EFLAGS, false, false, &getRegisterInfo()); + MachineOperand *OldFlagDef2 = OldMI2.findRegisterDefOperand( + X86::EFLAGS, false, false, &getRegisterInfo()); assert(!OldFlagDef1 == !OldFlagDef2 && "Unexpected instruction type for reassociation"); @@ -10050,8 +10059,10 @@ void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, assert(OldFlagDef1->isDead() && OldFlagDef2->isDead() && "Must have dead EFLAGS operand in reassociable instruction"); - MachineOperand *NewFlagDef1 = NewMI1.findRegisterDefOperand(X86::EFLAGS); - MachineOperand *NewFlagDef2 = NewMI2.findRegisterDefOperand(X86::EFLAGS); + MachineOperand *NewFlagDef1 = NewMI1.findRegisterDefOperand( + X86::EFLAGS, false, false, &getRegisterInfo()); + MachineOperand *NewFlagDef2 = NewMI2.findRegisterDefOperand( + X86::EFLAGS, false, false, &getRegisterInfo()); assert(NewFlagDef1 && NewFlagDef2 && "Unexpected operand in reassociable instruction"); diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp index 69a54e7667b553..3f0381f5398315 100644 --- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp +++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp @@ -483,7 +483,7 @@ bool X86SpeculativeLoadHardeningPass::runOnMachineFunction( PredStateSubReg); ++NumInstsInserted; MachineOperand *ZeroEFLAGSDefOp = - ZeroI->findRegisterDefOperand(X86::EFLAGS); + ZeroI->findRegisterDefOperand(X86::EFLAGS, false, false, TRI); assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() && "Must have an implicit def of EFLAGS!"); ZeroEFLAGSDefOp->setIsDead(true); @@ -762,7 +762,8 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG( // If this is the last cmov and the EFLAGS weren't originally // live-in, mark them as killed. if (!LiveEFLAGS && Cond == Conds.back()) - CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); + CMovI->findRegisterUseOperand(X86::EFLAGS, false, TRI) + ->setIsKill(true); ++NumInstsInserted; LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); @@ -1185,7 +1186,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches( .addReg(PS->InitialReg) .addReg(PS->PoisonReg) .addImm(X86::COND_NE); - CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); + CMovI->findRegisterUseOperand(X86::EFLAGS, false, TRI)->setIsKill(true); ++NumInstsInserted; LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n"); CMovs.push_back(&*CMovI); @@ -1213,7 +1214,8 @@ static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // Check if EFLAGS are alive by seeing if there is a def of them or they // live-in, and then seeing if that def is in turn used. for (MachineInstr &MI : llvm::reverse(llvm::make_range(MBB.begin(), I))) { - if (MachineOperand *DefOp = MI.findRegisterDefOperand(X86::EFLAGS)) { + if (MachineOperand *DefOp = + MI.findRegisterDefOperand(X86::EFLAGS, false, false, &TRI)) { // If the def is dead, then EFLAGS is not live. if (DefOp->isDead()) return false;