From 3bcb1f2bdd5c70b2ac4aff3290996486d9ae0236 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 3 Feb 2024 23:33:37 -0800 Subject: [PATCH] [RISCV] Rework isSignExtendingOpW to store Register in the worklist. Previously we stored MachineInstr which restricted the implementation to only handle operand 0. The TH_LWD instruction has two sign extended destinations. --- llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp | 50 +++++++++++------------ llvm/test/CodeGen/RISCV/opt-w-instrs.mir | 25 +++++++++++- 2 files changed, 48 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp index b7d185ab9d925c..dcf70e8cad6408 100644 --- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp +++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp @@ -338,7 +338,7 @@ static bool hasAllWUsers(const MachineInstr &OrigMI, const RISCVSubtarget &ST, // This function returns true if the machine instruction always outputs a value // where bits 63:32 match bit 31. static bool isSignExtendingOpW(const MachineInstr &MI, - const MachineRegisterInfo &MRI) { + const MachineRegisterInfo &MRI, unsigned OpNo) { uint64_t TSFlags = MI.getDesc().TSFlags; // Instructions that can be determined from opcode are marked in tablegen. @@ -368,8 +368,9 @@ static bool isSignExtendingOpW(const MachineInstr &MI, // Copying from X0 produces zero. case RISCV::COPY: return MI.getOperand(1).getReg() == RISCV::X0; + // Ignore the scratch register destination. case RISCV::PseudoAtomicLoadNand32: - return true; + return OpNo == 0; case RISCV::PseudoVMV_X_S: { // vmv.x.s has at least 33 sign bits if log2(sew) <= 5. int64_t Log2SEW = MI.getOperand(2).getImm(); @@ -384,38 +385,35 @@ static bool isSignExtendingOpW(const MachineInstr &MI, static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST, const MachineRegisterInfo &MRI, SmallPtrSetImpl &FixableDef) { + SmallSet Visited; + SmallVector Worklist; - SmallPtrSet Visited; - SmallVector Worklist; - - auto AddRegDefToWorkList = [&](Register SrcReg) { + auto AddRegToWorkList = [&](Register SrcReg) { if (!SrcReg.isVirtual()) return false; - MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); - if (!SrcMI) - return false; - // Code assumes the register is operand 0. - // TODO: Maybe the worklist should store register? - if (!SrcMI->getOperand(0).isReg() || - SrcMI->getOperand(0).getReg() != SrcReg) - return false; - // Add SrcMI to the worklist. - Worklist.push_back(SrcMI); + Worklist.push_back(SrcReg); return true; }; - if (!AddRegDefToWorkList(SrcReg)) + if (!AddRegToWorkList(SrcReg)) return false; while (!Worklist.empty()) { - MachineInstr *MI = Worklist.pop_back_val(); + Register Reg = Worklist.pop_back_val(); - // If we already visited this instruction, we don't need to check it again. - if (!Visited.insert(MI).second) + // If we already visited this register, we don't need to check it again. + if (!Visited.insert(Reg).second) continue; + MachineInstr *MI = MRI.getVRegDef(Reg); + if (!MI) + continue; + + int OpNo = MI->findRegisterDefOperandIdx(Reg); + assert(OpNo != -1 && "Couldn't find register"); + // If this is a sign extending operation we don't need to look any further. - if (isSignExtendingOpW(*MI, MRI)) + if (isSignExtendingOpW(*MI, MRI, OpNo)) continue; // Is this an instruction that propagates sign extend? @@ -472,7 +470,7 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST, continue; } - if (!AddRegDefToWorkList(CopySrcReg)) + if (!AddRegToWorkList(CopySrcReg)) return false; break; @@ -492,7 +490,7 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST, // |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R. // DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1 // Logical operations use a sign extended 12-bit immediate. - if (!AddRegDefToWorkList(MI->getOperand(1).getReg())) + if (!AddRegToWorkList(MI->getOperand(1).getReg())) return false; break; @@ -507,7 +505,7 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST, case RISCV::PseudoCCSRAIW: // Returns operand 4 or an ADDW/SUBW/etc. of operands 5 and 6. We only // need to check if operand 4 is sign extended. - if (!AddRegDefToWorkList(MI->getOperand(4).getReg())) + if (!AddRegToWorkList(MI->getOperand(4).getReg())) return false; break; case RISCV::REMU: @@ -555,7 +553,7 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST, if (!MI->getOperand(I).isReg()) return false; - if (!AddRegDefToWorkList(MI->getOperand(I).getReg())) + if (!AddRegToWorkList(MI->getOperand(I).getReg())) return false; } @@ -568,7 +566,7 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST, case RISCV::VT_MASKCN: // Instructions return zero or operand 1. Result is sign extended if // operand 1 is sign extended. - if (!AddRegDefToWorkList(MI->getOperand(1).getReg())) + if (!AddRegToWorkList(MI->getOperand(1).getReg())) return false; break; diff --git a/llvm/test/CodeGen/RISCV/opt-w-instrs.mir b/llvm/test/CodeGen/RISCV/opt-w-instrs.mir index 8c22eaf917e873..03b4e330a09680 100644 --- a/llvm/test/CodeGen/RISCV/opt-w-instrs.mir +++ b/llvm/test/CodeGen/RISCV/opt-w-instrs.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 -# RUN: llc -mtriple=riscv64 -mattr='+d,+zfa,+v' -verify-machineinstrs -run-pass=riscv-opt-w-instrs %s -o - | FileCheck %s +# RUN: llc -mtriple=riscv64 -mattr='+d,+zfa,+v,+xtheadmempair' -verify-machineinstrs -run-pass=riscv-opt-w-instrs %s -o - | FileCheck %s --- name: fcvtmod_w_d @@ -88,3 +88,26 @@ body: | $x11 = COPY %3 PseudoRET ... +--- + name: th_lwd + tracksRegLiveness: true + body: | + bb.0.entry: + liveins: $x10 + ; CHECK-LABEL: name: th_lwd + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: early-clobber %1:gpr, early-clobber %2:gpr = TH_LWD [[COPY]], 2, 3 + ; CHECK-NEXT: $x10 = COPY %1 + ; CHECK-NEXT: $x11 = COPY %2 + ; CHECK-NEXT: PseudoRET + %0:gpr = COPY $x10 + early-clobber %1:gpr, early-clobber %2:gpr = TH_LWD %0, 2, 3 + %3:gpr = ADDIW %1, 0 + %4:gpr = ADDIW %2, 0 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... +