diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 124bc239451aed..46ef94b4dd2931 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -59,6 +59,7 @@ add_llvm_target(RISCVCodeGen RISCVTargetObjectFile.cpp RISCVTargetTransformInfo.cpp RISCVVectorPeephole.cpp + RISCVVLOptimizer.cpp GISel/RISCVCallLowering.cpp GISel/RISCVInstructionSelector.cpp GISel/RISCVLegalizerInfo.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index 5a94ada8f8dd46..651119d10d1119 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -99,6 +99,10 @@ void initializeRISCVO0PreLegalizerCombinerPass(PassRegistry &); FunctionPass *createRISCVPreLegalizerCombiner(); void initializeRISCVPreLegalizerCombinerPass(PassRegistry &); + +FunctionPass *createRISCVVLOptimizerPass(); +void initializeRISCVVLOptimizerPass(PassRegistry &); + } // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 794df2212dfa53..bfedc6b177332e 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -103,6 +103,10 @@ static cl::opt EnableVSETVLIAfterRVVRegAlloc( cl::desc("Insert vsetvls after vector register allocation"), cl::init(true)); +static cl::opt EnableVLOptimizer("riscv-enable-vloptimizer", + cl::desc("Enable the VL Optimizer pass"), + cl::init(true), cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { RegisterTargetMachine X(getTheRISCV32Target()); RegisterTargetMachine Y(getTheRISCV64Target()); @@ -550,8 +554,11 @@ void RISCVPassConfig::addMachineSSAOptimization() { void RISCVPassConfig::addPreRegAlloc() { addPass(createRISCVPreRAExpandPseudoPass()); - if (TM->getOptLevel() != CodeGenOptLevel::None) + if (TM->getOptLevel() != CodeGenOptLevel::None) { addPass(createRISCVMergeBaseOffsetOptPass()); + if (EnableVLOptimizer) + addPass(createRISCVVLOptimizerPass()); + } addPass(createRISCVInsertReadWriteCSRPass()); addPass(createRISCVInsertWriteVXRMPass()); diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp new file mode 100644 index 00000000000000..ae23d5dda5eb04 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -0,0 +1,1569 @@ +//===-------------- RISCVVLOptimizer.cpp - VL Optimizer -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +// +// This pass reduces the VL where possible at the MI level, before VSETVLI +// instructions are inserted. +// +// The purpose of this optimization is to make the VL argument, for instructions +// that have a VL argument, as small as possible. This is implemented by +// visiting each instruction in reverse order and checking that if it has a VL +// argument, whether the VL can be reduced. +// +//===---------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVMachineFunctionInfo.h" +#include "RISCVSubtarget.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/InitializePasses.h" + +#include + +using namespace llvm; + +#define DEBUG_TYPE "riscv-vl-optimizer" + +namespace { + +class RISCVVLOptimizer : public MachineFunctionPass { + const MachineRegisterInfo *MRI; + const MachineDominatorTree *MDT; + +public: + static char ID; + + RISCVVLOptimizer() : MachineFunctionPass(ID) { } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return "RISC-V VL Optimizer"; } + +private: + bool tryReduceVL(MachineInstr &MI); + bool isCandidate(const MachineInstr &MI) const; +}; + +} // end anonymous namespace + +char RISCVVLOptimizer::ID = 0; +INITIALIZE_PASS_BEGIN(RISCVVLOptimizer, DEBUG_TYPE, "RISC-V VL Optimizer", + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) +INITIALIZE_PASS_END(RISCVVLOptimizer, DEBUG_TYPE, "RISC-V VL Optimizer", false, + false) + +FunctionPass *llvm::createRISCVVLOptimizerPass() { + return new RISCVVLOptimizer(); +} + +/// Return true if R is a physical or virtual vector register, false otherwise. +static bool isVectorRegClass(Register R, const MachineRegisterInfo *MRI) { + if (R.isPhysical()) + return RISCV::VRRegClass.contains(R); + const TargetRegisterClass *RC = MRI->getRegClass(R); + return RISCV::VRRegClass.hasSubClassEq(RC) || + RISCV::VRM2RegClass.hasSubClassEq(RC) || + RISCV::VRM4RegClass.hasSubClassEq(RC) || + RISCV::VRM8RegClass.hasSubClassEq(RC); +} + +/// Represents the EMUL and EEW of a MachineOperand. +struct OperandInfo { + enum class State { + Unknown, + Known, + } S; + + // Represent as 1,2,4,8, ... and fractional indicator. This is because + // EMUL can take on values that don't map to RISCVII::VLMUL values exactly. + // For example, a mask operand can have an EMUL less than MF8. + std::pair EMUL; + + unsigned Log2EEW; + + OperandInfo(RISCVII::VLMUL EMUL, unsigned Log2EEW) + : S(State::Known), EMUL(RISCVVType::decodeVLMUL(EMUL)), Log2EEW(Log2EEW) { + } + + OperandInfo(std::pair EMUL, unsigned Log2EEW) + : S(State::Known), EMUL(EMUL), Log2EEW(Log2EEW) {} + + OperandInfo(State S) : S(S) { + assert(S != State::Known && + "This constructor may only be used to construct " + "an Unknown OperandInfo"); + } + + bool isUnknown() const { return S == State::Unknown; } + bool isKnown() const { return S == State::Known; } + + static bool EMULAndEEWAreEqual(const OperandInfo &A, const OperandInfo &B) { + assert(A.isKnown() && B.isKnown() && "Both operands must be known"); + return A.Log2EEW == B.Log2EEW && A.EMUL.first == B.EMUL.first && + A.EMUL.second == B.EMUL.second; + } + + void print(raw_ostream &OS) const { + if (isUnknown()) { + OS << "Unknown"; + return; + } + OS << "EMUL: "; + if (EMUL.second) + OS << "m"; + OS << "f" << EMUL.first; + OS << ", EEW: " << (1 << Log2EEW); + } +}; + +static raw_ostream &operator<<(raw_ostream &OS, const OperandInfo &OI) { + OI.print(OS); + return OS; +} + +/// Return the RISCVII::VLMUL that is two times VLMul. +/// Precondition: VLMul is not LMUL_RESERVED or LMUL_8. +static RISCVII::VLMUL twoTimesVLMUL(RISCVII::VLMUL VLMul) { + switch (VLMul) { + case RISCVII::VLMUL::LMUL_F8: + return RISCVII::VLMUL::LMUL_F4; + case RISCVII::VLMUL::LMUL_F4: + return RISCVII::VLMUL::LMUL_F2; + case RISCVII::VLMUL::LMUL_F2: + return RISCVII::VLMUL::LMUL_1; + case RISCVII::VLMUL::LMUL_1: + return RISCVII::VLMUL::LMUL_2; + case RISCVII::VLMUL::LMUL_2: + return RISCVII::VLMUL::LMUL_4; + case RISCVII::VLMUL::LMUL_4: + return RISCVII::VLMUL::LMUL_8; + case RISCVII::VLMUL::LMUL_8: + default: + llvm_unreachable("Could not multiply VLMul by 2"); + } +} + +/// Return the RISCVII::VLMUL that is VLMul / 2. +/// Precondition: VLMul is not LMUL_RESERVED or LMUL_MF8. +static RISCVII::VLMUL halfVLMUL(RISCVII::VLMUL VLMul) { + switch (VLMul) { + case RISCVII::VLMUL::LMUL_F4: + return RISCVII::VLMUL::LMUL_F8; + case RISCVII::VLMUL::LMUL_F2: + return RISCVII::VLMUL::LMUL_F4; + case RISCVII::VLMUL::LMUL_1: + return RISCVII::VLMUL::LMUL_F2; + case RISCVII::VLMUL::LMUL_2: + return RISCVII::VLMUL::LMUL_1; + case RISCVII::VLMUL::LMUL_4: + return RISCVII::VLMUL::LMUL_2; + case RISCVII::VLMUL::LMUL_8: + return RISCVII::VLMUL::LMUL_4; + case RISCVII::VLMUL::LMUL_F8: + default: + llvm_unreachable("Could not divide VLMul by 2"); + } +} + +/// Return EMUL = (EEW / SEW) * LMUL where EEW comes from Log2EEW and LMUL and +/// SEW are from the TSFlags of MI. +static std::pair +getEMULEqualsEEWDivSEWTimesLMUL(unsigned Log2EEW, const MachineInstr &MI) { + RISCVII::VLMUL MIVLMUL = RISCVII::getLMul(MI.getDesc().TSFlags); + auto [MILMUL, MILMULIsFractional] = RISCVVType::decodeVLMUL(MIVLMUL); + unsigned MILog2SEW = + MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm(); + unsigned MISEW = 1 << MILog2SEW; + + unsigned EEW = 1 << Log2EEW; + // Calculate (EEW/SEW)*LMUL preserving fractions less than 1. Use GCD + // to put fraction in simplest form. + unsigned Num = EEW, Denom = MISEW; + int GCD = MILMULIsFractional ? std::gcd(Num, Denom * MILMUL) + : std::gcd(Num * MILMUL, Denom); + Num = MILMULIsFractional ? Num / GCD : Num * MILMUL / GCD; + Denom = MILMULIsFractional ? Denom * MILMUL / GCD : Denom / GCD; + return std::make_pair(Num > Denom ? Num : Denom, Denom > Num); +} + +static bool isOpN(const MachineOperand &MO, unsigned OpN) { + const MachineInstr &MI = *MO.getParent(); + bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MI.getDesc()); + + if (HasPassthru) + return MO.getOperandNo() == OpN + 1; + + return MO.getOperandNo() == OpN; +} + +/// An index segment load or store operand has the form v.*segei.v. +/// Data has EEW=SEW, EMUL=LMUL. Index has EEW=, EMUL=(EEW/SEW)*LMUL. LMUL +/// and SEW comes from TSFlags of MI. +static OperandInfo getIndexSegmentLoadStoreOperandInfo(unsigned Log2EEW, + const MachineInstr &MI, + const MachineOperand &MO, + bool IsLoad) { + // Operand 0 is data register + // Data vector register group has EEW=SEW, EMUL=LMUL. + if (MO.getOperandNo() == 0) { + RISCVII::VLMUL MIVLMul = RISCVII::getLMul(MI.getDesc().TSFlags); + unsigned MILog2SEW = + MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm(); + return OperandInfo(MIVLMul, MILog2SEW); + } + + // Operand 2 is index vector register + // v.*segei.v + // Index vector register group has EEW=, EMUL=(EEW/SEW)*LMUL. + if (isOpN(MO, 2)) + return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(Log2EEW, MI), Log2EEW); + + llvm_unreachable("Could not get OperandInfo for non-vector register of an " + "indexed segment load or store instruction"); +} + +/// Dest has EEW=SEW and EMUL=LMUL. Source EEW=SEW/Factor (i.e. F2 => EEW/2). +/// Source has EMUL=(EEW/SEW)*LMUL. LMUL and SEW comes from TSFlags of MI. +static OperandInfo getIntegerExtensionOperandInfo(unsigned Factor, + const MachineInstr &MI, + const MachineOperand &MO) { + RISCVII::VLMUL MIVLMul = RISCVII::getLMul(MI.getDesc().TSFlags); + unsigned MILog2SEW = + MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm(); + + if (MO.getOperandNo() == 0) + return OperandInfo(MIVLMul, MILog2SEW); + + unsigned MISEW = 1 << MILog2SEW; + unsigned EEW = MISEW / Factor; + unsigned Log2EEW = Log2_32(EEW); + + return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(Log2EEW, MI), Log2EEW); +} + +/// Check whether MO is a mask operand of MI. +static bool isMaskOperand(const MachineInstr &MI, const MachineOperand &MO, + const MachineRegisterInfo *MRI) { + + if (!MO.isReg() || !isVectorRegClass(MO.getReg(), MRI)) + return false; + + const MCInstrDesc &Desc = MI.getDesc(); + return Desc.operands()[MO.getOperandNo()].RegClass == RISCV::VMV0RegClassID; +} + +/// Return the OperandInfo for MO, which is an operand of MI. +static OperandInfo getOperandInfo(const MachineInstr &MI, + const MachineOperand &MO, + const MachineRegisterInfo *MRI) { + const RISCVVPseudosTable::PseudoInfo *RVV = + RISCVVPseudosTable::getPseudoInfo(MI.getOpcode()); + assert(RVV && "Could not find MI in PseudoTable"); + + // MI has a VLMUL and SEW associated with it. The RVV specification defines + // the LMUL and SEW of each operand and definition in relation to MI.VLMUL and + // MI.SEW. + RISCVII::VLMUL MIVLMul = RISCVII::getLMul(MI.getDesc().TSFlags); + unsigned MILog2SEW = + MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm(); + + const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MI.getDesc()); + + // We bail out early for instructions that have passthru with non NoRegister, + // which means they are using TU policy. We are not interested in these + // since they must preserve the entire register content. + if (HasPassthru && MO.getOperandNo() == MI.getNumExplicitDefs() && + (MO.getReg() != RISCV::NoRegister)) + return OperandInfo(OperandInfo::State::Unknown); + + bool IsMODef = MO.getOperandNo() == 0; + bool IsOp1 = isOpN(MO, 1); + bool IsOp2 = isOpN(MO, 2); + bool IsOp3 = isOpN(MO, 3); + + // All mask operands have EEW=1, EMUL=(EEW/SEW)*LMUL + if (isMaskOperand(MI, MO, MRI)) + return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0); + + // switch against BaseInstr to reduce number of cases that need to be + // considered. + switch (RVV->BaseInstr) { + + // 6. Configuration-Setting Instructions + // Configuration setting instructions do not read or write vector registers + case RISCV::VSETIVLI: + case RISCV::VSETVL: + case RISCV::VSETVLI: + llvm_unreachable("Configuration setting instructions do not read or write " + "vector registers"); + + // 7. Vector Loads and Stores + // 7.4. Vector Unit-Stride Instructions + // 7.5. Vector Strided Instructions + // 7.7. Unit-stride Fault-Only-First Loads + /// Dest EEW encoded in the instruction and EMUL=(EEW/SEW)*LMUL + case RISCV::VLE8_V: + case RISCV::VSE8_V: + case RISCV::VLM_V: + case RISCV::VSM_V: + case RISCV::VLSE8_V: + case RISCV::VSSE8_V: + case RISCV::VLE8FF_V: + return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(3, MI), 3); + case RISCV::VLE16_V: + case RISCV::VSE16_V: + case RISCV::VLSE16_V: + case RISCV::VSSE16_V: + case RISCV::VLE16FF_V: + return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4); + case RISCV::VLE32_V: + case RISCV::VSE32_V: + case RISCV::VLSE32_V: + case RISCV::VSSE32_V: + case RISCV::VLE32FF_V: + return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(5, MI), 5); + case RISCV::VLE64_V: + case RISCV::VSE64_V: + case RISCV::VLSE64_V: + case RISCV::VSSE64_V: + case RISCV::VLE64FF_V: + return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(6, MI), 6); + + // 7.6. Vector Indexed Instructions + // Data EEW=SEW, EMUL=LMUL. Index EEW= and EMUL=(EEW/SEW)*LMUL + case RISCV::VLUXEI8_V: + case RISCV::VLOXEI8_V: + case RISCV::VSUXEI8_V: + case RISCV::VSOXEI8_V: + if (MO.getOperandNo() == 0) + return OperandInfo(MIVLMul, MILog2SEW); + return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(3, MI), 3); + case RISCV::VLUXEI16_V: + case RISCV::VLOXEI16_V: + case RISCV::VSUXEI16_V: + case RISCV::VSOXEI16_V: + if (MO.getOperandNo() == 0) + return OperandInfo(MIVLMul, MILog2SEW); + return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4); + case RISCV::VLUXEI32_V: + case RISCV::VLOXEI32_V: + case RISCV::VSUXEI32_V: + case RISCV::VSOXEI32_V: + if (MO.getOperandNo() == 0) + return OperandInfo(MIVLMul, MILog2SEW); + return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(5, MI), 5); + case RISCV::VLUXEI64_V: + case RISCV::VLOXEI64_V: + case RISCV::VSUXEI64_V: + case RISCV::VSOXEI64_V: + if (MO.getOperandNo() == 0) + return OperandInfo(MIVLMul, MILog2SEW); + return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(6, MI), 6); + + // 7.8. Vector Load/Store Segment Instructions + // 7.8.1. Vector Unit-Stride Segment Loads and Stores + // v.*sege.* + // EEW=eew, EMUL=LMUL + case RISCV::VLSEG2E8_V: + case RISCV::VLSEG2E8FF_V: + case RISCV::VLSEG3E8_V: + case RISCV::VLSEG3E8FF_V: + case RISCV::VLSEG4E8_V: + case RISCV::VLSEG4E8FF_V: + case RISCV::VLSEG5E8_V: + case RISCV::VLSEG5E8FF_V: + case RISCV::VLSEG6E8_V: + case RISCV::VLSEG6E8FF_V: + case RISCV::VLSEG7E8_V: + case RISCV::VLSEG7E8FF_V: + case RISCV::VLSEG8E8_V: + case RISCV::VLSEG8E8FF_V: + case RISCV::VSSEG2E8_V: + case RISCV::VSSEG3E8_V: + case RISCV::VSSEG4E8_V: + case RISCV::VSSEG5E8_V: + case RISCV::VSSEG6E8_V: + case RISCV::VSSEG7E8_V: + case RISCV::VSSEG8E8_V: + return OperandInfo(MIVLMul, 3); + case RISCV::VLSEG2E16_V: + case RISCV::VLSEG2E16FF_V: + case RISCV::VLSEG3E16_V: + case RISCV::VLSEG3E16FF_V: + case RISCV::VLSEG4E16_V: + case RISCV::VLSEG4E16FF_V: + case RISCV::VLSEG5E16_V: + case RISCV::VLSEG5E16FF_V: + case RISCV::VLSEG6E16_V: + case RISCV::VLSEG6E16FF_V: + case RISCV::VLSEG7E16_V: + case RISCV::VLSEG7E16FF_V: + case RISCV::VLSEG8E16_V: + case RISCV::VLSEG8E16FF_V: + case RISCV::VSSEG2E16_V: + case RISCV::VSSEG3E16_V: + case RISCV::VSSEG4E16_V: + case RISCV::VSSEG5E16_V: + case RISCV::VSSEG6E16_V: + case RISCV::VSSEG7E16_V: + case RISCV::VSSEG8E16_V: + return OperandInfo(MIVLMul, 4); + case RISCV::VLSEG2E32_V: + case RISCV::VLSEG2E32FF_V: + case RISCV::VLSEG3E32_V: + case RISCV::VLSEG3E32FF_V: + case RISCV::VLSEG4E32_V: + case RISCV::VLSEG4E32FF_V: + case RISCV::VLSEG5E32_V: + case RISCV::VLSEG5E32FF_V: + case RISCV::VLSEG6E32_V: + case RISCV::VLSEG6E32FF_V: + case RISCV::VLSEG7E32_V: + case RISCV::VLSEG7E32FF_V: + case RISCV::VLSEG8E32_V: + case RISCV::VLSEG8E32FF_V: + case RISCV::VSSEG2E32_V: + case RISCV::VSSEG3E32_V: + case RISCV::VSSEG4E32_V: + case RISCV::VSSEG5E32_V: + case RISCV::VSSEG6E32_V: + case RISCV::VSSEG7E32_V: + case RISCV::VSSEG8E32_V: + return OperandInfo(MIVLMul, 5); + case RISCV::VLSEG2E64_V: + case RISCV::VLSEG2E64FF_V: + case RISCV::VLSEG3E64_V: + case RISCV::VLSEG3E64FF_V: + case RISCV::VLSEG4E64_V: + case RISCV::VLSEG4E64FF_V: + case RISCV::VLSEG5E64_V: + case RISCV::VLSEG5E64FF_V: + case RISCV::VLSEG6E64_V: + case RISCV::VLSEG6E64FF_V: + case RISCV::VLSEG7E64_V: + case RISCV::VLSEG7E64FF_V: + case RISCV::VLSEG8E64_V: + case RISCV::VLSEG8E64FF_V: + case RISCV::VSSEG2E64_V: + case RISCV::VSSEG3E64_V: + case RISCV::VSSEG4E64_V: + case RISCV::VSSEG5E64_V: + case RISCV::VSSEG6E64_V: + case RISCV::VSSEG7E64_V: + case RISCV::VSSEG8E64_V: + return OperandInfo(MIVLMul, 6); + + // 7.8.2. Vector Strided Segment Loads and Stores + case RISCV::VLSSEG2E8_V: + case RISCV::VLSSEG3E8_V: + case RISCV::VLSSEG4E8_V: + case RISCV::VLSSEG5E8_V: + case RISCV::VLSSEG6E8_V: + case RISCV::VLSSEG7E8_V: + case RISCV::VLSSEG8E8_V: + case RISCV::VSSSEG2E8_V: + case RISCV::VSSSEG3E8_V: + case RISCV::VSSSEG4E8_V: + case RISCV::VSSSEG5E8_V: + case RISCV::VSSSEG6E8_V: + case RISCV::VSSSEG7E8_V: + case RISCV::VSSSEG8E8_V: + return OperandInfo(MIVLMul, 3); + case RISCV::VLSSEG2E16_V: + case RISCV::VLSSEG3E16_V: + case RISCV::VLSSEG4E16_V: + case RISCV::VLSSEG5E16_V: + case RISCV::VLSSEG6E16_V: + case RISCV::VLSSEG7E16_V: + case RISCV::VLSSEG8E16_V: + case RISCV::VSSSEG2E16_V: + case RISCV::VSSSEG3E16_V: + case RISCV::VSSSEG4E16_V: + case RISCV::VSSSEG5E16_V: + case RISCV::VSSSEG6E16_V: + case RISCV::VSSSEG7E16_V: + case RISCV::VSSSEG8E16_V: + return OperandInfo(MIVLMul, 4); + case RISCV::VLSSEG2E32_V: + case RISCV::VLSSEG3E32_V: + case RISCV::VLSSEG4E32_V: + case RISCV::VLSSEG5E32_V: + case RISCV::VLSSEG6E32_V: + case RISCV::VLSSEG7E32_V: + case RISCV::VLSSEG8E32_V: + case RISCV::VSSSEG2E32_V: + case RISCV::VSSSEG3E32_V: + case RISCV::VSSSEG4E32_V: + case RISCV::VSSSEG5E32_V: + case RISCV::VSSSEG6E32_V: + case RISCV::VSSSEG7E32_V: + case RISCV::VSSSEG8E32_V: + return OperandInfo(MIVLMul, 5); + case RISCV::VLSSEG2E64_V: + case RISCV::VLSSEG3E64_V: + case RISCV::VLSSEG4E64_V: + case RISCV::VLSSEG5E64_V: + case RISCV::VLSSEG6E64_V: + case RISCV::VLSSEG7E64_V: + case RISCV::VLSSEG8E64_V: + case RISCV::VSSSEG2E64_V: + case RISCV::VSSSEG3E64_V: + case RISCV::VSSSEG4E64_V: + case RISCV::VSSSEG5E64_V: + case RISCV::VSSSEG6E64_V: + case RISCV::VSSSEG7E64_V: + case RISCV::VSSSEG8E64_V: + return OperandInfo(MIVLMul, 6); + + // 7.8.3. Vector Indexed Segment Loads and Stores + case RISCV::VLUXSEG2EI8_V: + case RISCV::VLUXSEG3EI8_V: + case RISCV::VLUXSEG4EI8_V: + case RISCV::VLUXSEG5EI8_V: + case RISCV::VLUXSEG6EI8_V: + case RISCV::VLUXSEG7EI8_V: + case RISCV::VLUXSEG8EI8_V: + case RISCV::VLOXSEG2EI8_V: + case RISCV::VLOXSEG3EI8_V: + case RISCV::VLOXSEG4EI8_V: + case RISCV::VLOXSEG5EI8_V: + case RISCV::VLOXSEG6EI8_V: + case RISCV::VLOXSEG7EI8_V: + case RISCV::VLOXSEG8EI8_V: + return getIndexSegmentLoadStoreOperandInfo(3, MI, MO, /* IsLoad */ true); + case RISCV::VSUXSEG2EI8_V: + case RISCV::VSUXSEG3EI8_V: + case RISCV::VSUXSEG4EI8_V: + case RISCV::VSUXSEG5EI8_V: + case RISCV::VSUXSEG6EI8_V: + case RISCV::VSUXSEG7EI8_V: + case RISCV::VSUXSEG8EI8_V: + case RISCV::VSOXSEG2EI8_V: + case RISCV::VSOXSEG3EI8_V: + case RISCV::VSOXSEG4EI8_V: + case RISCV::VSOXSEG5EI8_V: + case RISCV::VSOXSEG6EI8_V: + case RISCV::VSOXSEG7EI8_V: + case RISCV::VSOXSEG8EI8_V: + return getIndexSegmentLoadStoreOperandInfo(3, MI, MO, /* IsLoad */ false); + case RISCV::VLUXSEG2EI16_V: + case RISCV::VLUXSEG3EI16_V: + case RISCV::VLUXSEG4EI16_V: + case RISCV::VLUXSEG5EI16_V: + case RISCV::VLUXSEG6EI16_V: + case RISCV::VLUXSEG7EI16_V: + case RISCV::VLUXSEG8EI16_V: + case RISCV::VLOXSEG2EI16_V: + case RISCV::VLOXSEG3EI16_V: + case RISCV::VLOXSEG4EI16_V: + case RISCV::VLOXSEG5EI16_V: + case RISCV::VLOXSEG6EI16_V: + case RISCV::VLOXSEG7EI16_V: + case RISCV::VLOXSEG8EI16_V: + return getIndexSegmentLoadStoreOperandInfo(4, MI, MO, /* IsLoad */ true); + case RISCV::VSUXSEG2EI16_V: + case RISCV::VSUXSEG3EI16_V: + case RISCV::VSUXSEG4EI16_V: + case RISCV::VSUXSEG5EI16_V: + case RISCV::VSUXSEG6EI16_V: + case RISCV::VSUXSEG7EI16_V: + case RISCV::VSUXSEG8EI16_V: + case RISCV::VSOXSEG2EI16_V: + case RISCV::VSOXSEG3EI16_V: + case RISCV::VSOXSEG4EI16_V: + case RISCV::VSOXSEG5EI16_V: + case RISCV::VSOXSEG6EI16_V: + case RISCV::VSOXSEG7EI16_V: + case RISCV::VSOXSEG8EI16_V: + return getIndexSegmentLoadStoreOperandInfo(4, MI, MO, /* IsLoad */ false); + case RISCV::VLUXSEG2EI32_V: + case RISCV::VLUXSEG3EI32_V: + case RISCV::VLUXSEG4EI32_V: + case RISCV::VLUXSEG5EI32_V: + case RISCV::VLUXSEG6EI32_V: + case RISCV::VLUXSEG7EI32_V: + case RISCV::VLUXSEG8EI32_V: + case RISCV::VLOXSEG2EI32_V: + case RISCV::VLOXSEG3EI32_V: + case RISCV::VLOXSEG4EI32_V: + case RISCV::VLOXSEG5EI32_V: + case RISCV::VLOXSEG6EI32_V: + case RISCV::VLOXSEG7EI32_V: + case RISCV::VLOXSEG8EI32_V: + return getIndexSegmentLoadStoreOperandInfo(5, MI, MO, /* IsLoad */ true); + case RISCV::VSUXSEG2EI32_V: + case RISCV::VSUXSEG3EI32_V: + case RISCV::VSUXSEG4EI32_V: + case RISCV::VSUXSEG5EI32_V: + case RISCV::VSUXSEG6EI32_V: + case RISCV::VSUXSEG7EI32_V: + case RISCV::VSUXSEG8EI32_V: + case RISCV::VSOXSEG2EI32_V: + case RISCV::VSOXSEG3EI32_V: + case RISCV::VSOXSEG4EI32_V: + case RISCV::VSOXSEG5EI32_V: + case RISCV::VSOXSEG6EI32_V: + case RISCV::VSOXSEG7EI32_V: + case RISCV::VSOXSEG8EI32_V: + return getIndexSegmentLoadStoreOperandInfo(5, MI, MO, /* IsLoad */ false); + case RISCV::VLUXSEG2EI64_V: + case RISCV::VLUXSEG3EI64_V: + case RISCV::VLUXSEG4EI64_V: + case RISCV::VLUXSEG5EI64_V: + case RISCV::VLUXSEG6EI64_V: + case RISCV::VLUXSEG7EI64_V: + case RISCV::VLUXSEG8EI64_V: + case RISCV::VLOXSEG2EI64_V: + case RISCV::VLOXSEG3EI64_V: + case RISCV::VLOXSEG4EI64_V: + case RISCV::VLOXSEG5EI64_V: + case RISCV::VLOXSEG6EI64_V: + case RISCV::VLOXSEG7EI64_V: + case RISCV::VLOXSEG8EI64_V: + return getIndexSegmentLoadStoreOperandInfo(6, MI, MO, /* IsLoad */ true); + case RISCV::VSUXSEG2EI64_V: + case RISCV::VSUXSEG3EI64_V: + case RISCV::VSUXSEG4EI64_V: + case RISCV::VSUXSEG5EI64_V: + case RISCV::VSUXSEG6EI64_V: + case RISCV::VSUXSEG7EI64_V: + case RISCV::VSUXSEG8EI64_V: + case RISCV::VSOXSEG2EI64_V: + case RISCV::VSOXSEG3EI64_V: + case RISCV::VSOXSEG4EI64_V: + case RISCV::VSOXSEG5EI64_V: + case RISCV::VSOXSEG6EI64_V: + case RISCV::VSOXSEG7EI64_V: + case RISCV::VSOXSEG8EI64_V: + return getIndexSegmentLoadStoreOperandInfo(6, MI, MO, /* IsLoad */ false); + + // 7.9. Vector Load/Store Whole Register Instructions + // EMUL=nr. EEW=eew. Since in-register byte layouts are idential to in-memory + // byte layouts, the same data is writen to destination register regardless + // of EEW. eew is just a hint to the hardware and has not functional impact. + // Therefore, it is be okay if we ignore eew and always use the same EEW to + // create more optimization opportunities. + // FIXME: Instead of using any SEW, we really ought to return the SEW in the + // instruction and add a field to OperandInfo that says the SEW is just a hint + // so that this optimization can use any sew to construct a ratio. + case RISCV::VL1RE8_V: + case RISCV::VL1RE16_V: + case RISCV::VL1RE32_V: + case RISCV::VL1RE64_V: + case RISCV::VS1R_V: + return OperandInfo(RISCVII::VLMUL::LMUL_1, 0); + case RISCV::VL2RE8_V: + case RISCV::VL2RE16_V: + case RISCV::VL2RE32_V: + case RISCV::VL2RE64_V: + case RISCV::VS2R_V: + return OperandInfo(RISCVII::VLMUL::LMUL_2, 0); + case RISCV::VL4RE8_V: + case RISCV::VL4RE16_V: + case RISCV::VL4RE32_V: + case RISCV::VL4RE64_V: + case RISCV::VS4R_V: + return OperandInfo(RISCVII::VLMUL::LMUL_4, 0); + case RISCV::VL8RE8_V: + case RISCV::VL8RE16_V: + case RISCV::VL8RE32_V: + case RISCV::VL8RE64_V: + case RISCV::VS8R_V: + return OperandInfo(RISCVII::VLMUL::LMUL_8, 0); + + // 11. Vector Integer Arithmetic Instructions + // 11.1. Vector Single-Width Integer Add and Subtract + case RISCV::VADD_VI: + case RISCV::VADD_VV: + case RISCV::VADD_VX: + case RISCV::VSUB_VV: + case RISCV::VSUB_VX: + case RISCV::VRSUB_VI: + case RISCV::VRSUB_VX: + return OperandInfo(MIVLMul, MILog2SEW); + + // 11.2. Vector Widening Integer Add/Subtract + // Def uses EEW=2*SEW and EMUL=2*LMUL. Operands use EEW=SEW and EMUL=LMUL. + case RISCV::VWADDU_VV: + case RISCV::VWADDU_VX: + case RISCV::VWSUBU_VV: + case RISCV::VWSUBU_VX: + case RISCV::VWADD_VV: + case RISCV::VWADD_VX: + case RISCV::VWSUB_VV: + case RISCV::VWSUB_VX: + case RISCV::VWSLL_VI: { + unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW; + RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul; + return OperandInfo(EMUL, Log2EEW); + } + // Def and Op1 uses EEW=2*SEW and EMUL=2*LMUL. Op2 uses EEW=SEW and EMUL=LMUL + case RISCV::VWADDU_WV: + case RISCV::VWADDU_WX: + case RISCV::VWSUBU_WV: + case RISCV::VWSUBU_WX: + case RISCV::VWADD_WV: + case RISCV::VWADD_WX: + case RISCV::VWSUB_WV: + case RISCV::VWSUB_WX: { + bool TwoTimes = IsMODef || IsOp1; + unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW; + RISCVII::VLMUL EMUL = TwoTimes ? twoTimesVLMUL(MIVLMul) : MIVLMul; + return OperandInfo(EMUL, Log2EEW); + } + // 11.3. Vector Integer Extension + case RISCV::VZEXT_VF2: + case RISCV::VSEXT_VF2: + return getIntegerExtensionOperandInfo(2, MI, MO); + case RISCV::VZEXT_VF4: + case RISCV::VSEXT_VF4: + return getIntegerExtensionOperandInfo(4, MI, MO); + case RISCV::VZEXT_VF8: + case RISCV::VSEXT_VF8: + return getIntegerExtensionOperandInfo(8, MI, MO); + + // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions + // EEW=SEW and EMUL=LMUL. Mask Operand EEW=1 and EMUL=(EEW/SEW)*LMUL + case RISCV::VADC_VIM: + case RISCV::VADC_VVM: + case RISCV::VADC_VXM: + case RISCV::VSBC_VVM: + case RISCV::VSBC_VXM: + return MO.getOperandNo() == 3 + ? OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0) + : OperandInfo(MIVLMul, MILog2SEW); + // Dest EEW=1 and EMUL=(EEW/SEW)*LMUL. Source EEW=SEW and EMUL=LMUL. Mask + // operand EEW=1 and EMUL=(EEW/SEW)*LMUL + case RISCV::VMADC_VIM: + case RISCV::VMADC_VVM: + case RISCV::VMADC_VXM: + case RISCV::VMSBC_VVM: + case RISCV::VMSBC_VXM: + return IsMODef || MO.getOperandNo() == 3 + ? OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0) + : OperandInfo(MIVLMul, MILog2SEW); + // Dest EEW=1 and EMUL=(EEW/SEW)*LMUL. Source EEW=SEW and EMUL=LMUL. + case RISCV::VMADC_VV: + case RISCV::VMADC_VI: + case RISCV::VMADC_VX: + case RISCV::VMSBC_VV: + case RISCV::VMSBC_VX: + return IsMODef ? OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0) + : OperandInfo(MIVLMul, MILog2SEW); + + // 11.5. Vector Bitwise Logical Instructions + // 11.6. Vector Single-Width Shift Instructions + // EEW=SEW. EMUL=LMUL. + case RISCV::VAND_VI: + case RISCV::VAND_VV: + case RISCV::VAND_VX: + case RISCV::VOR_VI: + case RISCV::VOR_VV: + case RISCV::VOR_VX: + case RISCV::VXOR_VI: + case RISCV::VXOR_VV: + case RISCV::VXOR_VX: + case RISCV::VSLL_VI: + case RISCV::VSLL_VV: + case RISCV::VSLL_VX: + case RISCV::VSRL_VI: + case RISCV::VSRL_VV: + case RISCV::VSRL_VX: + case RISCV::VSRA_VI: + case RISCV::VSRA_VV: + case RISCV::VSRA_VX: + return OperandInfo(MIVLMul, MILog2SEW); + + // 11.7. Vector Narrowing Integer Right Shift Instructions + // Destination EEW=SEW and EMUL=LMUL, Op 1 has EEW=2*SEW EMUL=2*LMUL. Op2 has + // EEW=SEW EMUL=LMUL. + case RISCV::VNSRL_WX: + case RISCV::VNSRL_WI: + case RISCV::VNSRL_WV: + case RISCV::VNSRA_WI: + case RISCV::VNSRA_WV: + case RISCV::VNSRA_WX: { + bool TwoTimes = IsOp1; + unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW; + RISCVII::VLMUL EMUL = TwoTimes ? twoTimesVLMUL(MIVLMul) : MIVLMul; + return OperandInfo(EMUL, Log2EEW); + } + // 11.8. Vector Integer Compare Instructions + // Dest EEW=1 and EMUL=(EEW/SEW)*LMUL. Source EEW=SEW and EMUL=LMUL. + case RISCV::VMSEQ_VI: + case RISCV::VMSEQ_VV: + case RISCV::VMSEQ_VX: + case RISCV::VMSNE_VI: + case RISCV::VMSNE_VV: + case RISCV::VMSNE_VX: + case RISCV::VMSLTU_VV: + case RISCV::VMSLTU_VX: + case RISCV::VMSLT_VV: + case RISCV::VMSLT_VX: + case RISCV::VMSLEU_VV: + case RISCV::VMSLEU_VI: + case RISCV::VMSLEU_VX: + case RISCV::VMSLE_VV: + case RISCV::VMSLE_VI: + case RISCV::VMSLE_VX: + case RISCV::VMSGTU_VI: + case RISCV::VMSGTU_VX: + case RISCV::VMSGT_VI: + case RISCV::VMSGT_VX: + if (IsMODef) + return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0); + return OperandInfo(MIVLMul, MILog2SEW); + + // 11.9. Vector Integer Min/Max Instructions + // EEW=SEW. EMUL=LMUL. + case RISCV::VMINU_VV: + case RISCV::VMINU_VX: + case RISCV::VMIN_VV: + case RISCV::VMIN_VX: + case RISCV::VMAXU_VV: + case RISCV::VMAXU_VX: + case RISCV::VMAX_VV: + case RISCV::VMAX_VX: + return OperandInfo(MIVLMul, MILog2SEW); + + // 11.10. Vector Single-Width Integer Multiply Instructions + // Source and Dest EEW=SEW and EMUL=LMUL. + case RISCV::VMUL_VV: + case RISCV::VMUL_VX: + case RISCV::VMULH_VV: + case RISCV::VMULH_VX: + case RISCV::VMULHU_VV: + case RISCV::VMULHU_VX: + case RISCV::VMULHSU_VV: + case RISCV::VMULHSU_VX: { + return OperandInfo(MIVLMul, MILog2SEW); + } + // 11.11. Vector Integer Divide Instructions + // EEW=SEW. EMUL=LMUL. + case RISCV::VDIVU_VV: + case RISCV::VDIVU_VX: + case RISCV::VDIV_VV: + case RISCV::VDIV_VX: + case RISCV::VREMU_VV: + case RISCV::VREMU_VX: + case RISCV::VREM_VV: + case RISCV::VREM_VX: + return OperandInfo(MIVLMul, MILog2SEW); + + // 11.12. Vector Widening Integer Multiply Instructions + // Source and Destination EMUL=LMUL. Destination EEW=2*SEW. Source EEW=SEW. + case RISCV::VWMUL_VV: + case RISCV::VWMUL_VX: + case RISCV::VWMULSU_VV: + case RISCV::VWMULSU_VX: + case RISCV::VWMULU_VV: + case RISCV::VWMULU_VX: { + unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW; + RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul; + return OperandInfo(EMUL, Log2EEW); + } + // 11.13. Vector Single-Width Integer Multiply-Add Instructions + // EEW=SEW. EMUL=LMUL. + case RISCV::VMACC_VV: + case RISCV::VMACC_VX: + case RISCV::VNMSAC_VV: + case RISCV::VNMSAC_VX: + case RISCV::VMADD_VV: + case RISCV::VMADD_VX: + case RISCV::VNMSUB_VV: + case RISCV::VNMSUB_VX: + return OperandInfo(MIVLMul, MILog2SEW); + + // 11.14. Vector Widening Integer Multiply-Add Instructions + // Destination EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL. + // Even though the add is a 2*SEW addition, the operands of the add are the + // Dest which is 2*SEW and the result of the multiply which is 2*SEW. + case RISCV::VWMACCU_VV: + case RISCV::VWMACCU_VX: + case RISCV::VWMACC_VV: + case RISCV::VWMACC_VX: + case RISCV::VWMACCSU_VV: + case RISCV::VWMACCSU_VX: + case RISCV::VWMACCUS_VX: { + // Operand 0 is destination as a def and Operand 1 is destination as a use + // due to SSA. + bool TwoTimes = IsMODef || IsOp1; + unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW; + RISCVII::VLMUL EMUL = TwoTimes ? twoTimesVLMUL(MIVLMul) : MIVLMul; + return OperandInfo(EMUL, Log2EEW); + } + // 11.15. Vector Integer Merge Instructions + // EEW=SEW and EMUL=LMUL, except the mask operand has EEW=1 and EMUL= + // (EEW/SEW)*LMUL. + case RISCV::VMERGE_VIM: + case RISCV::VMERGE_VVM: + case RISCV::VMERGE_VXM: + if (MO.getOperandNo() == 3) + return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0); + return OperandInfo(MIVLMul, MILog2SEW); + + // 11.16. Vector Integer Move Instructions + // 12. Vector Fixed-Point Arithmetic Instructions + // 12.1. Vector Single-Width Saturating Add and Subtract + // 12.2. Vector Single-Width Averaging Add and Subtract + // EEW=SEW. EMUL=LMUL. + case RISCV::VMV_V_I: + case RISCV::VMV_V_V: + case RISCV::VMV_V_X: + case RISCV::VSADDU_VI: + case RISCV::VSADDU_VV: + case RISCV::VSADDU_VX: + case RISCV::VSADD_VI: + case RISCV::VSADD_VV: + case RISCV::VSADD_VX: + case RISCV::VSSUBU_VV: + case RISCV::VSSUBU_VX: + case RISCV::VSSUB_VV: + case RISCV::VSSUB_VX: + case RISCV::VAADDU_VV: + case RISCV::VAADDU_VX: + case RISCV::VAADD_VV: + case RISCV::VAADD_VX: + case RISCV::VASUBU_VV: + case RISCV::VASUBU_VX: + case RISCV::VASUB_VV: + case RISCV::VASUB_VX: + return OperandInfo(MIVLMul, MILog2SEW); + + // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation + // Destination EEW=2*SEW and EMUL=2*EMUL. Source EEW=SEW and EMUL=LMUL. + case RISCV::VSMUL_VV: + case RISCV::VSMUL_VX: { + unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW; + RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul; + return OperandInfo(EMUL, Log2EEW); + } + // 12.4. Vector Single-Width Scaling Shift Instructions + // EEW=SEW. EMUL=LMUL. + case RISCV::VSSRL_VI: + case RISCV::VSSRL_VV: + case RISCV::VSSRL_VX: + case RISCV::VSSRA_VI: + case RISCV::VSSRA_VV: + case RISCV::VSSRA_VX: + return OperandInfo(MIVLMul, MILog2SEW); + + // 12.5. Vector Narrowing Fixed-Point Clip Instructions + // Destination and Op1 EEW=SEW and EMUL=LMUL. Op2 EEW=2*SEW and EMUL=2*LMUL + case RISCV::VNCLIPU_WI: + case RISCV::VNCLIPU_WV: + case RISCV::VNCLIPU_WX: + case RISCV::VNCLIP_WI: + case RISCV::VNCLIP_WV: + case RISCV::VNCLIP_WX: { + bool TwoTimes = !IsMODef && IsOp1; + unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW; + RISCVII::VLMUL EMUL = TwoTimes ? twoTimesVLMUL(MIVLMul) : MIVLMul; + return OperandInfo(EMUL, Log2EEW); + } + // 13. Vector Floating-Point Instructions + // 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions + // EEW=SEW. EMUL=LMUL. + case RISCV::VFADD_VF: + case RISCV::VFADD_VV: + case RISCV::VFSUB_VF: + case RISCV::VFSUB_VV: + case RISCV::VFRSUB_VF: + return OperandInfo(MIVLMul, MILog2SEW); + + // 13.3. Vector Widening Floating-Point Add/Subtract Instructions + // Dest EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL. + case RISCV::VFWADD_VV: + case RISCV::VFWADD_VF: + case RISCV::VFWSUB_VV: + case RISCV::VFWSUB_VF: { + unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW; + RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul; + return OperandInfo(EMUL, Log2EEW); + } + // Dest and Op1 EEW=2*SEW and EMUL=2*LMUL. Op2 EEW=SEW and EMUL=LMUL. + case RISCV::VFWADD_WF: + case RISCV::VFWADD_WV: + case RISCV::VFWSUB_WF: + case RISCV::VFWSUB_WV: { + bool TwoTimes = IsMODef || IsOp1; + unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW; + RISCVII::VLMUL EMUL = TwoTimes ? twoTimesVLMUL(MIVLMul) : MIVLMul; + return OperandInfo(EMUL, Log2EEW); + } + // 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions + // EEW=SEW. EMUL=LMUL. + case RISCV::VFMUL_VF: + case RISCV::VFMUL_VV: + case RISCV::VFDIV_VF: + case RISCV::VFDIV_VV: + case RISCV::VFRDIV_VF: + return OperandInfo(MIVLMul, MILog2SEW); + + // 13.5. Vector Widening Floating-Point Multiply + case RISCV::VFWMUL_VF: + case RISCV::VFWMUL_VV: + return OperandInfo(MIVLMul, MILog2SEW); + + // 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions + // EEW=SEW. EMUL=LMUL. + // TODO: FMA instructions reads 3 registers but MC layer only reads 2 + // registers since its missing that the output operand should be part of the + // input operand list. + case RISCV::VFMACC_VF: + case RISCV::VFMACC_VV: + case RISCV::VFNMACC_VF: + case RISCV::VFNMACC_VV: + case RISCV::VFMSAC_VF: + case RISCV::VFMSAC_VV: + case RISCV::VFNMSAC_VF: + case RISCV::VFNMSAC_VV: + case RISCV::VFMADD_VF: + case RISCV::VFMADD_VV: + case RISCV::VFNMADD_VF: + case RISCV::VFNMADD_VV: + case RISCV::VFMSUB_VF: + case RISCV::VFMSUB_VV: + case RISCV::VFNMSUB_VF: + case RISCV::VFNMSUB_VV: + return OperandInfo(OperandInfo::State::Unknown); + + // 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions + // Dest EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW EMUL=LMUL. + case RISCV::VFWMACC_VF: + case RISCV::VFWMACC_VV: + case RISCV::VFWNMACC_VF: + case RISCV::VFWNMACC_VV: + case RISCV::VFWMSAC_VF: + case RISCV::VFWMSAC_VV: + case RISCV::VFWNMSAC_VF: + case RISCV::VFWNMSAC_VV: { + // Operand 0 is destination as a def and Operand 1 is destination as a use + // due to SSA. + bool TwoTimes = IsMODef || IsOp1; + unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW; + RISCVII::VLMUL EMUL = TwoTimes ? twoTimesVLMUL(MIVLMul) : MIVLMul; + return OperandInfo(EMUL, Log2EEW); + } + // 13.8. Vector Floating-Point Square-Root Instruction + // 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction + // 13.10. Vector Floating-Point Reciprocal Estimate Instruction + // 13.11. Vector Floating-Point MIN/MAX Instructions + // 13.12. Vector Floating-Point Sign-Injection Instructions + // 13.14. Vector Floating-Point Classify Instruction + // 13.16. Vector Floating-Point Move Instruction + // 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions + // EEW=SEW. EMUL=LMUL. + case RISCV::VFSQRT_V: + case RISCV::VFRSQRT7_V: + case RISCV::VFREC7_V: + case RISCV::VFMIN_VF: + case RISCV::VFMIN_VV: + case RISCV::VFMAX_VF: + case RISCV::VFMAX_VV: + case RISCV::VFSGNJ_VF: + case RISCV::VFSGNJ_VV: + case RISCV::VFSGNJN_VV: + case RISCV::VFSGNJN_VF: + case RISCV::VFSGNJX_VF: + case RISCV::VFSGNJX_VV: + case RISCV::VFCLASS_V: + case RISCV::VFMV_V_F: + case RISCV::VFCVT_XU_F_V: + case RISCV::VFCVT_X_F_V: + case RISCV::VFCVT_RTZ_XU_F_V: + case RISCV::VFCVT_RTZ_X_F_V: + case RISCV::VFCVT_F_XU_V: + case RISCV::VFCVT_F_X_V: + return OperandInfo(MIVLMul, MILog2SEW); + + // 13.13. Vector Floating-Point Compare Instructions + // Dest EEW=1 and EMUL=(EEW/SEW)*LMUL. Source EEW=SEW EMUL=LMUL. + case RISCV::VMFEQ_VF: + case RISCV::VMFEQ_VV: + case RISCV::VMFNE_VF: + case RISCV::VMFNE_VV: + case RISCV::VMFLT_VF: + case RISCV::VMFLT_VV: + case RISCV::VMFLE_VF: + case RISCV::VMFLE_VV: + case RISCV::VMFGT_VF: + case RISCV::VMFGE_VF: + if (IsMODef) + return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0); + return OperandInfo(MIVLMul, MILog2SEW); + + // 13.15. Vector Floating-Point Merge Instruction + // EEW=SEW and EMUL=LMUL, except the mask operand has EEW=1 and EMUL= + // (EEW/SEW)*LMUL. + case RISCV::VFMERGE_VFM: + if (IsOp3) + return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0); + return OperandInfo(MIVLMul, MILog2SEW); + + // 13.18. Widening Floating-Point/Integer Type-Convert Instructions + // Dest EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL. + case RISCV::VFWCVT_XU_F_V: + case RISCV::VFWCVT_X_F_V: + case RISCV::VFWCVT_RTZ_XU_F_V: + case RISCV::VFWCVT_RTZ_X_F_V: + case RISCV::VFWCVT_F_XU_V: + case RISCV::VFWCVT_F_X_V: + case RISCV::VFWCVT_F_F_V: { + unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW; + RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul; + return OperandInfo(EMUL, Log2EEW); + } + // 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions + // EMUL=LMUL. Dest EEW=SEW/2. Source EEW=SEW EMUL=LMUL. + case RISCV::VFNCVT_XU_F_W: + case RISCV::VFNCVT_X_F_W: + case RISCV::VFNCVT_RTZ_XU_F_W: + case RISCV::VFNCVT_RTZ_X_F_W: + case RISCV::VFNCVT_F_XU_W: + case RISCV::VFNCVT_F_X_W: + case RISCV::VFNCVT_F_F_W: + case RISCV::VFNCVT_ROD_F_F_W: { + unsigned Log2EEW = IsMODef ? MILog2SEW - 1 : MILog2SEW; + RISCVII::VLMUL EMUL = IsMODef ? halfVLMUL(MIVLMul) : MIVLMul; + return OperandInfo(EMUL, Log2EEW); + } + // 14. Vector Reduction Operations + // 14.1. Vector Single-Width Integer Reduction Instructions + // We need to return Unknown since only element 0 of reduction is valid but it + // was generated by reducing over all of the input elements. There are 3 + // vector sources for reductions. One for scalar, one for tail value, and one + // for the elements to reduce over. Only the one with the elements to reduce + // over obeys VL. The other two only read element 0 from the register. + case RISCV::VREDAND_VS: + case RISCV::VREDMAX_VS: + case RISCV::VREDMAXU_VS: + case RISCV::VREDMIN_VS: + case RISCV::VREDMINU_VS: + case RISCV::VREDOR_VS: + case RISCV::VREDSUM_VS: + case RISCV::VREDXOR_VS: + return OperandInfo(OperandInfo::State::Unknown); + + // 14.2. Vector Widening Integer Reduction Instructions + // Dest EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW EMUL=LMUL. Source is zero + // extended to 2*SEW in order to generate 2*SEW Dest. + case RISCV::VWREDSUM_VS: + case RISCV::VWREDSUMU_VS: { + unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW; + RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul; + return OperandInfo(EMUL, Log2EEW); + } + // 14.3. Vector Single-Width Floating-Point Reduction Instructions + // EMUL=LMUL. EEW=SEW. + case RISCV::VFREDMAX_VS: + case RISCV::VFREDMIN_VS: + case RISCV::VFREDOSUM_VS: + case RISCV::VFREDUSUM_VS: + return OperandInfo(MIVLMul, MILog2SEW); + + // 14.4. Vector Widening Floating-Point Reduction Instructions + // Source EEW=SEW and EMUL=LMUL. Dest EEW=2*SEW and EMUL=2*LMUL. + case RISCV::VFWREDOSUM_VS: + case RISCV::VFWREDUSUM_VS: { + unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW; + RISCVII::VLMUL EMUL = IsMODef ? twoTimesVLMUL(MIVLMul) : MIVLMul; + return OperandInfo(EMUL, Log2EEW); + } + + // 15. Vector Mask Instructions + // 15.2. Vector count population in mask vcpop.m + // 15.3. vfirst find-first-set mask bit + // 15.4. vmsbf.m set-before-first mask bit + // 15.6. vmsof.m set-only-first mask bit + // EEW=1 and EMUL= (EEW/SEW)*LMUL + case RISCV::VMAND_MM: + case RISCV::VMNAND_MM: + case RISCV::VMANDN_MM: + case RISCV::VMXOR_MM: + case RISCV::VMOR_MM: + case RISCV::VMNOR_MM: + case RISCV::VMORN_MM: + case RISCV::VMXNOR_MM: + case RISCV::VCPOP_M: + case RISCV::VFIRST_M: + case RISCV::VMSBF_M: + case RISCV::VMSIF_M: + case RISCV::VMSOF_M: { + return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0); + } + // 15.8. Vector Iota Instruction + // Dest and Op1 EEW=SEW and EMUL=LMUL. Op2 EEW=1 and EMUL(EEW/SEW)*LMUL. + case RISCV::VIOTA_M: { + bool IsDefOrOp1 = IsMODef || IsOp1; + unsigned Log2EEW = IsDefOrOp1 ? 0 : MILog2SEW; + if (IsDefOrOp1) + return OperandInfo(MIVLMul, Log2EEW); + return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(MILog2SEW, MI), Log2EEW); + } + // 15.9. Vector Element Index Instruction + // Dest EEW=SEW EMUL=LMUL. Mask Operand EEW=1 and EMUL(EEW/SEW)*LMUL. + case RISCV::VID_V: { + unsigned Log2EEW = IsMODef ? MILog2SEW : 0; + if (IsMODef) + return OperandInfo(MIVLMul, Log2EEW); + return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(Log2EEW, MI), Log2EEW); + } + // 16. Vector Permutation Instructions + // 16.1. Integer Scalar Move Instructions + // 16.2. Floating-Point Scalar Move Instructions + // EMUL=LMUL. EEW=SEW. + case RISCV::VMV_X_S: + case RISCV::VMV_S_X: + case RISCV::VFMV_F_S: + case RISCV::VFMV_S_F: + return OperandInfo(MIVLMul, MILog2SEW); + + // 16.3. Vector Slide Instructions + // EMUL=LMUL. EEW=SEW. + case RISCV::VSLIDEUP_VI: + case RISCV::VSLIDEUP_VX: + case RISCV::VSLIDEDOWN_VI: + case RISCV::VSLIDEDOWN_VX: + case RISCV::VSLIDE1UP_VX: + case RISCV::VFSLIDE1UP_VF: + case RISCV::VSLIDE1DOWN_VX: + case RISCV::VFSLIDE1DOWN_VF: + return OperandInfo(MIVLMul, MILog2SEW); + + // 16.4. Vector Register Gather Instructions + // EMUL=LMUL. EEW=SEW. For mask operand, EMUL=1 and EEW=1. + case RISCV::VRGATHER_VI: + case RISCV::VRGATHER_VV: + case RISCV::VRGATHER_VX: + return OperandInfo(MIVLMul, MILog2SEW); + // Destination EMUL=LMUL and EEW=SEW. Op2 EEW=SEW and EMUL=LMUL. Op1 EEW=16 + // and EMUL=(16/SEW)*LMUL. + case RISCV::VRGATHEREI16_VV: { + if (IsMODef || IsOp2) + return OperandInfo(MIVLMul, MILog2SEW); + return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4); + } + // 16.5. Vector Compress Instruction + // EMUL=LMUL. EEW=SEW. + case RISCV::VCOMPRESS_VM: + return OperandInfo(MIVLMul, MILog2SEW); + + // 16.6. Whole Vector Register Move + case RISCV::VMV1R_V: + case RISCV::VMV2R_V: + case RISCV::VMV4R_V: + case RISCV::VMV8R_V: + llvm_unreachable("These instructions don't have pseudo versions so they " + "don't have an SEW operand."); + + default: + return OperandInfo(OperandInfo::State::Unknown); + } +} + +/// Return true if this optimization should consider MI for VL reduction. This +/// white-list approach simplifies this optimization for instructions that may +/// have more complex semantics with relation to how it uses VL. +static bool isSupportedInstr(const MachineInstr &MI) { + const RISCVVPseudosTable::PseudoInfo *RVV = + RISCVVPseudosTable::getPseudoInfo(MI.getOpcode()); + + if (!RVV) + return false; + + switch (RVV->BaseInstr) { + case RISCV::VADD_VI: + case RISCV::VADD_VV: + case RISCV::VADD_VX: + case RISCV::VMUL_VV: + case RISCV::VMUL_VX: + case RISCV::VSLL_VI: + case RISCV::VSEXT_VF2: + case RISCV::VSEXT_VF4: + case RISCV::VSEXT_VF8: + case RISCV::VZEXT_VF2: + case RISCV::VZEXT_VF4: + case RISCV::VZEXT_VF8: + case RISCV::VMV_V_I: + case RISCV::VMV_V_X: + case RISCV::VNSRL_WI: + case RISCV::VWADD_VV: + case RISCV::VWADDU_VV: + case RISCV::VWMACC_VX: + case RISCV::VWMACCU_VX: + case RISCV::VWSLL_VI: + return true; + } + + return false; +} + +/// Return true if MO is a vector operand but is used as a scalar operand. +static bool isVectorOpUsedAsScalarOp(MachineOperand &MO) { + MachineInstr *MI = MO.getParent(); + const RISCVVPseudosTable::PseudoInfo *RVV = + RISCVVPseudosTable::getPseudoInfo(MI->getOpcode()); + + if (!RVV) + return false; + + switch (RVV->BaseInstr) { + // Reductions only use vs1[0] of vs1 + case RISCV::VREDAND_VS: + case RISCV::VREDMAX_VS: + case RISCV::VREDMAXU_VS: + case RISCV::VREDMIN_VS: + case RISCV::VREDMINU_VS: + case RISCV::VREDOR_VS: + case RISCV::VREDSUM_VS: + case RISCV::VREDXOR_VS: + case RISCV::VWREDSUM_VS: + case RISCV::VWREDSUMU_VS: + case RISCV::VFREDMAX_VS: + case RISCV::VFREDMIN_VS: + case RISCV::VFREDOSUM_VS: + case RISCV::VFREDUSUM_VS: + case RISCV::VFWREDOSUM_VS: + case RISCV::VFWREDUSUM_VS: { + return isOpN(MO, 2); + } + default: + return false; + } +} + +static bool safeToPropgateVL(const MachineInstr &MI) { + const RISCVVPseudosTable::PseudoInfo *RVV = + RISCVVPseudosTable::getPseudoInfo(MI.getOpcode()); + if (!RVV) + return false; + + switch (RVV->BaseInstr) { + // vslidedown instructions may use the higher part of the input operand beyond + // the VL. + case RISCV::VSLIDEDOWN_VI: + case RISCV::VSLIDEDOWN_VX: + case RISCV::VSLIDE1DOWN_VX: + case RISCV::VFSLIDE1DOWN_VF: + + // vrgather instructions may index beyond the VL. + case RISCV::VRGATHER_VI: + case RISCV::VRGATHER_VV: + case RISCV::VRGATHER_VX: + case RISCV::VRGATHEREI16_VV: + return false; + + default: + return true; + } +} + +bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const { + + LLVM_DEBUG( + dbgs() << "Check whether the instruction is a candidate for reducing VL:" + << MI << "\n"); + + const MCInstrDesc &Desc = MI.getDesc(); + if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) { + LLVM_DEBUG(dbgs() << " Not a candidate due to lack of vl op or sew op\n"); + return false; + } + + if (MI.getNumDefs() != 1) { + LLVM_DEBUG(dbgs() << " Not a candidate due to it def more than one\n"); + return false; + } + unsigned VLOpNum = RISCVII::getVLOpNum(Desc); + const MachineOperand &VLOp = MI.getOperand(VLOpNum); + if (!VLOp.isImm() || VLOp.getImm() != RISCV::VLMaxSentinel) { + LLVM_DEBUG(dbgs() << " Not a candidate due to VL is not VLMAX\n"); + return false; + } + + // Some instructions that produce vectors have semantics that make it more + // difficult to determine whether the VL can be reduced. For example, some + // instructions, such as reductions, may write lanes past VL to a scalar + // register. Other instructions, such as some loads or stores, may write + // lower lanes using data from higher lanes. There may be other complex + // semantics not mentioned here that make it hard to determine whether + // the VL can be optimized. As a result, a white-list of supported + // instructions is used. Over time, more instructions cam be supported + // upon careful examination of their semantics under the logic in this + // optimization. + // TODO: Use a better approach than a white-list, such as adding + // properties to instructions using something like TSFlags. + if (!isSupportedInstr(MI)) { + LLVM_DEBUG(dbgs() << " Not a candidate due to unsupported instruction\n"); + return false; + } + + return true; +} + +bool RISCVVLOptimizer::tryReduceVL(MachineInstr &OrigMI) { + SetVector Worklist; + Worklist.insert(&OrigMI); + + bool MadeChange = false; + while (!Worklist.empty()) { + MachineInstr &MI = *Worklist.pop_back_val(); + LLVM_DEBUG(dbgs() << "Try reduce VL for " << MI << "\n"); + std::optional CommonVL; + bool CanReduceVL = true; + for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) { + const MachineInstr &UserMI = *UserOp.getParent(); + LLVM_DEBUG(dbgs() << " Check user: " << UserMI << "\n"); + + // Instructions like reductions may use a vector register as a scalar + // register. In this case, we should treat it like a scalar register which + // does not impact the decision on whether to optimize VL. + if (isVectorOpUsedAsScalarOp(UserOp)) { + [[maybe_unused]] Register R = UserOp.getReg(); + [[maybe_unused]] const TargetRegisterClass *RC = MRI->getRegClass(R); + assert(RISCV::VRRegClass.hasSubClassEq(RC) && + "Expect LMUL 1 register class for vector as scalar operands!"); + LLVM_DEBUG(dbgs() << " Use this operand as a scalar operand\n"); + continue; + } + + if (!safeToPropgateVL(UserMI)) { + LLVM_DEBUG(dbgs() << " Abort due to used by unsafe instruction\n"); + CanReduceVL = false; + break; + } + + // Tied operands might pass through. + if (UserOp.isTied()) { + LLVM_DEBUG(dbgs() << " Abort due to user use it as tied operand\n"); + CanReduceVL = false; + break; + } + + const MCInstrDesc &Desc = UserMI.getDesc(); + if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) { + LLVM_DEBUG(dbgs() << " Abort due to lack of VL or SEW, assume that" + " use VLMAX.\n"); + CanReduceVL = false; + break; + } + + unsigned VLOpNum = RISCVII::getVLOpNum(Desc); + const MachineOperand &VLOp = UserMI.getOperand(VLOpNum); + // Looking for a register VL that isn't X0. + if (!VLOp.isReg() || VLOp.getReg() == RISCV::X0) { + LLVM_DEBUG(dbgs() << " Abort due to user use X0 as VL.\n"); + CanReduceVL = false; + break; + } + + if (!CommonVL) { + CommonVL = VLOp.getReg(); + } else if (*CommonVL != VLOp.getReg()) { + LLVM_DEBUG(dbgs() << " Abort due to users have different VL!\n"); + CanReduceVL = false; + break; + } + + // The SEW and LMUL of destination and source registers need to match. + + // If the produced Dest is not a vector register, then it has no EEW or + // EMUL, so there is no need to check that producer and consumer LMUL and + // SEW match. We've already checked above that UserOp is a vector + // register. + if (!isVectorRegClass(MI.getOperand(0).getReg(), MRI)) { + LLVM_DEBUG(dbgs() << " Abort due to register class mismatch between " + "USE and DEF\n"); + continue; + } + + OperandInfo ConsumerInfo = getOperandInfo(UserMI, UserOp, MRI); + OperandInfo ProducerInfo = getOperandInfo(MI, MI.getOperand(0), MRI); + if (ConsumerInfo.isUnknown() || ProducerInfo.isUnknown() || + !OperandInfo::EMULAndEEWAreEqual(ConsumerInfo, ProducerInfo)) { + LLVM_DEBUG(dbgs() << " Abort due to incompatible or unknown " + "information for EMUL or EEW.\n"); + LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n"); + LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n"); + CanReduceVL = false; + break; + } + } + + if (!CanReduceVL || !CommonVL) + continue; + + if (!CommonVL->isVirtual()) { + LLVM_DEBUG( + dbgs() << " Abort due to new VL is not virtual register.\n"); + continue; + } + + const MachineInstr *VLMI = MRI->getVRegDef(*CommonVL); + if (!MDT->dominates(VLMI, &MI)) + continue; + + // All our checks passed. We can reduce VL. + unsigned VLOpNum = RISCVII::getVLOpNum(MI.getDesc()); + MachineOperand &VLOp = MI.getOperand(VLOpNum); + VLOp.ChangeToRegister(*CommonVL, false); + MadeChange = true; + + // Now add all inputs to this instruction to the worklist. + for (auto &Op : MI.operands()) { + if (!Op.isReg() || !Op.isUse() || !Op.getReg().isVirtual()) + continue; + + if (!isVectorRegClass(Op.getReg(), MRI)) + continue; + + MachineInstr *DefMI = MRI->getVRegDef(Op.getReg()); + + if (!isCandidate(*DefMI)) + continue; + + Worklist.insert(DefMI); + } + } + + return MadeChange; +} + +bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + MRI = &MF.getRegInfo(); + MDT = &getAnalysis().getDomTree(); + + const RISCVSubtarget &ST = MF.getSubtarget(); + if (!ST.hasVInstructions()) + return false; + + bool MadeChange = false; + for (MachineBasicBlock &MBB : MF) { + // Visit instructions in reverse order. + for (auto &MI : make_range(MBB.rbegin(), MBB.rend())) { + if (!isCandidate(MI)) + continue; + + MadeChange |= tryReduceVL(MI); + } + } + + return MadeChange; +} diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 5d14d14d216244..4d24d3333f0467 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -118,6 +118,8 @@ ; RV64-NEXT: RISC-V Optimize W Instructions ; CHECK-NEXT: RISC-V Pre-RA pseudo instruction expansion pass ; CHECK-NEXT: RISC-V Merge Base Offset +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: RISC-V VL Optimizer ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: RISC-V Insert Write VXRM Pass ; CHECK-NEXT: RISC-V Landing Pad Setup @@ -128,7 +130,6 @@ ; CHECK-NEXT: Live Variable Analysis ; CHECK-NEXT: Eliminate PHI nodes for register allocation ; CHECK-NEXT: Two-Address instruction pass -; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Slot index numbering ; CHECK-NEXT: Live Interval Analysis ; CHECK-NEXT: Register Coalescer diff --git a/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll index e47517abacb4d3..63212e7cd5e004 100644 --- a/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll +++ b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll @@ -10,10 +10,10 @@ declare @llvm.riscv.vloxei.nxv4i32.nxv4i64( define @test_vloxei(ptr %ptr, %offset, i64 %vl) { ; CHECK-LABEL: test_vloxei: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vzext.vf8 v12, v8 ; CHECK-NEXT: vsll.vi v12, v12, 4 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vloxei64.v v8, (a0), v12 ; CHECK-NEXT: ret entry: @@ -30,10 +30,10 @@ entry: define @test_vloxei2(ptr %ptr, %offset, i64 %vl) { ; CHECK-LABEL: test_vloxei2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vzext.vf8 v12, v8 ; CHECK-NEXT: vsll.vi v12, v12, 14 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vloxei64.v v8, (a0), v12 ; CHECK-NEXT: ret entry: @@ -50,10 +50,10 @@ entry: define @test_vloxei3(ptr %ptr, %offset, i64 %vl) { ; CHECK-LABEL: test_vloxei3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vzext.vf8 v12, v8 ; CHECK-NEXT: vsll.vi v12, v12, 26 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vloxei64.v v8, (a0), v12 ; CHECK-NEXT: ret entry: @@ -74,9 +74,8 @@ define @test_vloxei4(ptr %ptr, %offset, @llvm.riscv.vloxei.nxv4i32.nxv4i16( define @test_vloxei5(ptr %ptr, %offset, i64 %vl) { ; CHECK-LABEL: test_vloxei5: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vzext.vf2 v9, v8 ; CHECK-NEXT: vsll.vi v10, v9, 12 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vloxei16.v v8, (a0), v10 ; CHECK-NEXT: ret entry: @@ -123,10 +122,10 @@ define @test_vloxei6(ptr %ptr, %offset, i64 ; CHECK-NEXT: li a2, 127 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a2 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vzext.vf8 v12, v8 ; CHECK-NEXT: vsll.vi v12, v12, 4 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vloxei64.v v8, (a0), v12 ; CHECK-NEXT: ret entry: @@ -146,8 +145,9 @@ define @test_vloxei7(ptr %ptr, %offset, i64 ; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vsll.vi v12, v8, 2 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vloxei64.v v8, (a0), v12 ; CHECK-NEXT: ret entry: @@ -172,10 +172,10 @@ declare @llvm.riscv.vloxei.mask.nxv4i32.nxv4i64( define @test_vloxei_mask(ptr %ptr, %offset, %m, i64 %vl) { ; CHECK-LABEL: test_vloxei_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vzext.vf8 v12, v8 ; CHECK-NEXT: vsll.vi v12, v12, 4 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vloxei64.v v8, (a0), v12, v0.t ; CHECK-NEXT: ret entry: @@ -199,10 +199,10 @@ declare @llvm.riscv.vluxei.nxv4i32.nxv4i64( define @test_vluxei(ptr %ptr, %offset, i64 %vl) { ; CHECK-LABEL: test_vluxei: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vzext.vf8 v12, v8 ; CHECK-NEXT: vsll.vi v12, v12, 4 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vluxei64.v v8, (a0), v12 ; CHECK-NEXT: ret entry: @@ -227,10 +227,10 @@ declare @llvm.riscv.vluxei.mask.nxv4i32.nxv4i64( define @test_vluxei_mask(ptr %ptr, %offset, %m, i64 %vl) { ; CHECK-LABEL: test_vluxei_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vzext.vf8 v12, v8 ; CHECK-NEXT: vsll.vi v12, v12, 4 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vluxei64.v v8, (a0), v12, v0.t ; CHECK-NEXT: ret entry: @@ -254,10 +254,10 @@ declare void @llvm.riscv.vsoxei.nxv4i32.nxv4i64( define void @test_vsoxei( %val, ptr %ptr, %offset, i64 %vl) { ; CHECK-LABEL: test_vsoxei: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vzext.vf8 v12, v10 ; CHECK-NEXT: vsll.vi v12, v12, 4 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vsoxei64.v v8, (a0), v12 ; CHECK-NEXT: ret entry: @@ -281,10 +281,10 @@ declare void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i64( define void @test_vsoxei_mask( %val, ptr %ptr, %offset, %m, i64 %vl) { ; CHECK-LABEL: test_vsoxei_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vzext.vf8 v12, v10 ; CHECK-NEXT: vsll.vi v12, v12, 4 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vsoxei64.v v8, (a0), v12, v0.t ; CHECK-NEXT: ret entry: @@ -308,10 +308,10 @@ declare void @llvm.riscv.vsuxei.nxv4i32.nxv4i64( define void @test_vsuxei( %val, ptr %ptr, %offset, i64 %vl) { ; CHECK-LABEL: test_vsuxei: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vzext.vf8 v12, v10 ; CHECK-NEXT: vsll.vi v12, v12, 4 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vsuxei64.v v8, (a0), v12 ; CHECK-NEXT: ret entry: @@ -335,10 +335,10 @@ declare void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i64( define void @test_vsuxei_mask( %val, ptr %ptr, %offset, %m, i64 %vl) { ; CHECK-LABEL: test_vsuxei_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vzext.vf8 v12, v10 ; CHECK-NEXT: vsll.vi v12, v12, 4 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vsuxei64.v v8, (a0), v12, v0.t ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll index eb8c58d2d37790..073a0aecb8f732 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll @@ -184,9 +184,8 @@ define @icmp_uge_vv_nxv1i8( %va, @icmp_uge_vx_nxv1i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vx_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsleu.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -348,9 +347,8 @@ define @icmp_sge_vv_nxv1i8( %va, @icmp_sge_vx_nxv1i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vx_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsle.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -470,9 +468,8 @@ define @icmp_sle_vx_nxv1i8( %va, i8 %b, @icmp_sle_vx_swap_nxv1i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vx_swap_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsle.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -764,9 +761,8 @@ define @icmp_uge_vv_nxv8i8( %va, @icmp_uge_vx_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vx_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsleu.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -928,9 +924,8 @@ define @icmp_sge_vv_nxv8i8( %va, @icmp_sge_vx_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vx_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsle.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -1050,9 +1045,8 @@ define @icmp_sle_vx_nxv8i8( %va, i8 %b, @icmp_sle_vx_swap_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vx_swap_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsle.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -1375,9 +1369,8 @@ define @icmp_uge_vv_nxv1i32( %va, @icmp_uge_vx_nxv1i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vx_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsleu.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1539,9 +1532,8 @@ define @icmp_sge_vv_nxv1i32( %va, @icmp_sge_vx_nxv1i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vx_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsle.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1661,9 +1653,8 @@ define @icmp_sle_vx_nxv1i32( %va, i32 %b, @icmp_sle_vx_swap_nxv1i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vx_swap_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsle.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1885,9 +1876,8 @@ define @icmp_uge_vv_nxv8i32( %va, @icmp_uge_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vx_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vmsleu.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2064,9 +2054,8 @@ define @icmp_sge_vv_nxv8i32( %va, @icmp_sge_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vx_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vmsle.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2197,9 +2186,8 @@ define @icmp_sle_vx_nxv8i32( %va, i32 %b, @icmp_sle_vx_swap_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vx_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vmsle.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2633,9 +2621,8 @@ define @icmp_uge_vx_nxv1i64( %va, i64 %b, poison, i64 %b, i32 0 @@ -2881,9 +2868,8 @@ define @icmp_sge_vx_nxv1i64( %va, i64 %b, poison, i64 %b, i32 0 @@ -3073,9 +3059,8 @@ define @icmp_sle_vx_swap_nxv1i64( %va, i64 % ; ; RV64-LABEL: icmp_sle_vx_swap_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vmsle.vv v0, v9, v8, v0.t ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 @@ -3402,9 +3387,8 @@ define @icmp_uge_vx_nxv8i64( %va, i64 %b, @icmp_sge_vx_nxv8i64( %va, i64 %b, @icmp_sle_vx_swap_nxv8i64( %va, i64 % ; ; RV64-LABEL: icmp_sle_vx_swap_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vmv.v.x v24, a0 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmv.v.x v24, a0 ; RV64-NEXT: vmsle.vv v16, v24, v8, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll index a4b7ca7f39768f..d276631c5883e8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll @@ -12,9 +12,7 @@ define @vdiv_vx_nxv8i7( %a, i7 signext %b, @vdivu_vx_nxv8i7( %a, i7 signext %b, < ; CHECK-NEXT: li a2, 127 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a2, v0.t -; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vand.vx v9, v9, a2, v0.t ; CHECK-NEXT: vdivu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll index 80ada4670562d7..d4930bd2ae0396 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll @@ -143,9 +143,8 @@ define @vfmacc_vf_nxv1f32( %va, half %b, ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -170,9 +169,8 @@ define @vfmacc_vf_nxv1f32_commute( %va, ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v11, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v11, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -198,9 +196,8 @@ define @vfmacc_vf_nxv1f32_unmasked( %va, ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -225,9 +222,8 @@ define @vfmacc_vf_nxv1f32_tu( %va, half ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_tu: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, mu @@ -254,9 +250,8 @@ define @vfmacc_vf_nxv1f32_commute_tu( %v ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute_tu: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, mu @@ -283,9 +278,8 @@ define @vfmacc_vf_nxv1f32_unmasked_tu( % ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked_tu: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, ma @@ -362,9 +356,8 @@ define @vfmacc_vf_nxv2f32( %va, half %b, ; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -389,9 +382,8 @@ define @vfmacc_vf_nxv2f32_unmasked( %va, ; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -468,9 +460,8 @@ define @vfmacc_vf_nxv4f32( %va, half %b, ; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -495,9 +486,8 @@ define @vfmacc_vf_nxv4f32_unmasked( %va, ; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -574,9 +564,8 @@ define @vfmacc_vf_nxv8f32( %va, half %b, ; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma @@ -601,9 +590,8 @@ define @vfmacc_vf_nxv8f32_unmasked( %va, ; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma @@ -694,9 +682,8 @@ define @vfmacc_vf_nxv16f32( %va, half ; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v4, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v4, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma @@ -721,9 +708,8 @@ define @vfmacc_vf_nxv16f32_unmasked( % ; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll index c92a79e49c1642..94b80075ac14c5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll @@ -120,9 +120,8 @@ define @vmfsac_vf_nxv1f32( %a, half %b, ; ZVFHMIN-LABEL: vmfsac_vf_nxv1f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -148,9 +147,8 @@ define @vmfsac_vf_nxv1f32_commute( %a, h ; ZVFHMIN-LABEL: vmfsac_vf_nxv1f32_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v11, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v11, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -177,9 +175,8 @@ define @vmfsac_vf_nxv1f32_unmasked( %a, ; ZVFHMIN-LABEL: vmfsac_vf_nxv1f32_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -255,9 +252,8 @@ define @vmfsac_vf_nxv2f32( %a, half %b, ; ZVFHMIN-LABEL: vmfsac_vf_nxv2f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -283,9 +279,8 @@ define @vmfsac_vf_nxv2f32_commute( %a, h ; ZVFHMIN-LABEL: vmfsac_vf_nxv2f32_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v11, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v11, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -312,9 +307,8 @@ define @vmfsac_vf_nxv2f32_unmasked( %a, ; ZVFHMIN-LABEL: vmfsac_vf_nxv2f32_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -392,9 +386,8 @@ define @vmfsac_vf_nxv4f32( %a, half %b, ; ZVFHMIN-LABEL: vmfsac_vf_nxv4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -420,9 +413,8 @@ define @vmfsac_vf_nxv4f32_commute( %a, h ; ZVFHMIN-LABEL: vmfsac_vf_nxv4f32_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -449,9 +441,8 @@ define @vmfsac_vf_nxv4f32_unmasked( %a, ; ZVFHMIN-LABEL: vmfsac_vf_nxv4f32_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -529,9 +520,8 @@ define @vmfsac_vf_nxv8f32( %a, half %b, ; ZVFHMIN-LABEL: vmfsac_vf_nxv8f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma @@ -557,9 +547,8 @@ define @vmfsac_vf_nxv8f32_commute( %a, h ; ZVFHMIN-LABEL: vmfsac_vf_nxv8f32_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma @@ -586,9 +575,8 @@ define @vmfsac_vf_nxv8f32_unmasked( %a, ; ZVFHMIN-LABEL: vmfsac_vf_nxv8f32_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll index 6ea58a4e768736..3fc58acd504e52 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll @@ -71,9 +71,8 @@ define @vfnmacc_vf_nxv1f32( %a, half %b, ; ZVFHMIN-LABEL: vfnmacc_vf_nxv1f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -101,9 +100,8 @@ define @vfnmacc_vf_nxv1f32_commute( %a, ; ZVFHMIN-LABEL: vfnmacc_vf_nxv1f32_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v11, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v11, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -131,9 +129,8 @@ define @vfnmacc_vf_nxv1f32_unmasked( %a, ; ZVFHMIN-LABEL: vfnmacc_vf_nxv1f32_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -212,9 +209,8 @@ define @vfnmacc_vf_nxv2f32( %a, half %b, ; ZVFHMIN-LABEL: vfnmacc_vf_nxv2f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -242,9 +238,8 @@ define @vfnmacc_vf_nxv2f32_commute( %a, ; ZVFHMIN-LABEL: vfnmacc_vf_nxv2f32_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v11, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v11, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -272,9 +267,8 @@ define @vfnmacc_vf_nxv2f32_unmasked( %a, ; ZVFHMIN-LABEL: vfnmacc_vf_nxv2f32_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -355,9 +349,8 @@ define @vfnmacc_vf_nxv4f32( %a, half %b, ; ZVFHMIN-LABEL: vfnmacc_vf_nxv4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -385,9 +378,8 @@ define @vfnmacc_vf_nxv4f32_commute( %a, ; ZVFHMIN-LABEL: vfnmacc_vf_nxv4f32_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -415,9 +407,8 @@ define @vfnmacc_vf_nxv4f32_unmasked( %a, ; ZVFHMIN-LABEL: vfnmacc_vf_nxv4f32_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -498,9 +489,8 @@ define @vfnmacc_vf_nxv8f32( %a, half %b, ; ZVFHMIN-LABEL: vfnmacc_vf_nxv8f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma @@ -528,9 +518,8 @@ define @vfnmacc_vf_nxv8f32_commute( %a, ; ZVFHMIN-LABEL: vfnmacc_vf_nxv8f32_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma @@ -558,9 +547,8 @@ define @vfnmacc_vf_nxv8f32_unmasked( %a, ; ZVFHMIN-LABEL: vfnmacc_vf_nxv8f32_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma @@ -655,9 +643,8 @@ define @vfnmacc_vf_nxv16f32( %a, half ; ZVFHMIN-LABEL: vfnmacc_vf_nxv16f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v4, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v4, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma @@ -685,9 +672,8 @@ define @vfnmacc_vf_nxv16f32_commute( % ; ZVFHMIN-LABEL: vfnmacc_vf_nxv16f32_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v4, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v4, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma @@ -715,9 +701,8 @@ define @vfnmacc_vf_nxv16f32_unmasked( ; ZVFHMIN-LABEL: vfnmacc_vf_nxv16f32_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll index 0afbe58038c76f..692a22bde48822 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll @@ -69,9 +69,8 @@ define @vfnmsac_vf_nxv1f32( %a, half %b, ; ZVFHMIN-LABEL: vfnmsac_vf_nxv1f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -97,9 +96,8 @@ define @vfnmsac_vf_nxv1f32_commute( %a, ; ZVFHMIN-LABEL: vfnmsac_vf_nxv1f32_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v11, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v11, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -126,9 +124,8 @@ define @vfnmsac_vf_nxv1f32_unmasked( %a, ; ZVFHMIN-LABEL: vfnmsac_vf_nxv1f32_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -204,9 +201,8 @@ define @vfnmsac_vf_nxv2f32( %a, half %b, ; ZVFHMIN-LABEL: vfnmsac_vf_nxv2f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -232,9 +228,8 @@ define @vfnmsac_vf_nxv2f32_commute( %a, ; ZVFHMIN-LABEL: vfnmsac_vf_nxv2f32_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v11, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v11, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -261,9 +256,8 @@ define @vfnmsac_vf_nxv2f32_unmasked( %a, ; ZVFHMIN-LABEL: vfnmsac_vf_nxv2f32_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -341,9 +335,8 @@ define @vfnmsac_vf_nxv4f32( %a, half %b, ; ZVFHMIN-LABEL: vfnmsac_vf_nxv4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -369,9 +362,8 @@ define @vfnmsac_vf_nxv4f32_commute( %a, ; ZVFHMIN-LABEL: vfnmsac_vf_nxv4f32_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -398,9 +390,8 @@ define @vfnmsac_vf_nxv4f32_unmasked( %a, ; ZVFHMIN-LABEL: vfnmsac_vf_nxv4f32_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -478,9 +469,8 @@ define @vfnmsac_vf_nxv8f32( %a, half %b, ; ZVFHMIN-LABEL: vfnmsac_vf_nxv8f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma @@ -506,9 +496,8 @@ define @vfnmsac_vf_nxv8f32_commute( %a, ; ZVFHMIN-LABEL: vfnmsac_vf_nxv8f32_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma @@ -535,9 +524,8 @@ define @vfnmsac_vf_nxv8f32_unmasked( %a, ; ZVFHMIN-LABEL: vfnmsac_vf_nxv8f32_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma @@ -629,9 +617,8 @@ define @vfnmsac_vf_nxv16f32( %a, half ; ZVFHMIN-LABEL: vfnmsac_vf_nxv16f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v4, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v4, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma @@ -657,9 +644,8 @@ define @vfnmsac_vf_nxv16f32_commute( % ; ZVFHMIN-LABEL: vfnmsac_vf_nxv16f32_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v4, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v4, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma @@ -686,9 +672,8 @@ define @vfnmsac_vf_nxv16f32_unmasked( ; ZVFHMIN-LABEL: vfnmsac_vf_nxv16f32_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-no-prop.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-no-prop.ll new file mode 100644 index 00000000000000..e1f641afd2cfe0 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-no-prop.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s + +declare @llvm.riscv.vadd.nxv4i32.nxv4i32(, , , iXLen) +declare @llvm.riscv.vrgather.vv.nxv4i32.iXLen( + , + , + , + iXLen) + +declare @llvm.riscv.vslidedown.nxv4i32( + , + , + iXLen, + iXLen, + iXLen); + +declare @llvm.riscv.vslide1down.nxv4i32.i32( + , + , + i32, + iXLen); + +define @vrgather( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { +; CHECK-LABEL: vrgather: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v12, v10, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vrgather.vv v8, v12, v10 +; CHECK-NEXT: ret + %v = add %a, %b + %w = call @llvm.riscv.vrgather.vv.nxv4i32.iXLen( + poison, + %v, + %a, + iXLen %vl1) + + ret %w +} + +define @vslidedown( %0, %1, iXLen %2, %a, %b) nounwind { +; CHECK-LABEL: vslidedown: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v10, v12, v14 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v10, a0 +; CHECK-NEXT: ret +entry: + %v = add %a, %b + %w = call @llvm.riscv.vslidedown.nxv4i32( + %0, + %v, + iXLen %2, + iXLen %2, + iXLen 1) + + ret %w +} + +define @vslide1down( %0, i32 %1, iXLen %2, %a, %b) nounwind { +; CHECK-LABEL: vslide1down: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v10, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: ret +entry: + %v = add %a, %b + %w = call @llvm.riscv.vslide1down.nxv4i32.i32( + poison, + %v, + i32 %1, + iXLen %2) + + ret %w +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll new file mode 100644 index 00000000000000..4f24da3d861afe --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvl512b -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvl512b -verify-machineinstrs | FileCheck %s + +define <2 x i32> @vdot_lane_s32(<2 x i32> noundef %var_1, <8 x i8> noundef %var_3, <8 x i8> noundef %var_5, <8 x i16> %x) { +; CHECK-LABEL: vdot_lane_s32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v11, 0 +; CHECK-NEXT: vnsrl.wi v9, v11, 16 +; CHECK-NEXT: vwadd.vv v10, v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v10, 0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vnsrl.wx v9, v10, a0 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> + %b = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> + %c = sext <4 x i16> %a to <4 x i32> + %d = sext <4 x i16> %b to <4 x i32> + %e = add nsw <4 x i32> %c, %d + %z10 = shufflevector <4 x i32> %e, <4 x i32> poison, <2 x i32> + %z11 = shufflevector <4 x i32> %e, <4 x i32> poison, <2 x i32> + %y12 = add <2 x i32> %z10, %z11 + ret <2 x i32> %y12 +} + +declare @llvm.riscv.vnsrl.nxv2i16.nxv2i32.nxv2i16( + , + , + , + iXLen); + +define @intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16( %a, %b, iXLen %2, %3, %4, %z) nounwind { +; CHECK-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vwadd.vv v10, v8, v9 +; CHECK-NEXT: vnsrl.wv v8, v10, v12 +; CHECK-NEXT: ret +entry: + %c = sext %a to + %d = sext %b to + %v1 = add %c, %d + %x = call @llvm.riscv.vnsrl.nxv2i16.nxv2i32.nxv2i16( + undef, + %v1, + %z, + iXLen %2) + + ret %x +} + +declare @llvm.riscv.vnclip.nxv2i16.nxv2i32.nxv2i16( + , + , + , + iXLen, iXLen); + +define @vnclip( %a, %b, iXLen %2, %3, %4, %z) nounwind { +; CHECK-LABEL: vnclip: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vwadd.vv v10, v8, v9 +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vnclip.wv v8, v10, v12 +; CHECK-NEXT: ret +entry: + %c = sext %a to + %d = sext %b to + %v1 = add %c, %d + %x = call @llvm.riscv.vnclip.nxv2i16.nxv2i32.nxv2i16( + undef, + %v1, + %z, + iXLen 0, iXLen %2) + + ret %x +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll new file mode 100644 index 00000000000000..b03ba076059503 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s + +declare @llvm.riscv.vadd.nxv4i32.nxv4i32(, , , iXLen) + +define @different_imm_vl_with_ta( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { +; CHECK-LABEL: different_imm_vl_with_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v10, v12 +; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %a, %b, iXLen 5) + %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %v, %a, iXLen 4) + ret %w +} + +; No benificial to propagate VL since VL is larger in the use side. +define @different_imm_vl_with_ta_larger_vl( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { +; CHECK-LABEL: different_imm_vl_with_ta_larger_vl: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v10, v12 +; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %a, %b, iXLen 4) + %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %v, %a, iXLen 5) + ret %w +} + +define @different_imm_reg_vl_with_ta( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { +; CHECK-LABEL: different_imm_reg_vl_with_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v10, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %a, %b, iXLen 4) + %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %v, %a, iXLen %vl1) + ret %w +} + + +; No benificial to propagate VL since VL is already one. +define @different_imm_vl_with_ta_1( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { +; CHECK-LABEL: different_imm_vl_with_ta_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v10, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %a, %b, iXLen 1) + %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %v, %a, iXLen %vl1) + ret %w +} + +; Propgate %vl2 to last instruction since it is may smaller than %vl1, +; it's still safe even %vl2 is larger than %vl1, becuase rest of the vector are +; undefined value. +define @different_vl_with_ta( %a, %b, iXLen %vl1, iXLen %vl2) { +; CHECK-LABEL: different_vl_with_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v10, v8, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret + %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %a, %b, iXLen %vl1) + %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %v, %a,iXLen %vl2) + ret %w +} + +; Test case to make sure VL won't propgate if using tail-undisturbed policy. +define @different_vl_with_tu( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { +; CHECK-LABEL: different_vl_with_tu: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv2r.v v14, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vadd.vv v14, v10, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; CHECK-NEXT: vadd.vv v8, v14, v10 +; CHECK-NEXT: ret + %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %a, %a, %b, iXLen %vl1) + %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %passthru, %v, %a,iXLen %vl2) + ret %w +} + +; Test case to make sure VL won't propgate if using tail-undisturbed policy. +define @different_imm_vl_with_tu( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { +; CHECK-LABEL: different_imm_vl_with_tu: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv2r.v v14, v10 +; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-NEXT: vadd.vv v14, v10, v12 +; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-NEXT: vadd.vv v8, v14, v10 +; CHECK-NEXT: ret + %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %a, %a, %b, iXLen 5) + %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %passthru, %v, %a,iXLen 4) + ret %w +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll index f65e708f5303cc..cc02f54cda21a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll @@ -12,9 +12,7 @@ define @vmax_vx_nxv8i7( %a, i7 signext %b, @vmaxu_vx_nxv8i7( %a, i7 signext %b, < ; CHECK-NEXT: li a2, 127 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a2, v0.t -; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vand.vx v9, v9, a2, v0.t ; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll index 0bf0638633aa45..f1edd99aa0b0dc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll @@ -12,9 +12,7 @@ define @vmin_vx_nxv8i7( %a, i7 signext %b, @vminu_vx_nxv8i7( %a, i7 signext %b, < ; CHECK-NEXT: li a2, 127 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a2, v0.t -; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vand.vx v9, v9, a2, v0.t ; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll index 51026cbcb8c4bf..0adee0157c8f38 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll @@ -1440,11 +1440,10 @@ define @vmul_vadd_vx_nxv8i64_unmasked( %va, ; CHECK-LABEL: vmul_vadd_vx_nxv8i64_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 21 -; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: li a1, 7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmadd.vx v8, a1, v16 +; CHECK-NEXT: vmv.v.x v16, a1 +; CHECK-NEXT: li a0, 7 +; CHECK-NEXT: vmadd.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll index c0d7ecf74956b9..5a9265fa207b28 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -231,17 +231,17 @@ define @vpgather_nxv8i8( %ptrs, @vpgather_baseidx_nxv8i8(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i8: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v8 -; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8i8: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 -; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, %idxs @@ -264,18 +264,18 @@ define @vpgather_baseidx_nxv32i8(ptr %base, @vpgather_baseidx_nxv32i8(ptr %base, @vpgather_baseidx_nxv32i8(ptr %base, %idxs @@ -525,19 +525,19 @@ define @vpgather_nxv8i16( %ptrs, @vpgather_baseidx_nxv8i8_nxv8i16(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v8 ; RV32-NEXT: vadd.vv v12, v12, v12 -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vadd.vv v16, v16, v16 -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, %idxs @@ -548,19 +548,19 @@ define @vpgather_baseidx_nxv8i8_nxv8i16(ptr %base, @vpgather_baseidx_sext_nxv8i8_nxv8i16(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v8 ; RV32-NEXT: vadd.vv v12, v12, v12 -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vadd.vv v16, v16, v16 -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -572,17 +572,17 @@ define @vpgather_baseidx_sext_nxv8i8_nxv8i16(ptr %base, @vpgather_baseidx_zext_nxv8i8_nxv8i16(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; RV32-NEXT: vwaddu.vv v10, v8, v8 -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; RV64-NEXT: vwaddu.vv v10, v8, v8 -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -594,18 +594,17 @@ define @vpgather_baseidx_zext_nxv8i8_nxv8i16(ptr %base, @vpgather_baseidx_nxv8i16(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; RV32-NEXT: vwadd.vv v12, v8, v8 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwadd.vv v12, v8, v8 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v8 ; RV64-NEXT: vadd.vv v16, v16, v16 -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, %idxs @@ -751,19 +750,18 @@ define @vpgather_nxv8i32( %ptrs, @vpgather_baseidx_nxv8i8_nxv8i32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v8 ; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i32, ptr %base, %idxs @@ -774,19 +772,18 @@ define @vpgather_baseidx_nxv8i8_nxv8i32(ptr %base, @vpgather_baseidx_sext_nxv8i8_nxv8i32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v8 ; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -798,19 +795,19 @@ define @vpgather_baseidx_sext_nxv8i8_nxv8i32(ptr %base, @vpgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV32-NEXT: vzext.vf2 v10, v8 ; RV32-NEXT: vsll.vi v12, v10, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV64-NEXT: vzext.vf2 v10, v8 ; RV64-NEXT: vsll.vi v12, v10, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -822,19 +819,18 @@ define @vpgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, @vpgather_baseidx_nxv8i16_nxv8i32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v12, v8 ; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v8 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i32, ptr %base, %idxs @@ -845,19 +841,18 @@ define @vpgather_baseidx_nxv8i16_nxv8i32(ptr %base, @vpgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v12, v8 ; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v8 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -869,19 +864,17 @@ define @vpgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, @vpgather_baseidx_zext_nxv8i16_nxv8i32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vzext.vf2 v12, v8 ; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV64-NEXT: vzext.vf2 v12, v8 ; RV64-NEXT: vsll.vi v8, v12, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -893,18 +886,17 @@ define @vpgather_baseidx_zext_nxv8i16_nxv8i32(ptr %base, @vpgather_baseidx_nxv8i32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v16, v8 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i32, ptr %base, %idxs @@ -1008,19 +1000,18 @@ define @vpgather_nxv8i64( %ptrs, @vpgather_baseidx_nxv8i8_nxv8i64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v8 ; RV32-NEXT: vsll.vi v16, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, %idxs @@ -1031,19 +1022,18 @@ define @vpgather_baseidx_nxv8i8_nxv8i64(ptr %base, @vpgather_baseidx_sext_nxv8i8_nxv8i64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v8 ; RV32-NEXT: vsll.vi v16, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -1055,19 +1045,19 @@ define @vpgather_baseidx_sext_nxv8i8_nxv8i64(ptr %base, @vpgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV32-NEXT: vzext.vf2 v10, v8 ; RV32-NEXT: vsll.vi v16, v10, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV64-NEXT: vzext.vf2 v10, v8 ; RV64-NEXT: vsll.vi v16, v10, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -1079,19 +1069,18 @@ define @vpgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, @vpgather_baseidx_nxv8i16_nxv8i64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v12, v8 ; RV32-NEXT: vsll.vi v16, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, %idxs @@ -1102,19 +1091,18 @@ define @vpgather_baseidx_nxv8i16_nxv8i64(ptr %base, @vpgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v12, v8 ; RV32-NEXT: vsll.vi v16, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -1126,19 +1114,19 @@ define @vpgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, @vpgather_baseidx_zext_nxv8i16_nxv8i64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vzext.vf2 v12, v8 ; RV32-NEXT: vsll.vi v16, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV64-NEXT: vzext.vf2 v12, v8 ; RV64-NEXT: vsll.vi v16, v12, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -1150,18 +1138,17 @@ define @vpgather_baseidx_zext_nxv8i16_nxv8i64(ptr %base, @vpgather_baseidx_nxv8i32_nxv8i64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i32_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsll.vi v16, v8, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8i32_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, %idxs @@ -1172,18 +1159,17 @@ define @vpgather_baseidx_nxv8i32_nxv8i64(ptr %base, @vpgather_baseidx_sext_nxv8i32_nxv8i64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsll.vi v16, v8, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -1195,18 +1181,17 @@ define @vpgather_baseidx_sext_nxv8i32_nxv8i64(ptr %base, @vpgather_baseidx_zext_nxv8i32_nxv8i64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsll.vi v16, v8, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vzext.vf2 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -1218,18 +1203,17 @@ define @vpgather_baseidx_zext_nxv8i32_nxv8i64(ptr %base, @vpgather_baseidx_nxv8i64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v16, v8, 0 ; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, %idxs @@ -1338,19 +1322,19 @@ define @vpgather_nxv8f16( %ptrs, @vpgather_baseidx_nxv8i8_nxv8f16(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v8 ; RV32-NEXT: vadd.vv v12, v12, v12 -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vadd.vv v16, v16, v16 -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds half, ptr %base, %idxs @@ -1361,19 +1345,19 @@ define @vpgather_baseidx_nxv8i8_nxv8f16(ptr %base, @vpgather_baseidx_sext_nxv8i8_nxv8f16(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v8 ; RV32-NEXT: vadd.vv v12, v12, v12 -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vadd.vv v16, v16, v16 -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -1385,17 +1369,17 @@ define @vpgather_baseidx_sext_nxv8i8_nxv8f16(ptr %base, @vpgather_baseidx_zext_nxv8i8_nxv8f16(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; RV32-NEXT: vwaddu.vv v10, v8, v8 -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; RV64-NEXT: vwaddu.vv v10, v8, v8 -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -1407,18 +1391,17 @@ define @vpgather_baseidx_zext_nxv8i8_nxv8f16(ptr %base, @vpgather_baseidx_nxv8f16(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; RV32-NEXT: vwadd.vv v12, v8, v8 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwadd.vv v12, v8, v8 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v8 ; RV64-NEXT: vadd.vv v16, v16, v16 -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds half, ptr %base, %idxs @@ -1522,19 +1505,18 @@ define @vpgather_nxv8f32( %ptrs, @vpgather_baseidx_nxv8i8_nxv8f32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v8 ; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds float, ptr %base, %idxs @@ -1545,19 +1527,18 @@ define @vpgather_baseidx_nxv8i8_nxv8f32(ptr %base, @vpgather_baseidx_sext_nxv8i8_nxv8f32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v8 ; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -1569,19 +1550,19 @@ define @vpgather_baseidx_sext_nxv8i8_nxv8f32(ptr %base, @vpgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV32-NEXT: vzext.vf2 v10, v8 ; RV32-NEXT: vsll.vi v12, v10, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV64-NEXT: vzext.vf2 v10, v8 ; RV64-NEXT: vsll.vi v12, v10, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -1593,19 +1574,18 @@ define @vpgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, @vpgather_baseidx_nxv8i16_nxv8f32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v12, v8 ; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v8 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds float, ptr %base, %idxs @@ -1616,19 +1596,18 @@ define @vpgather_baseidx_nxv8i16_nxv8f32(ptr %base, @vpgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v12, v8 ; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v8 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -1640,19 +1619,17 @@ define @vpgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, @vpgather_baseidx_zext_nxv8i16_nxv8f32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vzext.vf2 v12, v8 ; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV64-NEXT: vzext.vf2 v12, v8 ; RV64-NEXT: vsll.vi v8, v12, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -1664,18 +1641,17 @@ define @vpgather_baseidx_zext_nxv8i16_nxv8f32(ptr %base, @vpgather_baseidx_nxv8f32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v16, v8 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds float, ptr %base, %idxs @@ -1779,19 +1755,18 @@ define @vpgather_nxv6f64( %ptrs, @vpgather_baseidx_nxv6i8_nxv6f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv6i8_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v8 ; RV32-NEXT: vsll.vi v16, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv6i8_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs @@ -1802,19 +1777,18 @@ define @vpgather_baseidx_nxv6i8_nxv6f64(ptr %base, @vpgather_baseidx_sext_nxv6i8_nxv6f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v8 ; RV32-NEXT: vsll.vi v16, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -1826,19 +1800,19 @@ define @vpgather_baseidx_sext_nxv6i8_nxv6f64(ptr %base, @vpgather_baseidx_zext_nxv6i8_nxv6f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV32-NEXT: vzext.vf2 v10, v8 ; RV32-NEXT: vsll.vi v16, v10, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV64-NEXT: vzext.vf2 v10, v8 ; RV64-NEXT: vsll.vi v16, v10, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -1850,19 +1824,18 @@ define @vpgather_baseidx_zext_nxv6i8_nxv6f64(ptr %base, @vpgather_baseidx_nxv6i16_nxv6f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv6i16_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v12, v8 ; RV32-NEXT: vsll.vi v16, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv6i16_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs @@ -1873,19 +1846,18 @@ define @vpgather_baseidx_nxv6i16_nxv6f64(ptr %base, @vpgather_baseidx_sext_nxv6i16_nxv6f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v12, v8 ; RV32-NEXT: vsll.vi v16, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -1897,19 +1869,19 @@ define @vpgather_baseidx_sext_nxv6i16_nxv6f64(ptr %base, < define @vpgather_baseidx_zext_nxv6i16_nxv6f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vzext.vf2 v12, v8 ; RV32-NEXT: vsll.vi v16, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV64-NEXT: vzext.vf2 v12, v8 ; RV64-NEXT: vsll.vi v16, v12, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -1921,18 +1893,17 @@ define @vpgather_baseidx_zext_nxv6i16_nxv6f64(ptr %base, < define @vpgather_baseidx_nxv6i32_nxv6f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv6i32_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsll.vi v16, v8, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv6i32_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs @@ -1943,18 +1914,17 @@ define @vpgather_baseidx_nxv6i32_nxv6f64(ptr %base, @vpgather_baseidx_sext_nxv6i32_nxv6f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsll.vi v16, v8, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -1966,18 +1936,17 @@ define @vpgather_baseidx_sext_nxv6i32_nxv6f64(ptr %base, < define @vpgather_baseidx_zext_nxv6i32_nxv6f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsll.vi v16, v8, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vzext.vf2 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -1989,18 +1958,17 @@ define @vpgather_baseidx_zext_nxv6i32_nxv6f64(ptr %base, < define @vpgather_baseidx_nxv6f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v16, v8, 0 ; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs @@ -2030,19 +1998,18 @@ define @vpgather_nxv8f64( %ptrs, @vpgather_baseidx_nxv8i8_nxv8f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v8 ; RV32-NEXT: vsll.vi v16, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs @@ -2053,19 +2020,18 @@ define @vpgather_baseidx_nxv8i8_nxv8f64(ptr %base, @vpgather_baseidx_sext_nxv8i8_nxv8f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v8 ; RV32-NEXT: vsll.vi v16, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -2077,19 +2043,19 @@ define @vpgather_baseidx_sext_nxv8i8_nxv8f64(ptr %base, @vpgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV32-NEXT: vzext.vf2 v10, v8 ; RV32-NEXT: vsll.vi v16, v10, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV64-NEXT: vzext.vf2 v10, v8 ; RV64-NEXT: vsll.vi v16, v10, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -2101,19 +2067,18 @@ define @vpgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, @vpgather_baseidx_nxv8i16_nxv8f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v12, v8 ; RV32-NEXT: vsll.vi v16, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs @@ -2124,19 +2089,18 @@ define @vpgather_baseidx_nxv8i16_nxv8f64(ptr %base, @vpgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v12, v8 ; RV32-NEXT: vsll.vi v16, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -2148,19 +2112,19 @@ define @vpgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, < define @vpgather_baseidx_zext_nxv8i16_nxv8f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vzext.vf2 v12, v8 ; RV32-NEXT: vsll.vi v16, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV64-NEXT: vzext.vf2 v12, v8 ; RV64-NEXT: vsll.vi v16, v12, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -2172,18 +2136,17 @@ define @vpgather_baseidx_zext_nxv8i16_nxv8f64(ptr %base, < define @vpgather_baseidx_nxv8i32_nxv8f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i32_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsll.vi v16, v8, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8i32_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs @@ -2194,18 +2157,17 @@ define @vpgather_baseidx_nxv8i32_nxv8f64(ptr %base, @vpgather_baseidx_sext_nxv8i32_nxv8f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsll.vi v16, v8, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -2217,18 +2179,17 @@ define @vpgather_baseidx_sext_nxv8i32_nxv8f64(ptr %base, < define @vpgather_baseidx_zext_nxv8i32_nxv8f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsll.vi v16, v8, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vzext.vf2 v16, v8 ; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -2240,18 +2201,17 @@ define @vpgather_baseidx_zext_nxv8i32_nxv8f64(ptr %base, < define @vpgather_baseidx_nxv8f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v16, v8, 0 ; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll index 59662db42898fc..bf8653070141d9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll @@ -43,17 +43,15 @@ define void @vpscatter_nxv2i8( %val, %ptrs, define void @vpscatter_nxv2i16_truncstore_nxv2i8( %val, %ptrs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; RV32-NEXT: vnsrl.wi v8, v8, 0 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; RV32-NEXT: vnsrl.wi v8, v8, 0 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; RV64-NEXT: vnsrl.wi v8, v8, 0 ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; RV64-NEXT: vnsrl.wi v8, v8, 0 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t ; RV64-NEXT: ret %tval = trunc %val to @@ -64,21 +62,19 @@ define void @vpscatter_nxv2i16_truncstore_nxv2i8( %val, %val, %ptrs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; RV32-NEXT: vnsrl.wi v8, v8, 0 ; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV32-NEXT: vnsrl.wi v8, v8, 0 -; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; RV64-NEXT: vnsrl.wi v8, v8, 0 ; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64-NEXT: vnsrl.wi v8, v8, 0 -; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t ; RV64-NEXT: ret %tval = trunc %val to @@ -89,25 +85,23 @@ define void @vpscatter_nxv2i32_truncstore_nxv2i8( %val, %val, %ptrs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; RV32-NEXT: vnsrl.wi v11, v8, 0 ; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV32-NEXT: vnsrl.wi v8, v11, 0 ; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV32-NEXT: vnsrl.wi v8, v8, 0 -; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; RV64-NEXT: vnsrl.wi v12, v8, 0 ; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64-NEXT: vnsrl.wi v8, v12, 0 ; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64-NEXT: vnsrl.wi v8, v8, 0 -; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t ; RV64-NEXT: ret %tval = trunc %val to @@ -170,17 +164,17 @@ define void @vpscatter_nxv8i8( %val, %ptrs, define void @vpscatter_baseidx_nxv8i8( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i8: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v9 -; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i8: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v9 -; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, %idxs @@ -227,17 +221,15 @@ define void @vpscatter_nxv2i16( %val, %ptrs define void @vpscatter_nxv2i32_truncstore_nxv2i16( %val, %ptrs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV32-NEXT: vnsrl.wi v8, v8, 0 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV32-NEXT: vnsrl.wi v8, v8, 0 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV64-NEXT: vnsrl.wi v8, v8, 0 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV64-NEXT: vnsrl.wi v8, v8, 0 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t ; RV64-NEXT: ret %tval = trunc %val to @@ -248,21 +240,19 @@ define void @vpscatter_nxv2i32_truncstore_nxv2i16( %val, %val, %ptrs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; RV32-NEXT: vnsrl.wi v11, v8, 0 ; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV32-NEXT: vnsrl.wi v8, v11, 0 -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; RV64-NEXT: vnsrl.wi v12, v8, 0 ; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64-NEXT: vnsrl.wi v8, v12, 0 -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t ; RV64-NEXT: ret %tval = trunc %val to @@ -325,19 +315,19 @@ define void @vpscatter_nxv8i16( %val, %ptrs define void @vpscatter_baseidx_nxv8i8_nxv8i16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v10 ; RV32-NEXT: vadd.vv v12, v12, v12 -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v10 ; RV64-NEXT: vadd.vv v16, v16, v16 -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, %idxs @@ -348,19 +338,19 @@ define void @vpscatter_baseidx_nxv8i8_nxv8i16( %val, ptr %base define void @vpscatter_baseidx_sext_nxv8i8_nxv8i16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v10 ; RV32-NEXT: vadd.vv v12, v12, v12 -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v10 ; RV64-NEXT: vadd.vv v16, v16, v16 -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -372,17 +362,17 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8i16( %val, ptr define void @vpscatter_baseidx_zext_nxv8i8_nxv8i16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; RV32-NEXT: vwaddu.vv v12, v10, v10 -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; RV64-NEXT: vwaddu.vv v12, v10, v10 -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -394,18 +384,17 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8i16( %val, ptr define void @vpscatter_baseidx_nxv8i16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; RV32-NEXT: vwadd.vv v12, v10, v10 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwadd.vv v12, v10, v10 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v10 ; RV64-NEXT: vadd.vv v16, v16, v16 -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, %idxs @@ -426,9 +415,8 @@ define void @vpscatter_baseidx_vpsext_nxv8i16_nxv8i16( %val, p ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV64-NEXT: vsext.vf2 v12, v10, v0.t -; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma ; RV64-NEXT: vwadd.vv v16, v12, v12 -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = call @llvm.vp.sext.nxv8i16.nxv8i32( %idxs, %m, i32 %evl) @@ -450,9 +438,8 @@ define void @vpscatter_baseidx_vpzext_nxv8i16_nxv8i16( %val, p ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV64-NEXT: vzext.vf2 v12, v10, v0.t -; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma ; RV64-NEXT: vwadd.vv v16, v12, v12 -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = call @llvm.vp.zext.nxv8i16.nxv8i32( %idxs, %m, i32 %evl) @@ -467,10 +454,10 @@ define void @vpscatter_baseidx_vpsext_nxv8i32_nxv8i16( %val, p ; RV32: # %bb.0: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsext.vf2 v16, v12, v0.t -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v12, v16, 0 ; RV32-NEXT: vadd.vv v12, v12, v12 -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; @@ -493,10 +480,10 @@ define void @vpscatter_baseidx_vpzext_nxv8i32_nxv8i16( %val, p ; RV32: # %bb.0: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vzext.vf2 v16, v12, v0.t -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v12, v16, 0 ; RV32-NEXT: vadd.vv v12, v12, v12 -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; @@ -552,17 +539,15 @@ define void @vpscatter_nxv2i32( %val, %ptrs define void @vpscatter_nxv2i64_truncstore_nxv2i32( %val, %ptrs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32-NEXT: vnsrl.wi v11, v8, 0 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV32-NEXT: vnsrl.wi v11, v8, 0 ; RV32-NEXT: vsoxei32.v v11, (zero), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV64-NEXT: vnsrl.wi v12, v8, 0 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV64-NEXT: vnsrl.wi v12, v8, 0 ; RV64-NEXT: vsoxei64.v v12, (zero), v10, v0.t ; RV64-NEXT: ret %tval = trunc %val to @@ -625,19 +610,18 @@ define void @vpscatter_nxv8i32( %val, %ptrs define void @vpscatter_baseidx_nxv8i8_nxv8i32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v16, v12 ; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v12 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i32, ptr %base, %idxs @@ -648,19 +632,18 @@ define void @vpscatter_baseidx_nxv8i8_nxv8i32( %val, ptr %base define void @vpscatter_baseidx_sext_nxv8i8_nxv8i32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v16, v12 ; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v12 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -672,19 +655,19 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8i32( %val, ptr define void @vpscatter_baseidx_zext_nxv8i8_nxv8i32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV32-NEXT: vzext.vf2 v14, v12 ; RV32-NEXT: vsll.vi v12, v14, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV64-NEXT: vzext.vf2 v14, v12 ; RV64-NEXT: vsll.vi v12, v14, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -696,19 +679,18 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8i32( %val, ptr define void @vpscatter_baseidx_nxv8i16_nxv8i32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v16, v12 ; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v12 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i32, ptr %base, %idxs @@ -719,19 +701,18 @@ define void @vpscatter_baseidx_nxv8i16_nxv8i32( %val, ptr %bas define void @vpscatter_baseidx_sext_nxv8i16_nxv8i32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v16, v12 ; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v12 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -743,19 +724,17 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8i32( %val, ptr define void @vpscatter_baseidx_zext_nxv8i16_nxv8i32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vzext.vf2 v16, v12 ; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV64-NEXT: vzext.vf2 v16, v12 ; RV64-NEXT: vsll.vi v12, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -767,18 +746,17 @@ define void @vpscatter_baseidx_zext_nxv8i16_nxv8i32( %val, ptr define void @vpscatter_baseidx_nxv8i32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vsll.vi v12, v12, 2 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV32-NEXT: vsll.vi v12, v12, 2 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v16, v12 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i32, ptr %base, %idxs @@ -877,19 +855,18 @@ define void @vpscatter_nxv8i64( %val, %ptrs define void @vpscatter_baseidx_nxv8i8_nxv8i64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v20, v16 ; RV32-NEXT: vsll.vi v16, v20, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, %idxs @@ -900,19 +877,18 @@ define void @vpscatter_baseidx_nxv8i8_nxv8i64( %val, ptr %base define void @vpscatter_baseidx_sext_nxv8i8_nxv8i64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v20, v16 ; RV32-NEXT: vsll.vi v16, v20, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -924,19 +900,19 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8i64( %val, ptr define void @vpscatter_baseidx_zext_nxv8i8_nxv8i64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV32-NEXT: vzext.vf2 v18, v16 ; RV32-NEXT: vsll.vi v16, v18, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV64-NEXT: vzext.vf2 v18, v16 ; RV64-NEXT: vsll.vi v16, v18, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -948,19 +924,18 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8i64( %val, ptr define void @vpscatter_baseidx_nxv8i16_nxv8i64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v20, v16 ; RV32-NEXT: vsll.vi v16, v20, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, %idxs @@ -971,19 +946,18 @@ define void @vpscatter_baseidx_nxv8i16_nxv8i64( %val, ptr %bas define void @vpscatter_baseidx_sext_nxv8i16_nxv8i64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v20, v16 ; RV32-NEXT: vsll.vi v16, v20, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -995,19 +969,19 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8i64( %val, ptr define void @vpscatter_baseidx_zext_nxv8i16_nxv8i64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vzext.vf2 v20, v16 ; RV32-NEXT: vsll.vi v16, v20, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV64-NEXT: vzext.vf2 v20, v16 ; RV64-NEXT: vsll.vi v16, v20, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -1019,18 +993,17 @@ define void @vpscatter_baseidx_zext_nxv8i16_nxv8i64( %val, ptr define void @vpscatter_baseidx_nxv8i32_nxv8i64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, %idxs @@ -1041,18 +1014,17 @@ define void @vpscatter_baseidx_nxv8i32_nxv8i64( %val, ptr %bas define void @vpscatter_baseidx_sext_nxv8i32_nxv8i64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -1064,18 +1036,17 @@ define void @vpscatter_baseidx_sext_nxv8i32_nxv8i64( %val, ptr define void @vpscatter_baseidx_zext_nxv8i32_nxv8i64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vzext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -1087,18 +1058,17 @@ define void @vpscatter_baseidx_zext_nxv8i32_nxv8i64( %val, ptr define void @vpscatter_baseidx_nxv8i64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v24, v16, 0 ; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, %idxs @@ -1197,19 +1167,19 @@ define void @vpscatter_nxv8f16( %val, %ptr define void @vpscatter_baseidx_nxv8i8_nxv8f16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v10 ; RV32-NEXT: vadd.vv v12, v12, v12 -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v10 ; RV64-NEXT: vadd.vv v16, v16, v16 -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds half, ptr %base, %idxs @@ -1220,19 +1190,19 @@ define void @vpscatter_baseidx_nxv8i8_nxv8f16( %val, ptr %bas define void @vpscatter_baseidx_sext_nxv8i8_nxv8f16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v12, v10 ; RV32-NEXT: vadd.vv v12, v12, v12 -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v10 ; RV64-NEXT: vadd.vv v16, v16, v16 -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -1244,17 +1214,17 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8f16( %val, ptr define void @vpscatter_baseidx_zext_nxv8i8_nxv8f16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; RV32-NEXT: vwaddu.vv v12, v10, v10 -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; RV64-NEXT: vwaddu.vv v12, v10, v10 -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -1266,18 +1236,17 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8f16( %val, ptr define void @vpscatter_baseidx_nxv8f16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; RV32-NEXT: vwadd.vv v12, v10, v10 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwadd.vv v12, v10, v10 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v10 ; RV64-NEXT: vadd.vv v16, v16, v16 -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds half, ptr %base, %idxs @@ -1376,19 +1345,18 @@ define void @vpscatter_nxv8f32( %val, %pt define void @vpscatter_baseidx_nxv8i8_nxv8f32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v16, v12 ; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v12 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds float, ptr %base, %idxs @@ -1399,19 +1367,18 @@ define void @vpscatter_baseidx_nxv8i8_nxv8f32( %val, ptr %ba define void @vpscatter_baseidx_sext_nxv8i8_nxv8f32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v16, v12 ; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v12 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -1423,19 +1390,19 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8f32( %val, pt define void @vpscatter_baseidx_zext_nxv8i8_nxv8f32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV32-NEXT: vzext.vf2 v14, v12 ; RV32-NEXT: vsll.vi v12, v14, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV64-NEXT: vzext.vf2 v14, v12 ; RV64-NEXT: vsll.vi v12, v14, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -1447,19 +1414,18 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8f32( %val, pt define void @vpscatter_baseidx_nxv8i16_nxv8f32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v16, v12 ; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v12 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds float, ptr %base, %idxs @@ -1470,19 +1436,18 @@ define void @vpscatter_baseidx_nxv8i16_nxv8f32( %val, ptr %b define void @vpscatter_baseidx_sext_nxv8i16_nxv8f32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v16, v12 ; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v12 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -1494,19 +1459,17 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8f32( %val, p define void @vpscatter_baseidx_zext_nxv8i16_nxv8f32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vzext.vf2 v16, v12 ; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV64-NEXT: vzext.vf2 v16, v12 ; RV64-NEXT: vsll.vi v12, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -1518,18 +1481,17 @@ define void @vpscatter_baseidx_zext_nxv8i16_nxv8f32( %val, p define void @vpscatter_baseidx_nxv8f32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vsll.vi v12, v12, 2 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV32-NEXT: vsll.vi v12, v12, 2 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v16, v12 ; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds float, ptr %base, %idxs @@ -1628,19 +1590,18 @@ define void @vpscatter_nxv6f64( %val, %p define void @vpscatter_baseidx_nxv6i8_nxv6f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v20, v16 ; RV32-NEXT: vsll.vi v16, v20, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs @@ -1651,19 +1612,18 @@ define void @vpscatter_baseidx_nxv6i8_nxv6f64( %val, ptr %b define void @vpscatter_baseidx_sext_nxv6i8_nxv6f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v20, v16 ; RV32-NEXT: vsll.vi v16, v20, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -1675,19 +1635,19 @@ define void @vpscatter_baseidx_sext_nxv6i8_nxv6f64( %val, p define void @vpscatter_baseidx_zext_nxv6i8_nxv6f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV32-NEXT: vzext.vf2 v18, v16 ; RV32-NEXT: vsll.vi v16, v18, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV64-NEXT: vzext.vf2 v18, v16 ; RV64-NEXT: vsll.vi v16, v18, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -1699,19 +1659,18 @@ define void @vpscatter_baseidx_zext_nxv6i8_nxv6f64( %val, p define void @vpscatter_baseidx_nxv6i16_nxv6f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v20, v16 ; RV32-NEXT: vsll.vi v16, v20, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs @@ -1722,19 +1681,18 @@ define void @vpscatter_baseidx_nxv6i16_nxv6f64( %val, ptr % define void @vpscatter_baseidx_sext_nxv6i16_nxv6f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v20, v16 ; RV32-NEXT: vsll.vi v16, v20, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -1746,19 +1704,19 @@ define void @vpscatter_baseidx_sext_nxv6i16_nxv6f64( %val, define void @vpscatter_baseidx_zext_nxv6i16_nxv6f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vzext.vf2 v20, v16 ; RV32-NEXT: vsll.vi v16, v20, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV64-NEXT: vzext.vf2 v20, v16 ; RV64-NEXT: vsll.vi v16, v20, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -1770,18 +1728,17 @@ define void @vpscatter_baseidx_zext_nxv6i16_nxv6f64( %val, define void @vpscatter_baseidx_nxv6i32_nxv6f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs @@ -1792,18 +1749,17 @@ define void @vpscatter_baseidx_nxv6i32_nxv6f64( %val, ptr % define void @vpscatter_baseidx_sext_nxv6i32_nxv6f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -1815,18 +1771,17 @@ define void @vpscatter_baseidx_sext_nxv6i32_nxv6f64( %val, define void @vpscatter_baseidx_zext_nxv6i32_nxv6f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vzext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -1838,18 +1793,17 @@ define void @vpscatter_baseidx_zext_nxv6i32_nxv6f64( %val, define void @vpscatter_baseidx_nxv6f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v24, v16, 0 ; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs @@ -1878,19 +1832,18 @@ define void @vpscatter_nxv8f64( %val, %p define void @vpscatter_baseidx_nxv8i8_nxv8f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v20, v16 ; RV32-NEXT: vsll.vi v16, v20, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs @@ -1901,19 +1854,18 @@ define void @vpscatter_baseidx_nxv8i8_nxv8f64( %val, ptr %b define void @vpscatter_baseidx_sext_nxv8i8_nxv8f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf4 v20, v16 ; RV32-NEXT: vsll.vi v16, v20, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -1925,19 +1877,19 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8f64( %val, p define void @vpscatter_baseidx_zext_nxv8i8_nxv8f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV32-NEXT: vzext.vf2 v18, v16 ; RV32-NEXT: vsll.vi v16, v18, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV64-NEXT: vzext.vf2 v18, v16 ; RV64-NEXT: vsll.vi v16, v18, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -1949,19 +1901,18 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8f64( %val, p define void @vpscatter_baseidx_nxv8i16_nxv8f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v20, v16 ; RV32-NEXT: vsll.vi v16, v20, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs @@ -1972,19 +1923,18 @@ define void @vpscatter_baseidx_nxv8i16_nxv8f64( %val, ptr % define void @vpscatter_baseidx_sext_nxv8i16_nxv8f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsext.vf2 v20, v16 ; RV32-NEXT: vsll.vi v16, v20, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -1996,19 +1946,19 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8f64( %val, define void @vpscatter_baseidx_zext_nxv8i16_nxv8f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vzext.vf2 v20, v16 ; RV32-NEXT: vsll.vi v16, v20, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV64-NEXT: vzext.vf2 v20, v16 ; RV64-NEXT: vsll.vi v16, v20, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -2020,18 +1970,17 @@ define void @vpscatter_baseidx_zext_nxv8i16_nxv8f64( %val, define void @vpscatter_baseidx_nxv8i32_nxv8f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs @@ -2042,18 +1991,17 @@ define void @vpscatter_baseidx_nxv8i32_nxv8f64( %val, ptr % define void @vpscatter_baseidx_sext_nxv8i32_nxv8f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -2065,18 +2013,17 @@ define void @vpscatter_baseidx_sext_nxv8i32_nxv8f64( %val, define void @vpscatter_baseidx_zext_nxv8i32_nxv8f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vzext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to @@ -2088,18 +2035,17 @@ define void @vpscatter_baseidx_zext_nxv8i32_nxv8f64( %val, define void @vpscatter_baseidx_nxv8f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v24, v16, 0 ; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll index 2ef96f4b3896fc..9304e8c58f90db 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll @@ -12,9 +12,7 @@ define @vrem_vx_nxv8i7( %a, i7 signext %b, @vremu_vx_nxv8i7( %a, i7 signext %b, < ; CHECK-NEXT: li a2, 127 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a2, v0.t -; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vand.vx v9, v9, a2, v0.t ; CHECK-NEXT: vremu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll index 027c81180d5f19..f28153d427ebfe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -1125,7 +1125,6 @@ exit: define @clobbered_forwarded_avl(i64 %n, %v, i1 %cmp) { ; CHECK-LABEL: clobbered_forwarded_avl: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: .LBB27_1: # %for.body @@ -1133,9 +1132,7 @@ define @clobbered_forwarded_avl(i64 %n, %v ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: bnez a1, .LBB27_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vadd.vv v10, v8, v8 -; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v10, v8 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll index 380835494ed17d..f5c46aec86b864 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll @@ -9,10 +9,9 @@ declare @llvm.vp.shl.nxv8i7(, @vsll_vx_nxv8i7( %a, i7 signext %b, %mask, i32 zeroext %evl) { ; CHECK-LABEL: vsll_vx_nxv8i7: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll index 382c8297473b78..68ea51845392c9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll @@ -944,9 +944,9 @@ define @vsra_vv_nxv1i8_sext_zext_mixed_trunc( ; CHECK-NEXT: vsext.vf4 v9, v8 ; CHECK-NEXT: vzext.vf4 v10, v8 ; CHECK-NEXT: vsra.vv v8, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v8, 0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t ; CHECK-NEXT: ret %sexted_va = sext %va to diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll index cff8cc710d21f3..ecce91982b14a4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll @@ -12,10 +12,8 @@ define @vsra_vx_nxv8i7( %a, i7 signext %b, @vsrl_vx_nxv8i7( %a, i7 signext %b, @vssub_vx_nxv1i8( %va, i8 %b, @vssub_vx_nxv1i8_commute( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vssub_vx_nxv1i8_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vssub.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll index 8c729d7d9bfb6e..ca56145260f51c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll @@ -60,9 +60,8 @@ define @vssubu_vx_nxv1i8( %va, i8 %b, @vssubu_vx_nxv1i8_commute( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vssubu_vx_nxv1i8_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vssubu.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll index bb3076b3a945e8..c30c4763dd46d5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll @@ -13,10 +13,9 @@ declare @llvm.vp.shl.nxv2i64(, @vwsll_vv_nxv2i64_sext( %a, %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vv_nxv2i64_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -35,10 +34,9 @@ define @vwsll_vv_nxv2i64_sext( %a, @vwsll_vv_nxv2i64_zext( %a, %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vv_nxv2i64_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vzext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -57,17 +55,15 @@ define @vwsll_vv_nxv2i64_zext( %a, @vwsll_vx_i64_nxv2i64( %a, i64 %b, %m, i32 zeroext %vl) { ; CHECK-RV32-LABEL: vwsll_vx_i64_nxv2i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; CHECK-RV32-NEXT: vzext.vf2 v10, v8 ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; CHECK-RV32-NEXT: vzext.vf2 v10, v8 ; CHECK-RV32-NEXT: vsll.vx v8, v10, a0, v0.t ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vwsll_vx_i64_nxv2i64: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; CHECK-RV64-NEXT: vzext.vf2 v10, v8 ; CHECK-RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-RV64-NEXT: vzext.vf2 v10, v8 ; CHECK-RV64-NEXT: vsll.vx v8, v10, a0, v0.t ; CHECK-RV64-NEXT: ret ; @@ -94,12 +90,11 @@ define @vwsll_vx_i64_nxv2i64( %a, i64 %b, < define @vwsll_vx_i32_nxv2i64_sext( %a, i32 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i32_nxv2i64_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -120,12 +115,11 @@ define @vwsll_vx_i32_nxv2i64_sext( %a, i32 define @vwsll_vx_i32_nxv2i64_zext( %a, i32 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i32_nxv2i64_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vzext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -146,12 +140,11 @@ define @vwsll_vx_i32_nxv2i64_zext( %a, i32 define @vwsll_vx_i16_nxv2i64_sext( %a, i16 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i16_nxv2i64_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsext.vf4 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -172,12 +165,11 @@ define @vwsll_vx_i16_nxv2i64_sext( %a, i16 define @vwsll_vx_i16_nxv2i64_zext( %a, i16 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i16_nxv2i64_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vzext.vf4 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -198,12 +190,11 @@ define @vwsll_vx_i16_nxv2i64_zext( %a, i16 define @vwsll_vx_i8_nxv2i64_sext( %a, i8 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i8_nxv2i64_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsext.vf8 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -224,12 +215,11 @@ define @vwsll_vx_i8_nxv2i64_sext( %a, i8 %b define @vwsll_vx_i8_nxv2i64_zext( %a, i8 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i8_nxv2i64_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vzext.vf8 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -250,9 +240,8 @@ define @vwsll_vx_i8_nxv2i64_zext( %a, i8 %b define @vwsll_vi_nxv2i64( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vi_nxv2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsll.vi v8, v10, 2, v0.t ; CHECK-NEXT: ret ; @@ -276,10 +265,9 @@ declare @llvm.vp.shl.nxv4i32(, @vwsll_vv_nxv4i32_sext( %a, %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vv_nxv4i32_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -298,10 +286,9 @@ define @vwsll_vv_nxv4i32_sext( %a, @vwsll_vv_nxv4i32_zext( %a, %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vv_nxv4i32_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vzext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -320,17 +307,15 @@ define @vwsll_vv_nxv4i32_zext( %a, @vwsll_vx_i64_nxv4i32( %a, i64 %b, %m, i32 zeroext %vl) { ; CHECK-RV32-LABEL: vwsll_vx_i64_nxv4i32: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; CHECK-RV32-NEXT: vzext.vf2 v10, v8 ; CHECK-RV32-NEXT: vsetvli zero, a2, e32, m2, ta, ma +; CHECK-RV32-NEXT: vzext.vf2 v10, v8 ; CHECK-RV32-NEXT: vsll.vx v8, v10, a0, v0.t ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vwsll_vx_i64_nxv4i32: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; CHECK-RV64-NEXT: vzext.vf2 v10, v8 ; CHECK-RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-RV64-NEXT: vzext.vf2 v10, v8 ; CHECK-RV64-NEXT: vsll.vx v8, v10, a0, v0.t ; CHECK-RV64-NEXT: ret ; @@ -358,9 +343,8 @@ define @vwsll_vx_i64_nxv4i32( %a, i64 %b, < define @vwsll_vx_i32_nxv4i32( %a, i32 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i32_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsll.vx v8, v10, a0, v0.t ; CHECK-NEXT: ret ; @@ -380,12 +364,11 @@ define @vwsll_vx_i32_nxv4i32( %a, i32 %b, < define @vwsll_vx_i16_nxv4i32_sext( %a, i16 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i16_nxv4i32_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -406,12 +389,11 @@ define @vwsll_vx_i16_nxv4i32_sext( %a, i16 define @vwsll_vx_i16_nxv4i32_zext( %a, i16 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i16_nxv4i32_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vzext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -432,12 +414,11 @@ define @vwsll_vx_i16_nxv4i32_zext( %a, i16 define @vwsll_vx_i8_nxv4i32_sext( %a, i8 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i8_nxv4i32_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsext.vf4 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -458,12 +439,11 @@ define @vwsll_vx_i8_nxv4i32_sext( %a, i8 %b define @vwsll_vx_i8_nxv4i32_zext( %a, i8 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i8_nxv4i32_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vzext.vf4 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -484,9 +464,8 @@ define @vwsll_vx_i8_nxv4i32_zext( %a, i8 %b define @vwsll_vi_nxv4i32( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vi_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsll.vi v8, v10, 2, v0.t ; CHECK-NEXT: ret ; @@ -511,10 +490,9 @@ declare @llvm.vp.shl.nxv8i16(, @vwsll_vv_nxv8i16_sext( %a, %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vv_nxv8i16_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -533,10 +511,9 @@ define @vwsll_vv_nxv8i16_sext( %a, @vwsll_vv_nxv8i16_zext( %a, %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vv_nxv8i16_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vzext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -555,17 +532,15 @@ define @vwsll_vv_nxv8i16_zext( %a, @vwsll_vx_i64_nxv8i16( %a, i64 %b, %m, i32 zeroext %vl) { ; CHECK-RV32-LABEL: vwsll_vx_i64_nxv8i16: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-RV32-NEXT: vzext.vf2 v10, v8 ; CHECK-RV32-NEXT: vsetvli zero, a2, e16, m2, ta, ma +; CHECK-RV32-NEXT: vzext.vf2 v10, v8 ; CHECK-RV32-NEXT: vsll.vx v8, v10, a0, v0.t ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vwsll_vx_i64_nxv8i16: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; CHECK-RV64-NEXT: vzext.vf2 v10, v8 ; CHECK-RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-RV64-NEXT: vzext.vf2 v10, v8 ; CHECK-RV64-NEXT: vsll.vx v8, v10, a0, v0.t ; CHECK-RV64-NEXT: ret ; @@ -593,9 +568,8 @@ define @vwsll_vx_i64_nxv8i16( %a, i64 %b, @vwsll_vx_i32_nxv8i16( %a, i32 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i32_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsll.vx v8, v10, a0, v0.t ; CHECK-NEXT: ret ; @@ -616,9 +590,8 @@ define @vwsll_vx_i32_nxv8i16( %a, i32 %b, @vwsll_vx_i16_nxv8i16( %a, i16 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i16_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsll.vx v8, v10, a0, v0.t ; CHECK-NEXT: ret ; @@ -638,12 +611,11 @@ define @vwsll_vx_i16_nxv8i16( %a, i16 %b, @vwsll_vx_i8_nxv8i16_sext( %a, i8 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i8_nxv8i16_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -664,12 +636,11 @@ define @vwsll_vx_i8_nxv8i16_sext( %a, i8 %b, define @vwsll_vx_i8_nxv8i16_zext( %a, i8 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i8_nxv8i16_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vzext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -690,9 +661,8 @@ define @vwsll_vx_i8_nxv8i16_zext( %a, i8 %b, define @vwsll_vi_nxv8i16( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vi_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsll.vi v8, v10, 2, v0.t ; CHECK-NEXT: ret ;