From b2223b4d7efa4ed003a1b3ce7439106ddc63125f Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Wed, 4 Sep 2024 09:52:42 -0700 Subject: [PATCH 1/6] [WebAssembly] Rename legacy EH mir tests (#107189) We added `-legacy` suffix to the legacy EH `ll` tests in #107166 but forgot to do the same for `mir` tests. --- .../{cfg-stackify-eh.mir => cfg-stackify-eh-legacy.mir} | 0 .../CodeGen/WebAssembly/{exception.mir => exception-legacy.mir} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename llvm/test/CodeGen/WebAssembly/{cfg-stackify-eh.mir => cfg-stackify-eh-legacy.mir} (100%) rename llvm/test/CodeGen/WebAssembly/{exception.mir => exception-legacy.mir} (100%) diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.mir b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.mir similarity index 100% rename from llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.mir rename to llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.mir diff --git a/llvm/test/CodeGen/WebAssembly/exception.mir b/llvm/test/CodeGen/WebAssembly/exception-legacy.mir similarity index 100% rename from llvm/test/CodeGen/WebAssembly/exception.mir rename to llvm/test/CodeGen/WebAssembly/exception-legacy.mir From 32bc670609fe9c938bca5b3c0e70e6b3934b4641 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Wed, 4 Sep 2024 09:53:38 -0700 Subject: [PATCH 2/6] [WebAssembly] Misc. fixes in CFGStackify (NFC) (#107182) This contains misc. small fixes in CFGStackify. Most of them are comment fixes and variable name changes. Two code changes are removing the cases that can never occur. Another is extracting a routine as a lambda function. I will add explanations inline in the code as Github comments. --- .../WebAssembly/WebAssemblyCFGStackify.cpp | 141 +++++++++--------- 1 file changed, 67 insertions(+), 74 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index c7001ef2b33e62..6fd882f62f3f09 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -63,8 +63,9 @@ class WebAssemblyCFGStackify final : public MachineFunctionPass { // over scoped regions when walking blocks. SmallVector ScopeTops; void updateScopeTops(MachineBasicBlock *Begin, MachineBasicBlock *End) { + int BeginNo = Begin->getNumber(); int EndNo = End->getNumber(); - if (!ScopeTops[EndNo] || ScopeTops[EndNo]->getNumber() > Begin->getNumber()) + if (!ScopeTops[EndNo] || ScopeTops[EndNo]->getNumber() > BeginNo) ScopeTops[EndNo] = Begin; } @@ -77,8 +78,8 @@ class WebAssemblyCFGStackify final : public MachineFunctionPass { // Exception handling related functions bool fixCallUnwindMismatches(MachineFunction &MF); bool fixCatchUnwindMismatches(MachineFunction &MF); - void addTryDelegate(MachineInstr *RangeBegin, MachineInstr *RangeEnd, - MachineBasicBlock *DelegateDest); + void addNestedTryDelegate(MachineInstr *RangeBegin, MachineInstr *RangeEnd, + MachineBasicBlock *UnwindDest); void recalculateScopeTops(MachineFunction &MF); void removeUnnecessaryInstrs(MachineFunction &MF); @@ -225,7 +226,7 @@ void WebAssemblyCFGStackify::registerScope(MachineInstr *Begin, EndToBegin[End] = Begin; } -// When 'End' is not an 'end_try' but 'delegate, EHPad is nullptr. +// When 'End' is not an 'end_try' but a 'delegate', EHPad is nullptr. void WebAssemblyCFGStackify::registerTryScope(MachineInstr *Begin, MachineInstr *End, MachineBasicBlock *EHPad) { @@ -293,7 +294,7 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) { } } - // Decide where in Header to put the BLOCK. + // Decide where in MBB to put the BLOCK. // Instructions that should go before the BLOCK. SmallPtrSet BeforeSet; @@ -359,21 +360,20 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) { TII.get(WebAssembly::BLOCK)) .addImm(int64_t(ReturnType)); - // Decide where in Header to put the END_BLOCK. + // Decide where in MBB to put the END_BLOCK. BeforeSet.clear(); AfterSet.clear(); for (auto &MI : MBB) { #ifndef NDEBUG - // END_BLOCK should precede existing LOOP and TRY markers. - if (MI.getOpcode() == WebAssembly::LOOP || - MI.getOpcode() == WebAssembly::TRY) + // END_BLOCK should precede existing LOOP markers. + if (MI.getOpcode() == WebAssembly::LOOP) AfterSet.insert(&MI); #endif // If there is a previously placed END_LOOP marker and the header of the // loop is above this block's header, the END_LOOP should be placed after - // the BLOCK, because the loop contains this block. Otherwise the END_LOOP - // should be placed before the BLOCK. The same for END_TRY. + // the END_BLOCK, because the loop contains this block. Otherwise the + // END_LOOP should be placed before the END_BLOCK. The same for END_TRY. if (MI.getOpcode() == WebAssembly::END_LOOP || MI.getOpcode() == WebAssembly::END_TRY) { if (EndToBegin[&MI]->getParent()->getNumber() >= Header->getNumber()) @@ -437,7 +437,7 @@ void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) { TII.get(WebAssembly::LOOP)) .addImm(int64_t(WebAssembly::BlockType::Void)); - // Decide where in Header to put the END_LOOP. + // Decide where in MBB to put the END_LOOP. BeforeSet.clear(); AfterSet.clear(); #ifndef NDEBUG @@ -491,7 +491,6 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) { WebAssemblyException *WE = WEI.getExceptionFor(&MBB); assert(WE); MachineBasicBlock *Bottom = SRI.getBottom(WE); - auto Iter = std::next(Bottom->getIterator()); if (Iter == MF.end()) { getAppendixBlock(MF); @@ -499,12 +498,9 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) { } MachineBasicBlock *Cont = &*Iter; - assert(Cont != &MF.front()); - MachineBasicBlock *LayoutPred = Cont->getPrevNode(); - // If the nearest common dominator is inside a more deeply nested context, // walk out to the nearest scope which isn't more deeply nested. - for (MachineFunction::iterator I(LayoutPred), E(Header); I != E; --I) { + for (MachineFunction::iterator I(Bottom), E(Header); I != E; --I) { if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) { if (ScopeTop->getNumber() > Header->getNumber()) { // Skip over an intervening scope. @@ -538,7 +534,7 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) { } // All previously inserted BLOCK/TRY markers should be after the TRY because - // they are all nested trys. + // they are all nested blocks/trys. if (MI.getOpcode() == WebAssembly::BLOCK || MI.getOpcode() == WebAssembly::TRY) AfterSet.insert(&MI); @@ -607,14 +603,13 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) { TII.get(WebAssembly::TRY)) .addImm(int64_t(WebAssembly::BlockType::Void)); - // Decide where in Header to put the END_TRY. + // Decide where in Cont to put the END_TRY. BeforeSet.clear(); AfterSet.clear(); for (const auto &MI : *Cont) { #ifndef NDEBUG - // END_TRY should precede existing LOOP and BLOCK markers. - if (MI.getOpcode() == WebAssembly::LOOP || - MI.getOpcode() == WebAssembly::BLOCK) + // END_TRY should precede existing LOOP markers. + if (MI.getOpcode() == WebAssembly::LOOP) AfterSet.insert(&MI); // All END_TRY markers placed earlier belong to exceptions that contains @@ -643,9 +638,8 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) { // Mark the end of the TRY. InsertPos = getEarliestInsertPos(Cont, BeforeSet, AfterSet); - MachineInstr *End = - BuildMI(*Cont, InsertPos, Bottom->findBranchDebugLoc(), - TII.get(WebAssembly::END_TRY)); + MachineInstr *End = BuildMI(*Cont, InsertPos, Bottom->findBranchDebugLoc(), + TII.get(WebAssembly::END_TRY)); registerTryScope(Begin, End, &MBB); // Track the farthest-spanning scope that ends at this point. We create two @@ -845,9 +839,9 @@ static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB, // Wrap the given range of instruction with try-delegate. RangeBegin and // RangeEnd are inclusive. -void WebAssemblyCFGStackify::addTryDelegate(MachineInstr *RangeBegin, - MachineInstr *RangeEnd, - MachineBasicBlock *DelegateDest) { +void WebAssemblyCFGStackify::addNestedTryDelegate( + MachineInstr *RangeBegin, MachineInstr *RangeEnd, + MachineBasicBlock *UnwindDest) { auto *BeginBB = RangeBegin->getParent(); auto *EndBB = RangeEnd->getParent(); MachineFunction &MF = *BeginBB->getParent(); @@ -879,8 +873,8 @@ void WebAssemblyCFGStackify::addTryDelegate(MachineInstr *RangeBegin, MachineBasicBlock *DelegateBB = MF.CreateMachineBasicBlock(); // If the destination of 'delegate' is not the caller, adds the destination to // the BB's successors. - if (DelegateDest != FakeCallerBB) - DelegateBB->addSuccessor(DelegateDest); + if (UnwindDest != FakeCallerBB) + DelegateBB->addSuccessor(UnwindDest); auto SplitPos = std::next(RangeEnd->getIterator()); if (SplitPos == EndBB->end()) { @@ -962,7 +956,7 @@ void WebAssemblyCFGStackify::addTryDelegate(MachineInstr *RangeBegin, // Add 'delegate' instruction in the delegate BB created above. MachineInstr *Delegate = BuildMI(DelegateBB, RangeEnd->getDebugLoc(), TII.get(WebAssembly::DELEGATE)) - .addMBB(DelegateDest); + .addMBB(UnwindDest); registerTryScope(Try, Delegate, nullptr); } @@ -1130,7 +1124,7 @@ bool WebAssemblyCFGStackify::fixCallUnwindMismatches(MachineFunction &MF) { if (EHPadStack.back() == UnwindDest) continue; - // Include EH_LABELs in the range before and afer the invoke + // Include EH_LABELs in the range before and after the invoke MachineInstr *RangeBegin = &MI, *RangeEnd = &MI; if (RangeBegin->getIterator() != MBB.begin() && std::prev(RangeBegin->getIterator())->isEHLabel()) @@ -1231,22 +1225,24 @@ bool WebAssemblyCFGStackify::fixCallUnwindMismatches(MachineFunction &MF) { std::tie(RangeBegin, RangeEnd) = Range; auto *MBB = RangeBegin->getParent(); - // If this BB has an EH pad successor, i.e., ends with an 'invoke', now we - // are going to wrap the invoke with try-delegate, making the 'delegate' - // BB the new successor instead, so remove the EH pad succesor here. The - // BB may not have an EH pad successor if calls in this BB throw to the - // caller. - MachineBasicBlock *EHPad = nullptr; - for (auto *Succ : MBB->successors()) { - if (Succ->isEHPad()) { - EHPad = Succ; - break; + // If this BB has an EH pad successor, i.e., ends with an 'invoke', and if + // the current range contains the invoke, now we are going to wrap the + // invoke with try-delegate, making the 'delegate' BB the new successor + // instead, so remove the EH pad succesor here. The BB may not have an EH + // pad successor if calls in this BB throw to the caller. + if (UnwindDest != getFakeCallerBlock(MF)) { + MachineBasicBlock *EHPad = nullptr; + for (auto *Succ : MBB->successors()) { + if (Succ->isEHPad()) { + EHPad = Succ; + break; + } } + if (EHPad) + MBB->removeSuccessor(EHPad); } - if (EHPad) - MBB->removeSuccessor(EHPad); - addTryDelegate(RangeBegin, RangeEnd, UnwindDest); + addNestedTryDelegate(RangeBegin, RangeEnd, UnwindDest); } } @@ -1354,12 +1350,10 @@ bool WebAssemblyCFGStackify::fixCatchUnwindMismatches(MachineFunction &MF) { NumCatchUnwindMismatches += EHPadToUnwindDest.size(); SmallPtrSet NewEndTryBBs; - for (auto &P : EHPadToUnwindDest) { - MachineBasicBlock *EHPad = P.first; - MachineBasicBlock *UnwindDest = P.second; + for (auto &[EHPad, UnwindDest] : EHPadToUnwindDest) { MachineInstr *Try = EHPadToTry[EHPad]; MachineInstr *EndTry = BeginToEnd[Try]; - addTryDelegate(Try, EndTry, UnwindDest); + addNestedTryDelegate(Try, EndTry, UnwindDest); NewEndTryBBs.insert(EndTry->getParent()); } @@ -1534,7 +1528,7 @@ static void appendEndToFunction(MachineFunction &MF, TII.get(WebAssembly::END_FUNCTION)); } -/// Insert LOOP/TRY/BLOCK markers at appropriate places. +/// Insert BLOCK/LOOP/TRY markers at appropriate places. void WebAssemblyCFGStackify::placeMarkers(MachineFunction &MF) { // We allocate one more than the number of blocks in the function to // accommodate for the possible fake block we may insert at the end. @@ -1558,9 +1552,9 @@ void WebAssemblyCFGStackify::placeMarkers(MachineFunction &MF) { // Fix mismatches in unwind destinations induced by linearizing the code. if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm && MF.getFunction().hasPersonalityFn()) { - bool Changed = fixCallUnwindMismatches(MF); - Changed |= fixCatchUnwindMismatches(MF); - if (Changed) + bool MismatchFixed = fixCallUnwindMismatches(MF); + MismatchFixed |= fixCatchUnwindMismatches(MF); + if (MismatchFixed) recalculateScopeTops(MF); } } @@ -1654,6 +1648,23 @@ void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) { // Now rewrite references to basic blocks to be depth immediates. SmallVector Stack; SmallVector EHPadStack; + + auto RewriteOperands = [&](MachineInstr &MI) { + // Rewrite MBB operands to be depth immediates. + SmallVector Ops(MI.operands()); + while (MI.getNumOperands() > 0) + MI.removeOperand(MI.getNumOperands() - 1); + for (auto MO : Ops) { + if (MO.isMBB()) { + if (MI.getOpcode() == WebAssembly::DELEGATE) + MO = MachineOperand::CreateImm(getDelegateDepth(Stack, MO.getMBB())); + else + MO = MachineOperand::CreateImm(getBranchDepth(Stack, MO.getMBB())); + } + MI.addOperand(MF, MO); + } + }; + for (auto &MBB : reverse(MF)) { for (MachineInstr &MI : llvm::reverse(MBB)) { switch (MI.getOpcode()) { @@ -1697,23 +1708,8 @@ void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) { break; default: - if (MI.isTerminator()) { - // Rewrite MBB operands to be depth immediates. - SmallVector Ops(MI.operands()); - while (MI.getNumOperands() > 0) - MI.removeOperand(MI.getNumOperands() - 1); - for (auto MO : Ops) { - if (MO.isMBB()) { - if (MI.getOpcode() == WebAssembly::DELEGATE) - MO = MachineOperand::CreateImm( - getDelegateDepth(Stack, MO.getMBB())); - else - MO = MachineOperand::CreateImm( - getBranchDepth(Stack, MO.getMBB())); - } - MI.addOperand(MF, MO); - } - } + if (MI.isTerminator()) + RewriteOperands(MI); if (MI.getOpcode() == WebAssembly::DELEGATE) Stack.push_back(std::make_pair(&MBB, &MI)); @@ -1767,10 +1763,7 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) { // Add an end instruction at the end of the function body. const auto &TII = *MF.getSubtarget().getInstrInfo(); - if (!MF.getSubtarget() - .getTargetTriple() - .isOSBinFormatELF()) - appendEndToFunction(MF, TII); + appendEndToFunction(MF, TII); cleanupFunctionData(MF); From 26ba186bd0a22fac7d08ed566b00c03236b6b7a9 Mon Sep 17 00:00:00 2001 From: RolandF77 <55763885+RolandF77@users.noreply.github.com> Date: Wed, 4 Sep 2024 12:55:27 -0400 Subject: [PATCH 3/6] [PowerPC] Improve pwr7 codegen for v4i8 load (#104507) There are no partial vector loads on pwr7 so current v4i8 codegen is an int load then store to vector sized temp and re-load as vector. Try to use lfiwax to load 32 bits into an FP reg and take advantage of VSX FP and vector reg sharing to move the result to the right vector position. --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 24 +++- .../build-vector-from-load-and-zeros.ll | 119 ++++++---------- .../PowerPC/canonical-merge-shuffles.ll | 53 +++---- llvm/test/CodeGen/PowerPC/load-and-splat.ll | 117 +++++++-------- llvm/test/CodeGen/PowerPC/pre-inc-disable.ll | 21 ++- .../CodeGen/PowerPC/scalar_vector_test_4.ll | 42 +++--- .../CodeGen/PowerPC/test-vector-insert.ll | 92 +++++------- .../PowerPC/v16i8_scalar_to_vector_shuffle.ll | 28 ++-- .../PowerPC/v2i64_scalar_to_vector_shuffle.ll | 44 ++---- .../PowerPC/v4i32_scalar_to_vector_shuffle.ll | 134 +++++++----------- .../PowerPC/v8i16_scalar_to_vector_shuffle.ll | 94 +++++------- 11 files changed, 303 insertions(+), 465 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 8a0858e2462520..f1bd14d7ee0116 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -11492,13 +11492,33 @@ SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op, SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); + + MachineFunction &MF = DAG.getMachineFunction(); + SDValue Op0 = Op.getOperand(0); + ReuseLoadInfo RLI; + if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() && + Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD && + Op0.getValueType() == MVT::i32 && Op0.hasOneUse() && + canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) { + + MachineMemOperand *MMO = + MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, + RLI.Alignment, RLI.AAInfo, RLI.Ranges); + SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())}; + SDValue Bits = DAG.getMemIntrinsicNode( + PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops, + MVT::i32, MMO); + spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); + return Bits.getValue(0); + } + // Create a stack slot that is 16-byte aligned. - MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); int FrameIdx = MFI.CreateStackObject(16, Align(16), false); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - SDValue Val = Op.getOperand(0); + SDValue Val = Op0; EVT ValVT = Val.getValueType(); // P10 hardware store forwarding requires that a single store contains all // the data for the load. P10 is able to merge a pair of adjacent stores. Try diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll index 6d35a7281de6b4..fba6725e2b2a3f 100644 --- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll +++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll @@ -27,20 +27,17 @@ define <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) { ; PWR7-LE-LABEL: build_v2i64_extload_0: ; PWR7-LE: # %bb.0: # %entry ; PWR7-LE-NEXT: li 4, 0 -; PWR7-LE-NEXT: lwz 3, 0(3) ; PWR7-LE-NEXT: stw 4, -16(1) ; PWR7-LE-NEXT: addis 4, 2, .LCPI0_0@toc@ha +; PWR7-LE-NEXT: lfiwzx 0, 0, 3 +; PWR7-LE-NEXT: addi 3, 1, -16 ; PWR7-LE-NEXT: addi 4, 4, .LCPI0_0@toc@l -; PWR7-LE-NEXT: stw 3, -32(1) -; PWR7-LE-NEXT: addi 3, 1, -32 -; PWR7-LE-NEXT: lxvd2x 0, 0, 4 -; PWR7-LE-NEXT: addi 4, 1, -16 ; PWR7-LE-NEXT: lxvd2x 1, 0, 4 -; PWR7-LE-NEXT: xxswapd 34, 0 +; PWR7-LE-NEXT: xxspltw 35, 0, 1 ; PWR7-LE-NEXT: lxvd2x 0, 0, 3 -; PWR7-LE-NEXT: xxswapd 35, 1 +; PWR7-LE-NEXT: xxswapd 34, 1 ; PWR7-LE-NEXT: xxswapd 36, 0 -; PWR7-LE-NEXT: vperm 2, 3, 4, 2 +; PWR7-LE-NEXT: vperm 2, 4, 3, 2 ; PWR7-LE-NEXT: blr ; ; PWR8-LE-LABEL: build_v2i64_extload_0: @@ -337,17 +334,13 @@ entry: define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) { ; PWR7-BE-LABEL: build_v4i32_load_0: ; PWR7-BE: # %bb.0: # %entry -; PWR7-BE-NEXT: lwz 3, 0(3) -; PWR7-BE-NEXT: xxlxor 36, 36, 36 -; PWR7-BE-NEXT: sldi 3, 3, 32 -; PWR7-BE-NEXT: std 3, -32(1) -; PWR7-BE-NEXT: std 3, -24(1) +; PWR7-BE-NEXT: lfiwzx 0, 0, 3 ; PWR7-BE-NEXT: addis 3, 2, .LCPI8_0@toc@ha +; PWR7-BE-NEXT: xxlxor 36, 36, 36 ; PWR7-BE-NEXT: addi 3, 3, .LCPI8_0@toc@l -; PWR7-BE-NEXT: lxvw4x 34, 0, 3 -; PWR7-BE-NEXT: addi 3, 1, -32 ; PWR7-BE-NEXT: lxvw4x 35, 0, 3 -; PWR7-BE-NEXT: vperm 2, 3, 4, 2 +; PWR7-BE-NEXT: xxspltw 34, 0, 1 +; PWR7-BE-NEXT: vperm 2, 2, 4, 3 ; PWR7-BE-NEXT: blr ; ; PWR8-BE-LABEL: build_v4i32_load_0: @@ -365,20 +358,17 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) { ; PWR7-LE-LABEL: build_v4i32_load_0: ; PWR7-LE: # %bb.0: # %entry ; PWR7-LE-NEXT: li 4, 0 -; PWR7-LE-NEXT: lwz 3, 0(3) ; PWR7-LE-NEXT: stw 4, -16(1) ; PWR7-LE-NEXT: addis 4, 2, .LCPI8_0@toc@ha +; PWR7-LE-NEXT: lfiwzx 0, 0, 3 +; PWR7-LE-NEXT: addi 3, 1, -16 ; PWR7-LE-NEXT: addi 4, 4, .LCPI8_0@toc@l -; PWR7-LE-NEXT: stw 3, -32(1) -; PWR7-LE-NEXT: addi 3, 1, -32 -; PWR7-LE-NEXT: lxvd2x 0, 0, 4 -; PWR7-LE-NEXT: addi 4, 1, -16 ; PWR7-LE-NEXT: lxvd2x 1, 0, 4 -; PWR7-LE-NEXT: xxswapd 34, 0 +; PWR7-LE-NEXT: xxspltw 35, 0, 1 ; PWR7-LE-NEXT: lxvd2x 0, 0, 3 -; PWR7-LE-NEXT: xxswapd 35, 1 +; PWR7-LE-NEXT: xxswapd 34, 1 ; PWR7-LE-NEXT: xxswapd 36, 0 -; PWR7-LE-NEXT: vperm 2, 3, 4, 2 +; PWR7-LE-NEXT: vperm 2, 4, 3, 2 ; PWR7-LE-NEXT: blr ; ; PWR8-LE-LABEL: build_v4i32_load_0: @@ -400,17 +390,13 @@ entry: define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) { ; PWR7-BE-LABEL: build_v4i32_load_1: ; PWR7-BE: # %bb.0: # %entry -; PWR7-BE-NEXT: lwz 3, 0(3) -; PWR7-BE-NEXT: xxlxor 36, 36, 36 -; PWR7-BE-NEXT: sldi 3, 3, 32 -; PWR7-BE-NEXT: std 3, -16(1) -; PWR7-BE-NEXT: std 3, -8(1) +; PWR7-BE-NEXT: lfiwzx 0, 0, 3 ; PWR7-BE-NEXT: addis 3, 2, .LCPI9_0@toc@ha +; PWR7-BE-NEXT: xxlxor 36, 36, 36 ; PWR7-BE-NEXT: addi 3, 3, .LCPI9_0@toc@l -; PWR7-BE-NEXT: lxvw4x 34, 0, 3 -; PWR7-BE-NEXT: addi 3, 1, -16 ; PWR7-BE-NEXT: lxvw4x 35, 0, 3 -; PWR7-BE-NEXT: vperm 2, 4, 3, 2 +; PWR7-BE-NEXT: xxspltw 34, 0, 1 +; PWR7-BE-NEXT: vperm 2, 4, 2, 3 ; PWR7-BE-NEXT: blr ; ; PWR8-BE-LABEL: build_v4i32_load_1: @@ -427,20 +413,17 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) { ; PWR7-LE-LABEL: build_v4i32_load_1: ; PWR7-LE: # %bb.0: # %entry ; PWR7-LE-NEXT: li 4, 0 -; PWR7-LE-NEXT: lwz 3, 0(3) -; PWR7-LE-NEXT: stw 4, -32(1) +; PWR7-LE-NEXT: stw 4, -16(1) ; PWR7-LE-NEXT: addis 4, 2, .LCPI9_0@toc@ha -; PWR7-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l -; PWR7-LE-NEXT: stw 3, -16(1) +; PWR7-LE-NEXT: lfiwzx 0, 0, 3 ; PWR7-LE-NEXT: addi 3, 1, -16 -; PWR7-LE-NEXT: lxvd2x 0, 0, 4 -; PWR7-LE-NEXT: addi 4, 1, -32 +; PWR7-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l ; PWR7-LE-NEXT: lxvd2x 1, 0, 4 -; PWR7-LE-NEXT: xxswapd 34, 0 +; PWR7-LE-NEXT: xxspltw 35, 0, 1 ; PWR7-LE-NEXT: lxvd2x 0, 0, 3 -; PWR7-LE-NEXT: xxswapd 35, 1 +; PWR7-LE-NEXT: xxswapd 34, 1 ; PWR7-LE-NEXT: xxswapd 36, 0 -; PWR7-LE-NEXT: vperm 2, 4, 3, 2 +; PWR7-LE-NEXT: vperm 2, 3, 4, 2 ; PWR7-LE-NEXT: blr ; ; PWR8-LE-LABEL: build_v4i32_load_1: @@ -463,17 +446,13 @@ entry: define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) { ; PWR7-BE-LABEL: build_v4i32_load_2: ; PWR7-BE: # %bb.0: # %entry -; PWR7-BE-NEXT: lwz 3, 0(3) -; PWR7-BE-NEXT: xxlxor 36, 36, 36 -; PWR7-BE-NEXT: sldi 3, 3, 32 -; PWR7-BE-NEXT: std 3, -16(1) -; PWR7-BE-NEXT: std 3, -8(1) +; PWR7-BE-NEXT: lfiwzx 0, 0, 3 ; PWR7-BE-NEXT: addis 3, 2, .LCPI10_0@toc@ha +; PWR7-BE-NEXT: xxlxor 36, 36, 36 ; PWR7-BE-NEXT: addi 3, 3, .LCPI10_0@toc@l -; PWR7-BE-NEXT: lxvw4x 34, 0, 3 -; PWR7-BE-NEXT: addi 3, 1, -16 ; PWR7-BE-NEXT: lxvw4x 35, 0, 3 -; PWR7-BE-NEXT: vperm 2, 4, 3, 2 +; PWR7-BE-NEXT: xxspltw 34, 0, 1 +; PWR7-BE-NEXT: vperm 2, 4, 2, 3 ; PWR7-BE-NEXT: blr ; ; PWR8-BE-LABEL: build_v4i32_load_2: @@ -491,20 +470,17 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) { ; PWR7-LE-LABEL: build_v4i32_load_2: ; PWR7-LE: # %bb.0: # %entry ; PWR7-LE-NEXT: li 4, 0 -; PWR7-LE-NEXT: lwz 3, 0(3) -; PWR7-LE-NEXT: stw 4, -32(1) +; PWR7-LE-NEXT: stw 4, -16(1) ; PWR7-LE-NEXT: addis 4, 2, .LCPI10_0@toc@ha -; PWR7-LE-NEXT: addi 4, 4, .LCPI10_0@toc@l -; PWR7-LE-NEXT: stw 3, -16(1) +; PWR7-LE-NEXT: lfiwzx 0, 0, 3 ; PWR7-LE-NEXT: addi 3, 1, -16 -; PWR7-LE-NEXT: lxvd2x 0, 0, 4 -; PWR7-LE-NEXT: addi 4, 1, -32 +; PWR7-LE-NEXT: addi 4, 4, .LCPI10_0@toc@l ; PWR7-LE-NEXT: lxvd2x 1, 0, 4 -; PWR7-LE-NEXT: xxswapd 34, 0 +; PWR7-LE-NEXT: xxspltw 35, 0, 1 ; PWR7-LE-NEXT: lxvd2x 0, 0, 3 -; PWR7-LE-NEXT: xxswapd 35, 1 +; PWR7-LE-NEXT: xxswapd 34, 1 ; PWR7-LE-NEXT: xxswapd 36, 0 -; PWR7-LE-NEXT: vperm 2, 4, 3, 2 +; PWR7-LE-NEXT: vperm 2, 3, 4, 2 ; PWR7-LE-NEXT: blr ; ; PWR8-LE-LABEL: build_v4i32_load_2: @@ -526,17 +502,13 @@ entry: define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) { ; PWR7-BE-LABEL: build_v4i32_load_3: ; PWR7-BE: # %bb.0: # %entry -; PWR7-BE-NEXT: lwz 3, 0(3) -; PWR7-BE-NEXT: xxlxor 36, 36, 36 -; PWR7-BE-NEXT: sldi 3, 3, 32 -; PWR7-BE-NEXT: std 3, -16(1) -; PWR7-BE-NEXT: std 3, -8(1) +; PWR7-BE-NEXT: lfiwzx 0, 0, 3 ; PWR7-BE-NEXT: addis 3, 2, .LCPI11_0@toc@ha +; PWR7-BE-NEXT: xxlxor 36, 36, 36 ; PWR7-BE-NEXT: addi 3, 3, .LCPI11_0@toc@l -; PWR7-BE-NEXT: lxvw4x 34, 0, 3 -; PWR7-BE-NEXT: addi 3, 1, -16 ; PWR7-BE-NEXT: lxvw4x 35, 0, 3 -; PWR7-BE-NEXT: vperm 2, 4, 3, 2 +; PWR7-BE-NEXT: xxspltw 34, 0, 1 +; PWR7-BE-NEXT: vperm 2, 4, 2, 3 ; PWR7-BE-NEXT: blr ; ; PWR8-BE-LABEL: build_v4i32_load_3: @@ -553,20 +525,17 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) { ; PWR7-LE-LABEL: build_v4i32_load_3: ; PWR7-LE: # %bb.0: # %entry ; PWR7-LE-NEXT: li 4, 0 -; PWR7-LE-NEXT: lwz 3, 0(3) -; PWR7-LE-NEXT: stw 4, -32(1) +; PWR7-LE-NEXT: stw 4, -16(1) ; PWR7-LE-NEXT: addis 4, 2, .LCPI11_0@toc@ha -; PWR7-LE-NEXT: addi 4, 4, .LCPI11_0@toc@l -; PWR7-LE-NEXT: stw 3, -16(1) +; PWR7-LE-NEXT: lfiwzx 0, 0, 3 ; PWR7-LE-NEXT: addi 3, 1, -16 -; PWR7-LE-NEXT: lxvd2x 0, 0, 4 -; PWR7-LE-NEXT: addi 4, 1, -32 +; PWR7-LE-NEXT: addi 4, 4, .LCPI11_0@toc@l ; PWR7-LE-NEXT: lxvd2x 1, 0, 4 -; PWR7-LE-NEXT: xxswapd 34, 0 +; PWR7-LE-NEXT: xxspltw 35, 0, 1 ; PWR7-LE-NEXT: lxvd2x 0, 0, 3 -; PWR7-LE-NEXT: xxswapd 35, 1 +; PWR7-LE-NEXT: xxswapd 34, 1 ; PWR7-LE-NEXT: xxswapd 36, 0 -; PWR7-LE-NEXT: vperm 2, 4, 3, 2 +; PWR7-LE-NEXT: vperm 2, 3, 4, 2 ; PWR7-LE-NEXT: blr ; ; PWR8-LE-LABEL: build_v4i32_load_3: diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll index c26f98c5b0495d..e1159e56e23ebe 100644 --- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll +++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll @@ -536,15 +536,12 @@ define dso_local <8 x i16> @testmrglb3(ptr nocapture readonly %a) local_unnamed_ ; ; P8-AIX-32-LABEL: testmrglb3: ; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lwz r4, 4(r3) +; P8-AIX-32-NEXT: li r4, 4 +; P8-AIX-32-NEXT: lfiwzx f1, 0, r3 ; P8-AIX-32-NEXT: xxlxor v3, v3, v3 -; P8-AIX-32-NEXT: stw r4, -16(r1) -; P8-AIX-32-NEXT: lwz r3, 0(r3) -; P8-AIX-32-NEXT: stw r3, -32(r1) -; P8-AIX-32-NEXT: addi r3, r1, -16 -; P8-AIX-32-NEXT: lxvw4x vs0, 0, r3 -; P8-AIX-32-NEXT: addi r3, r1, -32 -; P8-AIX-32-NEXT: lxvw4x vs1, 0, r3 +; P8-AIX-32-NEXT: lfiwzx f0, r3, r4 +; P8-AIX-32-NEXT: xxspltw vs1, vs1, 1 +; P8-AIX-32-NEXT: xxspltw vs0, vs0, 1 ; P8-AIX-32-NEXT: xxmrghw v2, vs1, vs0 ; P8-AIX-32-NEXT: vmrghb v2, v3, v2 ; P8-AIX-32-NEXT: blr @@ -852,17 +849,15 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(ptr nocapture rea ; ; P8-AIX-32-LABEL: no_RAUW_in_combine_during_legalize: ; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: li r5, 0 ; P8-AIX-32-NEXT: slwi r4, r4, 2 ; P8-AIX-32-NEXT: xxlxor v3, v3, v3 -; P8-AIX-32-NEXT: lwzx r3, r3, r4 -; P8-AIX-32-NEXT: li r4, 0 -; P8-AIX-32-NEXT: stw r4, -32(r1) -; P8-AIX-32-NEXT: stw r3, -16(r1) -; P8-AIX-32-NEXT: addi r3, r1, -32 -; P8-AIX-32-NEXT: lxvw4x vs0, 0, r3 +; P8-AIX-32-NEXT: stw r5, -16(r1) +; P8-AIX-32-NEXT: lfiwzx f0, r3, r4 ; P8-AIX-32-NEXT: addi r3, r1, -16 ; P8-AIX-32-NEXT: lxvw4x vs1, 0, r3 -; P8-AIX-32-NEXT: xxmrghw v2, vs0, vs1 +; P8-AIX-32-NEXT: xxspltw vs0, vs0, 1 +; P8-AIX-32-NEXT: xxmrghw v2, vs1, vs0 ; P8-AIX-32-NEXT: vmrghb v2, v2, v3 ; P8-AIX-32-NEXT: blr entry: @@ -1026,14 +1021,11 @@ define dso_local <2 x i64> @testSplat8(ptr nocapture readonly %ptr) local_unname ; ; P8-AIX-32-LABEL: testSplat8: ; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lwz r4, 4(r3) -; P8-AIX-32-NEXT: stw r4, -16(r1) -; P8-AIX-32-NEXT: lwz r3, 0(r3) -; P8-AIX-32-NEXT: stw r3, -32(r1) -; P8-AIX-32-NEXT: addi r3, r1, -16 -; P8-AIX-32-NEXT: lxvw4x vs0, 0, r3 -; P8-AIX-32-NEXT: addi r3, r1, -32 -; P8-AIX-32-NEXT: lxvw4x vs1, 0, r3 +; P8-AIX-32-NEXT: li r4, 4 +; P8-AIX-32-NEXT: lfiwzx f1, 0, r3 +; P8-AIX-32-NEXT: lfiwzx f0, r3, r4 +; P8-AIX-32-NEXT: xxspltw vs1, vs1, 1 +; P8-AIX-32-NEXT: xxspltw vs0, vs0, 1 ; P8-AIX-32-NEXT: xxmrghw vs0, vs1, vs0 ; P8-AIX-32-NEXT: xxmrghd v2, vs0, vs0 ; P8-AIX-32-NEXT: blr @@ -1081,17 +1073,14 @@ define <2 x i64> @testSplati64_0(ptr nocapture readonly %ptr) #0 { ; ; P8-AIX-32-LABEL: testSplati64_0: ; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lwz r4, 0(r3) -; P8-AIX-32-NEXT: lwz r3, 4(r3) -; P8-AIX-32-NEXT: stw r3, -16(r1) +; P8-AIX-32-NEXT: li r4, 4 +; P8-AIX-32-NEXT: lfiwzx f0, r3, r4 +; P8-AIX-32-NEXT: xxspltw v2, vs0, 1 +; P8-AIX-32-NEXT: lfiwzx f0, 0, r3 ; P8-AIX-32-NEXT: lwz r3, L..C3(r2) # %const.0 -; P8-AIX-32-NEXT: stw r4, -32(r1) -; P8-AIX-32-NEXT: lxvw4x v2, 0, r3 -; P8-AIX-32-NEXT: addi r3, r1, -16 -; P8-AIX-32-NEXT: lxvw4x v3, 0, r3 -; P8-AIX-32-NEXT: addi r3, r1, -32 ; P8-AIX-32-NEXT: lxvw4x v4, 0, r3 -; P8-AIX-32-NEXT: vperm v2, v4, v3, v2 +; P8-AIX-32-NEXT: xxspltw v3, vs0, 1 +; P8-AIX-32-NEXT: vperm v2, v3, v2, v4 ; P8-AIX-32-NEXT: blr entry: %0 = load <1 x i64>, ptr %ptr, align 8 diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll index bc68ad2a67bf5d..c9ee3a51f41724 100644 --- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll +++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll @@ -208,47 +208,41 @@ define dso_local void @test4(ptr nocapture %c, ptr nocapture readonly %a) local_ ; ; P9-AIX32-LABEL: test4: ; P9-AIX32: # %bb.0: # %entry -; P9-AIX32-NEXT: lwz r5, 24(r4) -; P9-AIX32-NEXT: lwz r4, 28(r4) -; P9-AIX32-NEXT: stw r4, -16(r1) +; P9-AIX32-NEXT: li r5, 28 +; P9-AIX32-NEXT: lxvwsx vs0, r4, r5 +; P9-AIX32-NEXT: li r5, 24 +; P9-AIX32-NEXT: lxvwsx vs1, r4, r5 ; P9-AIX32-NEXT: lwz r4, L..C0(r2) # %const.0 -; P9-AIX32-NEXT: stw r5, -32(r1) -; P9-AIX32-NEXT: lxv vs1, -16(r1) -; P9-AIX32-NEXT: lxv vs2, -32(r1) -; P9-AIX32-NEXT: lxv vs0, 0(r4) -; P9-AIX32-NEXT: xxperm vs1, vs2, vs0 -; P9-AIX32-NEXT: stxv vs1, 0(r3) +; P9-AIX32-NEXT: lxv vs2, 0(r4) +; P9-AIX32-NEXT: xxperm vs0, vs1, vs2 +; P9-AIX32-NEXT: stxv vs0, 0(r3) ; P9-AIX32-NEXT: blr ; ; P8-AIX32-LABEL: test4: ; P8-AIX32: # %bb.0: # %entry -; P8-AIX32-NEXT: lwz r5, 24(r4) -; P8-AIX32-NEXT: lwz r4, 28(r4) -; P8-AIX32-NEXT: stw r4, -16(r1) +; P8-AIX32-NEXT: li r5, 28 +; P8-AIX32-NEXT: lfiwzx f0, r4, r5 +; P8-AIX32-NEXT: li r5, 24 +; P8-AIX32-NEXT: xxspltw v2, vs0, 1 +; P8-AIX32-NEXT: lfiwzx f0, r4, r5 ; P8-AIX32-NEXT: lwz r4, L..C0(r2) # %const.0 -; P8-AIX32-NEXT: stw r5, -32(r1) -; P8-AIX32-NEXT: lxvw4x v2, 0, r4 -; P8-AIX32-NEXT: addi r4, r1, -16 -; P8-AIX32-NEXT: lxvw4x v3, 0, r4 -; P8-AIX32-NEXT: addi r4, r1, -32 ; P8-AIX32-NEXT: lxvw4x v4, 0, r4 -; P8-AIX32-NEXT: vperm v2, v4, v3, v2 +; P8-AIX32-NEXT: xxspltw v3, vs0, 1 +; P8-AIX32-NEXT: vperm v2, v3, v2, v4 ; P8-AIX32-NEXT: stxvw4x v2, 0, r3 ; P8-AIX32-NEXT: blr ; ; P7-AIX32-LABEL: test4: ; P7-AIX32: # %bb.0: # %entry -; P7-AIX32-NEXT: lwz r5, 24(r4) -; P7-AIX32-NEXT: lwz r4, 28(r4) -; P7-AIX32-NEXT: stw r4, -16(r1) +; P7-AIX32-NEXT: li r5, 28 +; P7-AIX32-NEXT: lfiwzx f0, r4, r5 +; P7-AIX32-NEXT: li r5, 24 +; P7-AIX32-NEXT: xxspltw v2, vs0, 1 +; P7-AIX32-NEXT: lfiwzx f0, r4, r5 ; P7-AIX32-NEXT: lwz r4, L..C0(r2) # %const.0 -; P7-AIX32-NEXT: stw r5, -32(r1) -; P7-AIX32-NEXT: lxvw4x v2, 0, r4 -; P7-AIX32-NEXT: addi r4, r1, -16 -; P7-AIX32-NEXT: lxvw4x v3, 0, r4 -; P7-AIX32-NEXT: addi r4, r1, -32 ; P7-AIX32-NEXT: lxvw4x v4, 0, r4 -; P7-AIX32-NEXT: vperm v2, v4, v3, v2 +; P7-AIX32-NEXT: xxspltw v3, vs0, 1 +; P7-AIX32-NEXT: vperm v2, v3, v2, v4 ; P7-AIX32-NEXT: stxvw4x v2, 0, r3 ; P7-AIX32-NEXT: blr entry: @@ -362,47 +356,41 @@ define void @test6(ptr %a, ptr %in) { ; ; P9-AIX32-LABEL: test6: ; P9-AIX32: # %bb.0: # %entry -; P9-AIX32-NEXT: lwz r4, 0(r4) ; P9-AIX32-NEXT: li r5, 0 -; P9-AIX32-NEXT: stw r5, -32(r1) -; P9-AIX32-NEXT: lxv vs1, -32(r1) -; P9-AIX32-NEXT: stw r4, -16(r1) -; P9-AIX32-NEXT: lwz r4, L..C2(r2) # %const.0 +; P9-AIX32-NEXT: stw r5, -16(r1) +; P9-AIX32-NEXT: lwz r5, L..C2(r2) # %const.0 +; P9-AIX32-NEXT: lxvwsx vs1, 0, r4 ; P9-AIX32-NEXT: lxv vs2, -16(r1) -; P9-AIX32-NEXT: lxv vs0, 0(r4) -; P9-AIX32-NEXT: xxperm vs2, vs1, vs0 -; P9-AIX32-NEXT: stxv vs2, 0(r3) +; P9-AIX32-NEXT: lxv vs0, 0(r5) +; P9-AIX32-NEXT: xxperm vs1, vs2, vs0 +; P9-AIX32-NEXT: stxv vs1, 0(r3) ; P9-AIX32-NEXT: blr ; ; P8-AIX32-LABEL: test6: ; P8-AIX32: # %bb.0: # %entry -; P8-AIX32-NEXT: lwz r4, 0(r4) ; P8-AIX32-NEXT: li r5, 0 -; P8-AIX32-NEXT: stw r5, -32(r1) -; P8-AIX32-NEXT: stw r4, -16(r1) +; P8-AIX32-NEXT: stw r5, -16(r1) +; P8-AIX32-NEXT: lfiwzx f0, 0, r4 ; P8-AIX32-NEXT: lwz r4, L..C2(r2) # %const.0 -; P8-AIX32-NEXT: lxvw4x v2, 0, r4 -; P8-AIX32-NEXT: addi r4, r1, -32 ; P8-AIX32-NEXT: lxvw4x v3, 0, r4 ; P8-AIX32-NEXT: addi r4, r1, -16 ; P8-AIX32-NEXT: lxvw4x v4, 0, r4 -; P8-AIX32-NEXT: vperm v2, v3, v4, v2 +; P8-AIX32-NEXT: xxspltw v2, vs0, 1 +; P8-AIX32-NEXT: vperm v2, v4, v2, v3 ; P8-AIX32-NEXT: stxvw4x v2, 0, r3 ; P8-AIX32-NEXT: blr ; ; P7-AIX32-LABEL: test6: ; P7-AIX32: # %bb.0: # %entry -; P7-AIX32-NEXT: lwz r4, 0(r4) ; P7-AIX32-NEXT: li r5, 0 -; P7-AIX32-NEXT: stw r5, -32(r1) -; P7-AIX32-NEXT: stw r4, -16(r1) +; P7-AIX32-NEXT: stw r5, -16(r1) +; P7-AIX32-NEXT: lfiwzx f0, 0, r4 ; P7-AIX32-NEXT: lwz r4, L..C2(r2) # %const.0 -; P7-AIX32-NEXT: lxvw4x v2, 0, r4 -; P7-AIX32-NEXT: addi r4, r1, -32 ; P7-AIX32-NEXT: lxvw4x v3, 0, r4 ; P7-AIX32-NEXT: addi r4, r1, -16 ; P7-AIX32-NEXT: lxvw4x v4, 0, r4 -; P7-AIX32-NEXT: vperm v2, v3, v4, v2 +; P7-AIX32-NEXT: xxspltw v2, vs0, 1 +; P7-AIX32-NEXT: vperm v2, v4, v2, v3 ; P7-AIX32-NEXT: stxvw4x v2, 0, r3 ; P7-AIX32-NEXT: blr entry: @@ -810,40 +798,31 @@ define <16 x i8> @unadjusted_lxvdsx(ptr %s, ptr %t) { ; ; P9-AIX32-LABEL: unadjusted_lxvdsx: ; P9-AIX32: # %bb.0: # %entry -; P9-AIX32-NEXT: lwz r4, 4(r3) -; P9-AIX32-NEXT: stw r4, -16(r1) -; P9-AIX32-NEXT: lwz r3, 0(r3) -; P9-AIX32-NEXT: lxv vs0, -16(r1) -; P9-AIX32-NEXT: stw r3, -32(r1) -; P9-AIX32-NEXT: lxv vs1, -32(r1) +; P9-AIX32-NEXT: li r4, 4 +; P9-AIX32-NEXT: lxvwsx vs1, 0, r3 +; P9-AIX32-NEXT: lxvwsx vs0, r3, r4 ; P9-AIX32-NEXT: xxmrghw vs0, vs1, vs0 ; P9-AIX32-NEXT: xxmrghd v2, vs0, vs0 ; P9-AIX32-NEXT: blr ; ; P8-AIX32-LABEL: unadjusted_lxvdsx: ; P8-AIX32: # %bb.0: # %entry -; P8-AIX32-NEXT: lwz r4, 4(r3) -; P8-AIX32-NEXT: stw r4, -16(r1) -; P8-AIX32-NEXT: lwz r3, 0(r3) -; P8-AIX32-NEXT: stw r3, -32(r1) -; P8-AIX32-NEXT: addi r3, r1, -16 -; P8-AIX32-NEXT: lxvw4x vs0, 0, r3 -; P8-AIX32-NEXT: addi r3, r1, -32 -; P8-AIX32-NEXT: lxvw4x vs1, 0, r3 +; P8-AIX32-NEXT: li r4, 4 +; P8-AIX32-NEXT: lfiwzx f1, 0, r3 +; P8-AIX32-NEXT: lfiwzx f0, r3, r4 +; P8-AIX32-NEXT: xxspltw vs1, vs1, 1 +; P8-AIX32-NEXT: xxspltw vs0, vs0, 1 ; P8-AIX32-NEXT: xxmrghw vs0, vs1, vs0 ; P8-AIX32-NEXT: xxmrghd v2, vs0, vs0 ; P8-AIX32-NEXT: blr ; ; P7-AIX32-LABEL: unadjusted_lxvdsx: ; P7-AIX32: # %bb.0: # %entry -; P7-AIX32-NEXT: lwz r4, 4(r3) -; P7-AIX32-NEXT: stw r4, -16(r1) -; P7-AIX32-NEXT: lwz r3, 0(r3) -; P7-AIX32-NEXT: stw r3, -32(r1) -; P7-AIX32-NEXT: addi r3, r1, -16 -; P7-AIX32-NEXT: lxvw4x vs0, 0, r3 -; P7-AIX32-NEXT: addi r3, r1, -32 -; P7-AIX32-NEXT: lxvw4x vs1, 0, r3 +; P7-AIX32-NEXT: li r4, 4 +; P7-AIX32-NEXT: lfiwzx f1, 0, r3 +; P7-AIX32-NEXT: lfiwzx f0, r3, r4 +; P7-AIX32-NEXT: xxspltw vs1, vs1, 1 +; P7-AIX32-NEXT: xxspltw vs0, vs0, 1 ; P7-AIX32-NEXT: xxmrghw vs0, vs1, vs0 ; P7-AIX32-NEXT: xxmrghd v2, vs0, vs0 ; P7-AIX32-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll index 4da36c9af5c101..4435484ae0b947 100644 --- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll +++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll @@ -85,23 +85,20 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) { ; ; P9BE-AIX32-LABEL: test64: ; P9BE-AIX32: # %bb.0: # %entry -; P9BE-AIX32-NEXT: lwzux 4, 3, 4 +; P9BE-AIX32-NEXT: add 5, 3, 4 +; P9BE-AIX32-NEXT: lxvwsx 0, 3, 4 +; P9BE-AIX32-NEXT: li 3, 4 ; P9BE-AIX32-NEXT: xxlxor 2, 2, 2 ; P9BE-AIX32-NEXT: vspltisw 4, 8 -; P9BE-AIX32-NEXT: stw 4, -48(1) +; P9BE-AIX32-NEXT: lxvwsx 1, 5, 3 +; P9BE-AIX32-NEXT: lwz 3, L..C0(2) # %const.0 ; P9BE-AIX32-NEXT: vadduwm 4, 4, 4 -; P9BE-AIX32-NEXT: lwz 4, 4(3) -; P9BE-AIX32-NEXT: lxv 0, -48(1) -; P9BE-AIX32-NEXT: stw 4, -32(1) -; P9BE-AIX32-NEXT: lwz 4, L..C0(2) # %const.0 -; P9BE-AIX32-NEXT: lxv 1, -32(1) -; P9BE-AIX32-NEXT: lwz 3, 8(3) -; P9BE-AIX32-NEXT: stw 3, -16(1) -; P9BE-AIX32-NEXT: lwz 3, L..C1(2) # %const.1 ; P9BE-AIX32-NEXT: xxmrghw 2, 0, 1 -; P9BE-AIX32-NEXT: lxv 0, 0(4) +; P9BE-AIX32-NEXT: lxv 0, 0(3) +; P9BE-AIX32-NEXT: li 3, 8 ; P9BE-AIX32-NEXT: xxperm 2, 2, 0 -; P9BE-AIX32-NEXT: lxv 0, -16(1) +; P9BE-AIX32-NEXT: lxvwsx 0, 5, 3 +; P9BE-AIX32-NEXT: lwz 3, L..C1(2) # %const.1 ; P9BE-AIX32-NEXT: xxmrghw 3, 1, 0 ; P9BE-AIX32-NEXT: lxv 0, 0(3) ; P9BE-AIX32-NEXT: xxperm 3, 3, 0 diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll index 25e1baa28f7ef3..c8e0d0d25f4f7e 100644 --- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll @@ -73,13 +73,11 @@ define <4 x i32> @s2v_test1(ptr nocapture readonly %int32, <4 x i32> %vec) { ; ; P8-AIX-32-LABEL: s2v_test1: ; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lwz r3, 0(r3) -; P8-AIX-32-NEXT: stw r3, -16(r1) +; P8-AIX-32-NEXT: lfiwzx f0, 0, r3 ; P8-AIX-32-NEXT: lwz r3, L..C0(r2) # %const.0 -; P8-AIX-32-NEXT: lxvw4x v3, 0, r3 -; P8-AIX-32-NEXT: addi r3, r1, -16 ; P8-AIX-32-NEXT: lxvw4x v4, 0, r3 -; P8-AIX-32-NEXT: vperm v2, v4, v2, v3 +; P8-AIX-32-NEXT: xxspltw v3, vs0, 1 +; P8-AIX-32-NEXT: vperm v2, v3, v2, v4 ; P8-AIX-32-NEXT: blr entry: %0 = load i32, ptr %int32, align 4 @@ -142,13 +140,12 @@ define <4 x i32> @s2v_test2(ptr nocapture readonly %int32, <4 x i32> %vec) { ; ; P8-AIX-32-LABEL: s2v_test2: ; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lwz r3, 4(r3) -; P8-AIX-32-NEXT: stw r3, -16(r1) +; P8-AIX-32-NEXT: addi r3, r3, 4 +; P8-AIX-32-NEXT: lfiwzx f0, 0, r3 ; P8-AIX-32-NEXT: lwz r3, L..C1(r2) # %const.0 -; P8-AIX-32-NEXT: lxvw4x v3, 0, r3 -; P8-AIX-32-NEXT: addi r3, r1, -16 ; P8-AIX-32-NEXT: lxvw4x v4, 0, r3 -; P8-AIX-32-NEXT: vperm v2, v4, v2, v3 +; P8-AIX-32-NEXT: xxspltw v3, vs0, 1 +; P8-AIX-32-NEXT: vperm v2, v3, v2, v4 ; P8-AIX-32-NEXT: blr entry: %arrayidx = getelementptr inbounds i32, ptr %int32, i64 1 @@ -224,13 +221,11 @@ define <4 x i32> @s2v_test3(ptr nocapture readonly %int32, <4 x i32> %vec, i32 s ; P8-AIX-32-LABEL: s2v_test3: ; P8-AIX-32: # %bb.0: # %entry ; P8-AIX-32-NEXT: slwi r4, r4, 2 -; P8-AIX-32-NEXT: lwzx r3, r3, r4 -; P8-AIX-32-NEXT: stw r3, -16(r1) +; P8-AIX-32-NEXT: lfiwzx f0, r3, r4 ; P8-AIX-32-NEXT: lwz r3, L..C2(r2) # %const.0 -; P8-AIX-32-NEXT: lxvw4x v3, 0, r3 -; P8-AIX-32-NEXT: addi r3, r1, -16 ; P8-AIX-32-NEXT: lxvw4x v4, 0, r3 -; P8-AIX-32-NEXT: vperm v2, v4, v2, v3 +; P8-AIX-32-NEXT: xxspltw v3, vs0, 1 +; P8-AIX-32-NEXT: vperm v2, v3, v2, v4 ; P8-AIX-32-NEXT: blr entry: %idxprom = sext i32 %Idx to i64 @@ -295,13 +290,12 @@ define <4 x i32> @s2v_test4(ptr nocapture readonly %int32, <4 x i32> %vec) { ; ; P8-AIX-32-LABEL: s2v_test4: ; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lwz r3, 4(r3) -; P8-AIX-32-NEXT: stw r3, -16(r1) +; P8-AIX-32-NEXT: addi r3, r3, 4 +; P8-AIX-32-NEXT: lfiwzx f0, 0, r3 ; P8-AIX-32-NEXT: lwz r3, L..C3(r2) # %const.0 -; P8-AIX-32-NEXT: lxvw4x v3, 0, r3 -; P8-AIX-32-NEXT: addi r3, r1, -16 ; P8-AIX-32-NEXT: lxvw4x v4, 0, r3 -; P8-AIX-32-NEXT: vperm v2, v4, v2, v3 +; P8-AIX-32-NEXT: xxspltw v3, vs0, 1 +; P8-AIX-32-NEXT: vperm v2, v3, v2, v4 ; P8-AIX-32-NEXT: blr entry: %arrayidx = getelementptr inbounds i32, ptr %int32, i64 1 @@ -362,13 +356,11 @@ define <4 x i32> @s2v_test5(<4 x i32> %vec, ptr nocapture readonly %ptr1) { ; ; P8-AIX-32-LABEL: s2v_test5: ; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lwz r3, 0(r3) -; P8-AIX-32-NEXT: stw r3, -16(r1) +; P8-AIX-32-NEXT: lfiwzx f0, 0, r3 ; P8-AIX-32-NEXT: lwz r3, L..C4(r2) # %const.0 -; P8-AIX-32-NEXT: lxvw4x v3, 0, r3 -; P8-AIX-32-NEXT: addi r3, r1, -16 ; P8-AIX-32-NEXT: lxvw4x v4, 0, r3 -; P8-AIX-32-NEXT: vperm v2, v4, v2, v3 +; P8-AIX-32-NEXT: xxspltw v3, vs0, 1 +; P8-AIX-32-NEXT: vperm v2, v3, v2, v4 ; P8-AIX-32-NEXT: blr entry: %0 = load i32, ptr %ptr1, align 4 diff --git a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll index 73b4ad8a507b82..47fa6f2a5b4d29 100644 --- a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll +++ b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll @@ -25,16 +25,13 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) { ; CHECK-LE-P7: # %bb.0: # %entry ; CHECK-LE-P7-NEXT: xscvdpsxws f0, f1 ; CHECK-LE-P7-NEXT: addi r3, r1, -4 +; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI0_0@toc@ha +; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI0_0@toc@l ; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3 -; CHECK-LE-P7-NEXT: lwz r3, -4(r1) -; CHECK-LE-P7-NEXT: stw r3, -32(r1) -; CHECK-LE-P7-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-LE-P7-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r3 -; CHECK-LE-P7-NEXT: addi r3, r1, -32 +; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4 ; CHECK-LE-P7-NEXT: xxswapd v3, vs0 -; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r3 -; CHECK-LE-P7-NEXT: xxswapd v4, vs0 +; CHECK-LE-P7-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P7-NEXT: xxspltw v4, vs0, 1 ; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3 ; CHECK-LE-P7-NEXT: blr ; @@ -59,16 +56,12 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) { ; CHECK-BE-P7-NEXT: xscvdpsxws f0, f1 ; CHECK-BE-P7-NEXT: addi r3, r1, -4 ; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3 -; CHECK-BE-P7-NEXT: lwz r3, -4(r1) -; CHECK-BE-P7-NEXT: sldi r3, r3, 32 -; CHECK-BE-P7-NEXT: std r3, -32(r1) -; CHECK-BE-P7-NEXT: std r3, -24(r1) +; CHECK-BE-P7-NEXT: lfiwzx f0, 0, r3 ; CHECK-BE-P7-NEXT: addis r3, r2, .LCPI0_0@toc@ha ; CHECK-BE-P7-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-P7-NEXT: lxvw4x v3, 0, r3 -; CHECK-BE-P7-NEXT: addi r3, r1, -32 ; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r3 -; CHECK-BE-P7-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-P7-NEXT: xxspltw v3, vs0, 1 +; CHECK-BE-P7-NEXT: vperm v2, v2, v3, v4 ; CHECK-BE-P7-NEXT: blr ; ; CHECK-BE-P8-LABEL: test: @@ -96,16 +89,13 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) { ; CHECK-LE-P7: # %bb.0: # %entry ; CHECK-LE-P7-NEXT: xscvdpsxws f0, f1 ; CHECK-LE-P7-NEXT: addi r3, r1, -4 +; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI1_0@toc@ha +; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI1_0@toc@l ; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3 -; CHECK-LE-P7-NEXT: lwz r3, -4(r1) -; CHECK-LE-P7-NEXT: stw r3, -32(r1) -; CHECK-LE-P7-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-LE-P7-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r3 -; CHECK-LE-P7-NEXT: addi r3, r1, -32 +; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4 ; CHECK-LE-P7-NEXT: xxswapd v3, vs0 -; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r3 -; CHECK-LE-P7-NEXT: xxswapd v4, vs0 +; CHECK-LE-P7-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P7-NEXT: xxspltw v4, vs0, 1 ; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3 ; CHECK-LE-P7-NEXT: blr ; @@ -130,16 +120,12 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) { ; CHECK-BE-P7-NEXT: xscvdpsxws f0, f1 ; CHECK-BE-P7-NEXT: addi r3, r1, -4 ; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3 -; CHECK-BE-P7-NEXT: lwz r3, -4(r1) -; CHECK-BE-P7-NEXT: sldi r3, r3, 32 -; CHECK-BE-P7-NEXT: std r3, -32(r1) -; CHECK-BE-P7-NEXT: std r3, -24(r1) +; CHECK-BE-P7-NEXT: lfiwzx f0, 0, r3 ; CHECK-BE-P7-NEXT: addis r3, r2, .LCPI1_0@toc@ha ; CHECK-BE-P7-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-BE-P7-NEXT: lxvw4x v3, 0, r3 -; CHECK-BE-P7-NEXT: addi r3, r1, -32 ; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r3 -; CHECK-BE-P7-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-P7-NEXT: xxspltw v3, vs0, 1 +; CHECK-BE-P7-NEXT: vperm v2, v2, v3, v4 ; CHECK-BE-P7-NEXT: blr ; ; CHECK-BE-P8-LABEL: test2: @@ -167,16 +153,13 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) { ; CHECK-LE-P7: # %bb.0: # %entry ; CHECK-LE-P7-NEXT: xscvdpuxws f0, f1 ; CHECK-LE-P7-NEXT: addi r3, r1, -4 +; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI2_0@toc@ha +; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI2_0@toc@l ; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3 -; CHECK-LE-P7-NEXT: lwz r3, -4(r1) -; CHECK-LE-P7-NEXT: stw r3, -32(r1) -; CHECK-LE-P7-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-LE-P7-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r3 -; CHECK-LE-P7-NEXT: addi r3, r1, -32 +; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4 ; CHECK-LE-P7-NEXT: xxswapd v3, vs0 -; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r3 -; CHECK-LE-P7-NEXT: xxswapd v4, vs0 +; CHECK-LE-P7-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P7-NEXT: xxspltw v4, vs0, 1 ; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3 ; CHECK-LE-P7-NEXT: blr ; @@ -201,16 +184,12 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) { ; CHECK-BE-P7-NEXT: xscvdpuxws f0, f1 ; CHECK-BE-P7-NEXT: addi r3, r1, -4 ; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3 -; CHECK-BE-P7-NEXT: lwz r3, -4(r1) -; CHECK-BE-P7-NEXT: sldi r3, r3, 32 -; CHECK-BE-P7-NEXT: std r3, -32(r1) -; CHECK-BE-P7-NEXT: std r3, -24(r1) +; CHECK-BE-P7-NEXT: lfiwzx f0, 0, r3 ; CHECK-BE-P7-NEXT: addis r3, r2, .LCPI2_0@toc@ha ; CHECK-BE-P7-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-BE-P7-NEXT: lxvw4x v3, 0, r3 -; CHECK-BE-P7-NEXT: addi r3, r1, -32 ; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r3 -; CHECK-BE-P7-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-P7-NEXT: xxspltw v3, vs0, 1 +; CHECK-BE-P7-NEXT: vperm v2, v2, v3, v4 ; CHECK-BE-P7-NEXT: blr ; ; CHECK-BE-P8-LABEL: test3: @@ -238,16 +217,13 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) { ; CHECK-LE-P7: # %bb.0: # %entry ; CHECK-LE-P7-NEXT: xscvdpuxws f0, f1 ; CHECK-LE-P7-NEXT: addi r3, r1, -4 +; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI3_0@toc@ha +; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI3_0@toc@l ; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3 -; CHECK-LE-P7-NEXT: lwz r3, -4(r1) -; CHECK-LE-P7-NEXT: stw r3, -32(r1) -; CHECK-LE-P7-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-LE-P7-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r3 -; CHECK-LE-P7-NEXT: addi r3, r1, -32 +; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4 ; CHECK-LE-P7-NEXT: xxswapd v3, vs0 -; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r3 -; CHECK-LE-P7-NEXT: xxswapd v4, vs0 +; CHECK-LE-P7-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P7-NEXT: xxspltw v4, vs0, 1 ; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3 ; CHECK-LE-P7-NEXT: blr ; @@ -272,16 +248,12 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) { ; CHECK-BE-P7-NEXT: xscvdpuxws f0, f1 ; CHECK-BE-P7-NEXT: addi r3, r1, -4 ; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3 -; CHECK-BE-P7-NEXT: lwz r3, -4(r1) -; CHECK-BE-P7-NEXT: sldi r3, r3, 32 -; CHECK-BE-P7-NEXT: std r3, -32(r1) -; CHECK-BE-P7-NEXT: std r3, -24(r1) +; CHECK-BE-P7-NEXT: lfiwzx f0, 0, r3 ; CHECK-BE-P7-NEXT: addis r3, r2, .LCPI3_0@toc@ha ; CHECK-BE-P7-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-BE-P7-NEXT: lxvw4x v3, 0, r3 -; CHECK-BE-P7-NEXT: addi r3, r1, -32 ; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r3 -; CHECK-BE-P7-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-P7-NEXT: xxspltw v3, vs0, 1 +; CHECK-BE-P7-NEXT: vperm v2, v2, v3, v4 ; CHECK-BE-P7-NEXT: blr ; ; CHECK-BE-P8-LABEL: test4: diff --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll index 11cc8abd2c7fa3..31d0960e19f4ef 100644 --- a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll +++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll @@ -2045,31 +2045,25 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt ; ; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64: ; CHECK-AIX-32-P8: # %bb.0: # %entry -; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 -; CHECK-AIX-32-P8-NEXT: lwz r3, 4(r4) -; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) -; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) -; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: li r5, 4 +; CHECK-AIX-32-P8-NEXT: lfiwzx f1, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r3 ; CHECK-AIX-32-P8-NEXT: lwz r3, L..C9(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, r4, r5 ; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 -; CHECK-AIX-32-P8-NEXT: xxmrghw v3, vs1, vs0 -; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: xxspltw vs1, vs1, 1 +; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1 +; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-32-P9-NEXT: lwz r3, 4(r4) -; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) -; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) -; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1) -; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: li r3, 4 +; CHECK-AIX-32-P9-NEXT: lxvwsx vs2, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, r4, r3 ; CHECK-AIX-32-P9-NEXT: lwz r3, L..C5(r2) # %const.0 -; CHECK-AIX-32-P9-NEXT: lxv vs2, -32(r1) ; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs2, vs1 ; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3) ; CHECK-AIX-32-P9-NEXT: xxperm v2, vs0, vs1 diff --git a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll index 8bb71e073e8146..56c8c128ba9f40 100644 --- a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll +++ b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll @@ -1685,43 +1685,29 @@ define <2 x i64> @test_v2i64_v2i64(ptr nocapture noundef readonly %a, ptr nocapt ; ; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64: ; CHECK-AIX-32-P8: # %bb.0: # %entry -; CHECK-AIX-32-P8-NEXT: lwz r5, 4(r3) -; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1) -; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 -; CHECK-AIX-32-P8-NEXT: lwz r3, 4(r4) -; CHECK-AIX-32-P8-NEXT: stw r3, -48(r1) -; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) -; CHECK-AIX-32-P8-NEXT: stw r3, -64(r1) -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -48 +; CHECK-AIX-32-P8-NEXT: li r5, 4 +; CHECK-AIX-32-P8-NEXT: lfiwzx f1, 0, r3 +; CHECK-AIX-32-P8-NEXT: lfiwzx f3, 0, r4 +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, r3, r5 +; CHECK-AIX-32-P8-NEXT: lfiwzx f2, r4, r5 +; CHECK-AIX-32-P8-NEXT: xxspltw vs1, vs1, 1 +; CHECK-AIX-32-P8-NEXT: xxspltw vs3, vs3, 1 +; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1 +; CHECK-AIX-32-P8-NEXT: xxspltw vs2, vs2, 1 ; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -64 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 -; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs3, vs2 ; CHECK-AIX-32-P8-NEXT: xxmrghd v3, v2, vs0 ; CHECK-AIX-32-P8-NEXT: vaddudm v2, v3, v2 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64: ; CHECK-AIX-32-P9: # %bb.0: # %entry -; CHECK-AIX-32-P9-NEXT: lwz r5, 4(r3) -; CHECK-AIX-32-P9-NEXT: stw r5, -16(r1) -; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) -; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) -; CHECK-AIX-32-P9-NEXT: lwz r3, 4(r4) -; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) -; CHECK-AIX-32-P9-NEXT: stw r3, -48(r1) -; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P9-NEXT: li r5, 4 +; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3 +; CHECK-AIX-32-P9-NEXT: lxvwsx vs0, r3, r5 ; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs1, vs0 -; CHECK-AIX-32-P9-NEXT: lxv vs0, -48(r1) -; CHECK-AIX-32-P9-NEXT: stw r3, -64(r1) -; CHECK-AIX-32-P9-NEXT: lxv vs1, -64(r1) +; CHECK-AIX-32-P9-NEXT: lxvwsx vs0, r4, r5 +; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r4 ; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-AIX-32-P9-NEXT: xxmrghd v3, v2, vs0 ; CHECK-AIX-32-P9-NEXT: vaddudm v2, v3, v2 diff --git a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll index 4ca55d276647bf..c8e7b20e4b8c37 100644 --- a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll +++ b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll @@ -743,25 +743,21 @@ define void @test_v8i16_v4i32(ptr %a) { ; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) -; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) -; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 -; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4 -; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3 ; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 ; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 -; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3) -; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) -; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1) -; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3 ; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) ; CHECK-AIX-32-P9-NEXT: blr @@ -842,25 +838,21 @@ define void @test_v8i16_v2i64(ptr %a) { ; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) -; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) -; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 -; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4 -; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3 ; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 ; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 -; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3) -; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) -; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1) -; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3 ; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) ; CHECK-AIX-32-P9-NEXT: blr @@ -1030,25 +1022,21 @@ define void @test_v4i32_v8i16(ptr %a) { ; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) -; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) -; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 -; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4 -; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3 ; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 ; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 -; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3) -; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) -; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1) -; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3 ; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) ; CHECK-AIX-32-P9-NEXT: blr @@ -1125,26 +1113,18 @@ define void @test_v4i32_v2i64(ptr %a) { ; ; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64: ; CHECK-AIX-32-P8: # %bb.0: # %entry -; CHECK-AIX-32-P8-NEXT: lwz r4, 0(r3) -; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 -; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) -; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lfiwzx f1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1 +; CHECK-AIX-32-P8-NEXT: xxspltw vs1, vs1, 1 ; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64: ; CHECK-AIX-32-P9: # %bb.0: # %entry -; CHECK-AIX-32-P9-NEXT: lwz r4, 0(r3) -; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1) -; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) -; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxvwsx vs0, 0, r3 +; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3 ; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) ; CHECK-AIX-32-P9-NEXT: blr @@ -1212,14 +1192,11 @@ define void @test_v2i64_v2i64(ptr %a) { ; ; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64: ; CHECK-AIX-32-P8: # %bb.0: # %entry -; CHECK-AIX-32-P8-NEXT: lwz r4, 4(r3) -; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) -; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: li r4, 4 +; CHECK-AIX-32-P8-NEXT: lfiwzx f1, 0, r3 +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, r3, r4 +; CHECK-AIX-32-P8-NEXT: xxspltw vs1, vs1, 1 +; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1 ; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-AIX-32-P8-NEXT: lfiwzx f1, 0, r3 ; CHECK-AIX-32-P8-NEXT: xxspltw vs1, vs1, 1 @@ -1229,12 +1206,9 @@ define void @test_v2i64_v2i64(ptr %a) { ; ; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64: ; CHECK-AIX-32-P9: # %bb.0: # %entry -; CHECK-AIX-32-P9-NEXT: lwz r4, 4(r3) -; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1) -; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) -; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) -; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: li r4, 4 +; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3 +; CHECK-AIX-32-P9-NEXT: lxvwsx vs0, r3, r4 ; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3 ; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 @@ -1308,26 +1282,18 @@ define void @test_v2i64_v4i32(ptr %a) { ; ; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32: ; CHECK-AIX-32-P8: # %bb.0: # %entry -; CHECK-AIX-32-P8-NEXT: lwz r4, 0(r3) -; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 -; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) -; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lfiwzx f1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1 +; CHECK-AIX-32-P8-NEXT: xxspltw vs1, vs1, 1 ; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32: ; CHECK-AIX-32-P9: # %bb.0: # %entry -; CHECK-AIX-32-P9-NEXT: lwz r4, 0(r3) -; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) -; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) -; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxvwsx vs0, 0, r3 +; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3 ; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) ; CHECK-AIX-32-P9-NEXT: blr @@ -1407,25 +1373,21 @@ define void @test_v2i64_v8i16(ptr %a) { ; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) -; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) -; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 -; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4 -; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3 ; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 ; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 -; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3) -; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) -; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1) -; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3 ; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) ; CHECK-AIX-32-P9-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll index 201bc5be545068..e1aa531db449e5 100644 --- a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll +++ b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll @@ -654,17 +654,14 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) { ; ; CHECK-AIX-32-P8-LABEL: test_v2i64_none: ; CHECK-AIX-32-P8: # %bb.0: # %entry -; CHECK-AIX-32-P8-NEXT: lwz r4, 4(r3) +; CHECK-AIX-32-P8-NEXT: li r4, 4 +; CHECK-AIX-32-P8-NEXT: lfiwzx f1, 0, r3 ; CHECK-AIX-32-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) -; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, r3, r4 ; CHECK-AIX-32-P8-NEXT: lwz r3, L..C6(r2) # %const.0 ; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxspltw vs1, vs1, 1 +; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1 ; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0 ; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3 ; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 @@ -672,14 +669,11 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) { ; ; CHECK-AIX-32-P9-LABEL: test_v2i64_none: ; CHECK-AIX-32-P9: # %bb.0: # %entry -; CHECK-AIX-32-P9-NEXT: lwz r4, 4(r3) +; CHECK-AIX-32-P9-NEXT: li r4, 4 +; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3 ; CHECK-AIX-32-P9-NEXT: xxlxor vs2, vs2, vs2 -; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1) -; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) -; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxvwsx vs0, r3, r4 ; CHECK-AIX-32-P9-NEXT: lwz r3, L..C5(r2) # %const.0 -; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) ; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3) ; CHECK-AIX-32-P9-NEXT: xxperm vs0, vs2, vs1 @@ -847,24 +841,20 @@ define <16 x i8> @test_v8i16_v4i32(ptr %a, ptr %b) local_unnamed_addr { ; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) -; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1) -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 -; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r3 -; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) -; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) ; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r4 ; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 -; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3) -; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1) -; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) -; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) -; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxvwsx v3, 0, r4 ; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-AIX-32-P9-NEXT: blr entry: @@ -937,24 +927,20 @@ define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr { ; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) -; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1) -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 -; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r3 -; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) -; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) ; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r4 ; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 -; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3) -; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1) -; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) -; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) -; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxvwsx v3, 0, r4 ; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-AIX-32-P9-NEXT: blr entry: @@ -1149,24 +1135,20 @@ define <16 x i8> @test_v4i32_v8i16(ptr %a, ptr %b) local_unnamed_addr { ; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) -; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1) -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 -; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r3 -; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) -; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) ; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r4 ; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 -; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3) -; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1) -; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) -; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) -; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxvwsx v3, 0, r4 ; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-AIX-32-P9-NEXT: blr entry: @@ -1519,24 +1501,20 @@ define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr { ; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) -; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1) -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 -; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r3 -; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) -; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) ; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r4 ; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 -; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3) -; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1) -; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) -; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) -; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxvwsx v3, 0, r4 ; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-AIX-32-P9-NEXT: blr entry: From a724f9a7e5d46c9bf49c7b5e207f792fb5214c10 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 4 Sep 2024 10:08:05 -0700 Subject: [PATCH 4/6] [SLP][NFC]Make whole reg non-power-2 test for x86 and aarch64 along with risc-v --- .../SLPVectorizer/{RISCV => }/reduction-whole-regs-loads.ll | 3 +++ 1 file changed, 3 insertions(+) rename llvm/test/Transforms/SLPVectorizer/{RISCV => }/reduction-whole-regs-loads.ll (87%) diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-whole-regs-loads.ll b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll similarity index 87% rename from llvm/test/Transforms/SLPVectorizer/RISCV/reduction-whole-regs-loads.ll rename to llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll index 54dc33dbc0d00b..c077181c35063b 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-whole-regs-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux -mattr=+v -slp-threshold=-100 | FileCheck %s +; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+v -slp-threshold=-100 | FileCheck %s +; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux -mattr=+v -slp-threshold=-100 | FileCheck %s +; REQUIRES: aarch64-registered-target, x86-registered-target, riscv-registered-target define i64 @test(ptr %p) { ; CHECK-LABEL: @test( From 2092f3527ed743a8fb9e0858c839cd4b26907f2a Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 4 Sep 2024 10:20:13 -0700 Subject: [PATCH 5/6] [SLP][NFC]Remove unsupported attribute --- .../Transforms/SLPVectorizer/reduction-whole-regs-loads.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll index c077181c35063b..281b5f99540eab 100644 --- a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux -mattr=+v -slp-threshold=-100 | FileCheck %s -; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+v -slp-threshold=-100 | FileCheck %s -; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux -mattr=+v -slp-threshold=-100 | FileCheck %s +; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -slp-threshold=-100 | FileCheck %s +; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux -slp-threshold=-100 | FileCheck %s ; REQUIRES: aarch64-registered-target, x86-registered-target, riscv-registered-target define i64 @test(ptr %p) { From 601645c3b70e2a17d18779a3a51b8bc9ecdc9aa6 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Wed, 4 Sep 2024 16:52:49 +0000 Subject: [PATCH 6/6] [clang] Fix FIXME in dynamic initializer emission, NFCI This potentially affects platforms that support comdats other than ELF, COFF, or wasm, but that is the intention of the FIXME, and if they don't want this behavior, they probably shouldn't advertise comdat support. --- clang/lib/CodeGen/CGDeclCXX.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index 8dcb5f61006196..c44f38ef02a3f1 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -640,13 +640,13 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, addUsedGlobal(COMDATKey); } - // If we used a COMDAT key for the global ctor, the init function can be - // discarded if the global ctor entry is discarded. - // FIXME: Do we need to restrict this to ELF and Wasm? + // If comdats are in use and supported, place the initializer function into + // the comdat group of the global. In the MS ABI, initializers are mangled + // and have their own comdat, so we don't include them in the group for + // consistency with MSVC. llvm::Comdat *C = Addr->getComdat(); - if (COMDATKey && C && - (getTarget().getTriple().isOSBinFormatELF() || - getTarget().getTriple().isOSBinFormatWasm())) { + if (COMDATKey && C && getTriple().supportsCOMDAT() && + !getTarget().getCXXABI().isMicrosoft()) { Fn->setComdat(C); } } else {