From 6d103d7746c94cc865138093c7c65138b89aa77c Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Wed, 31 Jul 2024 19:20:49 +0200 Subject: [PATCH 001/114] [Support] Erase blocks after DomTree::eraseNode (#101195) Change eraseNode to require that the basic block is still contained inside the function. This is a preparation for using numbers of basic blocks inside the dominator tree, which are invalid for blocks that are not inside a function. --- .../llvm/Analysis/GenericDomTreeUpdater.h | 2 +- llvm/lib/Analysis/DomTreeUpdater.cpp | 8 ++-- llvm/lib/CodeGen/EarlyIfConversion.cpp | 46 +++++++++++-------- .../AArch64/AArch64ConditionalCompares.cpp | 3 +- llvm/unittests/IR/DominatorTreeTest.cpp | 3 +- 5 files changed, 34 insertions(+), 28 deletions(-) diff --git a/llvm/include/llvm/Analysis/GenericDomTreeUpdater.h b/llvm/include/llvm/Analysis/GenericDomTreeUpdater.h index 84ed882c6de84d..ca4ce68b85cbcf 100644 --- a/llvm/include/llvm/Analysis/GenericDomTreeUpdater.h +++ b/llvm/include/llvm/Analysis/GenericDomTreeUpdater.h @@ -232,7 +232,7 @@ class GenericDomTreeUpdater { /// insertEdge/deleteEdge or is unnecessary in the batch update. bool isUpdateValid(typename DomTreeT::UpdateType Update) const; - /// Erase Basic Block node that has been unlinked from Function + /// Erase Basic Block node before it is unlinked from Function /// in the DomTree and PostDomTree. void eraseDelBBNode(BasicBlockT *DelBB); diff --git a/llvm/lib/Analysis/DomTreeUpdater.cpp b/llvm/lib/Analysis/DomTreeUpdater.cpp index 6895317c1d03ae..351bd66e389bcd 100644 --- a/llvm/lib/Analysis/DomTreeUpdater.cpp +++ b/llvm/lib/Analysis/DomTreeUpdater.cpp @@ -42,9 +42,8 @@ bool DomTreeUpdater::forceFlushDeletedBB() { // delete only has an UnreachableInst inside. assert(BB->size() == 1 && isa(BB->getTerminator()) && "DelBB has been modified while awaiting deletion."); - BB->removeFromParent(); eraseDelBBNode(BB); - delete BB; + BB->eraseFromParent(); } DeletedBBs.clear(); Callbacks.clear(); @@ -63,9 +62,8 @@ void DomTreeUpdater::deleteBB(BasicBlock *DelBB) { return; } - DelBB->removeFromParent(); eraseDelBBNode(DelBB); - delete DelBB; + DelBB->eraseFromParent(); } void DomTreeUpdater::callbackDeleteBB( @@ -77,8 +75,8 @@ void DomTreeUpdater::callbackDeleteBB( return; } - DelBB->removeFromParent(); eraseDelBBNode(DelBB); + DelBB->removeFromParent(); Callback(DelBB); delete DelBB; } diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp index d506c625d8ca56..0de8112fb72c89 100644 --- a/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -181,8 +181,8 @@ class SSAIfConv { bool canConvertIf(MachineBasicBlock *MBB, bool Predicate = false); /// convertIf - If-convert the last block passed to canConvertIf(), assuming - /// it is possible. Add any erased blocks to RemovedBlocks. - void convertIf(SmallVectorImpl &RemovedBlocks, + /// it is possible. Add any blocks that are to be erased to RemoveBlocks. + void convertIf(SmallVectorImpl &RemoveBlocks, bool Predicate = false); }; } // end anonymous namespace @@ -678,9 +678,9 @@ void SSAIfConv::rewritePHIOperands() { /// convertIf - Execute the if conversion after canConvertIf has determined the /// feasibility. /// -/// Any basic blocks erased will be added to RemovedBlocks. +/// Any basic blocks that need to be erased will be added to RemoveBlocks. /// -void SSAIfConv::convertIf(SmallVectorImpl &RemovedBlocks, +void SSAIfConv::convertIf(SmallVectorImpl &RemoveBlocks, bool Predicate) { assert(Head && Tail && TBB && FBB && "Call canConvertIf first."); @@ -721,15 +721,18 @@ void SSAIfConv::convertIf(SmallVectorImpl &RemovedBlocks, DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc(); TII->removeBranch(*Head); - // Erase the now empty conditional blocks. It is likely that Head can fall + // Mark the now empty conditional blocks for removal and move them to the end. + // It is likely that Head can fall // through to Tail, and we can join the two blocks. if (TBB != Tail) { - RemovedBlocks.push_back(TBB); - TBB->eraseFromParent(); + RemoveBlocks.push_back(TBB); + if (TBB != &TBB->getParent()->back()) + TBB->moveAfter(&TBB->getParent()->back()); } if (FBB != Tail) { - RemovedBlocks.push_back(FBB); - FBB->eraseFromParent(); + RemoveBlocks.push_back(FBB); + if (FBB != &FBB->getParent()->back()) + FBB->moveAfter(&FBB->getParent()->back()); } assert(Head->succ_empty() && "Additional head successors?"); @@ -740,8 +743,9 @@ void SSAIfConv::convertIf(SmallVectorImpl &RemovedBlocks, Head->splice(Head->end(), Tail, Tail->begin(), Tail->end()); Head->transferSuccessorsAndUpdatePHIs(Tail); - RemovedBlocks.push_back(Tail); - Tail->eraseFromParent(); + RemoveBlocks.push_back(Tail); + if (Tail != &Tail->getParent()->back()) + Tail->moveAfter(&Tail->getParent()->back()); } else { // We need a branch to Tail, let code placement work it out later. LLVM_DEBUG(dbgs() << "Converting to unconditional branch.\n"); @@ -1062,11 +1066,13 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { while (IfConv.canConvertIf(MBB) && shouldConvertIf()) { // If-convert MBB and update analyses. invalidateTraces(); - SmallVector RemovedBlocks; - IfConv.convertIf(RemovedBlocks); + SmallVector RemoveBlocks; + IfConv.convertIf(RemoveBlocks); Changed = true; - updateDomTree(DomTree, IfConv, RemovedBlocks); - updateLoops(Loops, RemovedBlocks); + updateDomTree(DomTree, IfConv, RemoveBlocks); + for (MachineBasicBlock *MBB : RemoveBlocks) + MBB->eraseFromParent(); + updateLoops(Loops, RemoveBlocks); } return Changed; } @@ -1200,11 +1206,13 @@ bool EarlyIfPredicator::tryConvertIf(MachineBasicBlock *MBB) { bool Changed = false; while (IfConv.canConvertIf(MBB, /*Predicate*/ true) && shouldConvertIf()) { // If-convert MBB and update analyses. - SmallVector RemovedBlocks; - IfConv.convertIf(RemovedBlocks, /*Predicate*/ true); + SmallVector RemoveBlocks; + IfConv.convertIf(RemoveBlocks, /*Predicate*/ true); Changed = true; - updateDomTree(DomTree, IfConv, RemovedBlocks); - updateLoops(Loops, RemovedBlocks); + updateDomTree(DomTree, IfConv, RemoveBlocks); + for (MachineBasicBlock *MBB : RemoveBlocks) + MBB->eraseFromParent(); + updateLoops(Loops, RemoveBlocks); } return Changed; } diff --git a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp index 49e5211af50ccd..9669a393bc2b94 100644 --- a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp +++ b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -711,7 +711,6 @@ void SSACCmpConv::convert(SmallVectorImpl &RemovedBlocks) { Head->updateTerminator(CmpBB->getNextNode()); RemovedBlocks.push_back(CmpBB); - CmpBB->eraseFromParent(); LLVM_DEBUG(dbgs() << "Result:\n" << *Head); ++NumConverted; } @@ -918,6 +917,8 @@ bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) { CmpConv.convert(RemovedBlocks); Changed = true; updateDomTree(RemovedBlocks); + for (MachineBasicBlock *MBB : RemovedBlocks) + MBB->eraseFromParent(); updateLoops(RemovedBlocks); } return Changed; diff --git a/llvm/unittests/IR/DominatorTreeTest.cpp b/llvm/unittests/IR/DominatorTreeTest.cpp index 44bde74ad350f9..555348c65a63d0 100644 --- a/llvm/unittests/IR/DominatorTreeTest.cpp +++ b/llvm/unittests/IR/DominatorTreeTest.cpp @@ -607,11 +607,10 @@ TEST(DominatorTree, DeletingEdgesIntroducesInfiniteLoop2) { SwitchC->removeCase(SwitchC->case_begin()); DT->deleteEdge(C, C2); PDT->deleteEdge(C, C2); - C2->removeFromParent(); EXPECT_EQ(DT->getNode(C2), nullptr); PDT->eraseNode(C2); - delete C2; + C2->eraseFromParent(); EXPECT_TRUE(DT->verify()); EXPECT_TRUE(PDT->verify()); From 9fe455fd0c7d6f2107b33b37c04bbd3b12fe65b3 Mon Sep 17 00:00:00 2001 From: Felipe de Azevedo Piovezan Date: Wed, 31 Jul 2024 10:25:31 -0700 Subject: [PATCH 002/114] [lldb] Add constant value mode for RegisterLocation in UnwindPlans (#100624) This is useful for language runtimes that compute register values by inspecting the state of the currently running process. Currently, there are no mechanisms enabling these runtimes to set register values to arbitrary values. The alternative considered would involve creating a dwarf expression that produces an arbitrary integer (e.g. using OP_constu). However, the current data structure for Rows is such that they do not own any memory associated with dwarf expressions, which implies any such expression would need to have static storage and therefore could not contain a runtime value. Adding a new rule for constants leads to a simpler implementation. It's also worth noting that this does not make the "Location" union any bigger, since it already contains a pointer+size pair. --- lldb/include/lldb/Symbol/UnwindPlan.h | 17 ++++++++++++++++- lldb/source/Symbol/UnwindPlan.cpp | 17 +++++++++++++++++ lldb/source/Target/RegisterContextUnwind.cpp | 9 +++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/lldb/include/lldb/Symbol/UnwindPlan.h b/lldb/include/lldb/Symbol/UnwindPlan.h index ebb0ec421da72d..a9e8406608ff31 100644 --- a/lldb/include/lldb/Symbol/UnwindPlan.h +++ b/lldb/include/lldb/Symbol/UnwindPlan.h @@ -68,7 +68,8 @@ class UnwindPlan { isAFAPlusOffset, // reg = AFA + offset inOtherRegister, // reg = other reg atDWARFExpression, // reg = deref(eval(dwarf_expr)) - isDWARFExpression // reg = eval(dwarf_expr) + isDWARFExpression, // reg = eval(dwarf_expr) + isConstant // reg = constant }; RegisterLocation() : m_location() {} @@ -105,6 +106,15 @@ class UnwindPlan { bool IsDWARFExpression() const { return m_type == isDWARFExpression; } + bool IsConstant() const { return m_type == isConstant; } + + void SetIsConstant(uint64_t value) { + m_type = isConstant; + m_location.constant_value = value; + } + + uint64_t GetConstant() const { return m_location.constant_value; } + void SetAtCFAPlusOffset(int32_t offset) { m_type = atCFAPlusOffset; m_location.offset = offset; @@ -192,6 +202,8 @@ class UnwindPlan { const uint8_t *opcodes; uint16_t length; } expr; + // For m_type == isConstant + uint64_t constant_value; } m_location; }; @@ -358,6 +370,9 @@ class UnwindPlan { bool SetRegisterLocationToSame(uint32_t reg_num, bool must_replace); + bool SetRegisterLocationToIsConstant(uint32_t reg_num, uint64_t constant, + bool can_replace); + // When this UnspecifiedRegistersAreUndefined mode is // set, any register that is not specified by this Row will // be described as Undefined. diff --git a/lldb/source/Symbol/UnwindPlan.cpp b/lldb/source/Symbol/UnwindPlan.cpp index e258a4e3d82f24..e2dbd81a82c84c 100644 --- a/lldb/source/Symbol/UnwindPlan.cpp +++ b/lldb/source/Symbol/UnwindPlan.cpp @@ -46,6 +46,8 @@ operator==(const UnwindPlan::Row::RegisterLocation &rhs) const { return !memcmp(m_location.expr.opcodes, rhs.m_location.expr.opcodes, m_location.expr.length); break; + case isConstant: + return m_location.constant_value == rhs.m_location.constant_value; } } return false; @@ -153,6 +155,9 @@ void UnwindPlan::Row::RegisterLocation::Dump(Stream &s, if (m_type == atDWARFExpression) s.PutChar(']'); } break; + case isConstant: + s.Printf("=0x%" PRIx64, m_location.constant_value); + break; } } @@ -351,6 +356,18 @@ bool UnwindPlan::Row::SetRegisterLocationToSame(uint32_t reg_num, return true; } +bool UnwindPlan::Row::SetRegisterLocationToIsConstant(uint32_t reg_num, + uint64_t constant, + bool can_replace) { + if (!can_replace && + m_register_locations.find(reg_num) != m_register_locations.end()) + return false; + RegisterLocation reg_loc; + reg_loc.SetIsConstant(constant); + m_register_locations[reg_num] = reg_loc; + return true; +} + bool UnwindPlan::Row::operator==(const UnwindPlan::Row &rhs) const { return m_offset == rhs.m_offset && m_cfa_value == rhs.m_cfa_value && m_afa_value == rhs.m_afa_value && diff --git a/lldb/source/Target/RegisterContextUnwind.cpp b/lldb/source/Target/RegisterContextUnwind.cpp index bc8081f4e3b316..a61228d092d898 100644 --- a/lldb/source/Target/RegisterContextUnwind.cpp +++ b/lldb/source/Target/RegisterContextUnwind.cpp @@ -1694,6 +1694,15 @@ RegisterContextUnwind::SavedLocationForRegister( return UnwindLLDB::RegisterSearchResult::eRegisterNotFound; } + if (unwindplan_regloc.IsConstant()) { + regloc.type = UnwindLLDB::RegisterLocation::eRegisterValueInferred; + regloc.location.inferred_value = unwindplan_regloc.GetConstant(); + m_registers[regnum.GetAsKind(eRegisterKindLLDB)] = regloc; + UnwindLogMsg("supplying caller's register %s (%d) via constant value", + regnum.GetName(), regnum.GetAsKind(eRegisterKindLLDB)); + return UnwindLLDB::RegisterSearchResult::eRegisterFound; + } + UnwindLogMsg("no save location for %s (%d) in this stack frame", regnum.GetName(), regnum.GetAsKind(eRegisterKindLLDB)); From 8b17b12912ff59b0c9f2825ac2cc258418ed349b Mon Sep 17 00:00:00 2001 From: vporpo Date: Wed, 31 Jul 2024 10:37:54 -0700 Subject: [PATCH 003/114] [SandboxIR] Implement FPToUIInst (#101369) This patch implements sandboxir::FPToUIInst which mirrors llvm::FPToUIInst. --- llvm/include/llvm/SandboxIR/SandboxIR.h | 23 +++++++ llvm/lib/SandboxIR/SandboxIR.cpp | 28 +++++++++ llvm/unittests/SandboxIR/SandboxIRTest.cpp | 70 ++++++++++++++++++++++ 3 files changed, 121 insertions(+) diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index 190225e5528d8d..a0a31e659dcc57 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -34,6 +34,8 @@ // | | // | +- FPToSIInst // | | +// | +- FPToUIInst +// | | // | +- IntToPtrInst // | | // | +- PtrToIntInst @@ -1376,6 +1378,27 @@ class CastInst : public Instruction { #endif }; +class FPToUIInst final : public CastInst { +public: + static Value *create(Value *Src, Type *DestTy, BBIterator WhereIt, + BasicBlock *WhereBB, Context &Ctx, + const Twine &Name = ""); + static Value *create(Value *Src, Type *DestTy, Instruction *InsertBefore, + Context &Ctx, const Twine &Name = ""); + static Value *create(Value *Src, Type *DestTy, BasicBlock *InsertAtEnd, + Context &Ctx, const Twine &Name = ""); + + static bool classof(const Value *From) { + if (auto *I = dyn_cast(From)) + return I->getOpcode() == Opcode::FPToUI; + return false; + } +#ifndef NDEBUG + void dump(raw_ostream &OS) const final; + LLVM_DUMP_METHOD void dump() const final; +#endif // NDEBUG +}; + class FPToSIInst final : public CastInst { public: static Value *create(Value *Src, Type *DestTy, BBIterator WhereIt, diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp index 9ba168f88cc212..a6de44679d0968 100644 --- a/llvm/lib/SandboxIR/SandboxIR.cpp +++ b/llvm/lib/SandboxIR/SandboxIR.cpp @@ -1143,6 +1143,34 @@ void CastInst::dump() const { } #endif // NDEBUG +Value *FPToUIInst::create(Value *Src, Type *DestTy, BBIterator WhereIt, + BasicBlock *WhereBB, Context &Ctx, + const Twine &Name) { + return CastInst::create(DestTy, Instruction::Opcode::FPToUI, Src, WhereIt, + WhereBB, Ctx, Name); +} +Value *FPToUIInst::create(Value *Src, Type *DestTy, Instruction *InsertBefore, + Context &Ctx, const Twine &Name) { + return create(Src, DestTy, InsertBefore->getIterator(), + InsertBefore->getParent(), Ctx, Name); +} +Value *FPToUIInst::create(Value *Src, Type *DestTy, BasicBlock *InsertAtEnd, + Context &Ctx, const Twine &Name) { + return create(Src, DestTy, InsertAtEnd->end(), InsertAtEnd, Ctx, Name); +} + +#ifndef NDEBUG +void FPToUIInst::dump(raw_ostream &OS) const { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); +} + +void FPToUIInst::dump() const { + dump(dbgs()); + dbgs() << "\n"; +} +#endif // NDEBUG + Value *FPToSIInst::create(Value *Src, Type *DestTy, BBIterator WhereIt, BasicBlock *WhereBB, Context &Ctx, const Twine &Name) { diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index c9c7fe0ea0cde3..f405bc1fca7444 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -1499,6 +1499,7 @@ define void @foo(i32 %arg, float %farg, double %darg, ptr %ptr) { EXPECT_EQ(SExt->getDestTy(), Ti64); auto *FPToUI = cast(&*It++); + EXPECT_TRUE(isa(FPToUI)); EXPECT_EQ(FPToUI->getOpcode(), sandboxir::Instruction::Opcode::FPToUI); EXPECT_EQ(FPToUI->getSrcTy(), Tfloat); EXPECT_EQ(FPToUI->getDestTy(), Ti32); @@ -1618,6 +1619,75 @@ define void @foo(i32 %arg, float %farg, double %darg, ptr %ptr) { } } +TEST_F(SandboxIRTest, FPToUIInst) { + parseIR(C, R"IR( +define void @foo(float %arg) { + %fptoui = fptoui float %arg to i32 + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + sandboxir::Context Ctx(C); + sandboxir::Function *F = Ctx.createFunction(&LLVMF); + unsigned ArgIdx = 0; + auto *Arg = F->getArg(ArgIdx++); + auto *BB = &*F->begin(); + auto It = BB->begin(); + Type *Ti32 = Type::getInt32Ty(C); + Type *Tfloat = Type::getFloatTy(C); + + auto *FPToUI = cast(&*It++); + EXPECT_EQ(FPToUI->getOpcode(), sandboxir::Instruction::Opcode::FPToUI); + EXPECT_EQ(FPToUI->getSrcTy(), Tfloat); + EXPECT_EQ(FPToUI->getDestTy(), Ti32); + auto *Ret = cast(&*It++); + + { + // Check create() WhereIt, WhereBB + auto *NewI = cast( + sandboxir::FPToUIInst::create(Arg, Ti32, /*WhereIt=*/BB->end(), + /*WhereBB=*/BB, Ctx, "FPToUI")); + // Check getOpcode(). + EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::FPToUI); + // Check getSrcTy(). + EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); + // Check getDestTy(). + EXPECT_EQ(NewI->getDestTy(), Ti32); + // Check instr position. + EXPECT_EQ(NewI->getNextNode(), nullptr); + EXPECT_EQ(NewI->getPrevNode(), Ret); + } + { + // Check create() InsertBefore. + auto *NewI = cast( + sandboxir::FPToUIInst::create(Arg, Ti32, + /*InsertBefore=*/Ret, Ctx, "FPToUI")); + // Check getOpcode(). + EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::FPToUI); + // Check getSrcTy(). + EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); + // Check getDestTy(). + EXPECT_EQ(NewI->getDestTy(), Ti32); + // Check instr position. + EXPECT_EQ(NewI->getNextNode(), Ret); + } + { + // Check create() InsertAtEnd. + auto *NewI = cast( + sandboxir::FPToUIInst::create(Arg, Ti32, + /*InsertAtEnd=*/BB, Ctx, "FPToUI")); + // Check getOpcode(). + EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::FPToUI); + // Check getSrcTy(). + EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); + // Check getDestTy(). + EXPECT_EQ(NewI->getDestTy(), Ti32); + // Check instr position. + EXPECT_EQ(NewI->getNextNode(), nullptr); + EXPECT_EQ(NewI->getParent(), BB); + } +} + TEST_F(SandboxIRTest, FPToSIInst) { parseIR(C, R"IR( define void @foo(float %arg) { From 35a2e6d24bcb94720ec7b3aa00e58a1b7b837fbc Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 31 Jul 2024 10:33:43 -0700 Subject: [PATCH 004/114] [SCEV] Regen a couple auto-gen tests --- .../ScalarEvolution/finite-trip-count.ll | 12 ++++++++++++ .../Analysis/ScalarEvolution/ne-overflow.ll | 3 +++ .../trip-count-implied-addrec.ll | 18 ++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/llvm/test/Analysis/ScalarEvolution/finite-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/finite-trip-count.ll index b3e3b208bf9082..471954f44311d4 100644 --- a/llvm/test/Analysis/ScalarEvolution/finite-trip-count.ll +++ b/llvm/test/Analysis/ScalarEvolution/finite-trip-count.ll @@ -56,6 +56,12 @@ define void @sle_pre_inc_infinite(i32 %len) { ; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (0 smax (1 + (sext i32 %len to i64))) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: +; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (0 smax (1 + (sext i32 %len to i64))) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: ; entry: br label %for.body @@ -121,6 +127,12 @@ define void @ule_pre_inc_infinite(i32 %len) { ; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (1 + (zext i32 %len to i64)) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: +; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (1 + (zext i32 %len to i64)) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: ; entry: br label %for.body diff --git a/llvm/test/Analysis/ScalarEvolution/ne-overflow.ll b/llvm/test/Analysis/ScalarEvolution/ne-overflow.ll index 82b4d0e4fb4837..49288c85897fd9 100644 --- a/llvm/test/Analysis/ScalarEvolution/ne-overflow.ll +++ b/llvm/test/Analysis/ScalarEvolution/ne-overflow.ll @@ -240,6 +240,9 @@ define void @test_zext(i64 %N) mustprogress { ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (%N /u 2) ; CHECK-NEXT: Predicates: ; CHECK-NEXT: {0,+,2}<%for.body> Added Flags: +; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (%N /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,2}<%for.body> Added Flags: ; entry: br label %for.body diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll index e9c13f551b4d0d..64306ac28cf275 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll @@ -61,6 +61,9 @@ define void @nw_implies_nsw(i16 %n) mustprogress { ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (128 + (-128 smax %n)) ; CHECK-NEXT: Predicates: ; CHECK-NEXT: {-128,+,1}<%for.body> Added Flags: +; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (128 + (-128 smax %n)) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {-128,+,1}<%for.body> Added Flags: ; entry: br label %for.body @@ -107,6 +110,9 @@ define void @actually_infinite() { ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is i16 257 ; CHECK-NEXT: Predicates: ; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: +; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is i16 257 +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: ; entry: br label %for.body @@ -132,6 +138,9 @@ define void @rhs_mustexit_1(i16 %n.raw) mustprogress { ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (1 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16)))) ; CHECK-NEXT: Predicates: ; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: +; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (-1 + (1 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16)))) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: ; entry: %n.and = and i16 %n.raw, 255 @@ -233,6 +242,9 @@ define void @neg_rhs_wrong_range(i16 %n.raw) mustprogress { ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((-1 + (2 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16)))) /u 2) ; CHECK-NEXT: Predicates: ; CHECK-NEXT: {2,+,2}<%for.body> Added Flags: +; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is ((-1 + (2 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16)))) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {2,+,2}<%for.body> Added Flags: ; entry: %n.and = and i16 %n.raw, 255 @@ -260,6 +272,9 @@ define void @neg_rhs_maybe_infinite(i16 %n.raw) { ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (1 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16)))) ; CHECK-NEXT: Predicates: ; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: +; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (-1 + (1 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16)))) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: ; entry: %n.and = and i16 %n.raw, 255 @@ -382,6 +397,9 @@ define void @ult_constant_rhs_stride2_neg(i16 %n.raw, i8 %start) { ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((256 + (-1 * (zext i8 (2 + %start) to i16))) /u 2) ; CHECK-NEXT: Predicates: ; CHECK-NEXT: {(2 + %start),+,2}<%for.body> Added Flags: +; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is ((256 + (-1 * (zext i8 (2 + %start) to i16))) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {(2 + %start),+,2}<%for.body> Added Flags: ; entry: br label %for.body From f9827e67ce2ccad863fac9d062eacbd60d829375 Mon Sep 17 00:00:00 2001 From: Volodymyr Sapsai Date: Wed, 31 Jul 2024 10:38:32 -0700 Subject: [PATCH 005/114] [Modules][Diagnostic] Don't claim a METADATA mismatch is always in PCH file. (#101280) You can provide more than one AST file as an input. Emit a path for a file with a problem, so you can disambiguate between multiple files. rdar://65005546 --- .../clang/Basic/DiagnosticSerializationKinds.td | 16 ++++++++-------- clang/lib/Serialization/ASTReader.cpp | 15 ++++++++++----- clang/test/Index/pch-with-errors.c | 2 +- clang/test/Modules/load-module-with-errors.m | 4 ++-- 4 files changed, 21 insertions(+), 16 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSerializationKinds.td b/clang/include/clang/Basic/DiagnosticSerializationKinds.td index eb27de5921d6a1..51d0abbbec252a 100644 --- a/clang/include/clang/Basic/DiagnosticSerializationKinds.td +++ b/clang/include/clang/Basic/DiagnosticSerializationKinds.td @@ -50,14 +50,14 @@ def warn_pch_vfsoverlay_mismatch : Warning< def note_pch_vfsoverlay_files : Note<"%select{PCH|current translation unit}0 has the following VFS overlays:\n%1">; def note_pch_vfsoverlay_empty : Note<"%select{PCH|current translation unit}0 has no VFS overlays">; -def err_pch_version_too_old : Error< - "PCH file uses an older PCH format that is no longer supported">; -def err_pch_version_too_new : Error< - "PCH file uses a newer PCH format that cannot be read">; -def err_pch_different_branch : Error< - "PCH file built from a different branch (%0) than the compiler (%1)">; -def err_pch_with_compiler_errors : Error< - "PCH file contains compiler errors">; +def err_ast_file_version_too_old : Error< + "%select{PCH|module|AST}0 file '%1' uses an older PCH format that is no longer supported">; +def err_ast_file_version_too_new : Error< + "%select{PCH|module|AST}0 file '%1' uses a newer PCH format that cannot be read">; +def err_ast_file_different_branch : Error< + "%select{PCH|module|AST}0 file '%1' built from a different branch (%2) than the compiler (%3)">; +def err_ast_file_with_compiler_errors : Error< + "%select{PCH|module|AST}0 file '%1' contains compiler errors">; def err_module_file_conflict : Error< "module '%0' is defined in both '%1' and '%2'">, DefaultFatal; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 3cb96df12e4da0..86fa96a91932f4 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3023,8 +3023,9 @@ ASTReader::ReadControlBlock(ModuleFile &F, case METADATA: { if (Record[0] != VERSION_MAJOR && !DisableValidation) { if ((ClientLoadCapabilities & ARR_VersionMismatch) == 0) - Diag(Record[0] < VERSION_MAJOR? diag::err_pch_version_too_old - : diag::err_pch_version_too_new); + Diag(Record[0] < VERSION_MAJOR ? diag::err_ast_file_version_too_old + : diag::err_ast_file_version_too_new) + << moduleKindForDiagnostic(F.Kind) << F.FileName; return VersionMismatch; } @@ -3037,7 +3038,8 @@ ASTReader::ReadControlBlock(ModuleFile &F, return OutOfDate; if (!AllowASTWithCompilerErrors) { - Diag(diag::err_pch_with_compiler_errors); + Diag(diag::err_ast_file_with_compiler_errors) + << moduleKindForDiagnostic(F.Kind) << F.FileName; return HadErrors; } } @@ -3060,7 +3062,9 @@ ASTReader::ReadControlBlock(ModuleFile &F, StringRef ASTBranch = Blob; if (StringRef(CurBranch) != ASTBranch && !DisableValidation) { if ((ClientLoadCapabilities & ARR_VersionMismatch) == 0) - Diag(diag::err_pch_different_branch) << ASTBranch << CurBranch; + Diag(diag::err_ast_file_different_branch) + << moduleKindForDiagnostic(F.Kind) << F.FileName << ASTBranch + << CurBranch; return VersionMismatch; } break; @@ -4827,7 +4831,8 @@ ASTReader::ReadASTCore(StringRef FileName, case AST_BLOCK_ID: if (!HaveReadControlBlock) { if ((ClientLoadCapabilities & ARR_VersionMismatch) == 0) - Diag(diag::err_pch_version_too_old); + Diag(diag::err_ast_file_version_too_old) + << moduleKindForDiagnostic(Type) << FileName; return VersionMismatch; } diff --git a/clang/test/Index/pch-with-errors.c b/clang/test/Index/pch-with-errors.c index e8711c8e26a9bc..cfe58c155cd6db 100644 --- a/clang/test/Index/pch-with-errors.c +++ b/clang/test/Index/pch-with-errors.c @@ -38,7 +38,7 @@ void foo(void) { // CHECK-INDEX: [indexEntityReference]: kind: function | name: erroneous // RUN: not %clang -fsyntax-only %s -include %t.h 2>&1 | FileCheck -check-prefix=PCH-ERR %s -// PCH-ERR: error: PCH file contains compiler errors +// PCH-ERR: error: PCH file '{{.*}}' contains compiler errors // RUN: not c-index-test -write-pch %t.pch foobar.c 2>&1 | FileCheck -check-prefix=NONEXISTENT %s // NONEXISTENT: Unable to load translation unit diff --git a/clang/test/Modules/load-module-with-errors.m b/clang/test/Modules/load-module-with-errors.m index 1f8e483a19e928..6e10cb3381be88 100644 --- a/clang/test/Modules/load-module-with-errors.m +++ b/clang/test/Modules/load-module-with-errors.m @@ -1,7 +1,7 @@ // Note: the run lines follow their respective tests, since line/column // matter in this test. -// pcherror-error@* {{PCH file contains compiler errors}} +// pcherror-error-re@* {{module file '{{.*}}use_error_a.pcm' contains compiler errors}} @import use_error_a; // notallowerror-error {{could not build module 'use_error_a'}} @import use_error_b; // expected-no-diagnostics @@ -61,7 +61,7 @@ void test(Error *x) { // RUN: -fmodule-file=%t/prebuilt/use_error_a.pcm \ // RUN: -fmodule-file=%t/prebuilt/use_error_b.pcm \ // RUN: -fmodules-cache-path=%t 2>&1 | \ -// RUN: grep "PCH file contains compiler errors" +// RUN: grep "module file .* contains compiler errors" // Shouldn't build the cached modules (that have errors) when not allowing // errors From 366eade911b54878c9cc1835d2544fb4ba907ef5 Mon Sep 17 00:00:00 2001 From: Leandro Lupori Date: Wed, 31 Jul 2024 14:39:06 -0300 Subject: [PATCH 006/114] [flang][OpenMP] Reland Fix copyprivate semantic checks (#95799) (#101009) There are some cases in which variables used in OpenMP constructs are predetermined as private. The semantic checks for copyprivate were not handling those cases. Besides that, shared symbols were not being properly represented in some cases. When there was no previously declared private (implicit) symbol, no new association symbols, representing shared ones, were being created. These symbols must always be inserted in constructs that may privatize the original symbol: parallel, teams and task generating constructs. Fixes #87214 and #86907 --- flang/include/flang/Semantics/tools.h | 1 + flang/lib/Semantics/resolve-directives.cpp | 152 ++++++++++++------ flang/test/Lower/OpenMP/associate.f90 | 38 +++++ .../OpenMP/default-clause-implied-do-fix.f90 | 15 +- flang/test/Semantics/OpenMP/copyprivate04.f90 | 112 +++++++++++++ .../Semantics/OpenMP/do05-positivecase.f90 | 4 +- flang/test/Semantics/OpenMP/do20.f90 | 2 +- flang/test/Semantics/OpenMP/implicit-dsa.f90 | 14 +- .../Semantics/OpenMP/parallel-shared05.f90 | 17 ++ flang/test/Semantics/OpenMP/reduction08.f90 | 10 +- flang/test/Semantics/OpenMP/reduction09.f90 | 8 +- flang/test/Semantics/OpenMP/symbol01.f90 | 4 +- flang/test/Semantics/OpenMP/symbol02.f90 | 4 +- flang/test/Semantics/OpenMP/symbol03.f90 | 4 +- flang/test/Semantics/OpenMP/symbol05.f90 | 4 +- flang/test/Semantics/OpenMP/symbol07.f90 | 2 +- flang/test/Semantics/OpenMP/symbol08.f90 | 36 ++--- flang/test/Semantics/OpenMP/symbol09.f90 | 2 +- 18 files changed, 331 insertions(+), 98 deletions(-) create mode 100644 flang/test/Lower/OpenMP/associate.f90 create mode 100644 flang/test/Semantics/OpenMP/copyprivate04.f90 create mode 100644 flang/test/Semantics/OpenMP/parallel-shared05.f90 diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index 0aee1ef299bc8b..ec275f349e81bf 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -86,6 +86,7 @@ bool IsIntrinsicConcat( bool IsGenericDefinedOp(const Symbol &); bool IsDefinedOperator(SourceName); std::string MakeOpName(SourceName); +bool IsCommonBlockContaining(const Symbol &, const Symbol &); // Returns true if maybeAncestor exists and is a proper ancestor of a // descendent scope (or symbol owner). Will be false, unlike Scope::Contains(), diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index fb32ce6837fbfc..d635a7b8b7874f 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -19,6 +19,7 @@ #include "flang/Parser/parse-tree.h" #include "flang/Parser/tools.h" #include "flang/Semantics/expression.h" +#include "flang/Semantics/tools.h" #include #include #include @@ -729,7 +730,6 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { void CheckNameInAllocateStmt(const parser::CharBlock &source, const parser::Name &ompObject, const parser::AllocateStmt &allocate); - bool HasSymbolInEnclosingScope(const Symbol &, Scope &); std::int64_t ordCollapseLevel{0}; void AddOmpRequiresToScope(Scope &, WithOmpDeclarative::RequiresFlags, @@ -2035,6 +2035,14 @@ void OmpAttributeVisitor::Post(const parser::OpenMPAllocatorsConstruct &x) { // and adjust the symbol for each Name if necessary void OmpAttributeVisitor::Post(const parser::Name &name) { auto *symbol{name.symbol}; + auto IsPrivatizable = [](const Symbol *sym) { + return !IsProcedure(*sym) && !IsNamedConstant(*sym) && + !sym->owner().IsDerivedType() && + sym->owner().kind() != Scope::Kind::ImpliedDos && + !sym->detailsIf() && + !sym->detailsIf(); + }; + if (symbol && !dirContext_.empty() && GetContext().withinConstruct) { // Exclude construct-names if (auto *details{symbol->detailsIf()}) { @@ -2042,8 +2050,7 @@ void OmpAttributeVisitor::Post(const parser::Name &name) { return; } } - if (!symbol->owner().IsDerivedType() && !IsProcedure(*symbol) && - !IsObjectWithDSA(*symbol) && !IsNamedConstant(*symbol)) { + if (IsPrivatizable(symbol) && !IsObjectWithDSA(*symbol)) { // TODO: create a separate function to go through the rules for // predetermined, explicitly determined, and implicitly // determined data-sharing attributes (2.15.1.1). @@ -2068,6 +2075,9 @@ void OmpAttributeVisitor::Post(const parser::Name &name) { if (found->test(semantics::Symbol::Flag::OmpThreadprivate)) return; } + if (!IsPrivatizable(symbol)) { + return; + } // Implicitly determined DSAs // OMP 5.2 5.1.1 - Variables Referenced in a Construct @@ -2085,16 +2095,22 @@ void OmpAttributeVisitor::Post(const parser::Name &name) { } } - // When handling each implicit rule, either a new private symbol is - // declared or the last declared symbol is used. - // In the latter case, it's necessary to insert a new symbol in the scope - // being processed, associated with the last declared symbol. - // This captures the fact that, although we are using the last declared - // symbol, its DSA could be different in this scope. - // Also, because of how symbols are collected in lowering, not inserting - // a new symbol in this scope could lead to the conclusion that the - // symbol was declared in this construct, which would result in wrong - // privatization code being generated. + // When handling each implicit rule for a given symbol, one of the + // following 3 actions may be taken: + // 1. Declare a new private symbol. + // 2. Create a new association symbol with no flags, that will represent + // a shared symbol in the current scope. Note that symbols without + // any private flags are considered as shared. + // 3. Use the last declared private symbol, by inserting a new symbol + // in the scope being processed, associated with it. + // If no private symbol was declared previously, then no association + // is needed and the symbol from the enclosing scope will be + // inherited by the current one. + // + // Because of how symbols are collected in lowering, not inserting a new + // symbol in the last case could lead to the conclusion that a symbol + // from an enclosing construct was declared in the current construct, + // which would result in wrong privatization code being generated. // Consider the following example: // // !$omp parallel default(private) ! p1 @@ -2107,48 +2123,56 @@ void OmpAttributeVisitor::Post(const parser::Name &name) { // (p2), it would use the x symbol definition from the enclosing scope. // Then, when p2's default symbols were collected in lowering, the x // symbol from the outer parallel construct (p1) would be collected, as - // it would have the private flag set (note that symbols that don't have - // any private flag are considered as shared). + // it would have the private flag set. // This would make x appear to be defined in p2, causing it to be // privatized in p2 and its privatization in p1 to be skipped. - auto declNewSymbol = [&](Symbol::Flag flag) { + auto makePrivateSymbol = [&](Symbol::Flag flag) { Symbol *hostSymbol = lastDeclSymbol ? lastDeclSymbol : &symbol->GetUltimate(); lastDeclSymbol = DeclarePrivateAccessEntity( *hostSymbol, flag, context_.FindScope(dirContext.directiveSource)); return lastDeclSymbol; }; + auto makeSharedSymbol = [&]() { + Symbol *hostSymbol = + lastDeclSymbol ? lastDeclSymbol : &symbol->GetUltimate(); + MakeAssocSymbol(symbol->name(), *hostSymbol, + context_.FindScope(dirContext.directiveSource)); + }; auto useLastDeclSymbol = [&]() { if (lastDeclSymbol) MakeAssocSymbol(symbol->name(), *lastDeclSymbol, context_.FindScope(dirContext.directiveSource)); }; + bool taskGenDir = llvm::omp::taskGeneratingSet.test(dirContext.directive); + bool targetDir = llvm::omp::allTargetSet.test(dirContext.directive); + bool parallelDir = llvm::omp::allParallelSet.test(dirContext.directive); + bool teamsDir = llvm::omp::allTeamsSet.test(dirContext.directive); + if (dsa.has_value()) { - useLastDeclSymbol(); + if (dsa.value() == Symbol::Flag::OmpShared && + (parallelDir || taskGenDir || teamsDir)) + makeSharedSymbol(); + // Private symbols will have been declared already. prevDSA = dsa; continue; } - bool taskGenDir = llvm::omp::taskGeneratingSet.test(dirContext.directive); - bool targetDir = llvm::omp::allTargetSet.test(dirContext.directive); - bool parallelDir = llvm::omp::allParallelSet.test(dirContext.directive); - if (dirContext.defaultDSA == Symbol::Flag::OmpPrivate || dirContext.defaultDSA == Symbol::Flag::OmpFirstPrivate || dirContext.defaultDSA == Symbol::Flag::OmpShared) { // 1) default // Allowed only with parallel, teams and task generating constructs. - assert(parallelDir || taskGenDir || - llvm::omp::allTeamsSet.test(dirContext.directive)); + assert(parallelDir || taskGenDir || teamsDir); if (dirContext.defaultDSA != Symbol::Flag::OmpShared) - declNewSymbol(dirContext.defaultDSA); + makePrivateSymbol(dirContext.defaultDSA); else - useLastDeclSymbol(); + makeSharedSymbol(); dsa = dirContext.defaultDSA; } else if (parallelDir) { // 2) parallel -> shared - useLastDeclSymbol(); + makeSharedSymbol(); dsa = Symbol::Flag::OmpShared; } else if (!taskGenDir && !targetDir) { // 3) enclosing context @@ -2161,12 +2185,12 @@ void OmpAttributeVisitor::Post(const parser::Name &name) { // TODO 5) dummy arg in orphaned taskgen construct -> firstprivate if (prevDSA == Symbol::Flag::OmpShared) { // 6) shared in enclosing context -> shared - useLastDeclSymbol(); + makeSharedSymbol(); dsa = Symbol::Flag::OmpShared; } else { // 7) firstprivate dsa = Symbol::Flag::OmpFirstPrivate; - declNewSymbol(*dsa)->set(Symbol::Flag::OmpImplicit); + makePrivateSymbol(*dsa)->set(Symbol::Flag::OmpImplicit); } } prevDSA = dsa; @@ -2570,20 +2594,59 @@ void ResolveOmpTopLevelParts( }); } -void OmpAttributeVisitor::CheckDataCopyingClause( - const parser::Name &name, const Symbol &symbol, Symbol::Flag ompFlag) { - const auto *checkSymbol{&symbol}; +static bool IsSymbolInCommonBlock(const Symbol &symbol) { + // TODO Improve the performance of this predicate function. + // Going through all symbols sequentially, in all common blocks, can be + // slow when there are many symbols. A possible optimization is to add + // an OmpInCommonBlock flag to Symbol, to make it possible to quickly + // test if a given symbol is in a common block. + for (const auto &cb : symbol.owner().commonBlocks()) { + if (IsCommonBlockContaining(cb.second.get(), symbol)) { + return true; + } + } + return false; +} + +static bool IsSymbolThreadprivate(const Symbol &symbol) { if (const auto *details{symbol.detailsIf()}) { - checkSymbol = &details->symbol(); + return details->symbol().test(Symbol::Flag::OmpThreadprivate); } + return symbol.test(Symbol::Flag::OmpThreadprivate); +} +static bool IsSymbolPrivate(const Symbol &symbol) { + if (symbol.test(Symbol::Flag::OmpPrivate) || + symbol.test(Symbol::Flag::OmpFirstPrivate)) { + return true; + } + // A symbol that has not gone through constructs that may privatize the + // original symbol may be predetermined as private. + // (OMP 5.2 5.1.1 - Variables Referenced in a Construct) + if (symbol == symbol.GetUltimate()) { + switch (symbol.owner().kind()) { + case Scope::Kind::MainProgram: + case Scope::Kind::Subprogram: + case Scope::Kind::BlockConstruct: + return !symbol.attrs().test(Attr::SAVE) && + !symbol.attrs().test(Attr::PARAMETER) && !IsAssumedShape(symbol) && + !IsSymbolInCommonBlock(symbol); + default: + return false; + } + } + return false; +} + +void OmpAttributeVisitor::CheckDataCopyingClause( + const parser::Name &name, const Symbol &symbol, Symbol::Flag ompFlag) { if (ompFlag == Symbol::Flag::OmpCopyIn) { // List of items/objects that can appear in a 'copyin' clause must be // 'threadprivate' - if (!checkSymbol->test(Symbol::Flag::OmpThreadprivate)) { + if (!IsSymbolThreadprivate(symbol)) { context_.Say(name.source, "Non-THREADPRIVATE object '%s' in COPYIN clause"_err_en_US, - checkSymbol->name()); + symbol.name()); } } else if (ompFlag == Symbol::Flag::OmpCopyPrivate && GetContext().directive == llvm::omp::Directive::OMPD_single) { @@ -2596,18 +2659,13 @@ void OmpAttributeVisitor::CheckDataCopyingClause( "COPYPRIVATE variable '%s' may not appear on a PRIVATE or " "FIRSTPRIVATE clause on a SINGLE construct"_err_en_US, symbol.name()); - } else { + } else if (!IsSymbolThreadprivate(symbol) && !IsSymbolPrivate(symbol)) { // List of items/objects that can appear in a 'copyprivate' clause must be // either 'private' or 'threadprivate' in enclosing context. - if (!checkSymbol->test(Symbol::Flag::OmpThreadprivate) && - !(HasSymbolInEnclosingScope(symbol, currScope()) && - (symbol.test(Symbol::Flag::OmpPrivate) || - symbol.test(Symbol::Flag::OmpFirstPrivate)))) { - context_.Say(name.source, - "COPYPRIVATE variable '%s' is not PRIVATE or THREADPRIVATE in " - "outer context"_err_en_US, - symbol.name()); - } + context_.Say(name.source, + "COPYPRIVATE variable '%s' is not PRIVATE or THREADPRIVATE in " + "outer context"_err_en_US, + symbol.name()); } } } @@ -2677,12 +2735,6 @@ void OmpAttributeVisitor::CheckLabelContext(const parser::CharBlock source, } } -bool OmpAttributeVisitor::HasSymbolInEnclosingScope( - const Symbol &symbol, Scope &scope) { - const auto symbols{scope.parent().GetSymbols()}; - return llvm::is_contained(symbols, symbol); -} - // Goes through the names in an OmpObjectList and checks if each name appears // in the given allocate statement void OmpAttributeVisitor::CheckAllNamesInAllocateStmt( diff --git a/flang/test/Lower/OpenMP/associate.f90 b/flang/test/Lower/OpenMP/associate.f90 new file mode 100644 index 00000000000000..c6890f0954a7f8 --- /dev/null +++ b/flang/test/Lower/OpenMP/associate.f90 @@ -0,0 +1,38 @@ +! Check that constructs with associate and variables that have implicitly +! determined DSAs are lowered properly. +! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s + +!CHECK-LABEL: func @_QPtest_parallel_assoc +!CHECK: omp.parallel { +!CHECK-NOT: hlfir.declare {{.*}} {uniq_name = "_QFtest_parallel_assocEa"} +!CHECK-NOT: hlfir.declare {{.*}} {uniq_name = "_QFtest_parallel_assocEb"} +!CHECK: omp.wsloop { +!CHECK: } +!CHECK: } +!CHECK: omp.parallel { +!CHECK-NOT: hlfir.declare {{.*}} {uniq_name = "_QFtest_parallel_assocEb"} +!CHECK: omp.wsloop { +!CHECK: } +!CHECK: } +subroutine test_parallel_assoc() + integer, parameter :: l = 3 + integer :: a(l) + integer :: i + a = 1 + + !$omp parallel do + do i = 1,l + associate (b=>a) + b(i) = b(i) * 2 + end associate + enddo + !$omp end parallel do + + !$omp parallel do default(private) + do i = 1,l + associate (b=>a) + b(i) = b(i) * 2 + end associate + enddo + !$omp end parallel do +end subroutine diff --git a/flang/test/Lower/OpenMP/default-clause-implied-do-fix.f90 b/flang/test/Lower/OpenMP/default-clause-implied-do-fix.f90 index 25579272a6e0bc..21992aa629bafa 100644 --- a/flang/test/Lower/OpenMP/default-clause-implied-do-fix.f90 +++ b/flang/test/Lower/OpenMP/default-clause-implied-do-fix.f90 @@ -1,6 +1,6 @@ !RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s -!CHECK: @_QPsb +!CHECK-LABEL: func @_QPsb subroutine sb(a) integer :: a(:) !CHECK: omp.parallel @@ -9,3 +9,16 @@ subroutine sb(a) if (any(a/=(/(100,i=1,5)/))) print *, "OK" !$omp end parallel end subroutine + +!CHECK-LABEL: func @_QPsb2 +subroutine sb2() + integer, parameter :: SIZE=20 + integer :: i, a(SIZE) + +! Just check that the construct below doesn't hit a TODO in lowering. +!CHECK: omp.parallel + !$omp parallel + a = [ (i, i=1, SIZE) ] + print *, i + !$omp end parallel +end subroutine diff --git a/flang/test/Semantics/OpenMP/copyprivate04.f90 b/flang/test/Semantics/OpenMP/copyprivate04.f90 new file mode 100644 index 00000000000000..291cf1103fb279 --- /dev/null +++ b/flang/test/Semantics/OpenMP/copyprivate04.f90 @@ -0,0 +1,112 @@ +! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp +! OpenMP Version 5.2 +! 5.1.1 - Variables Referenced in a Construct +! Copyprivate must accept variables that are predetermined as private. + +module m1 + integer :: m +end module + +program omp_copyprivate + use m1 + implicit none + integer :: i + integer, save :: j + integer :: k + common /c/ k + real, parameter :: pi = 3.14 + integer :: a1(10) + + ! Local variables are private. + !$omp single + i = 123 + !$omp end single copyprivate(i) + !$omp single + !$omp end single copyprivate(a1) + + ! Variables with the SAVE attribute are not private. + !$omp single + !ERROR: COPYPRIVATE variable 'j' is not PRIVATE or THREADPRIVATE in outer context + !$omp end single copyprivate(j) + + ! Common block variables are not private. + !$omp single + !ERROR: COPYPRIVATE variable 'k' is not PRIVATE or THREADPRIVATE in outer context + !$omp end single copyprivate(/c/) + !$omp single + !ERROR: COPYPRIVATE variable 'k' is not PRIVATE or THREADPRIVATE in outer context + !$omp end single copyprivate(k) + + ! Module variables are not private. + !$omp single + !ERROR: COPYPRIVATE variable 'm' is not PRIVATE or THREADPRIVATE in outer context + !$omp end single copyprivate(m) + + ! Parallel can make a variable shared. + !$omp parallel + !$omp single + i = 456 + !ERROR: COPYPRIVATE variable 'i' is not PRIVATE or THREADPRIVATE in outer context + !$omp end single copyprivate(i) + call sub(j, a1) + !$omp end parallel + + !$omp parallel shared(i) + !$omp single + i = 456 + !ERROR: COPYPRIVATE variable 'i' is not PRIVATE or THREADPRIVATE in outer context + !$omp end single copyprivate(i) + !$omp end parallel + + !FIXME: an error should be emitted in this case. + ! copyprivate(i) should be considered as a reference to i and a new + ! symbol should be created in `parallel` scope, for this case to be + ! handled properly. + !$omp parallel + !$omp single + !$omp end single copyprivate(i) + !$omp end parallel + + ! Named constants are shared. + !$omp single + !ERROR: COPYPRIVATE variable 'pi' is not PRIVATE or THREADPRIVATE in outer context + !$omp end single copyprivate(pi) + + !$omp parallel do + do i = 1, 10 + !$omp parallel + !$omp single + j = i + !ERROR: COPYPRIVATE variable 'i' is not PRIVATE or THREADPRIVATE in outer context + !$omp end single copyprivate(i) + !$omp end parallel + end do + !$omp end parallel do + +contains + subroutine sub(s1, a) + integer :: s1 + integer :: a(:) + + ! Dummy argument. + !$omp single + !$omp end single copyprivate(s1) + + ! Assumed shape arrays are shared. + !$omp single + !ERROR: COPYPRIVATE variable 'a' is not PRIVATE or THREADPRIVATE in outer context + !$omp end single copyprivate(a) + end subroutine + + integer function fun(f1) + integer :: f1 + + ! Dummy argument. + !$omp single + !$omp end single copyprivate(f1) + + ! Function result is private. + !$omp single + !$omp end single copyprivate(fun) + end function +end program diff --git a/flang/test/Semantics/OpenMP/do05-positivecase.f90 b/flang/test/Semantics/OpenMP/do05-positivecase.f90 index 4e02235f58a1a4..3b512a5b4f25eb 100644 --- a/flang/test/Semantics/OpenMP/do05-positivecase.f90 +++ b/flang/test/Semantics/OpenMP/do05-positivecase.f90 @@ -20,12 +20,12 @@ program omp_do !$omp parallel default(shared) !$omp do !DEF: /omp_do/OtherConstruct2/OtherConstruct1/i (OmpPrivate, OmpPreDetermined) HostAssoc INTEGER(4) - !REF: /omp_do/n + !DEF: /omp_do/OtherConstruct2/n HostAssoc INTEGER(4) do i=1,n !$omp parallel !$omp single !DEF: /work EXTERNAL (Subroutine) ProcEntity - !REF: /omp_do/OtherConstruct2/OtherConstruct1/i + !DEF: /omp_do/OtherConstruct2/OtherConstruct1/OtherConstruct1/i HostAssoc INTEGER(4) call work(i, 1) !$omp end single !$omp end parallel diff --git a/flang/test/Semantics/OpenMP/do20.f90 b/flang/test/Semantics/OpenMP/do20.f90 index 915d01e69edd74..0cafae76b86b00 100644 --- a/flang/test/Semantics/OpenMP/do20.f90 +++ b/flang/test/Semantics/OpenMP/do20.f90 @@ -10,7 +10,7 @@ subroutine shared_iv !$omp parallel shared(i) !$omp single - !REF: /shared_iv/i + !DEF: /shared_iv/OtherConstruct1/i HostAssoc INTEGER(4) do i = 0, 1 end do !$omp end single diff --git a/flang/test/Semantics/OpenMP/implicit-dsa.f90 b/flang/test/Semantics/OpenMP/implicit-dsa.f90 index 92d2421d06f971..2abe3a0e16d624 100644 --- a/flang/test/Semantics/OpenMP/implicit-dsa.f90 +++ b/flang/test/Semantics/OpenMP/implicit-dsa.f90 @@ -15,14 +15,14 @@ subroutine implicit_dsa_test1 !$omp task private(y) shared(z) !DEF: /implicit_dsa_test1/OtherConstruct1/x (OmpFirstPrivate, OmpImplicit) HostAssoc INTEGER(4) !DEF: /implicit_dsa_test1/OtherConstruct1/y (OmpPrivate) HostAssoc INTEGER(4) - !REF: /implicit_dsa_test1/z + !DEF: /implicit_dsa_test1/OtherConstruct1/z HostAssoc INTEGER(4) x = y + z !$omp end task !$omp task default(shared) - !REF: /implicit_dsa_test1/x - !REF: /implicit_dsa_test1/y - !REF: /implicit_dsa_test1/z + !DEF: /implicit_dsa_test1/OtherConstruct2/x HostAssoc INTEGER(4) + !DEF: /implicit_dsa_test1/OtherConstruct2/y HostAssoc INTEGER(4) + !DEF: /implicit_dsa_test1/OtherConstruct2/z HostAssoc INTEGER(4) x = y + z !$omp end task @@ -61,16 +61,16 @@ subroutine implicit_dsa_test3 !$omp parallel !$omp task - !REF: /implicit_dsa_test3/x + !DEF: /implicit_dsa_test3/OtherConstruct1/OtherConstruct1/x HostAssoc INTEGER(4) x = 1 - !REF: /implicit_dsa_test3/y + !DEF: /implicit_dsa_test3/OtherConstruct1/OtherConstruct1/y HostAssoc INTEGER(4) y = 1 !$omp end task !$omp task firstprivate(x) !DEF: /implicit_dsa_test3/OtherConstruct1/OtherConstruct2/x (OmpFirstPrivate) HostAssoc INTEGER(4) x = 1 - !REF: /implicit_dsa_test3/z + !DEF: /implicit_dsa_test3/OtherConstruct1/OtherConstruct2/z HostAssoc INTEGER(4) z = 1 !$omp end task !$omp end parallel diff --git a/flang/test/Semantics/OpenMP/parallel-shared05.f90 b/flang/test/Semantics/OpenMP/parallel-shared05.f90 new file mode 100644 index 00000000000000..bcc1a9437c11e8 --- /dev/null +++ b/flang/test/Semantics/OpenMP/parallel-shared05.f90 @@ -0,0 +1,17 @@ +!RUN: %python %S/../test_errors.py %s %flang -fopenmp +! OpenMP Version 4.5 +! 2.15.3.2 parallel shared Clause +program omp_parallel_shared + type derived + integer :: field(2, 3) + end type + integer :: field(2) + type(derived) :: y + + ! Check that derived type fields and variables with the same name + ! don't cause errors. + !$omp parallel + y%field(2, 3) = 1 + field(1) = 1 + !$omp end parallel +end program omp_parallel_shared diff --git a/flang/test/Semantics/OpenMP/reduction08.f90 b/flang/test/Semantics/OpenMP/reduction08.f90 index 99163327cdafa6..9442fbd4d5978a 100644 --- a/flang/test/Semantics/OpenMP/reduction08.f90 +++ b/flang/test/Semantics/OpenMP/reduction08.f90 @@ -15,7 +15,7 @@ program omp_reduction do i=1,10 !DEF: /omp_reduction/OtherConstruct1/k (OmpReduction) HostAssoc INTEGER(4) !DEF: /omp_reduction/max ELEMENTAL, INTRINSIC, PURE (Function) ProcEntity - !REF: /omp_reduction/m + !DEF: /omp_reduction/OtherConstruct1/m HostAssoc INTEGER(4) k = max(k, m) end do !$omp end parallel do @@ -25,7 +25,7 @@ program omp_reduction do i=1,10 !DEF: /omp_reduction/OtherConstruct2/k (OmpReduction) HostAssoc INTEGER(4) !DEF: /omp_reduction/min ELEMENTAL, INTRINSIC, PURE (Function) ProcEntity - !REF: /omp_reduction/m + !DEF: /omp_reduction/OtherConstruct2/m HostAssoc INTEGER(4) k = min(k, m) end do !$omp end parallel do @@ -35,7 +35,7 @@ program omp_reduction do i=1,10 !DEF: /omp_reduction/OtherConstruct3/k (OmpReduction) HostAssoc INTEGER(4) !DEF: /omp_reduction/iand ELEMENTAL, INTRINSIC, PURE (Function) ProcEntity - !REF: /omp_reduction/m + !DEF: /omp_reduction/OtherConstruct3/m HostAssoc INTEGER(4) k = iand(k, m) end do !$omp end parallel do @@ -45,7 +45,7 @@ program omp_reduction do i=1,10 !DEF: /omp_reduction/OtherConstruct4/k (OmpReduction) HostAssoc INTEGER(4) !DEF: /omp_reduction/ior ELEMENTAL, INTRINSIC, PURE (Function) ProcEntity - !REF: /omp_reduction/m + !DEF: /omp_reduction/OtherConstruct4/m HostAssoc INTEGER(4) k = ior(k, m) end do !$omp end parallel do @@ -55,7 +55,7 @@ program omp_reduction do i=1,10 !DEF: /omp_reduction/OtherConstruct5/k (OmpReduction) HostAssoc INTEGER(4) !DEF: /omp_reduction/ieor ELEMENTAL, INTRINSIC, PURE (Function) ProcEntity - !REF: /omp_reduction/m + !DEF: /omp_reduction/OtherConstruct5/m HostAssoc INTEGER(4) k = ieor(k,m) end do !$omp end parallel do diff --git a/flang/test/Semantics/OpenMP/reduction09.f90 b/flang/test/Semantics/OpenMP/reduction09.f90 index dbc8d1b060e65a..1af2fc4fd9691e 100644 --- a/flang/test/Semantics/OpenMP/reduction09.f90 +++ b/flang/test/Semantics/OpenMP/reduction09.f90 @@ -26,7 +26,7 @@ program omp_reduction !$omp parallel do reduction(+:a(10)) !DEF: /omp_reduction/OtherConstruct2/i (OmpPrivate, OmpPreDetermined) HostAssoc INTEGER(4) do i=1,10 - !REF: /omp_reduction/k + !DEF: /omp_reduction/OtherConstruct2/k HostAssoc INTEGER(4) k = k+1 end do !$omp end parallel do @@ -35,7 +35,7 @@ program omp_reduction !$omp parallel do reduction(+:a(1:10:1)) !DEF: /omp_reduction/OtherConstruct3/i (OmpPrivate, OmpPreDetermined) HostAssoc INTEGER(4) do i=1,10 - !REF: /omp_reduction/k + !DEF: /omp_reduction/OtherConstruct3/k HostAssoc INTEGER(4) k = k+1 end do !$omp end parallel do @@ -43,7 +43,7 @@ program omp_reduction !$omp parallel do reduction(+:b(1:10:1,1:5,2)) !DEF: /omp_reduction/OtherConstruct4/i (OmpPrivate, OmpPreDetermined) HostAssoc INTEGER(4) do i=1,10 - !REF: /omp_reduction/k + !DEF: /omp_reduction/OtherConstruct4/k HostAssoc INTEGER(4) k = k+1 end do !$omp end parallel do @@ -51,7 +51,7 @@ program omp_reduction !$omp parallel do reduction(+:b(1:10:1,1:5,2:5:1)) !DEF: /omp_reduction/OtherConstruct5/i (OmpPrivate, OmpPreDetermined) HostAssoc INTEGER(4) do i=1,10 - !REF: /omp_reduction/k + !DEF: /omp_reduction/OtherConstruct5/k HostAssoc INTEGER(4) k = k+1 end do !$omp end parallel do diff --git a/flang/test/Semantics/OpenMP/symbol01.f90 b/flang/test/Semantics/OpenMP/symbol01.f90 index 0b435a9ab9850b..ecfb8622f81794 100644 --- a/flang/test/Semantics/OpenMP/symbol01.f90 +++ b/flang/test/Semantics/OpenMP/symbol01.f90 @@ -48,7 +48,7 @@ program mm !DEF: /mm/OtherConstruct1/i (OmpPrivate, OmpPreDetermined) HostAssoc INTEGER(4) do i=1,10 !DEF: /mm/OtherConstruct1/a (OmpPrivate) HostAssoc REAL(4) - !REF: /mm/b + !DEF: /mm/OtherConstruct1/b HostAssoc INTEGER(4) !REF: /mm/OtherConstruct1/i a = a+b(i) !DEF: /mm/OtherConstruct1/t (OmpPrivate) HostAssoc TYPE(myty) @@ -62,7 +62,7 @@ program mm !REF: /mm/OtherConstruct1/i !REF: /mm/OtherConstruct1/y x = a+i+y - !REF: /mm/c + !DEF: /mm/OtherConstruct1/c HostAssoc REAL(4) c = 3.0 end do end program diff --git a/flang/test/Semantics/OpenMP/symbol02.f90 b/flang/test/Semantics/OpenMP/symbol02.f90 index f6ffc5500d0a44..c199c526e1fa8c 100644 --- a/flang/test/Semantics/OpenMP/symbol02.f90 +++ b/flang/test/Semantics/OpenMP/symbol02.f90 @@ -15,9 +15,9 @@ a = 3. !DEF: /MainProgram1/OtherConstruct1/b (OmpPrivate) HostAssoc REAL(4) b = 4 - !REF: /MainProgram1/c + !DEF: /MainProgram1/OtherConstruct1/c HostAssoc REAL(4) c = 5 - !DEF: /MainProgram1/d (Implicit) ObjectEntity REAL(4) + !DEF: /MainProgram1/OtherConstruct1/d HostAssoc REAL(4) d = 6 !$omp end parallel !DEF: /MainProgram1/a (Implicit) ObjectEntity REAL(4) diff --git a/flang/test/Semantics/OpenMP/symbol03.f90 b/flang/test/Semantics/OpenMP/symbol03.f90 index 93e9b7a3eae6be..ba941b9c9e7c4e 100644 --- a/flang/test/Semantics/OpenMP/symbol03.f90 +++ b/flang/test/Semantics/OpenMP/symbol03.f90 @@ -9,10 +9,10 @@ !$omp parallel private(a) shared(b) !DEF: /MainProgram1/OtherConstruct1/a (OmpPrivate) HostAssoc REAL(4) a = 3. - !REF: /MainProgram1/b + !DEF: /MainProgram1/OtherConstruct1/b HostAssoc REAL(4) b = 4 !$omp parallel private(b) shared(a) - !REF: /MainProgram1/OtherConstruct1/a + !DEF: /MainProgram1/OtherConstruct1/OtherConstruct1/a HostAssoc REAL(4) a = 5. !DEF: /MainProgram1/OtherConstruct1/OtherConstruct1/b (OmpPrivate) HostAssoc REAL(4) b = 6 diff --git a/flang/test/Semantics/OpenMP/symbol05.f90 b/flang/test/Semantics/OpenMP/symbol05.f90 index fa0a8f65a42941..1ad0c10a401350 100644 --- a/flang/test/Semantics/OpenMP/symbol05.f90 +++ b/flang/test/Semantics/OpenMP/symbol05.f90 @@ -15,10 +15,10 @@ subroutine foo !DEF: /mm/foo/a ObjectEntity INTEGER(4) integer :: a = 3 !$omp parallel - !REF: /mm/foo/a + !DEF: /mm/foo/OtherConstruct1/a HostAssoc INTEGER(4) a = 1 !DEF: /mm/i PUBLIC (Implicit, OmpThreadprivate) ObjectEntity INTEGER(4) - !REF: /mm/foo/a + !REF: /mm/foo/OtherConstruct1/a i = a !$omp end parallel !REF: /mm/foo/a diff --git a/flang/test/Semantics/OpenMP/symbol07.f90 b/flang/test/Semantics/OpenMP/symbol07.f90 index e2250f5c7908aa..8b4716999820b1 100644 --- a/flang/test/Semantics/OpenMP/symbol07.f90 +++ b/flang/test/Semantics/OpenMP/symbol07.f90 @@ -23,7 +23,7 @@ subroutine function_call_in_region !$omp parallel default(none) private(a) shared(b) !DEF: /function_call_in_region/OtherConstruct1/a (OmpPrivate) HostAssoc REAL(4) !REF: /function_call_in_region/foo - !REF: /function_call_in_region/b + !DEF: /function_call_in_region/OtherConstruct1/b HostAssoc REAL(4) a = foo(b) !$omp end parallel !REF: /function_call_in_region/a diff --git a/flang/test/Semantics/OpenMP/symbol08.f90 b/flang/test/Semantics/OpenMP/symbol08.f90 index 3af85af74ee97c..69ccd17391b54f 100644 --- a/flang/test/Semantics/OpenMP/symbol08.f90 +++ b/flang/test/Semantics/OpenMP/symbol08.f90 @@ -28,18 +28,18 @@ subroutine test_do !DEF: /test_do/k ObjectEntity INTEGER(4) integer i, j, k !$omp parallel - !REF: /test_do/i + !DEF: /test_do/OtherConstruct1/i HostAssoc INTEGER(4) i = 99 !$omp do collapse(2) !DEF: /test_do/OtherConstruct1/OtherConstruct1/i (OmpPrivate, OmpPreDetermined) HostAssoc INTEGER(4) do i=1,5 !DEF: /test_do/OtherConstruct1/OtherConstruct1/j (OmpPrivate, OmpPreDetermined) HostAssoc INTEGER(4) do j=6,10 - !REF: /test_do/a + !DEF: /test_do/OtherConstruct1/a HostAssoc REAL(4) a(1,1,1) = 0. !DEF: /test_do/OtherConstruct1/k (OmpPrivate, OmpPreDetermined) HostAssoc INTEGER(4) do k=11,15 - !REF: /test_do/a + !REF: /test_do/OtherConstruct1/a !REF: /test_do/OtherConstruct1/k !REF: /test_do/OtherConstruct1/OtherConstruct1/j !REF: /test_do/OtherConstruct1/OtherConstruct1/i @@ -65,11 +65,11 @@ subroutine test_pardo do i=1,5 !DEF: /test_pardo/OtherConstruct1/j (OmpPrivate, OmpPreDetermined) HostAssoc INTEGER(4) do j=6,10 - !REF: /test_pardo/a + !DEF: /test_pardo/OtherConstruct1/a HostAssoc REAL(4) a(1,1,1) = 0. !DEF: /test_pardo/OtherConstruct1/k (OmpPrivate) HostAssoc INTEGER(4) do k=11,15 - !REF: /test_pardo/a + !REF: /test_pardo/OtherConstruct1/a !REF: /test_pardo/OtherConstruct1/k !REF: /test_pardo/OtherConstruct1/j !REF: /test_pardo/OtherConstruct1/i @@ -138,15 +138,15 @@ subroutine dotprod (b, c, n, block_size, num_teams, block_threads) do i0=1,n,block_size !$omp parallel do reduction(+:sum) !DEF: /dotprod/OtherConstruct1/OtherConstruct1/OtherConstruct1/OtherConstruct1/i (OmpPrivate, OmpPreDetermined) HostAssoc INTEGER(4) - !REF: /dotprod/OtherConstruct1/OtherConstruct1/OtherConstruct1/i0 + !DEF: /dotprod/OtherConstruct1/OtherConstruct1/OtherConstruct1/OtherConstruct1/i0 HostAssoc INTEGER(4) !DEF: /dotprod/min ELEMENTAL, INTRINSIC, PURE (Function) ProcEntity - !REF: /dotprod/block_size - !REF: /dotprod/n + !DEF: /dotprod/OtherConstruct1/OtherConstruct1/OtherConstruct1/OtherConstruct1/block_size HostAssoc INTEGER(4) + !DEF: /dotprod/OtherConstruct1/OtherConstruct1/OtherConstruct1/OtherConstruct1/n HostAssoc INTEGER(4) do i=i0,min(i0+block_size, n) !DEF: /dotprod/OtherConstruct1/OtherConstruct1/OtherConstruct1/OtherConstruct1/sum (OmpReduction) HostAssoc REAL(4) - !REF: /dotprod/b + !DEF: /dotprod/OtherConstruct1/OtherConstruct1/OtherConstruct1/OtherConstruct1/b HostAssoc REAL(4) !REF: /dotprod/OtherConstruct1/OtherConstruct1/OtherConstruct1/OtherConstruct1/i - !REF: /dotprod/c + !DEF: /dotprod/OtherConstruct1/OtherConstruct1/OtherConstruct1/OtherConstruct1/c HostAssoc REAL(4) sum = sum+b(i)*c(i) end do end do @@ -174,7 +174,7 @@ subroutine test_simd do j=6,10 !DEF: /test_simd/OtherConstruct1/k (OmpPrivate, OmpPreDetermined) HostAssoc INTEGER(4) do k=11,15 - !REF: /test_simd/a + !DEF: /test_simd/OtherConstruct1/a HostAssoc REAL(4) !REF: /test_simd/OtherConstruct1/k !REF: /test_simd/OtherConstruct1/j !REF: /test_simd/OtherConstruct1/i @@ -201,7 +201,7 @@ subroutine test_simd_multi do j=6,10 !DEF: /test_simd_multi/OtherConstruct1/k (OmpLastPrivate, OmpPreDetermined) HostAssoc INTEGER(4) do k=11,15 - !REF: /test_simd_multi/a + !DEF: /test_simd_multi/OtherConstruct1/a HostAssoc REAL(4) !REF: /test_simd_multi/OtherConstruct1/k !REF: /test_simd_multi/OtherConstruct1/j !REF: /test_simd_multi/OtherConstruct1/i @@ -223,11 +223,11 @@ subroutine test_seq_loop !REF: /test_seq_loop/j j = -1 !$omp parallel - !REF: /test_seq_loop/i - !REF: /test_seq_loop/j + !DEF: /test_seq_loop/OtherConstruct1/i HostAssoc INTEGER(4) + !DEF: /test_seq_loop/OtherConstruct1/j HostAssoc INTEGER(4) print *, i, j !$omp parallel - !REF: /test_seq_loop/i + !DEF: /test_seq_loop/OtherConstruct1/OtherConstruct1/i HostAssoc INTEGER(4) !DEF: /test_seq_loop/OtherConstruct1/OtherConstruct1/j (OmpPrivate, OmpPreDetermined) HostAssoc INTEGER(4) print *, i, j !$omp do @@ -237,12 +237,12 @@ subroutine test_seq_loop do j=1,10 end do end do - !REF: /test_seq_loop/i + !REF: /test_seq_loop/OtherConstruct1/OtherConstruct1/i !REF: /test_seq_loop/OtherConstruct1/OtherConstruct1/j print *, i, j !$omp end parallel - !REF: /test_seq_loop/i - !REF: /test_seq_loop/j + !REF: /test_seq_loop/OtherConstruct1/i + !REF: /test_seq_loop/OtherConstruct1/j print *, i, j !$omp end parallel !REF: /test_seq_loop/i diff --git a/flang/test/Semantics/OpenMP/symbol09.f90 b/flang/test/Semantics/OpenMP/symbol09.f90 index e2250f5c7908aa..8b4716999820b1 100644 --- a/flang/test/Semantics/OpenMP/symbol09.f90 +++ b/flang/test/Semantics/OpenMP/symbol09.f90 @@ -23,7 +23,7 @@ subroutine function_call_in_region !$omp parallel default(none) private(a) shared(b) !DEF: /function_call_in_region/OtherConstruct1/a (OmpPrivate) HostAssoc REAL(4) !REF: /function_call_in_region/foo - !REF: /function_call_in_region/b + !DEF: /function_call_in_region/OtherConstruct1/b HostAssoc REAL(4) a = foo(b) !$omp end parallel !REF: /function_call_in_region/a From 055893f164f595e5d4115db17dd479ed3401bf00 Mon Sep 17 00:00:00 2001 From: Brox Chen Date: Wed, 31 Jul 2024 13:47:13 -0400 Subject: [PATCH 007/114] [AMDGPU][True16][MC] duplicate vop1 tests to fake16 and update real-true16 flags for GFX12 (#100849) duplicate vop1 tests to fake16 and update real-true16 flags for GFX12 creating duplications here to avoid bulk copy in the following true16 patches --------- Co-authored-by: guochen2 --- llvm/test/MC/AMDGPU/gfx12_asm_vop1-fake16.s | 3597 +++++++++++++++++ llvm/test/MC/AMDGPU/gfx12_asm_vop1.s | 10 +- .../MC/AMDGPU/gfx12_asm_vop1_dpp16-fake16.s | 2828 +++++++++++++ llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s | 6 +- .../MC/AMDGPU/gfx12_asm_vop1_dpp8-fake16.s | 617 +++ llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s | 6 +- .../MC/AMDGPU/gfx12_asm_vop1_t16_err-fake16.s | 505 +++ llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err.s | 2 +- 8 files changed, 7562 insertions(+), 9 deletions(-) create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop1-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err-fake16.s diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1-fake16.s new file mode 100644 index 00000000000000..8fef2ab26dfdd8 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1-fake16.s @@ -0,0 +1,3597 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-ASM %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -disassemble -show-encoding | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-ASM %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s + +v_bfrev_b32_e32 v5, v1 +// GFX12: v_bfrev_b32_e32 v5, v1 ; encoding: [0x01,0x71,0x0a,0x7e] + +v_bfrev_b32 v5, v255 +// GFX12: v_bfrev_b32_e32 v5, v255 ; encoding: [0xff,0x71,0x0a,0x7e] + +v_bfrev_b32 v5, s1 +// GFX12: v_bfrev_b32_e32 v5, s1 ; encoding: [0x01,0x70,0x0a,0x7e] + +v_bfrev_b32 v5, s105 +// GFX12: v_bfrev_b32_e32 v5, s105 ; encoding: [0x69,0x70,0x0a,0x7e] + +v_bfrev_b32 v5, vcc_lo +// GFX12: v_bfrev_b32_e32 v5, vcc_lo ; encoding: [0x6a,0x70,0x0a,0x7e] + +v_bfrev_b32 v5, vcc_hi +// GFX12: v_bfrev_b32_e32 v5, vcc_hi ; encoding: [0x6b,0x70,0x0a,0x7e] + +v_bfrev_b32 v5, ttmp15 +// GFX12: v_bfrev_b32_e32 v5, ttmp15 ; encoding: [0x7b,0x70,0x0a,0x7e] + +v_bfrev_b32 v5, m0 +// GFX12: v_bfrev_b32_e32 v5, m0 ; encoding: [0x7d,0x70,0x0a,0x7e] + +v_bfrev_b32 v5, exec_lo +// GFX12: v_bfrev_b32_e32 v5, exec_lo ; encoding: [0x7e,0x70,0x0a,0x7e] + +v_bfrev_b32 v5, exec_hi +// GFX12: v_bfrev_b32_e32 v5, exec_hi ; encoding: [0x7f,0x70,0x0a,0x7e] + +v_bfrev_b32 v5, null +// GFX12: v_bfrev_b32_e32 v5, null ; encoding: [0x7c,0x70,0x0a,0x7e] + +v_bfrev_b32 v5, -1 +// GFX12: v_bfrev_b32_e32 v5, -1 ; encoding: [0xc1,0x70,0x0a,0x7e] + +v_bfrev_b32 v5, 0.5 +// GFX12: v_bfrev_b32_e32 v5, 0.5 ; encoding: [0xf0,0x70,0x0a,0x7e] + +v_bfrev_b32 v5, src_scc +// GFX12: v_bfrev_b32_e32 v5, src_scc ; encoding: [0xfd,0x70,0x0a,0x7e] + +v_bfrev_b32 v255, 0xaf123456 +// GFX12: v_bfrev_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0x70,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_ceil_f16 v5, v1 +// GFX12: v_ceil_f16_e32 v5, v1 ; encoding: [0x01,0xb9,0x0a,0x7e] + +v_ceil_f16 v5, v127 +// GFX12: v_ceil_f16_e32 v5, v127 ; encoding: [0x7f,0xb9,0x0a,0x7e] + +v_ceil_f16 v5, s1 +// GFX12: v_ceil_f16_e32 v5, s1 ; encoding: [0x01,0xb8,0x0a,0x7e] + +v_ceil_f16 v5, s105 +// GFX12: v_ceil_f16_e32 v5, s105 ; encoding: [0x69,0xb8,0x0a,0x7e] + +v_ceil_f16 v5, vcc_lo +// GFX12: v_ceil_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xb8,0x0a,0x7e] + +v_ceil_f16 v5, vcc_hi +// GFX12: v_ceil_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xb8,0x0a,0x7e] + +v_ceil_f16 v5, ttmp15 +// GFX12: v_ceil_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xb8,0x0a,0x7e] + +v_ceil_f16 v5, m0 +// GFX12: v_ceil_f16_e32 v5, m0 ; encoding: [0x7d,0xb8,0x0a,0x7e] + +v_ceil_f16 v5, exec_lo +// GFX12: v_ceil_f16_e32 v5, exec_lo ; encoding: [0x7e,0xb8,0x0a,0x7e] + +v_ceil_f16 v5, exec_hi +// GFX12: v_ceil_f16_e32 v5, exec_hi ; encoding: [0x7f,0xb8,0x0a,0x7e] + +v_ceil_f16 v5, null +// GFX12: v_ceil_f16_e32 v5, null ; encoding: [0x7c,0xb8,0x0a,0x7e] + +v_ceil_f16 v5, -1 +// GFX12: v_ceil_f16_e32 v5, -1 ; encoding: [0xc1,0xb8,0x0a,0x7e] + +v_ceil_f16 v5, 0.5 +// GFX12: v_ceil_f16_e32 v5, 0.5 ; encoding: [0xf0,0xb8,0x0a,0x7e] + +v_ceil_f16 v5, src_scc +// GFX12: v_ceil_f16_e32 v5, src_scc ; encoding: [0xfd,0xb8,0x0a,0x7e] + +v_ceil_f16 v127, 0xfe0b +// GFX12: v_ceil_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xb8,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_ceil_f32 v5, v1 +// GFX12: v_ceil_f32_e32 v5, v1 ; encoding: [0x01,0x45,0x0a,0x7e] + +v_ceil_f32 v5, v255 +// GFX12: v_ceil_f32_e32 v5, v255 ; encoding: [0xff,0x45,0x0a,0x7e] + +v_ceil_f32 v5, s1 +// GFX12: v_ceil_f32_e32 v5, s1 ; encoding: [0x01,0x44,0x0a,0x7e] + +v_ceil_f32 v5, s105 +// GFX12: v_ceil_f32_e32 v5, s105 ; encoding: [0x69,0x44,0x0a,0x7e] + +v_ceil_f32 v5, vcc_lo +// GFX12: v_ceil_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x44,0x0a,0x7e] + +v_ceil_f32 v5, vcc_hi +// GFX12: v_ceil_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x44,0x0a,0x7e] + +v_ceil_f32 v5, ttmp15 +// GFX12: v_ceil_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x44,0x0a,0x7e] + +v_ceil_f32 v5, m0 +// GFX12: v_ceil_f32_e32 v5, m0 ; encoding: [0x7d,0x44,0x0a,0x7e] + +v_ceil_f32 v5, exec_lo +// GFX12: v_ceil_f32_e32 v5, exec_lo ; encoding: [0x7e,0x44,0x0a,0x7e] + +v_ceil_f32 v5, exec_hi +// GFX12: v_ceil_f32_e32 v5, exec_hi ; encoding: [0x7f,0x44,0x0a,0x7e] + +v_ceil_f32 v5, null +// GFX12: v_ceil_f32_e32 v5, null ; encoding: [0x7c,0x44,0x0a,0x7e] + +v_ceil_f32 v5, -1 +// GFX12: v_ceil_f32_e32 v5, -1 ; encoding: [0xc1,0x44,0x0a,0x7e] + +v_ceil_f32 v5, 0.5 +// GFX12: v_ceil_f32_e32 v5, 0.5 ; encoding: [0xf0,0x44,0x0a,0x7e] + +v_ceil_f32 v5, src_scc +// GFX12: v_ceil_f32_e32 v5, src_scc ; encoding: [0xfd,0x44,0x0a,0x7e] + +v_ceil_f32 v255, 0xaf123456 +// GFX12: v_ceil_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x44,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_ceil_f64 v[5:6], v[1:2] +// GFX12: v_ceil_f64_e32 v[5:6], v[1:2] ; encoding: [0x01,0x31,0x0a,0x7e] + +v_ceil_f64 v[5:6], v[254:255] +// GFX12: v_ceil_f64_e32 v[5:6], v[254:255] ; encoding: [0xfe,0x31,0x0a,0x7e] + +v_ceil_f64 v[5:6], s[2:3] +// GFX12: v_ceil_f64_e32 v[5:6], s[2:3] ; encoding: [0x02,0x30,0x0a,0x7e] + +v_ceil_f64 v[5:6], s[104:105] +// GFX12: v_ceil_f64_e32 v[5:6], s[104:105] ; encoding: [0x68,0x30,0x0a,0x7e] + +v_ceil_f64 v[5:6], vcc +// GFX12: v_ceil_f64_e32 v[5:6], vcc ; encoding: [0x6a,0x30,0x0a,0x7e] + +v_ceil_f64 v[5:6], ttmp[14:15] +// GFX12: v_ceil_f64_e32 v[5:6], ttmp[14:15] ; encoding: [0x7a,0x30,0x0a,0x7e] + +v_ceil_f64 v[5:6], exec +// GFX12: v_ceil_f64_e32 v[5:6], exec ; encoding: [0x7e,0x30,0x0a,0x7e] + +v_ceil_f64 v[5:6], null +// GFX12: v_ceil_f64_e32 v[5:6], null ; encoding: [0x7c,0x30,0x0a,0x7e] + +v_ceil_f64 v[5:6], -1 +// GFX12: v_ceil_f64_e32 v[5:6], -1 ; encoding: [0xc1,0x30,0x0a,0x7e] + +v_ceil_f64 v[5:6], 0.5 +// GFX12: v_ceil_f64_e32 v[5:6], 0.5 ; encoding: [0xf0,0x30,0x0a,0x7e] + +v_ceil_f64 v[5:6], src_scc +// GFX12: v_ceil_f64_e32 v[5:6], src_scc ; encoding: [0xfd,0x30,0x0a,0x7e] + +v_ceil_f64 v[254:255], 0xaf123456 +// GFX12: v_ceil_f64_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x30,0xfc,0x7f,0x56,0x34,0x12,0xaf] + +v_cls_i32 v5, v1 +// GFX12: v_cls_i32_e32 v5, v1 ; encoding: [0x01,0x77,0x0a,0x7e] + +v_cls_i32 v5, v255 +// GFX12: v_cls_i32_e32 v5, v255 ; encoding: [0xff,0x77,0x0a,0x7e] + +v_cls_i32 v5, s1 +// GFX12: v_cls_i32_e32 v5, s1 ; encoding: [0x01,0x76,0x0a,0x7e] + +v_cls_i32 v5, s105 +// GFX12: v_cls_i32_e32 v5, s105 ; encoding: [0x69,0x76,0x0a,0x7e] + +v_cls_i32 v5, vcc_lo +// GFX12: v_cls_i32_e32 v5, vcc_lo ; encoding: [0x6a,0x76,0x0a,0x7e] + +v_cls_i32 v5, vcc_hi +// GFX12: v_cls_i32_e32 v5, vcc_hi ; encoding: [0x6b,0x76,0x0a,0x7e] + +v_cls_i32 v5, ttmp15 +// GFX12: v_cls_i32_e32 v5, ttmp15 ; encoding: [0x7b,0x76,0x0a,0x7e] + +v_cls_i32 v5, m0 +// GFX12: v_cls_i32_e32 v5, m0 ; encoding: [0x7d,0x76,0x0a,0x7e] + +v_cls_i32 v5, exec_lo +// GFX12: v_cls_i32_e32 v5, exec_lo ; encoding: [0x7e,0x76,0x0a,0x7e] + +v_cls_i32 v5, exec_hi +// GFX12: v_cls_i32_e32 v5, exec_hi ; encoding: [0x7f,0x76,0x0a,0x7e] + +v_cls_i32 v5, null +// GFX12: v_cls_i32_e32 v5, null ; encoding: [0x7c,0x76,0x0a,0x7e] + +v_cls_i32 v5, -1 +// GFX12: v_cls_i32_e32 v5, -1 ; encoding: [0xc1,0x76,0x0a,0x7e] + +v_cls_i32 v5, 0.5 +// GFX12: v_cls_i32_e32 v5, 0.5 ; encoding: [0xf0,0x76,0x0a,0x7e] + +v_cls_i32 v5, src_scc +// GFX12: v_cls_i32_e32 v5, src_scc ; encoding: [0xfd,0x76,0x0a,0x7e] + +v_cls_i32 v255, 0xaf123456 +// GFX12: v_cls_i32_e32 v255, 0xaf123456 ; encoding: [0xff,0x76,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_clz_i32_u32 v5, v1 +// GFX12: v_clz_i32_u32_e32 v5, v1 ; encoding: [0x01,0x73,0x0a,0x7e] + +v_clz_i32_u32 v5, v255 +// GFX12: v_clz_i32_u32_e32 v5, v255 ; encoding: [0xff,0x73,0x0a,0x7e] + +v_clz_i32_u32 v5, s1 +// GFX12: v_clz_i32_u32_e32 v5, s1 ; encoding: [0x01,0x72,0x0a,0x7e] + +v_clz_i32_u32 v5, s105 +// GFX12: v_clz_i32_u32_e32 v5, s105 ; encoding: [0x69,0x72,0x0a,0x7e] + +v_clz_i32_u32 v5, vcc_lo +// GFX12: v_clz_i32_u32_e32 v5, vcc_lo ; encoding: [0x6a,0x72,0x0a,0x7e] + +v_clz_i32_u32 v5, vcc_hi +// GFX12: v_clz_i32_u32_e32 v5, vcc_hi ; encoding: [0x6b,0x72,0x0a,0x7e] + +v_clz_i32_u32 v5, ttmp15 +// GFX12: v_clz_i32_u32_e32 v5, ttmp15 ; encoding: [0x7b,0x72,0x0a,0x7e] + +v_clz_i32_u32 v5, m0 +// GFX12: v_clz_i32_u32_e32 v5, m0 ; encoding: [0x7d,0x72,0x0a,0x7e] + +v_clz_i32_u32 v5, exec_lo +// GFX12: v_clz_i32_u32_e32 v5, exec_lo ; encoding: [0x7e,0x72,0x0a,0x7e] + +v_clz_i32_u32 v5, exec_hi +// GFX12: v_clz_i32_u32_e32 v5, exec_hi ; encoding: [0x7f,0x72,0x0a,0x7e] + +v_clz_i32_u32 v5, null +// GFX12: v_clz_i32_u32_e32 v5, null ; encoding: [0x7c,0x72,0x0a,0x7e] + +v_clz_i32_u32 v5, -1 +// GFX12: v_clz_i32_u32_e32 v5, -1 ; encoding: [0xc1,0x72,0x0a,0x7e] + +v_clz_i32_u32 v5, 0.5 +// GFX12: v_clz_i32_u32_e32 v5, 0.5 ; encoding: [0xf0,0x72,0x0a,0x7e] + +v_clz_i32_u32 v5, src_scc +// GFX12: v_clz_i32_u32_e32 v5, src_scc ; encoding: [0xfd,0x72,0x0a,0x7e] + +v_clz_i32_u32 v255, 0xaf123456 +// GFX12: v_clz_i32_u32_e32 v255, 0xaf123456 ; encoding: [0xff,0x72,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_cos_f16 v5, v1 +// GFX12: v_cos_f16_e32 v5, v1 ; encoding: [0x01,0xc3,0x0a,0x7e] + +v_cos_f16 v5, v127 +// GFX12: v_cos_f16_e32 v5, v127 ; encoding: [0x7f,0xc3,0x0a,0x7e] + +v_cos_f16 v5, s1 +// GFX12: v_cos_f16_e32 v5, s1 ; encoding: [0x01,0xc2,0x0a,0x7e] + +v_cos_f16 v5, s105 +// GFX12: v_cos_f16_e32 v5, s105 ; encoding: [0x69,0xc2,0x0a,0x7e] + +v_cos_f16 v5, vcc_lo +// GFX12: v_cos_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xc2,0x0a,0x7e] + +v_cos_f16 v5, vcc_hi +// GFX12: v_cos_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xc2,0x0a,0x7e] + +v_cos_f16 v5, ttmp15 +// GFX12: v_cos_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xc2,0x0a,0x7e] + +v_cos_f16 v5, m0 +// GFX12: v_cos_f16_e32 v5, m0 ; encoding: [0x7d,0xc2,0x0a,0x7e] + +v_cos_f16 v5, exec_lo +// GFX12: v_cos_f16_e32 v5, exec_lo ; encoding: [0x7e,0xc2,0x0a,0x7e] + +v_cos_f16 v5, exec_hi +// GFX12: v_cos_f16_e32 v5, exec_hi ; encoding: [0x7f,0xc2,0x0a,0x7e] + +v_cos_f16 v5, null +// GFX12: v_cos_f16_e32 v5, null ; encoding: [0x7c,0xc2,0x0a,0x7e] + +v_cos_f16 v5, -1 +// GFX12: v_cos_f16_e32 v5, -1 ; encoding: [0xc1,0xc2,0x0a,0x7e] + +v_cos_f16 v5, 0.5 +// GFX12: v_cos_f16_e32 v5, 0.5 ; encoding: [0xf0,0xc2,0x0a,0x7e] + +v_cos_f16 v5, src_scc +// GFX12: v_cos_f16_e32 v5, src_scc ; encoding: [0xfd,0xc2,0x0a,0x7e] + +v_cos_f16 v127, 0xfe0b +// GFX12: v_cos_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xc2,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_cos_f32 v5, v1 +// GFX12: v_cos_f32_e32 v5, v1 ; encoding: [0x01,0x6d,0x0a,0x7e] + +v_cos_f32 v5, v255 +// GFX12: v_cos_f32_e32 v5, v255 ; encoding: [0xff,0x6d,0x0a,0x7e] + +v_cos_f32 v5, s1 +// GFX12: v_cos_f32_e32 v5, s1 ; encoding: [0x01,0x6c,0x0a,0x7e] + +v_cos_f32 v5, s105 +// GFX12: v_cos_f32_e32 v5, s105 ; encoding: [0x69,0x6c,0x0a,0x7e] + +v_cos_f32 v5, vcc_lo +// GFX12: v_cos_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x6c,0x0a,0x7e] + +v_cos_f32 v5, vcc_hi +// GFX12: v_cos_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x6c,0x0a,0x7e] + +v_cos_f32 v5, ttmp15 +// GFX12: v_cos_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x6c,0x0a,0x7e] + +v_cos_f32 v5, m0 +// GFX12: v_cos_f32_e32 v5, m0 ; encoding: [0x7d,0x6c,0x0a,0x7e] + +v_cos_f32 v5, exec_lo +// GFX12: v_cos_f32_e32 v5, exec_lo ; encoding: [0x7e,0x6c,0x0a,0x7e] + +v_cos_f32 v5, exec_hi +// GFX12: v_cos_f32_e32 v5, exec_hi ; encoding: [0x7f,0x6c,0x0a,0x7e] + +v_cos_f32 v5, null +// GFX12: v_cos_f32_e32 v5, null ; encoding: [0x7c,0x6c,0x0a,0x7e] + +v_cos_f32 v5, -1 +// GFX12: v_cos_f32_e32 v5, -1 ; encoding: [0xc1,0x6c,0x0a,0x7e] + +v_cos_f32 v5, 0.5 +// GFX12: v_cos_f32_e32 v5, 0.5 ; encoding: [0xf0,0x6c,0x0a,0x7e] + +v_cos_f32 v5, src_scc +// GFX12: v_cos_f32_e32 v5, src_scc ; encoding: [0xfd,0x6c,0x0a,0x7e] + +v_cos_f32 v255, 0xaf123456 +// GFX12: v_cos_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x6c,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_ctz_i32_b32 v5, v1 +// GFX12: v_ctz_i32_b32_e32 v5, v1 ; encoding: [0x01,0x75,0x0a,0x7e] + +v_ctz_i32_b32 v5, v255 +// GFX12: v_ctz_i32_b32_e32 v5, v255 ; encoding: [0xff,0x75,0x0a,0x7e] + +v_ctz_i32_b32 v5, s1 +// GFX12: v_ctz_i32_b32_e32 v5, s1 ; encoding: [0x01,0x74,0x0a,0x7e] + +v_ctz_i32_b32 v5, s105 +// GFX12: v_ctz_i32_b32_e32 v5, s105 ; encoding: [0x69,0x74,0x0a,0x7e] + +v_ctz_i32_b32 v5, vcc_lo +// GFX12: v_ctz_i32_b32_e32 v5, vcc_lo ; encoding: [0x6a,0x74,0x0a,0x7e] + +v_ctz_i32_b32 v5, vcc_hi +// GFX12: v_ctz_i32_b32_e32 v5, vcc_hi ; encoding: [0x6b,0x74,0x0a,0x7e] + +v_ctz_i32_b32 v5, ttmp15 +// GFX12: v_ctz_i32_b32_e32 v5, ttmp15 ; encoding: [0x7b,0x74,0x0a,0x7e] + +v_ctz_i32_b32 v5, m0 +// GFX12: v_ctz_i32_b32_e32 v5, m0 ; encoding: [0x7d,0x74,0x0a,0x7e] + +v_ctz_i32_b32 v5, exec_lo +// GFX12: v_ctz_i32_b32_e32 v5, exec_lo ; encoding: [0x7e,0x74,0x0a,0x7e] + +v_ctz_i32_b32 v5, exec_hi +// GFX12: v_ctz_i32_b32_e32 v5, exec_hi ; encoding: [0x7f,0x74,0x0a,0x7e] + +v_ctz_i32_b32 v5, null +// GFX12: v_ctz_i32_b32_e32 v5, null ; encoding: [0x7c,0x74,0x0a,0x7e] + +v_ctz_i32_b32 v5, -1 +// GFX12: v_ctz_i32_b32_e32 v5, -1 ; encoding: [0xc1,0x74,0x0a,0x7e] + +v_ctz_i32_b32 v5, 0.5 +// GFX12: v_ctz_i32_b32_e32 v5, 0.5 ; encoding: [0xf0,0x74,0x0a,0x7e] + +v_ctz_i32_b32 v5, src_scc +// GFX12: v_ctz_i32_b32_e32 v5, src_scc ; encoding: [0xfd,0x74,0x0a,0x7e] + +v_ctz_i32_b32 v255, 0xaf123456 +// GFX12: v_ctz_i32_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0x74,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_cvt_f32_bf8_e32 v1, s3 +// GFX12: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xda,0x02,0x7e] + +v_cvt_f32_bf8_e32 v1, 3 +// GFX12: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xda,0x02,0x7e] + +v_cvt_f32_bf8_e32 v1, v3 +// GFX12: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xdb,0x02,0x7e] + +v_cvt_f32_fp8_e32 v1, s3 +// GFX12: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xd8,0x02,0x7e] + +v_cvt_f32_fp8_e32 v1, 3 +// GFX12: v_cvt_f32_fp8_e32 v1, 3 ; encoding: [0x83,0xd8,0x02,0x7e] + +v_cvt_f32_fp8_e32 v1, v3 +// GFX12: v_cvt_f32_fp8_e32 v1, v3 ; encoding: [0x03,0xd9,0x02,0x7e] + +v_cvt_pk_f32_bf8_e32 v[2:3], s3 +// GFX12: v_cvt_pk_f32_bf8_e32 v[2:3], s3 ; encoding: [0x03,0xde,0x04,0x7e] + +v_cvt_pk_f32_bf8_e32 v[3:4], s5 +// GFX12: v_cvt_pk_f32_bf8_e32 v[3:4], s5 ; encoding: [0x05,0xde,0x06,0x7e] + +v_cvt_pk_f32_bf8_e32 v[2:3], 3 +// GFX12: v_cvt_pk_f32_bf8_e32 v[2:3], 3 ; encoding: [0x83,0xde,0x04,0x7e] + +v_cvt_pk_f32_bf8_e32 v[3:4], 3 +// GFX12: v_cvt_pk_f32_bf8_e32 v[3:4], 3 ; encoding: [0x83,0xde,0x06,0x7e] + +v_cvt_pk_f32_bf8_e32 v[2:3], v3 +// GFX12: v_cvt_pk_f32_bf8_e32 v[2:3], v3 ; encoding: [0x03,0xdf,0x04,0x7e] + +v_cvt_pk_f32_bf8_e32 v[3:4], v3 +// GFX12: v_cvt_pk_f32_bf8_e32 v[3:4], v3 ; encoding: [0x03,0xdf,0x06,0x7e] + +v_cvt_pk_f32_fp8_e32 v[2:3], s3 +// GFX12: v_cvt_pk_f32_fp8_e32 v[2:3], s3 ; encoding: [0x03,0xdc,0x04,0x7e] + +v_cvt_pk_f32_fp8_e32 v[2:3], 3 +// GFX12: v_cvt_pk_f32_fp8_e32 v[2:3], 3 ; encoding: [0x83,0xdc,0x04,0x7e] + +v_cvt_pk_f32_fp8_e32 v[2:3], v3 +// GFX12: v_cvt_pk_f32_fp8_e32 v[2:3], v3 ; encoding: [0x03,0xdd,0x04,0x7e] + +v_cvt_f16_f32 v5, v1 +// GFX12: v_cvt_f16_f32_e32 v5, v1 ; encoding: [0x01,0x15,0x0a,0x7e] + +v_cvt_f16_f32 v5, v255 +// GFX12: v_cvt_f16_f32_e32 v5, v255 ; encoding: [0xff,0x15,0x0a,0x7e] + +v_cvt_f16_f32 v5, s1 +// GFX12: v_cvt_f16_f32_e32 v5, s1 ; encoding: [0x01,0x14,0x0a,0x7e] + +v_cvt_f16_f32 v5, s105 +// GFX12: v_cvt_f16_f32_e32 v5, s105 ; encoding: [0x69,0x14,0x0a,0x7e] + +v_cvt_f16_f32 v5, vcc_lo +// GFX12: v_cvt_f16_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x14,0x0a,0x7e] + +v_cvt_f16_f32 v5, vcc_hi +// GFX12: v_cvt_f16_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x14,0x0a,0x7e] + +v_cvt_f16_f32 v5, ttmp15 +// GFX12: v_cvt_f16_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x14,0x0a,0x7e] + +v_cvt_f16_f32 v5, m0 +// GFX12: v_cvt_f16_f32_e32 v5, m0 ; encoding: [0x7d,0x14,0x0a,0x7e] + +v_cvt_f16_f32 v5, exec_lo +// GFX12: v_cvt_f16_f32_e32 v5, exec_lo ; encoding: [0x7e,0x14,0x0a,0x7e] + +v_cvt_f16_f32 v5, exec_hi +// GFX12: v_cvt_f16_f32_e32 v5, exec_hi ; encoding: [0x7f,0x14,0x0a,0x7e] + +v_cvt_f16_f32 v5, null +// GFX12: v_cvt_f16_f32_e32 v5, null ; encoding: [0x7c,0x14,0x0a,0x7e] + +v_cvt_f16_f32 v5, -1 +// GFX12: v_cvt_f16_f32_e32 v5, -1 ; encoding: [0xc1,0x14,0x0a,0x7e] + +v_cvt_f16_f32 v5, 0.5 +// GFX12: v_cvt_f16_f32_e32 v5, 0.5 ; encoding: [0xf0,0x14,0x0a,0x7e] + +v_cvt_f16_f32 v5, src_scc +// GFX12: v_cvt_f16_f32_e32 v5, src_scc ; encoding: [0xfd,0x14,0x0a,0x7e] + +v_cvt_f16_f32 v127, 0xaf123456 +// GFX12: v_cvt_f16_f32_e32 v127, 0xaf123456 ; encoding: [0xff,0x14,0xfe,0x7e,0x56,0x34,0x12,0xaf] + +v_cvt_f16_i16 v5, v1 +// GFX12: v_cvt_f16_i16_e32 v5, v1 ; encoding: [0x01,0xa3,0x0a,0x7e] + +v_cvt_f16_i16 v5, v127 +// GFX12: v_cvt_f16_i16_e32 v5, v127 ; encoding: [0x7f,0xa3,0x0a,0x7e] + +v_cvt_f16_i16 v5, s1 +// GFX12: v_cvt_f16_i16_e32 v5, s1 ; encoding: [0x01,0xa2,0x0a,0x7e] + +v_cvt_f16_i16 v5, s105 +// GFX12: v_cvt_f16_i16_e32 v5, s105 ; encoding: [0x69,0xa2,0x0a,0x7e] + +v_cvt_f16_i16 v5, vcc_lo +// GFX12: v_cvt_f16_i16_e32 v5, vcc_lo ; encoding: [0x6a,0xa2,0x0a,0x7e] + +v_cvt_f16_i16 v5, vcc_hi +// GFX12: v_cvt_f16_i16_e32 v5, vcc_hi ; encoding: [0x6b,0xa2,0x0a,0x7e] + +v_cvt_f16_i16 v5, ttmp15 +// GFX12: v_cvt_f16_i16_e32 v5, ttmp15 ; encoding: [0x7b,0xa2,0x0a,0x7e] + +v_cvt_f16_i16 v5, m0 +// GFX12: v_cvt_f16_i16_e32 v5, m0 ; encoding: [0x7d,0xa2,0x0a,0x7e] + +v_cvt_f16_i16 v5, exec_lo +// GFX12: v_cvt_f16_i16_e32 v5, exec_lo ; encoding: [0x7e,0xa2,0x0a,0x7e] + +v_cvt_f16_i16 v5, exec_hi +// GFX12: v_cvt_f16_i16_e32 v5, exec_hi ; encoding: [0x7f,0xa2,0x0a,0x7e] + +v_cvt_f16_i16 v5, null +// GFX12: v_cvt_f16_i16_e32 v5, null ; encoding: [0x7c,0xa2,0x0a,0x7e] + +v_cvt_f16_i16 v5, -1 +// GFX12: v_cvt_f16_i16_e32 v5, -1 ; encoding: [0xc1,0xa2,0x0a,0x7e] + +v_cvt_f16_i16 v5, 0.5 +// GFX12-ASM: v_cvt_f16_i16_e32 v5, 0.5 ; encoding: [0xf0,0xa2,0x0a,0x7e] +// GFX12-DIS: v_cvt_f16_i16_e32 v5, 0x3800 ; encoding: [0xff,0xa2,0x0a,0x7e,0x00,0x38,0x00,0x00] + +v_cvt_f16_i16 v5, src_scc +// GFX12: v_cvt_f16_i16_e32 v5, src_scc ; encoding: [0xfd,0xa2,0x0a,0x7e] + +v_cvt_f16_i16 v127, 0xfe0b +// GFX12: v_cvt_f16_i16_e32 v127, 0xfe0b ; encoding: [0xff,0xa2,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_cvt_f16_u16 v5, v1 +// GFX12: v_cvt_f16_u16_e32 v5, v1 ; encoding: [0x01,0xa1,0x0a,0x7e] + +v_cvt_f16_u16 v5, v127 +// GFX12: v_cvt_f16_u16_e32 v5, v127 ; encoding: [0x7f,0xa1,0x0a,0x7e] + +v_cvt_f16_u16 v5, s1 +// GFX12: v_cvt_f16_u16_e32 v5, s1 ; encoding: [0x01,0xa0,0x0a,0x7e] + +v_cvt_f16_u16 v5, s105 +// GFX12: v_cvt_f16_u16_e32 v5, s105 ; encoding: [0x69,0xa0,0x0a,0x7e] + +v_cvt_f16_u16 v5, vcc_lo +// GFX12: v_cvt_f16_u16_e32 v5, vcc_lo ; encoding: [0x6a,0xa0,0x0a,0x7e] + +v_cvt_f16_u16 v5, vcc_hi +// GFX12: v_cvt_f16_u16_e32 v5, vcc_hi ; encoding: [0x6b,0xa0,0x0a,0x7e] + +v_cvt_f16_u16 v5, ttmp15 +// GFX12: v_cvt_f16_u16_e32 v5, ttmp15 ; encoding: [0x7b,0xa0,0x0a,0x7e] + +v_cvt_f16_u16 v5, m0 +// GFX12: v_cvt_f16_u16_e32 v5, m0 ; encoding: [0x7d,0xa0,0x0a,0x7e] + +v_cvt_f16_u16 v5, exec_lo +// GFX12: v_cvt_f16_u16_e32 v5, exec_lo ; encoding: [0x7e,0xa0,0x0a,0x7e] + +v_cvt_f16_u16 v5, exec_hi +// GFX12: v_cvt_f16_u16_e32 v5, exec_hi ; encoding: [0x7f,0xa0,0x0a,0x7e] + +v_cvt_f16_u16 v5, null +// GFX12: v_cvt_f16_u16_e32 v5, null ; encoding: [0x7c,0xa0,0x0a,0x7e] + +v_cvt_f16_u16 v5, -1 +// GFX12: v_cvt_f16_u16_e32 v5, -1 ; encoding: [0xc1,0xa0,0x0a,0x7e] + +v_cvt_f16_u16 v5, 0.5 +// GFX12-ASM: v_cvt_f16_u16_e32 v5, 0.5 ; encoding: [0xf0,0xa0,0x0a,0x7e] +// GFX12-DIS: v_cvt_f16_u16_e32 v5, 0x3800 ; encoding: [0xff,0xa0,0x0a,0x7e,0x00,0x38,0x00,0x00] + +v_cvt_f16_u16 v5, src_scc +// GFX12: v_cvt_f16_u16_e32 v5, src_scc ; encoding: [0xfd,0xa0,0x0a,0x7e] + +v_cvt_f16_u16 v127, 0xfe0b +// GFX12: v_cvt_f16_u16_e32 v127, 0xfe0b ; encoding: [0xff,0xa0,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_cvt_f32_f16 v5, v1 +// GFX12: v_cvt_f32_f16_e32 v5, v1 ; encoding: [0x01,0x17,0x0a,0x7e] + +v_cvt_f32_f16 v5, v127 +// GFX12: v_cvt_f32_f16_e32 v5, v127 ; encoding: [0x7f,0x17,0x0a,0x7e] + +v_cvt_f32_f16 v5, s1 +// GFX12: v_cvt_f32_f16_e32 v5, s1 ; encoding: [0x01,0x16,0x0a,0x7e] + +v_cvt_f32_f16 v5, s105 +// GFX12: v_cvt_f32_f16_e32 v5, s105 ; encoding: [0x69,0x16,0x0a,0x7e] + +v_cvt_f32_f16 v5, vcc_lo +// GFX12: v_cvt_f32_f16_e32 v5, vcc_lo ; encoding: [0x6a,0x16,0x0a,0x7e] + +v_cvt_f32_f16 v5, vcc_hi +// GFX12: v_cvt_f32_f16_e32 v5, vcc_hi ; encoding: [0x6b,0x16,0x0a,0x7e] + +v_cvt_f32_f16 v5, ttmp15 +// GFX12: v_cvt_f32_f16_e32 v5, ttmp15 ; encoding: [0x7b,0x16,0x0a,0x7e] + +v_cvt_f32_f16 v5, m0 +// GFX12: v_cvt_f32_f16_e32 v5, m0 ; encoding: [0x7d,0x16,0x0a,0x7e] + +v_cvt_f32_f16 v5, exec_lo +// GFX12: v_cvt_f32_f16_e32 v5, exec_lo ; encoding: [0x7e,0x16,0x0a,0x7e] + +v_cvt_f32_f16 v5, exec_hi +// GFX12: v_cvt_f32_f16_e32 v5, exec_hi ; encoding: [0x7f,0x16,0x0a,0x7e] + +v_cvt_f32_f16 v5, null +// GFX12: v_cvt_f32_f16_e32 v5, null ; encoding: [0x7c,0x16,0x0a,0x7e] + +v_cvt_f32_f16 v5, -1 +// GFX12: v_cvt_f32_f16_e32 v5, -1 ; encoding: [0xc1,0x16,0x0a,0x7e] + +v_cvt_f32_f16 v5, 0.5 +// GFX12: v_cvt_f32_f16_e32 v5, 0.5 ; encoding: [0xf0,0x16,0x0a,0x7e] + +v_cvt_f32_f16 v5, src_scc +// GFX12: v_cvt_f32_f16_e32 v5, src_scc ; encoding: [0xfd,0x16,0x0a,0x7e] + +v_cvt_f32_f16 v255, 0xfe0b +// GFX12: v_cvt_f32_f16_e32 v255, 0xfe0b ; encoding: [0xff,0x16,0xfe,0x7f,0x0b,0xfe,0x00,0x00] + +v_cvt_f32_f64 v5, v[1:2] +// GFX12: v_cvt_f32_f64_e32 v5, v[1:2] ; encoding: [0x01,0x1f,0x0a,0x7e] + +v_cvt_f32_f64 v5, v[254:255] +// GFX12: v_cvt_f32_f64_e32 v5, v[254:255] ; encoding: [0xfe,0x1f,0x0a,0x7e] + +v_cvt_f32_f64 v5, s[2:3] +// GFX12: v_cvt_f32_f64_e32 v5, s[2:3] ; encoding: [0x02,0x1e,0x0a,0x7e] + +v_cvt_f32_f64 v5, s[104:105] +// GFX12: v_cvt_f32_f64_e32 v5, s[104:105] ; encoding: [0x68,0x1e,0x0a,0x7e] + +v_cvt_f32_f64 v5, vcc +// GFX12: v_cvt_f32_f64_e32 v5, vcc ; encoding: [0x6a,0x1e,0x0a,0x7e] + +v_cvt_f32_f64 v5, ttmp[14:15] +// GFX12: v_cvt_f32_f64_e32 v5, ttmp[14:15] ; encoding: [0x7a,0x1e,0x0a,0x7e] + +v_cvt_f32_f64 v5, exec +// GFX12: v_cvt_f32_f64_e32 v5, exec ; encoding: [0x7e,0x1e,0x0a,0x7e] + +v_cvt_f32_f64 v5, null +// GFX12: v_cvt_f32_f64_e32 v5, null ; encoding: [0x7c,0x1e,0x0a,0x7e] + +v_cvt_f32_f64 v5, -1 +// GFX12: v_cvt_f32_f64_e32 v5, -1 ; encoding: [0xc1,0x1e,0x0a,0x7e] + +v_cvt_f32_f64 v5, 0.5 +// GFX12: v_cvt_f32_f64_e32 v5, 0.5 ; encoding: [0xf0,0x1e,0x0a,0x7e] + +v_cvt_f32_f64 v5, src_scc +// GFX12: v_cvt_f32_f64_e32 v5, src_scc ; encoding: [0xfd,0x1e,0x0a,0x7e] + +v_cvt_f32_f64 v255, 0xaf123456 +// GFX12: v_cvt_f32_f64_e32 v255, 0xaf123456 ; encoding: [0xff,0x1e,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_cvt_f32_i32 v5, v1 +// GFX12: v_cvt_f32_i32_e32 v5, v1 ; encoding: [0x01,0x0b,0x0a,0x7e] + +v_cvt_f32_i32 v5, v255 +// GFX12: v_cvt_f32_i32_e32 v5, v255 ; encoding: [0xff,0x0b,0x0a,0x7e] + +v_cvt_f32_i32 v5, s1 +// GFX12: v_cvt_f32_i32_e32 v5, s1 ; encoding: [0x01,0x0a,0x0a,0x7e] + +v_cvt_f32_i32 v5, s105 +// GFX12: v_cvt_f32_i32_e32 v5, s105 ; encoding: [0x69,0x0a,0x0a,0x7e] + +v_cvt_f32_i32 v5, vcc_lo +// GFX12: v_cvt_f32_i32_e32 v5, vcc_lo ; encoding: [0x6a,0x0a,0x0a,0x7e] + +v_cvt_f32_i32 v5, vcc_hi +// GFX12: v_cvt_f32_i32_e32 v5, vcc_hi ; encoding: [0x6b,0x0a,0x0a,0x7e] + +v_cvt_f32_i32 v5, ttmp15 +// GFX12: v_cvt_f32_i32_e32 v5, ttmp15 ; encoding: [0x7b,0x0a,0x0a,0x7e] + +v_cvt_f32_i32 v5, m0 +// GFX12: v_cvt_f32_i32_e32 v5, m0 ; encoding: [0x7d,0x0a,0x0a,0x7e] + +v_cvt_f32_i32 v5, exec_lo +// GFX12: v_cvt_f32_i32_e32 v5, exec_lo ; encoding: [0x7e,0x0a,0x0a,0x7e] + +v_cvt_f32_i32 v5, exec_hi +// GFX12: v_cvt_f32_i32_e32 v5, exec_hi ; encoding: [0x7f,0x0a,0x0a,0x7e] + +v_cvt_f32_i32 v5, null +// GFX12: v_cvt_f32_i32_e32 v5, null ; encoding: [0x7c,0x0a,0x0a,0x7e] + +v_cvt_f32_i32 v5, -1 +// GFX12: v_cvt_f32_i32_e32 v5, -1 ; encoding: [0xc1,0x0a,0x0a,0x7e] + +v_cvt_f32_i32 v5, 0.5 +// GFX12: v_cvt_f32_i32_e32 v5, 0.5 ; encoding: [0xf0,0x0a,0x0a,0x7e] + +v_cvt_f32_i32 v5, src_scc +// GFX12: v_cvt_f32_i32_e32 v5, src_scc ; encoding: [0xfd,0x0a,0x0a,0x7e] + +v_cvt_f32_i32 v255, 0xaf123456 +// GFX12: v_cvt_f32_i32_e32 v255, 0xaf123456 ; encoding: [0xff,0x0a,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_cvt_f32_u32 v5, v1 +// GFX12: v_cvt_f32_u32_e32 v5, v1 ; encoding: [0x01,0x0d,0x0a,0x7e] + +v_cvt_f32_u32 v5, v255 +// GFX12: v_cvt_f32_u32_e32 v5, v255 ; encoding: [0xff,0x0d,0x0a,0x7e] + +v_cvt_f32_u32 v5, s1 +// GFX12: v_cvt_f32_u32_e32 v5, s1 ; encoding: [0x01,0x0c,0x0a,0x7e] + +v_cvt_f32_u32 v5, s105 +// GFX12: v_cvt_f32_u32_e32 v5, s105 ; encoding: [0x69,0x0c,0x0a,0x7e] + +v_cvt_f32_u32 v5, vcc_lo +// GFX12: v_cvt_f32_u32_e32 v5, vcc_lo ; encoding: [0x6a,0x0c,0x0a,0x7e] + +v_cvt_f32_u32 v5, vcc_hi +// GFX12: v_cvt_f32_u32_e32 v5, vcc_hi ; encoding: [0x6b,0x0c,0x0a,0x7e] + +v_cvt_f32_u32 v5, ttmp15 +// GFX12: v_cvt_f32_u32_e32 v5, ttmp15 ; encoding: [0x7b,0x0c,0x0a,0x7e] + +v_cvt_f32_u32 v5, m0 +// GFX12: v_cvt_f32_u32_e32 v5, m0 ; encoding: [0x7d,0x0c,0x0a,0x7e] + +v_cvt_f32_u32 v5, exec_lo +// GFX12: v_cvt_f32_u32_e32 v5, exec_lo ; encoding: [0x7e,0x0c,0x0a,0x7e] + +v_cvt_f32_u32 v5, exec_hi +// GFX12: v_cvt_f32_u32_e32 v5, exec_hi ; encoding: [0x7f,0x0c,0x0a,0x7e] + +v_cvt_f32_u32 v5, null +// GFX12: v_cvt_f32_u32_e32 v5, null ; encoding: [0x7c,0x0c,0x0a,0x7e] + +v_cvt_f32_u32 v5, -1 +// GFX12: v_cvt_f32_u32_e32 v5, -1 ; encoding: [0xc1,0x0c,0x0a,0x7e] + +v_cvt_f32_u32 v5, 0.5 +// GFX12: v_cvt_f32_u32_e32 v5, 0.5 ; encoding: [0xf0,0x0c,0x0a,0x7e] + +v_cvt_f32_u32 v5, src_scc +// GFX12: v_cvt_f32_u32_e32 v5, src_scc ; encoding: [0xfd,0x0c,0x0a,0x7e] + +v_cvt_f32_u32 v255, 0xaf123456 +// GFX12: v_cvt_f32_u32_e32 v255, 0xaf123456 ; encoding: [0xff,0x0c,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_cvt_f32_ubyte0 v5, v1 +// GFX12: v_cvt_f32_ubyte0_e32 v5, v1 ; encoding: [0x01,0x23,0x0a,0x7e] + +v_cvt_f32_ubyte0 v5, v255 +// GFX12: v_cvt_f32_ubyte0_e32 v5, v255 ; encoding: [0xff,0x23,0x0a,0x7e] + +v_cvt_f32_ubyte0 v5, s1 +// GFX12: v_cvt_f32_ubyte0_e32 v5, s1 ; encoding: [0x01,0x22,0x0a,0x7e] + +v_cvt_f32_ubyte0 v5, s105 +// GFX12: v_cvt_f32_ubyte0_e32 v5, s105 ; encoding: [0x69,0x22,0x0a,0x7e] + +v_cvt_f32_ubyte0 v5, vcc_lo +// GFX12: v_cvt_f32_ubyte0_e32 v5, vcc_lo ; encoding: [0x6a,0x22,0x0a,0x7e] + +v_cvt_f32_ubyte0 v5, vcc_hi +// GFX12: v_cvt_f32_ubyte0_e32 v5, vcc_hi ; encoding: [0x6b,0x22,0x0a,0x7e] + +v_cvt_f32_ubyte0 v5, ttmp15 +// GFX12: v_cvt_f32_ubyte0_e32 v5, ttmp15 ; encoding: [0x7b,0x22,0x0a,0x7e] + +v_cvt_f32_ubyte0 v5, m0 +// GFX12: v_cvt_f32_ubyte0_e32 v5, m0 ; encoding: [0x7d,0x22,0x0a,0x7e] + +v_cvt_f32_ubyte0 v5, exec_lo +// GFX12: v_cvt_f32_ubyte0_e32 v5, exec_lo ; encoding: [0x7e,0x22,0x0a,0x7e] + +v_cvt_f32_ubyte0 v5, exec_hi +// GFX12: v_cvt_f32_ubyte0_e32 v5, exec_hi ; encoding: [0x7f,0x22,0x0a,0x7e] + +v_cvt_f32_ubyte0 v5, null +// GFX12: v_cvt_f32_ubyte0_e32 v5, null ; encoding: [0x7c,0x22,0x0a,0x7e] + +v_cvt_f32_ubyte0 v5, -1 +// GFX12: v_cvt_f32_ubyte0_e32 v5, -1 ; encoding: [0xc1,0x22,0x0a,0x7e] + +v_cvt_f32_ubyte0 v5, 0.5 +// GFX12: v_cvt_f32_ubyte0_e32 v5, 0.5 ; encoding: [0xf0,0x22,0x0a,0x7e] + +v_cvt_f32_ubyte0 v5, src_scc +// GFX12: v_cvt_f32_ubyte0_e32 v5, src_scc ; encoding: [0xfd,0x22,0x0a,0x7e] + +v_cvt_f32_ubyte0 v255, 0xaf123456 +// GFX12: v_cvt_f32_ubyte0_e32 v255, 0xaf123456 ; encoding: [0xff,0x22,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_cvt_f32_ubyte1 v5, v1 +// GFX12: v_cvt_f32_ubyte1_e32 v5, v1 ; encoding: [0x01,0x25,0x0a,0x7e] + +v_cvt_f32_ubyte1 v5, v255 +// GFX12: v_cvt_f32_ubyte1_e32 v5, v255 ; encoding: [0xff,0x25,0x0a,0x7e] + +v_cvt_f32_ubyte1 v5, s1 +// GFX12: v_cvt_f32_ubyte1_e32 v5, s1 ; encoding: [0x01,0x24,0x0a,0x7e] + +v_cvt_f32_ubyte1 v5, s105 +// GFX12: v_cvt_f32_ubyte1_e32 v5, s105 ; encoding: [0x69,0x24,0x0a,0x7e] + +v_cvt_f32_ubyte1 v5, vcc_lo +// GFX12: v_cvt_f32_ubyte1_e32 v5, vcc_lo ; encoding: [0x6a,0x24,0x0a,0x7e] + +v_cvt_f32_ubyte1 v5, vcc_hi +// GFX12: v_cvt_f32_ubyte1_e32 v5, vcc_hi ; encoding: [0x6b,0x24,0x0a,0x7e] + +v_cvt_f32_ubyte1 v5, ttmp15 +// GFX12: v_cvt_f32_ubyte1_e32 v5, ttmp15 ; encoding: [0x7b,0x24,0x0a,0x7e] + +v_cvt_f32_ubyte1 v5, m0 +// GFX12: v_cvt_f32_ubyte1_e32 v5, m0 ; encoding: [0x7d,0x24,0x0a,0x7e] + +v_cvt_f32_ubyte1 v5, exec_lo +// GFX12: v_cvt_f32_ubyte1_e32 v5, exec_lo ; encoding: [0x7e,0x24,0x0a,0x7e] + +v_cvt_f32_ubyte1 v5, exec_hi +// GFX12: v_cvt_f32_ubyte1_e32 v5, exec_hi ; encoding: [0x7f,0x24,0x0a,0x7e] + +v_cvt_f32_ubyte1 v5, null +// GFX12: v_cvt_f32_ubyte1_e32 v5, null ; encoding: [0x7c,0x24,0x0a,0x7e] + +v_cvt_f32_ubyte1 v5, -1 +// GFX12: v_cvt_f32_ubyte1_e32 v5, -1 ; encoding: [0xc1,0x24,0x0a,0x7e] + +v_cvt_f32_ubyte1 v5, 0.5 +// GFX12: v_cvt_f32_ubyte1_e32 v5, 0.5 ; encoding: [0xf0,0x24,0x0a,0x7e] + +v_cvt_f32_ubyte1 v5, src_scc +// GFX12: v_cvt_f32_ubyte1_e32 v5, src_scc ; encoding: [0xfd,0x24,0x0a,0x7e] + +v_cvt_f32_ubyte1 v255, 0xaf123456 +// GFX12: v_cvt_f32_ubyte1_e32 v255, 0xaf123456 ; encoding: [0xff,0x24,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_cvt_f32_ubyte2 v5, v1 +// GFX12: v_cvt_f32_ubyte2_e32 v5, v1 ; encoding: [0x01,0x27,0x0a,0x7e] + +v_cvt_f32_ubyte2 v5, v255 +// GFX12: v_cvt_f32_ubyte2_e32 v5, v255 ; encoding: [0xff,0x27,0x0a,0x7e] + +v_cvt_f32_ubyte2 v5, s1 +// GFX12: v_cvt_f32_ubyte2_e32 v5, s1 ; encoding: [0x01,0x26,0x0a,0x7e] + +v_cvt_f32_ubyte2 v5, s105 +// GFX12: v_cvt_f32_ubyte2_e32 v5, s105 ; encoding: [0x69,0x26,0x0a,0x7e] + +v_cvt_f32_ubyte2 v5, vcc_lo +// GFX12: v_cvt_f32_ubyte2_e32 v5, vcc_lo ; encoding: [0x6a,0x26,0x0a,0x7e] + +v_cvt_f32_ubyte2 v5, vcc_hi +// GFX12: v_cvt_f32_ubyte2_e32 v5, vcc_hi ; encoding: [0x6b,0x26,0x0a,0x7e] + +v_cvt_f32_ubyte2 v5, ttmp15 +// GFX12: v_cvt_f32_ubyte2_e32 v5, ttmp15 ; encoding: [0x7b,0x26,0x0a,0x7e] + +v_cvt_f32_ubyte2 v5, m0 +// GFX12: v_cvt_f32_ubyte2_e32 v5, m0 ; encoding: [0x7d,0x26,0x0a,0x7e] + +v_cvt_f32_ubyte2 v5, exec_lo +// GFX12: v_cvt_f32_ubyte2_e32 v5, exec_lo ; encoding: [0x7e,0x26,0x0a,0x7e] + +v_cvt_f32_ubyte2 v5, exec_hi +// GFX12: v_cvt_f32_ubyte2_e32 v5, exec_hi ; encoding: [0x7f,0x26,0x0a,0x7e] + +v_cvt_f32_ubyte2 v5, null +// GFX12: v_cvt_f32_ubyte2_e32 v5, null ; encoding: [0x7c,0x26,0x0a,0x7e] + +v_cvt_f32_ubyte2 v5, -1 +// GFX12: v_cvt_f32_ubyte2_e32 v5, -1 ; encoding: [0xc1,0x26,0x0a,0x7e] + +v_cvt_f32_ubyte2 v5, 0.5 +// GFX12: v_cvt_f32_ubyte2_e32 v5, 0.5 ; encoding: [0xf0,0x26,0x0a,0x7e] + +v_cvt_f32_ubyte2 v5, src_scc +// GFX12: v_cvt_f32_ubyte2_e32 v5, src_scc ; encoding: [0xfd,0x26,0x0a,0x7e] + +v_cvt_f32_ubyte2 v255, 0xaf123456 +// GFX12: v_cvt_f32_ubyte2_e32 v255, 0xaf123456 ; encoding: [0xff,0x26,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_cvt_f32_ubyte3 v5, v1 +// GFX12: v_cvt_f32_ubyte3_e32 v5, v1 ; encoding: [0x01,0x29,0x0a,0x7e] + +v_cvt_f32_ubyte3 v5, v255 +// GFX12: v_cvt_f32_ubyte3_e32 v5, v255 ; encoding: [0xff,0x29,0x0a,0x7e] + +v_cvt_f32_ubyte3 v5, s1 +// GFX12: v_cvt_f32_ubyte3_e32 v5, s1 ; encoding: [0x01,0x28,0x0a,0x7e] + +v_cvt_f32_ubyte3 v5, s105 +// GFX12: v_cvt_f32_ubyte3_e32 v5, s105 ; encoding: [0x69,0x28,0x0a,0x7e] + +v_cvt_f32_ubyte3 v5, vcc_lo +// GFX12: v_cvt_f32_ubyte3_e32 v5, vcc_lo ; encoding: [0x6a,0x28,0x0a,0x7e] + +v_cvt_f32_ubyte3 v5, vcc_hi +// GFX12: v_cvt_f32_ubyte3_e32 v5, vcc_hi ; encoding: [0x6b,0x28,0x0a,0x7e] + +v_cvt_f32_ubyte3 v5, ttmp15 +// GFX12: v_cvt_f32_ubyte3_e32 v5, ttmp15 ; encoding: [0x7b,0x28,0x0a,0x7e] + +v_cvt_f32_ubyte3 v5, m0 +// GFX12: v_cvt_f32_ubyte3_e32 v5, m0 ; encoding: [0x7d,0x28,0x0a,0x7e] + +v_cvt_f32_ubyte3 v5, exec_lo +// GFX12: v_cvt_f32_ubyte3_e32 v5, exec_lo ; encoding: [0x7e,0x28,0x0a,0x7e] + +v_cvt_f32_ubyte3 v5, exec_hi +// GFX12: v_cvt_f32_ubyte3_e32 v5, exec_hi ; encoding: [0x7f,0x28,0x0a,0x7e] + +v_cvt_f32_ubyte3 v5, null +// GFX12: v_cvt_f32_ubyte3_e32 v5, null ; encoding: [0x7c,0x28,0x0a,0x7e] + +v_cvt_f32_ubyte3 v5, -1 +// GFX12: v_cvt_f32_ubyte3_e32 v5, -1 ; encoding: [0xc1,0x28,0x0a,0x7e] + +v_cvt_f32_ubyte3 v5, 0.5 +// GFX12: v_cvt_f32_ubyte3_e32 v5, 0.5 ; encoding: [0xf0,0x28,0x0a,0x7e] + +v_cvt_f32_ubyte3 v5, src_scc +// GFX12: v_cvt_f32_ubyte3_e32 v5, src_scc ; encoding: [0xfd,0x28,0x0a,0x7e] + +v_cvt_f32_ubyte3 v255, 0xaf123456 +// GFX12: v_cvt_f32_ubyte3_e32 v255, 0xaf123456 ; encoding: [0xff,0x28,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_cvt_f64_f32 v[5:6], v1 +// GFX12: v_cvt_f64_f32_e32 v[5:6], v1 ; encoding: [0x01,0x21,0x0a,0x7e] + +v_cvt_f64_f32 v[5:6], v255 +// GFX12: v_cvt_f64_f32_e32 v[5:6], v255 ; encoding: [0xff,0x21,0x0a,0x7e] + +v_cvt_f64_f32 v[5:6], s1 +// GFX12: v_cvt_f64_f32_e32 v[5:6], s1 ; encoding: [0x01,0x20,0x0a,0x7e] + +v_cvt_f64_f32 v[5:6], s105 +// GFX12: v_cvt_f64_f32_e32 v[5:6], s105 ; encoding: [0x69,0x20,0x0a,0x7e] + +v_cvt_f64_f32 v[5:6], vcc_lo +// GFX12: v_cvt_f64_f32_e32 v[5:6], vcc_lo ; encoding: [0x6a,0x20,0x0a,0x7e] + +v_cvt_f64_f32 v[5:6], vcc_hi +// GFX12: v_cvt_f64_f32_e32 v[5:6], vcc_hi ; encoding: [0x6b,0x20,0x0a,0x7e] + +v_cvt_f64_f32 v[5:6], ttmp15 +// GFX12: v_cvt_f64_f32_e32 v[5:6], ttmp15 ; encoding: [0x7b,0x20,0x0a,0x7e] + +v_cvt_f64_f32 v[5:6], m0 +// GFX12: v_cvt_f64_f32_e32 v[5:6], m0 ; encoding: [0x7d,0x20,0x0a,0x7e] + +v_cvt_f64_f32 v[5:6], exec_lo +// GFX12: v_cvt_f64_f32_e32 v[5:6], exec_lo ; encoding: [0x7e,0x20,0x0a,0x7e] + +v_cvt_f64_f32 v[5:6], exec_hi +// GFX12: v_cvt_f64_f32_e32 v[5:6], exec_hi ; encoding: [0x7f,0x20,0x0a,0x7e] + +v_cvt_f64_f32 v[5:6], null +// GFX12: v_cvt_f64_f32_e32 v[5:6], null ; encoding: [0x7c,0x20,0x0a,0x7e] + +v_cvt_f64_f32 v[5:6], -1 +// GFX12: v_cvt_f64_f32_e32 v[5:6], -1 ; encoding: [0xc1,0x20,0x0a,0x7e] + +v_cvt_f64_f32 v[5:6], 0.5 +// GFX12: v_cvt_f64_f32_e32 v[5:6], 0.5 ; encoding: [0xf0,0x20,0x0a,0x7e] + +v_cvt_f64_f32 v[5:6], src_scc +// GFX12: v_cvt_f64_f32_e32 v[5:6], src_scc ; encoding: [0xfd,0x20,0x0a,0x7e] + +v_cvt_f64_f32 v[254:255], 0xaf123456 +// GFX12: v_cvt_f64_f32_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x20,0xfc,0x7f,0x56,0x34,0x12,0xaf] + +v_cvt_f64_i32 v[5:6], v1 +// GFX12: v_cvt_f64_i32_e32 v[5:6], v1 ; encoding: [0x01,0x09,0x0a,0x7e] + +v_cvt_f64_i32 v[5:6], v255 +// GFX12: v_cvt_f64_i32_e32 v[5:6], v255 ; encoding: [0xff,0x09,0x0a,0x7e] + +v_cvt_f64_i32 v[5:6], s1 +// GFX12: v_cvt_f64_i32_e32 v[5:6], s1 ; encoding: [0x01,0x08,0x0a,0x7e] + +v_cvt_f64_i32 v[5:6], s105 +// GFX12: v_cvt_f64_i32_e32 v[5:6], s105 ; encoding: [0x69,0x08,0x0a,0x7e] + +v_cvt_f64_i32 v[5:6], vcc_lo +// GFX12: v_cvt_f64_i32_e32 v[5:6], vcc_lo ; encoding: [0x6a,0x08,0x0a,0x7e] + +v_cvt_f64_i32 v[5:6], vcc_hi +// GFX12: v_cvt_f64_i32_e32 v[5:6], vcc_hi ; encoding: [0x6b,0x08,0x0a,0x7e] + +v_cvt_f64_i32 v[5:6], ttmp15 +// GFX12: v_cvt_f64_i32_e32 v[5:6], ttmp15 ; encoding: [0x7b,0x08,0x0a,0x7e] + +v_cvt_f64_i32 v[5:6], m0 +// GFX12: v_cvt_f64_i32_e32 v[5:6], m0 ; encoding: [0x7d,0x08,0x0a,0x7e] + +v_cvt_f64_i32 v[5:6], exec_lo +// GFX12: v_cvt_f64_i32_e32 v[5:6], exec_lo ; encoding: [0x7e,0x08,0x0a,0x7e] + +v_cvt_f64_i32 v[5:6], exec_hi +// GFX12: v_cvt_f64_i32_e32 v[5:6], exec_hi ; encoding: [0x7f,0x08,0x0a,0x7e] + +v_cvt_f64_i32 v[5:6], null +// GFX12: v_cvt_f64_i32_e32 v[5:6], null ; encoding: [0x7c,0x08,0x0a,0x7e] + +v_cvt_f64_i32 v[5:6], -1 +// GFX12: v_cvt_f64_i32_e32 v[5:6], -1 ; encoding: [0xc1,0x08,0x0a,0x7e] + +v_cvt_f64_i32 v[5:6], 0.5 +// GFX12: v_cvt_f64_i32_e32 v[5:6], 0.5 ; encoding: [0xf0,0x08,0x0a,0x7e] + +v_cvt_f64_i32 v[5:6], src_scc +// GFX12: v_cvt_f64_i32_e32 v[5:6], src_scc ; encoding: [0xfd,0x08,0x0a,0x7e] + +v_cvt_f64_i32 v[254:255], 0xaf123456 +// GFX12: v_cvt_f64_i32_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x08,0xfc,0x7f,0x56,0x34,0x12,0xaf] + +v_cvt_f64_u32 v[5:6], v1 +// GFX12: v_cvt_f64_u32_e32 v[5:6], v1 ; encoding: [0x01,0x2d,0x0a,0x7e] + +v_cvt_f64_u32 v[5:6], v255 +// GFX12: v_cvt_f64_u32_e32 v[5:6], v255 ; encoding: [0xff,0x2d,0x0a,0x7e] + +v_cvt_f64_u32 v[5:6], s1 +// GFX12: v_cvt_f64_u32_e32 v[5:6], s1 ; encoding: [0x01,0x2c,0x0a,0x7e] + +v_cvt_f64_u32 v[5:6], s105 +// GFX12: v_cvt_f64_u32_e32 v[5:6], s105 ; encoding: [0x69,0x2c,0x0a,0x7e] + +v_cvt_f64_u32 v[5:6], vcc_lo +// GFX12: v_cvt_f64_u32_e32 v[5:6], vcc_lo ; encoding: [0x6a,0x2c,0x0a,0x7e] + +v_cvt_f64_u32 v[5:6], vcc_hi +// GFX12: v_cvt_f64_u32_e32 v[5:6], vcc_hi ; encoding: [0x6b,0x2c,0x0a,0x7e] + +v_cvt_f64_u32 v[5:6], ttmp15 +// GFX12: v_cvt_f64_u32_e32 v[5:6], ttmp15 ; encoding: [0x7b,0x2c,0x0a,0x7e] + +v_cvt_f64_u32 v[5:6], m0 +// GFX12: v_cvt_f64_u32_e32 v[5:6], m0 ; encoding: [0x7d,0x2c,0x0a,0x7e] + +v_cvt_f64_u32 v[5:6], exec_lo +// GFX12: v_cvt_f64_u32_e32 v[5:6], exec_lo ; encoding: [0x7e,0x2c,0x0a,0x7e] + +v_cvt_f64_u32 v[5:6], exec_hi +// GFX12: v_cvt_f64_u32_e32 v[5:6], exec_hi ; encoding: [0x7f,0x2c,0x0a,0x7e] + +v_cvt_f64_u32 v[5:6], null +// GFX12: v_cvt_f64_u32_e32 v[5:6], null ; encoding: [0x7c,0x2c,0x0a,0x7e] + +v_cvt_f64_u32 v[5:6], -1 +// GFX12: v_cvt_f64_u32_e32 v[5:6], -1 ; encoding: [0xc1,0x2c,0x0a,0x7e] + +v_cvt_f64_u32 v[5:6], 0.5 +// GFX12: v_cvt_f64_u32_e32 v[5:6], 0.5 ; encoding: [0xf0,0x2c,0x0a,0x7e] + +v_cvt_f64_u32 v[5:6], src_scc +// GFX12: v_cvt_f64_u32_e32 v[5:6], src_scc ; encoding: [0xfd,0x2c,0x0a,0x7e] + +v_cvt_f64_u32 v[254:255], 0xaf123456 +// GFX12: v_cvt_f64_u32_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x2c,0xfc,0x7f,0x56,0x34,0x12,0xaf] + +v_cvt_floor_i32_f32 v5, v1 +// GFX12: v_cvt_floor_i32_f32_e32 v5, v1 ; encoding: [0x01,0x1b,0x0a,0x7e] + +v_cvt_floor_i32_f32 v5, v255 +// GFX12: v_cvt_floor_i32_f32_e32 v5, v255 ; encoding: [0xff,0x1b,0x0a,0x7e] + +v_cvt_floor_i32_f32 v5, s1 +// GFX12: v_cvt_floor_i32_f32_e32 v5, s1 ; encoding: [0x01,0x1a,0x0a,0x7e] + +v_cvt_floor_i32_f32 v5, s105 +// GFX12: v_cvt_floor_i32_f32_e32 v5, s105 ; encoding: [0x69,0x1a,0x0a,0x7e] + +v_cvt_floor_i32_f32 v5, vcc_lo +// GFX12: v_cvt_floor_i32_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x1a,0x0a,0x7e] + +v_cvt_floor_i32_f32 v5, vcc_hi +// GFX12: v_cvt_floor_i32_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x1a,0x0a,0x7e] + +v_cvt_floor_i32_f32 v5, ttmp15 +// GFX12: v_cvt_floor_i32_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x1a,0x0a,0x7e] + +v_cvt_floor_i32_f32 v5, m0 +// GFX12: v_cvt_floor_i32_f32_e32 v5, m0 ; encoding: [0x7d,0x1a,0x0a,0x7e] + +v_cvt_floor_i32_f32 v5, exec_lo +// GFX12: v_cvt_floor_i32_f32_e32 v5, exec_lo ; encoding: [0x7e,0x1a,0x0a,0x7e] + +v_cvt_floor_i32_f32 v5, exec_hi +// GFX12: v_cvt_floor_i32_f32_e32 v5, exec_hi ; encoding: [0x7f,0x1a,0x0a,0x7e] + +v_cvt_floor_i32_f32 v5, null +// GFX12: v_cvt_floor_i32_f32_e32 v5, null ; encoding: [0x7c,0x1a,0x0a,0x7e] + +v_cvt_floor_i32_f32 v5, -1 +// GFX12: v_cvt_floor_i32_f32_e32 v5, -1 ; encoding: [0xc1,0x1a,0x0a,0x7e] + +v_cvt_floor_i32_f32 v5, 0.5 +// GFX12: v_cvt_floor_i32_f32_e32 v5, 0.5 ; encoding: [0xf0,0x1a,0x0a,0x7e] + +v_cvt_floor_i32_f32 v5, src_scc +// GFX12: v_cvt_floor_i32_f32_e32 v5, src_scc ; encoding: [0xfd,0x1a,0x0a,0x7e] + +v_cvt_floor_i32_f32 v255, 0xaf123456 +// GFX12: v_cvt_floor_i32_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x1a,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_cvt_flr_i32_f32 v5, v1 +// GFX12: v_cvt_floor_i32_f32_e32 v5, v1 ; encoding: [0x01,0x1b,0x0a,0x7e] + +v_cvt_flr_i32_f32 v5, v255 +// GFX12: v_cvt_floor_i32_f32_e32 v5, v255 ; encoding: [0xff,0x1b,0x0a,0x7e] + +v_cvt_flr_i32_f32 v5, s1 +// GFX12: v_cvt_floor_i32_f32_e32 v5, s1 ; encoding: [0x01,0x1a,0x0a,0x7e] + +v_cvt_flr_i32_f32 v5, s105 +// GFX12: v_cvt_floor_i32_f32_e32 v5, s105 ; encoding: [0x69,0x1a,0x0a,0x7e] + +v_cvt_flr_i32_f32 v5, vcc_lo +// GFX12: v_cvt_floor_i32_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x1a,0x0a,0x7e] + +v_cvt_flr_i32_f32 v5, vcc_hi +// GFX12: v_cvt_floor_i32_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x1a,0x0a,0x7e] + +v_cvt_flr_i32_f32 v5, ttmp15 +// GFX12: v_cvt_floor_i32_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x1a,0x0a,0x7e] + +v_cvt_flr_i32_f32 v5, m0 +// GFX12: v_cvt_floor_i32_f32_e32 v5, m0 ; encoding: [0x7d,0x1a,0x0a,0x7e] + +v_cvt_flr_i32_f32 v5, exec_lo +// GFX12: v_cvt_floor_i32_f32_e32 v5, exec_lo ; encoding: [0x7e,0x1a,0x0a,0x7e] + +v_cvt_flr_i32_f32 v5, exec_hi +// GFX12: v_cvt_floor_i32_f32_e32 v5, exec_hi ; encoding: [0x7f,0x1a,0x0a,0x7e] + +v_cvt_flr_i32_f32 v5, null +// GFX12: v_cvt_floor_i32_f32_e32 v5, null ; encoding: [0x7c,0x1a,0x0a,0x7e] + +v_cvt_flr_i32_f32 v5, -1 +// GFX12: v_cvt_floor_i32_f32_e32 v5, -1 ; encoding: [0xc1,0x1a,0x0a,0x7e] + +v_cvt_flr_i32_f32 v5, 0.5 +// GFX12: v_cvt_floor_i32_f32_e32 v5, 0.5 ; encoding: [0xf0,0x1a,0x0a,0x7e] + +v_cvt_flr_i32_f32 v5, src_scc +// GFX12: v_cvt_floor_i32_f32_e32 v5, src_scc ; encoding: [0xfd,0x1a,0x0a,0x7e] + +v_cvt_flr_i32_f32 v255, 0xaf123456 +// GFX12: v_cvt_floor_i32_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x1a,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_cvt_i16_f16 v5, v1 +// GFX12: v_cvt_i16_f16_e32 v5, v1 ; encoding: [0x01,0xa7,0x0a,0x7e] + +v_cvt_i16_f16 v5, v127 +// GFX12: v_cvt_i16_f16_e32 v5, v127 ; encoding: [0x7f,0xa7,0x0a,0x7e] + +v_cvt_i16_f16 v5, s1 +// GFX12: v_cvt_i16_f16_e32 v5, s1 ; encoding: [0x01,0xa6,0x0a,0x7e] + +v_cvt_i16_f16 v5, s105 +// GFX12: v_cvt_i16_f16_e32 v5, s105 ; encoding: [0x69,0xa6,0x0a,0x7e] + +v_cvt_i16_f16 v5, vcc_lo +// GFX12: v_cvt_i16_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xa6,0x0a,0x7e] + +v_cvt_i16_f16 v5, vcc_hi +// GFX12: v_cvt_i16_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xa6,0x0a,0x7e] + +v_cvt_i16_f16 v5, ttmp15 +// GFX12: v_cvt_i16_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xa6,0x0a,0x7e] + +v_cvt_i16_f16 v5, m0 +// GFX12: v_cvt_i16_f16_e32 v5, m0 ; encoding: [0x7d,0xa6,0x0a,0x7e] + +v_cvt_i16_f16 v5, exec_lo +// GFX12: v_cvt_i16_f16_e32 v5, exec_lo ; encoding: [0x7e,0xa6,0x0a,0x7e] + +v_cvt_i16_f16 v5, exec_hi +// GFX12: v_cvt_i16_f16_e32 v5, exec_hi ; encoding: [0x7f,0xa6,0x0a,0x7e] + +v_cvt_i16_f16 v5, null +// GFX12: v_cvt_i16_f16_e32 v5, null ; encoding: [0x7c,0xa6,0x0a,0x7e] + +v_cvt_i16_f16 v5, -1 +// GFX12: v_cvt_i16_f16_e32 v5, -1 ; encoding: [0xc1,0xa6,0x0a,0x7e] + +v_cvt_i16_f16 v5, 0.5 +// GFX12: v_cvt_i16_f16_e32 v5, 0.5 ; encoding: [0xf0,0xa6,0x0a,0x7e] + +v_cvt_i16_f16 v5, src_scc +// GFX12: v_cvt_i16_f16_e32 v5, src_scc ; encoding: [0xfd,0xa6,0x0a,0x7e] + +v_cvt_i16_f16 v127, 0xfe0b +// GFX12: v_cvt_i16_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xa6,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_cvt_i32_f32 v5, v1 +// GFX12: v_cvt_i32_f32_e32 v5, v1 ; encoding: [0x01,0x11,0x0a,0x7e] + +v_cvt_i32_f32 v5, v255 +// GFX12: v_cvt_i32_f32_e32 v5, v255 ; encoding: [0xff,0x11,0x0a,0x7e] + +v_cvt_i32_f32 v5, s1 +// GFX12: v_cvt_i32_f32_e32 v5, s1 ; encoding: [0x01,0x10,0x0a,0x7e] + +v_cvt_i32_f32 v5, s105 +// GFX12: v_cvt_i32_f32_e32 v5, s105 ; encoding: [0x69,0x10,0x0a,0x7e] + +v_cvt_i32_f32 v5, vcc_lo +// GFX12: v_cvt_i32_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x10,0x0a,0x7e] + +v_cvt_i32_f32 v5, vcc_hi +// GFX12: v_cvt_i32_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x10,0x0a,0x7e] + +v_cvt_i32_f32 v5, ttmp15 +// GFX12: v_cvt_i32_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x10,0x0a,0x7e] + +v_cvt_i32_f32 v5, m0 +// GFX12: v_cvt_i32_f32_e32 v5, m0 ; encoding: [0x7d,0x10,0x0a,0x7e] + +v_cvt_i32_f32 v5, exec_lo +// GFX12: v_cvt_i32_f32_e32 v5, exec_lo ; encoding: [0x7e,0x10,0x0a,0x7e] + +v_cvt_i32_f32 v5, exec_hi +// GFX12: v_cvt_i32_f32_e32 v5, exec_hi ; encoding: [0x7f,0x10,0x0a,0x7e] + +v_cvt_i32_f32 v5, null +// GFX12: v_cvt_i32_f32_e32 v5, null ; encoding: [0x7c,0x10,0x0a,0x7e] + +v_cvt_i32_f32 v5, -1 +// GFX12: v_cvt_i32_f32_e32 v5, -1 ; encoding: [0xc1,0x10,0x0a,0x7e] + +v_cvt_i32_f32 v5, 0.5 +// GFX12: v_cvt_i32_f32_e32 v5, 0.5 ; encoding: [0xf0,0x10,0x0a,0x7e] + +v_cvt_i32_f32 v5, src_scc +// GFX12: v_cvt_i32_f32_e32 v5, src_scc ; encoding: [0xfd,0x10,0x0a,0x7e] + +v_cvt_i32_f32 v255, 0xaf123456 +// GFX12: v_cvt_i32_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x10,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_cvt_i32_f64 v5, v[1:2] +// GFX12: v_cvt_i32_f64_e32 v5, v[1:2] ; encoding: [0x01,0x07,0x0a,0x7e] + +v_cvt_i32_f64 v5, v[254:255] +// GFX12: v_cvt_i32_f64_e32 v5, v[254:255] ; encoding: [0xfe,0x07,0x0a,0x7e] + +v_cvt_i32_f64 v5, s[2:3] +// GFX12: v_cvt_i32_f64_e32 v5, s[2:3] ; encoding: [0x02,0x06,0x0a,0x7e] + +v_cvt_i32_f64 v5, s[104:105] +// GFX12: v_cvt_i32_f64_e32 v5, s[104:105] ; encoding: [0x68,0x06,0x0a,0x7e] + +v_cvt_i32_f64 v5, vcc +// GFX12: v_cvt_i32_f64_e32 v5, vcc ; encoding: [0x6a,0x06,0x0a,0x7e] + +v_cvt_i32_f64 v5, ttmp[14:15] +// GFX12: v_cvt_i32_f64_e32 v5, ttmp[14:15] ; encoding: [0x7a,0x06,0x0a,0x7e] + +v_cvt_i32_f64 v5, exec +// GFX12: v_cvt_i32_f64_e32 v5, exec ; encoding: [0x7e,0x06,0x0a,0x7e] + +v_cvt_i32_f64 v5, null +// GFX12: v_cvt_i32_f64_e32 v5, null ; encoding: [0x7c,0x06,0x0a,0x7e] + +v_cvt_i32_f64 v5, -1 +// GFX12: v_cvt_i32_f64_e32 v5, -1 ; encoding: [0xc1,0x06,0x0a,0x7e] + +v_cvt_i32_f64 v5, 0.5 +// GFX12: v_cvt_i32_f64_e32 v5, 0.5 ; encoding: [0xf0,0x06,0x0a,0x7e] + +v_cvt_i32_f64 v5, src_scc +// GFX12: v_cvt_i32_f64_e32 v5, src_scc ; encoding: [0xfd,0x06,0x0a,0x7e] + +v_cvt_i32_f64 v255, 0xaf123456 +// GFX12: v_cvt_i32_f64_e32 v255, 0xaf123456 ; encoding: [0xff,0x06,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_cvt_i32_i16 v5, v1 +// GFX12: v_cvt_i32_i16_e32 v5, v1 ; encoding: [0x01,0xd5,0x0a,0x7e] + +v_cvt_i32_i16 v5, v127 +// GFX12: v_cvt_i32_i16_e32 v5, v127 ; encoding: [0x7f,0xd5,0x0a,0x7e] + +v_cvt_i32_i16 v5, s1 +// GFX12: v_cvt_i32_i16_e32 v5, s1 ; encoding: [0x01,0xd4,0x0a,0x7e] + +v_cvt_i32_i16 v5, s105 +// GFX12: v_cvt_i32_i16_e32 v5, s105 ; encoding: [0x69,0xd4,0x0a,0x7e] + +v_cvt_i32_i16 v5, vcc_lo +// GFX12: v_cvt_i32_i16_e32 v5, vcc_lo ; encoding: [0x6a,0xd4,0x0a,0x7e] + +v_cvt_i32_i16 v5, vcc_hi +// GFX12: v_cvt_i32_i16_e32 v5, vcc_hi ; encoding: [0x6b,0xd4,0x0a,0x7e] + +v_cvt_i32_i16 v5, ttmp15 +// GFX12: v_cvt_i32_i16_e32 v5, ttmp15 ; encoding: [0x7b,0xd4,0x0a,0x7e] + +v_cvt_i32_i16 v5, m0 +// GFX12: v_cvt_i32_i16_e32 v5, m0 ; encoding: [0x7d,0xd4,0x0a,0x7e] + +v_cvt_i32_i16 v5, exec_lo +// GFX12: v_cvt_i32_i16_e32 v5, exec_lo ; encoding: [0x7e,0xd4,0x0a,0x7e] + +v_cvt_i32_i16 v5, exec_hi +// GFX12: v_cvt_i32_i16_e32 v5, exec_hi ; encoding: [0x7f,0xd4,0x0a,0x7e] + +v_cvt_i32_i16 v5, null +// GFX12: v_cvt_i32_i16_e32 v5, null ; encoding: [0x7c,0xd4,0x0a,0x7e] + +v_cvt_i32_i16 v5, -1 +// GFX12: v_cvt_i32_i16_e32 v5, -1 ; encoding: [0xc1,0xd4,0x0a,0x7e] + +v_cvt_i32_i16 v5, 0.5 +// GFX12-ASM: v_cvt_i32_i16_e32 v5, 0.5 ; encoding: [0xf0,0xd4,0x0a,0x7e] +// GFX12-DIS: v_cvt_i32_i16_e32 v5, 0x3800 ; encoding: [0xff,0xd4,0x0a,0x7e,0x00,0x38,0x00,0x00] + +v_cvt_i32_i16 v5, src_scc +// GFX12: v_cvt_i32_i16_e32 v5, src_scc ; encoding: [0xfd,0xd4,0x0a,0x7e] + +v_cvt_i32_i16 v255, 0xfe0b +// GFX12: v_cvt_i32_i16_e32 v255, 0xfe0b ; encoding: [0xff,0xd4,0xfe,0x7f,0x0b,0xfe,0x00,0x00] + +v_cvt_nearest_i32_f32 v5, v1 +// GFX12: v_cvt_nearest_i32_f32_e32 v5, v1 ; encoding: [0x01,0x19,0x0a,0x7e] + +v_cvt_nearest_i32_f32 v5, v255 +// GFX12: v_cvt_nearest_i32_f32_e32 v5, v255 ; encoding: [0xff,0x19,0x0a,0x7e] + +v_cvt_nearest_i32_f32 v5, s1 +// GFX12: v_cvt_nearest_i32_f32_e32 v5, s1 ; encoding: [0x01,0x18,0x0a,0x7e] + +v_cvt_nearest_i32_f32 v5, s105 +// GFX12: v_cvt_nearest_i32_f32_e32 v5, s105 ; encoding: [0x69,0x18,0x0a,0x7e] + +v_cvt_nearest_i32_f32 v5, vcc_lo +// GFX12: v_cvt_nearest_i32_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x18,0x0a,0x7e] + +v_cvt_nearest_i32_f32 v5, vcc_hi +// GFX12: v_cvt_nearest_i32_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x18,0x0a,0x7e] + +v_cvt_nearest_i32_f32 v5, ttmp15 +// GFX12: v_cvt_nearest_i32_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x18,0x0a,0x7e] + +v_cvt_nearest_i32_f32 v5, m0 +// GFX12: v_cvt_nearest_i32_f32_e32 v5, m0 ; encoding: [0x7d,0x18,0x0a,0x7e] + +v_cvt_nearest_i32_f32 v5, exec_lo +// GFX12: v_cvt_nearest_i32_f32_e32 v5, exec_lo ; encoding: [0x7e,0x18,0x0a,0x7e] + +v_cvt_nearest_i32_f32 v5, exec_hi +// GFX12: v_cvt_nearest_i32_f32_e32 v5, exec_hi ; encoding: [0x7f,0x18,0x0a,0x7e] + +v_cvt_nearest_i32_f32 v5, null +// GFX12: v_cvt_nearest_i32_f32_e32 v5, null ; encoding: [0x7c,0x18,0x0a,0x7e] + +v_cvt_nearest_i32_f32 v5, -1 +// GFX12: v_cvt_nearest_i32_f32_e32 v5, -1 ; encoding: [0xc1,0x18,0x0a,0x7e] + +v_cvt_nearest_i32_f32 v5, 0.5 +// GFX12: v_cvt_nearest_i32_f32_e32 v5, 0.5 ; encoding: [0xf0,0x18,0x0a,0x7e] + +v_cvt_nearest_i32_f32 v5, src_scc +// GFX12: v_cvt_nearest_i32_f32_e32 v5, src_scc ; encoding: [0xfd,0x18,0x0a,0x7e] + +v_cvt_nearest_i32_f32 v255, 0xaf123456 +// GFX12: v_cvt_nearest_i32_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x18,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_cvt_norm_i16_f16 v5, v1 +// GFX12: v_cvt_norm_i16_f16_e32 v5, v1 ; encoding: [0x01,0xc7,0x0a,0x7e] + +v_cvt_norm_i16_f16 v5, v127 +// GFX12: v_cvt_norm_i16_f16_e32 v5, v127 ; encoding: [0x7f,0xc7,0x0a,0x7e] + +v_cvt_norm_i16_f16 v5, s1 +// GFX12: v_cvt_norm_i16_f16_e32 v5, s1 ; encoding: [0x01,0xc6,0x0a,0x7e] + +v_cvt_norm_i16_f16 v5, s105 +// GFX12: v_cvt_norm_i16_f16_e32 v5, s105 ; encoding: [0x69,0xc6,0x0a,0x7e] + +v_cvt_norm_i16_f16 v5, vcc_lo +// GFX12: v_cvt_norm_i16_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xc6,0x0a,0x7e] + +v_cvt_norm_i16_f16 v5, vcc_hi +// GFX12: v_cvt_norm_i16_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xc6,0x0a,0x7e] + +v_cvt_norm_i16_f16 v5, ttmp15 +// GFX12: v_cvt_norm_i16_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xc6,0x0a,0x7e] + +v_cvt_norm_i16_f16 v5, m0 +// GFX12: v_cvt_norm_i16_f16_e32 v5, m0 ; encoding: [0x7d,0xc6,0x0a,0x7e] + +v_cvt_norm_i16_f16 v5, exec_lo +// GFX12: v_cvt_norm_i16_f16_e32 v5, exec_lo ; encoding: [0x7e,0xc6,0x0a,0x7e] + +v_cvt_norm_i16_f16 v5, exec_hi +// GFX12: v_cvt_norm_i16_f16_e32 v5, exec_hi ; encoding: [0x7f,0xc6,0x0a,0x7e] + +v_cvt_norm_i16_f16 v5, null +// GFX12: v_cvt_norm_i16_f16_e32 v5, null ; encoding: [0x7c,0xc6,0x0a,0x7e] + +v_cvt_norm_i16_f16 v5, -1 +// GFX12: v_cvt_norm_i16_f16_e32 v5, -1 ; encoding: [0xc1,0xc6,0x0a,0x7e] + +v_cvt_norm_i16_f16 v5, 0.5 +// GFX12: v_cvt_norm_i16_f16_e32 v5, 0.5 ; encoding: [0xf0,0xc6,0x0a,0x7e] + +v_cvt_norm_i16_f16 v5, src_scc +// GFX12: v_cvt_norm_i16_f16_e32 v5, src_scc ; encoding: [0xfd,0xc6,0x0a,0x7e] + +v_cvt_norm_i16_f16 v127, 0xfe0b +// GFX12: v_cvt_norm_i16_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xc6,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_cvt_norm_u16_f16 v5, v1 +// GFX12: v_cvt_norm_u16_f16_e32 v5, v1 ; encoding: [0x01,0xc9,0x0a,0x7e] + +v_cvt_norm_u16_f16 v5, v127 +// GFX12: v_cvt_norm_u16_f16_e32 v5, v127 ; encoding: [0x7f,0xc9,0x0a,0x7e] + +v_cvt_norm_u16_f16 v5, s1 +// GFX12: v_cvt_norm_u16_f16_e32 v5, s1 ; encoding: [0x01,0xc8,0x0a,0x7e] + +v_cvt_norm_u16_f16 v5, s105 +// GFX12: v_cvt_norm_u16_f16_e32 v5, s105 ; encoding: [0x69,0xc8,0x0a,0x7e] + +v_cvt_norm_u16_f16 v5, vcc_lo +// GFX12: v_cvt_norm_u16_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xc8,0x0a,0x7e] + +v_cvt_norm_u16_f16 v5, vcc_hi +// GFX12: v_cvt_norm_u16_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xc8,0x0a,0x7e] + +v_cvt_norm_u16_f16 v5, ttmp15 +// GFX12: v_cvt_norm_u16_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xc8,0x0a,0x7e] + +v_cvt_norm_u16_f16 v5, m0 +// GFX12: v_cvt_norm_u16_f16_e32 v5, m0 ; encoding: [0x7d,0xc8,0x0a,0x7e] + +v_cvt_norm_u16_f16 v5, exec_lo +// GFX12: v_cvt_norm_u16_f16_e32 v5, exec_lo ; encoding: [0x7e,0xc8,0x0a,0x7e] + +v_cvt_norm_u16_f16 v5, exec_hi +// GFX12: v_cvt_norm_u16_f16_e32 v5, exec_hi ; encoding: [0x7f,0xc8,0x0a,0x7e] + +v_cvt_norm_u16_f16 v5, null +// GFX12: v_cvt_norm_u16_f16_e32 v5, null ; encoding: [0x7c,0xc8,0x0a,0x7e] + +v_cvt_norm_u16_f16 v5, -1 +// GFX12: v_cvt_norm_u16_f16_e32 v5, -1 ; encoding: [0xc1,0xc8,0x0a,0x7e] + +v_cvt_norm_u16_f16 v5, 0.5 +// GFX12: v_cvt_norm_u16_f16_e32 v5, 0.5 ; encoding: [0xf0,0xc8,0x0a,0x7e] + +v_cvt_norm_u16_f16 v5, src_scc +// GFX12: v_cvt_norm_u16_f16_e32 v5, src_scc ; encoding: [0xfd,0xc8,0x0a,0x7e] + +v_cvt_norm_u16_f16 v127, 0xfe0b +// GFX12: v_cvt_norm_u16_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xc8,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_cvt_off_f32_i4 v5, v1 +// GFX12: v_cvt_off_f32_i4_e32 v5, v1 ; encoding: [0x01,0x1d,0x0a,0x7e] + +v_cvt_off_f32_i4 v5, v255 +// GFX12: v_cvt_off_f32_i4_e32 v5, v255 ; encoding: [0xff,0x1d,0x0a,0x7e] + +v_cvt_off_f32_i4 v5, s1 +// GFX12: v_cvt_off_f32_i4_e32 v5, s1 ; encoding: [0x01,0x1c,0x0a,0x7e] + +v_cvt_off_f32_i4 v5, s105 +// GFX12: v_cvt_off_f32_i4_e32 v5, s105 ; encoding: [0x69,0x1c,0x0a,0x7e] + +v_cvt_off_f32_i4 v5, vcc_lo +// GFX12: v_cvt_off_f32_i4_e32 v5, vcc_lo ; encoding: [0x6a,0x1c,0x0a,0x7e] + +v_cvt_off_f32_i4 v5, vcc_hi +// GFX12: v_cvt_off_f32_i4_e32 v5, vcc_hi ; encoding: [0x6b,0x1c,0x0a,0x7e] + +v_cvt_off_f32_i4 v5, ttmp15 +// GFX12: v_cvt_off_f32_i4_e32 v5, ttmp15 ; encoding: [0x7b,0x1c,0x0a,0x7e] + +v_cvt_off_f32_i4 v5, m0 +// GFX12: v_cvt_off_f32_i4_e32 v5, m0 ; encoding: [0x7d,0x1c,0x0a,0x7e] + +v_cvt_off_f32_i4 v5, exec_lo +// GFX12: v_cvt_off_f32_i4_e32 v5, exec_lo ; encoding: [0x7e,0x1c,0x0a,0x7e] + +v_cvt_off_f32_i4 v5, exec_hi +// GFX12: v_cvt_off_f32_i4_e32 v5, exec_hi ; encoding: [0x7f,0x1c,0x0a,0x7e] + +v_cvt_off_f32_i4 v5, null +// GFX12: v_cvt_off_f32_i4_e32 v5, null ; encoding: [0x7c,0x1c,0x0a,0x7e] + +v_cvt_off_f32_i4 v5, -1 +// GFX12: v_cvt_off_f32_i4_e32 v5, -1 ; encoding: [0xc1,0x1c,0x0a,0x7e] + +v_cvt_off_f32_i4 v5, 0.5 +// GFX12: v_cvt_off_f32_i4_e32 v5, 0.5 ; encoding: [0xf0,0x1c,0x0a,0x7e] + +v_cvt_off_f32_i4 v5, src_scc +// GFX12: v_cvt_off_f32_i4_e32 v5, src_scc ; encoding: [0xfd,0x1c,0x0a,0x7e] + +v_cvt_off_f32_i4 v255, 0x4f +// GFX12: v_cvt_off_f32_i4_e32 v255, 0x4f ; encoding: [0xff,0x1c,0xfe,0x7f,0x4f,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32 v5, v1 +// GFX12: v_cvt_nearest_i32_f32_e32 v5, v1 ; encoding: [0x01,0x19,0x0a,0x7e] + +v_cvt_rpi_i32_f32 v5, v255 +// GFX12: v_cvt_nearest_i32_f32_e32 v5, v255 ; encoding: [0xff,0x19,0x0a,0x7e] + +v_cvt_rpi_i32_f32 v5, s1 +// GFX12: v_cvt_nearest_i32_f32_e32 v5, s1 ; encoding: [0x01,0x18,0x0a,0x7e] + +v_cvt_rpi_i32_f32 v5, s105 +// GFX12: v_cvt_nearest_i32_f32_e32 v5, s105 ; encoding: [0x69,0x18,0x0a,0x7e] + +v_cvt_rpi_i32_f32 v5, vcc_lo +// GFX12: v_cvt_nearest_i32_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x18,0x0a,0x7e] + +v_cvt_rpi_i32_f32 v5, vcc_hi +// GFX12: v_cvt_nearest_i32_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x18,0x0a,0x7e] + +v_cvt_rpi_i32_f32 v5, ttmp15 +// GFX12: v_cvt_nearest_i32_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x18,0x0a,0x7e] + +v_cvt_rpi_i32_f32 v5, m0 +// GFX12: v_cvt_nearest_i32_f32_e32 v5, m0 ; encoding: [0x7d,0x18,0x0a,0x7e] + +v_cvt_rpi_i32_f32 v5, exec_lo +// GFX12: v_cvt_nearest_i32_f32_e32 v5, exec_lo ; encoding: [0x7e,0x18,0x0a,0x7e] + +v_cvt_rpi_i32_f32 v5, exec_hi +// GFX12: v_cvt_nearest_i32_f32_e32 v5, exec_hi ; encoding: [0x7f,0x18,0x0a,0x7e] + +v_cvt_rpi_i32_f32 v5, null +// GFX12: v_cvt_nearest_i32_f32_e32 v5, null ; encoding: [0x7c,0x18,0x0a,0x7e] + +v_cvt_rpi_i32_f32 v5, -1 +// GFX12: v_cvt_nearest_i32_f32_e32 v5, -1 ; encoding: [0xc1,0x18,0x0a,0x7e] + +v_cvt_rpi_i32_f32 v5, 0.5 +// GFX12: v_cvt_nearest_i32_f32_e32 v5, 0.5 ; encoding: [0xf0,0x18,0x0a,0x7e] + +v_cvt_rpi_i32_f32 v5, src_scc +// GFX12: v_cvt_nearest_i32_f32_e32 v5, src_scc ; encoding: [0xfd,0x18,0x0a,0x7e] + +v_cvt_rpi_i32_f32 v255, 0xaf123456 +// GFX12: v_cvt_nearest_i32_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x18,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_cvt_u16_f16 v5, v1 +// GFX12: v_cvt_u16_f16_e32 v5, v1 ; encoding: [0x01,0xa5,0x0a,0x7e] + +v_cvt_u16_f16 v5, v127 +// GFX12: v_cvt_u16_f16_e32 v5, v127 ; encoding: [0x7f,0xa5,0x0a,0x7e] + +v_cvt_u16_f16 v5, s1 +// GFX12: v_cvt_u16_f16_e32 v5, s1 ; encoding: [0x01,0xa4,0x0a,0x7e] + +v_cvt_u16_f16 v5, s105 +// GFX12: v_cvt_u16_f16_e32 v5, s105 ; encoding: [0x69,0xa4,0x0a,0x7e] + +v_cvt_u16_f16 v5, vcc_lo +// GFX12: v_cvt_u16_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xa4,0x0a,0x7e] + +v_cvt_u16_f16 v5, vcc_hi +// GFX12: v_cvt_u16_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xa4,0x0a,0x7e] + +v_cvt_u16_f16 v5, ttmp15 +// GFX12: v_cvt_u16_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xa4,0x0a,0x7e] + +v_cvt_u16_f16 v5, m0 +// GFX12: v_cvt_u16_f16_e32 v5, m0 ; encoding: [0x7d,0xa4,0x0a,0x7e] + +v_cvt_u16_f16 v5, exec_lo +// GFX12: v_cvt_u16_f16_e32 v5, exec_lo ; encoding: [0x7e,0xa4,0x0a,0x7e] + +v_cvt_u16_f16 v5, exec_hi +// GFX12: v_cvt_u16_f16_e32 v5, exec_hi ; encoding: [0x7f,0xa4,0x0a,0x7e] + +v_cvt_u16_f16 v5, null +// GFX12: v_cvt_u16_f16_e32 v5, null ; encoding: [0x7c,0xa4,0x0a,0x7e] + +v_cvt_u16_f16 v5, -1 +// GFX12: v_cvt_u16_f16_e32 v5, -1 ; encoding: [0xc1,0xa4,0x0a,0x7e] + +v_cvt_u16_f16 v5, 0.5 +// GFX12: v_cvt_u16_f16_e32 v5, 0.5 ; encoding: [0xf0,0xa4,0x0a,0x7e] + +v_cvt_u16_f16 v5, src_scc +// GFX12: v_cvt_u16_f16_e32 v5, src_scc ; encoding: [0xfd,0xa4,0x0a,0x7e] + +v_cvt_u16_f16 v127, 0xfe0b +// GFX12: v_cvt_u16_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xa4,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_cvt_u32_f32 v5, v1 +// GFX12: v_cvt_u32_f32_e32 v5, v1 ; encoding: [0x01,0x0f,0x0a,0x7e] + +v_cvt_u32_f32 v5, v255 +// GFX12: v_cvt_u32_f32_e32 v5, v255 ; encoding: [0xff,0x0f,0x0a,0x7e] + +v_cvt_u32_f32 v5, s1 +// GFX12: v_cvt_u32_f32_e32 v5, s1 ; encoding: [0x01,0x0e,0x0a,0x7e] + +v_cvt_u32_f32 v5, s105 +// GFX12: v_cvt_u32_f32_e32 v5, s105 ; encoding: [0x69,0x0e,0x0a,0x7e] + +v_cvt_u32_f32 v5, vcc_lo +// GFX12: v_cvt_u32_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x0e,0x0a,0x7e] + +v_cvt_u32_f32 v5, vcc_hi +// GFX12: v_cvt_u32_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x0e,0x0a,0x7e] + +v_cvt_u32_f32 v5, ttmp15 +// GFX12: v_cvt_u32_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x0e,0x0a,0x7e] + +v_cvt_u32_f32 v5, m0 +// GFX12: v_cvt_u32_f32_e32 v5, m0 ; encoding: [0x7d,0x0e,0x0a,0x7e] + +v_cvt_u32_f32 v5, exec_lo +// GFX12: v_cvt_u32_f32_e32 v5, exec_lo ; encoding: [0x7e,0x0e,0x0a,0x7e] + +v_cvt_u32_f32 v5, exec_hi +// GFX12: v_cvt_u32_f32_e32 v5, exec_hi ; encoding: [0x7f,0x0e,0x0a,0x7e] + +v_cvt_u32_f32 v5, null +// GFX12: v_cvt_u32_f32_e32 v5, null ; encoding: [0x7c,0x0e,0x0a,0x7e] + +v_cvt_u32_f32 v5, -1 +// GFX12: v_cvt_u32_f32_e32 v5, -1 ; encoding: [0xc1,0x0e,0x0a,0x7e] + +v_cvt_u32_f32 v5, 0.5 +// GFX12: v_cvt_u32_f32_e32 v5, 0.5 ; encoding: [0xf0,0x0e,0x0a,0x7e] + +v_cvt_u32_f32 v5, src_scc +// GFX12: v_cvt_u32_f32_e32 v5, src_scc ; encoding: [0xfd,0x0e,0x0a,0x7e] + +v_cvt_u32_f32 v255, 0xaf123456 +// GFX12: v_cvt_u32_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x0e,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_cvt_u32_f64 v5, v[1:2] +// GFX12: v_cvt_u32_f64_e32 v5, v[1:2] ; encoding: [0x01,0x2b,0x0a,0x7e] + +v_cvt_u32_f64 v5, v[254:255] +// GFX12: v_cvt_u32_f64_e32 v5, v[254:255] ; encoding: [0xfe,0x2b,0x0a,0x7e] + +v_cvt_u32_f64 v5, s[2:3] +// GFX12: v_cvt_u32_f64_e32 v5, s[2:3] ; encoding: [0x02,0x2a,0x0a,0x7e] + +v_cvt_u32_f64 v5, s[104:105] +// GFX12: v_cvt_u32_f64_e32 v5, s[104:105] ; encoding: [0x68,0x2a,0x0a,0x7e] + +v_cvt_u32_f64 v5, vcc +// GFX12: v_cvt_u32_f64_e32 v5, vcc ; encoding: [0x6a,0x2a,0x0a,0x7e] + +v_cvt_u32_f64 v5, ttmp[14:15] +// GFX12: v_cvt_u32_f64_e32 v5, ttmp[14:15] ; encoding: [0x7a,0x2a,0x0a,0x7e] + +v_cvt_u32_f64 v5, exec +// GFX12: v_cvt_u32_f64_e32 v5, exec ; encoding: [0x7e,0x2a,0x0a,0x7e] + +v_cvt_u32_f64 v5, null +// GFX12: v_cvt_u32_f64_e32 v5, null ; encoding: [0x7c,0x2a,0x0a,0x7e] + +v_cvt_u32_f64 v5, -1 +// GFX12: v_cvt_u32_f64_e32 v5, -1 ; encoding: [0xc1,0x2a,0x0a,0x7e] + +v_cvt_u32_f64 v5, 0.5 +// GFX12: v_cvt_u32_f64_e32 v5, 0.5 ; encoding: [0xf0,0x2a,0x0a,0x7e] + +v_cvt_u32_f64 v5, src_scc +// GFX12: v_cvt_u32_f64_e32 v5, src_scc ; encoding: [0xfd,0x2a,0x0a,0x7e] + +v_cvt_u32_f64 v255, 0xaf123456 +// GFX12: v_cvt_u32_f64_e32 v255, 0xaf123456 ; encoding: [0xff,0x2a,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_cvt_u32_u16 v5, v1 +// GFX12: v_cvt_u32_u16_e32 v5, v1 ; encoding: [0x01,0xd7,0x0a,0x7e] + +v_cvt_u32_u16 v5, v127 +// GFX12: v_cvt_u32_u16_e32 v5, v127 ; encoding: [0x7f,0xd7,0x0a,0x7e] + +v_cvt_u32_u16 v5, s1 +// GFX12: v_cvt_u32_u16_e32 v5, s1 ; encoding: [0x01,0xd6,0x0a,0x7e] + +v_cvt_u32_u16 v5, s105 +// GFX12: v_cvt_u32_u16_e32 v5, s105 ; encoding: [0x69,0xd6,0x0a,0x7e] + +v_cvt_u32_u16 v5, vcc_lo +// GFX12: v_cvt_u32_u16_e32 v5, vcc_lo ; encoding: [0x6a,0xd6,0x0a,0x7e] + +v_cvt_u32_u16 v5, vcc_hi +// GFX12: v_cvt_u32_u16_e32 v5, vcc_hi ; encoding: [0x6b,0xd6,0x0a,0x7e] + +v_cvt_u32_u16 v5, ttmp15 +// GFX12: v_cvt_u32_u16_e32 v5, ttmp15 ; encoding: [0x7b,0xd6,0x0a,0x7e] + +v_cvt_u32_u16 v5, m0 +// GFX12: v_cvt_u32_u16_e32 v5, m0 ; encoding: [0x7d,0xd6,0x0a,0x7e] + +v_cvt_u32_u16 v5, exec_lo +// GFX12: v_cvt_u32_u16_e32 v5, exec_lo ; encoding: [0x7e,0xd6,0x0a,0x7e] + +v_cvt_u32_u16 v5, exec_hi +// GFX12: v_cvt_u32_u16_e32 v5, exec_hi ; encoding: [0x7f,0xd6,0x0a,0x7e] + +v_cvt_u32_u16 v5, null +// GFX12: v_cvt_u32_u16_e32 v5, null ; encoding: [0x7c,0xd6,0x0a,0x7e] + +v_cvt_u32_u16 v5, -1 +// GFX12: v_cvt_u32_u16_e32 v5, -1 ; encoding: [0xc1,0xd6,0x0a,0x7e] + +v_cvt_u32_u16 v5, 0.5 +// GFX12-ASM: v_cvt_u32_u16_e32 v5, 0.5 ; encoding: [0xf0,0xd6,0x0a,0x7e] +// GFX12-DIS: v_cvt_u32_u16_e32 v5, 0x3800 ; encoding: [0xff,0xd6,0x0a,0x7e,0x00,0x38,0x00,0x00] + +v_cvt_u32_u16 v5, src_scc +// GFX12: v_cvt_u32_u16_e32 v5, src_scc ; encoding: [0xfd,0xd6,0x0a,0x7e] + +v_cvt_u32_u16 v255, 0xfe0b +// GFX12: v_cvt_u32_u16_e32 v255, 0xfe0b ; encoding: [0xff,0xd6,0xfe,0x7f,0x0b,0xfe,0x00,0x00] + +v_exp_f16 v5, v1 +// GFX12: v_exp_f16_e32 v5, v1 ; encoding: [0x01,0xb1,0x0a,0x7e] + +v_exp_f16 v5, v127 +// GFX12: v_exp_f16_e32 v5, v127 ; encoding: [0x7f,0xb1,0x0a,0x7e] + +v_exp_f16 v5, s1 +// GFX12: v_exp_f16_e32 v5, s1 ; encoding: [0x01,0xb0,0x0a,0x7e] + +v_exp_f16 v5, s105 +// GFX12: v_exp_f16_e32 v5, s105 ; encoding: [0x69,0xb0,0x0a,0x7e] + +v_exp_f16 v5, vcc_lo +// GFX12: v_exp_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xb0,0x0a,0x7e] + +v_exp_f16 v5, vcc_hi +// GFX12: v_exp_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xb0,0x0a,0x7e] + +v_exp_f16 v5, ttmp15 +// GFX12: v_exp_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xb0,0x0a,0x7e] + +v_exp_f16 v5, m0 +// GFX12: v_exp_f16_e32 v5, m0 ; encoding: [0x7d,0xb0,0x0a,0x7e] + +v_exp_f16 v5, exec_lo +// GFX12: v_exp_f16_e32 v5, exec_lo ; encoding: [0x7e,0xb0,0x0a,0x7e] + +v_exp_f16 v5, exec_hi +// GFX12: v_exp_f16_e32 v5, exec_hi ; encoding: [0x7f,0xb0,0x0a,0x7e] + +v_exp_f16 v5, null +// GFX12: v_exp_f16_e32 v5, null ; encoding: [0x7c,0xb0,0x0a,0x7e] + +v_exp_f16 v5, -1 +// GFX12: v_exp_f16_e32 v5, -1 ; encoding: [0xc1,0xb0,0x0a,0x7e] + +v_exp_f16 v5, 0.5 +// GFX12: v_exp_f16_e32 v5, 0.5 ; encoding: [0xf0,0xb0,0x0a,0x7e] + +v_exp_f16 v5, src_scc +// GFX12: v_exp_f16_e32 v5, src_scc ; encoding: [0xfd,0xb0,0x0a,0x7e] + +v_exp_f16 v127, 0xfe0b +// GFX12: v_exp_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xb0,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_exp_f32 v5, v1 +// GFX12: v_exp_f32_e32 v5, v1 ; encoding: [0x01,0x4b,0x0a,0x7e] + +v_exp_f32 v5, v255 +// GFX12: v_exp_f32_e32 v5, v255 ; encoding: [0xff,0x4b,0x0a,0x7e] + +v_exp_f32 v5, s1 +// GFX12: v_exp_f32_e32 v5, s1 ; encoding: [0x01,0x4a,0x0a,0x7e] + +v_exp_f32 v5, s105 +// GFX12: v_exp_f32_e32 v5, s105 ; encoding: [0x69,0x4a,0x0a,0x7e] + +v_exp_f32 v5, vcc_lo +// GFX12: v_exp_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x4a,0x0a,0x7e] + +v_exp_f32 v5, vcc_hi +// GFX12: v_exp_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x4a,0x0a,0x7e] + +v_exp_f32 v5, ttmp15 +// GFX12: v_exp_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x4a,0x0a,0x7e] + +v_exp_f32 v5, m0 +// GFX12: v_exp_f32_e32 v5, m0 ; encoding: [0x7d,0x4a,0x0a,0x7e] + +v_exp_f32 v5, exec_lo +// GFX12: v_exp_f32_e32 v5, exec_lo ; encoding: [0x7e,0x4a,0x0a,0x7e] + +v_exp_f32 v5, exec_hi +// GFX12: v_exp_f32_e32 v5, exec_hi ; encoding: [0x7f,0x4a,0x0a,0x7e] + +v_exp_f32 v5, null +// GFX12: v_exp_f32_e32 v5, null ; encoding: [0x7c,0x4a,0x0a,0x7e] + +v_exp_f32 v5, -1 +// GFX12: v_exp_f32_e32 v5, -1 ; encoding: [0xc1,0x4a,0x0a,0x7e] + +v_exp_f32 v5, 0.5 +// GFX12: v_exp_f32_e32 v5, 0.5 ; encoding: [0xf0,0x4a,0x0a,0x7e] + +v_exp_f32 v5, src_scc +// GFX12: v_exp_f32_e32 v5, src_scc ; encoding: [0xfd,0x4a,0x0a,0x7e] + +v_exp_f32 v255, 0xaf123456 +// GFX12: v_exp_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x4a,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_ffbh_i32 v5, v1 +// GFX12: v_cls_i32_e32 v5, v1 ; encoding: [0x01,0x77,0x0a,0x7e] + +v_ffbh_i32 v5, v255 +// GFX12: v_cls_i32_e32 v5, v255 ; encoding: [0xff,0x77,0x0a,0x7e] + +v_ffbh_i32 v5, s1 +// GFX12: v_cls_i32_e32 v5, s1 ; encoding: [0x01,0x76,0x0a,0x7e] + +v_ffbh_i32 v5, s105 +// GFX12: v_cls_i32_e32 v5, s105 ; encoding: [0x69,0x76,0x0a,0x7e] + +v_ffbh_i32 v5, vcc_lo +// GFX12: v_cls_i32_e32 v5, vcc_lo ; encoding: [0x6a,0x76,0x0a,0x7e] + +v_ffbh_i32 v5, vcc_hi +// GFX12: v_cls_i32_e32 v5, vcc_hi ; encoding: [0x6b,0x76,0x0a,0x7e] + +v_ffbh_i32 v5, ttmp15 +// GFX12: v_cls_i32_e32 v5, ttmp15 ; encoding: [0x7b,0x76,0x0a,0x7e] + +v_ffbh_i32 v5, m0 +// GFX12: v_cls_i32_e32 v5, m0 ; encoding: [0x7d,0x76,0x0a,0x7e] + +v_ffbh_i32 v5, exec_lo +// GFX12: v_cls_i32_e32 v5, exec_lo ; encoding: [0x7e,0x76,0x0a,0x7e] + +v_ffbh_i32 v5, exec_hi +// GFX12: v_cls_i32_e32 v5, exec_hi ; encoding: [0x7f,0x76,0x0a,0x7e] + +v_ffbh_i32 v5, null +// GFX12: v_cls_i32_e32 v5, null ; encoding: [0x7c,0x76,0x0a,0x7e] + +v_ffbh_i32 v5, -1 +// GFX12: v_cls_i32_e32 v5, -1 ; encoding: [0xc1,0x76,0x0a,0x7e] + +v_ffbh_i32 v5, 0.5 +// GFX12: v_cls_i32_e32 v5, 0.5 ; encoding: [0xf0,0x76,0x0a,0x7e] + +v_ffbh_i32 v5, src_scc +// GFX12: v_cls_i32_e32 v5, src_scc ; encoding: [0xfd,0x76,0x0a,0x7e] + +v_ffbh_i32 v255, 0xaf123456 +// GFX12: v_cls_i32_e32 v255, 0xaf123456 ; encoding: [0xff,0x76,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_ffbh_u32 v5, v1 +// GFX12: v_clz_i32_u32_e32 v5, v1 ; encoding: [0x01,0x73,0x0a,0x7e] + +v_ffbh_u32 v5, v255 +// GFX12: v_clz_i32_u32_e32 v5, v255 ; encoding: [0xff,0x73,0x0a,0x7e] + +v_ffbh_u32 v5, s1 +// GFX12: v_clz_i32_u32_e32 v5, s1 ; encoding: [0x01,0x72,0x0a,0x7e] + +v_ffbh_u32 v5, s105 +// GFX12: v_clz_i32_u32_e32 v5, s105 ; encoding: [0x69,0x72,0x0a,0x7e] + +v_ffbh_u32 v5, vcc_lo +// GFX12: v_clz_i32_u32_e32 v5, vcc_lo ; encoding: [0x6a,0x72,0x0a,0x7e] + +v_ffbh_u32 v5, vcc_hi +// GFX12: v_clz_i32_u32_e32 v5, vcc_hi ; encoding: [0x6b,0x72,0x0a,0x7e] + +v_ffbh_u32 v5, ttmp15 +// GFX12: v_clz_i32_u32_e32 v5, ttmp15 ; encoding: [0x7b,0x72,0x0a,0x7e] + +v_ffbh_u32 v5, m0 +// GFX12: v_clz_i32_u32_e32 v5, m0 ; encoding: [0x7d,0x72,0x0a,0x7e] + +v_ffbh_u32 v5, exec_lo +// GFX12: v_clz_i32_u32_e32 v5, exec_lo ; encoding: [0x7e,0x72,0x0a,0x7e] + +v_ffbh_u32 v5, exec_hi +// GFX12: v_clz_i32_u32_e32 v5, exec_hi ; encoding: [0x7f,0x72,0x0a,0x7e] + +v_ffbh_u32 v5, null +// GFX12: v_clz_i32_u32_e32 v5, null ; encoding: [0x7c,0x72,0x0a,0x7e] + +v_ffbh_u32 v5, -1 +// GFX12: v_clz_i32_u32_e32 v5, -1 ; encoding: [0xc1,0x72,0x0a,0x7e] + +v_ffbh_u32 v5, 0.5 +// GFX12: v_clz_i32_u32_e32 v5, 0.5 ; encoding: [0xf0,0x72,0x0a,0x7e] + +v_ffbh_u32 v5, src_scc +// GFX12: v_clz_i32_u32_e32 v5, src_scc ; encoding: [0xfd,0x72,0x0a,0x7e] + +v_ffbh_u32 v255, 0xaf123456 +// GFX12: v_clz_i32_u32_e32 v255, 0xaf123456 ; encoding: [0xff,0x72,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_ffbl_b32 v5, v1 +// GFX12: v_ctz_i32_b32_e32 v5, v1 ; encoding: [0x01,0x75,0x0a,0x7e] + +v_ffbl_b32 v5, v255 +// GFX12: v_ctz_i32_b32_e32 v5, v255 ; encoding: [0xff,0x75,0x0a,0x7e] + +v_ffbl_b32 v5, s1 +// GFX12: v_ctz_i32_b32_e32 v5, s1 ; encoding: [0x01,0x74,0x0a,0x7e] + +v_ffbl_b32 v5, s105 +// GFX12: v_ctz_i32_b32_e32 v5, s105 ; encoding: [0x69,0x74,0x0a,0x7e] + +v_ffbl_b32 v5, vcc_lo +// GFX12: v_ctz_i32_b32_e32 v5, vcc_lo ; encoding: [0x6a,0x74,0x0a,0x7e] + +v_ffbl_b32 v5, vcc_hi +// GFX12: v_ctz_i32_b32_e32 v5, vcc_hi ; encoding: [0x6b,0x74,0x0a,0x7e] + +v_ffbl_b32 v5, ttmp15 +// GFX12: v_ctz_i32_b32_e32 v5, ttmp15 ; encoding: [0x7b,0x74,0x0a,0x7e] + +v_ffbl_b32 v5, m0 +// GFX12: v_ctz_i32_b32_e32 v5, m0 ; encoding: [0x7d,0x74,0x0a,0x7e] + +v_ffbl_b32 v5, exec_lo +// GFX12: v_ctz_i32_b32_e32 v5, exec_lo ; encoding: [0x7e,0x74,0x0a,0x7e] + +v_ffbl_b32 v5, exec_hi +// GFX12: v_ctz_i32_b32_e32 v5, exec_hi ; encoding: [0x7f,0x74,0x0a,0x7e] + +v_ffbl_b32 v5, null +// GFX12: v_ctz_i32_b32_e32 v5, null ; encoding: [0x7c,0x74,0x0a,0x7e] + +v_ffbl_b32 v5, -1 +// GFX12: v_ctz_i32_b32_e32 v5, -1 ; encoding: [0xc1,0x74,0x0a,0x7e] + +v_ffbl_b32 v5, 0.5 +// GFX12: v_ctz_i32_b32_e32 v5, 0.5 ; encoding: [0xf0,0x74,0x0a,0x7e] + +v_ffbl_b32 v5, src_scc +// GFX12: v_ctz_i32_b32_e32 v5, src_scc ; encoding: [0xfd,0x74,0x0a,0x7e] + +v_ffbl_b32 v255, 0xaf123456 +// GFX12: v_ctz_i32_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0x74,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_floor_f16 v5, v1 +// GFX12: v_floor_f16_e32 v5, v1 ; encoding: [0x01,0xb7,0x0a,0x7e] + +v_floor_f16 v5, v127 +// GFX12: v_floor_f16_e32 v5, v127 ; encoding: [0x7f,0xb7,0x0a,0x7e] + +v_floor_f16 v5, s1 +// GFX12: v_floor_f16_e32 v5, s1 ; encoding: [0x01,0xb6,0x0a,0x7e] + +v_floor_f16 v5, s105 +// GFX12: v_floor_f16_e32 v5, s105 ; encoding: [0x69,0xb6,0x0a,0x7e] + +v_floor_f16 v5, vcc_lo +// GFX12: v_floor_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xb6,0x0a,0x7e] + +v_floor_f16 v5, vcc_hi +// GFX12: v_floor_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xb6,0x0a,0x7e] + +v_floor_f16 v5, ttmp15 +// GFX12: v_floor_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xb6,0x0a,0x7e] + +v_floor_f16 v5, m0 +// GFX12: v_floor_f16_e32 v5, m0 ; encoding: [0x7d,0xb6,0x0a,0x7e] + +v_floor_f16 v5, exec_lo +// GFX12: v_floor_f16_e32 v5, exec_lo ; encoding: [0x7e,0xb6,0x0a,0x7e] + +v_floor_f16 v5, exec_hi +// GFX12: v_floor_f16_e32 v5, exec_hi ; encoding: [0x7f,0xb6,0x0a,0x7e] + +v_floor_f16 v5, null +// GFX12: v_floor_f16_e32 v5, null ; encoding: [0x7c,0xb6,0x0a,0x7e] + +v_floor_f16 v5, -1 +// GFX12: v_floor_f16_e32 v5, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e] + +v_floor_f16 v5, 0.5 +// GFX12: v_floor_f16_e32 v5, 0.5 ; encoding: [0xf0,0xb6,0x0a,0x7e] + +v_floor_f16 v5, src_scc +// GFX12: v_floor_f16_e32 v5, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7e] + +v_floor_f16 v127, 0xfe0b +// GFX12: v_floor_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xb6,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_floor_f32 v5, v1 +// GFX12: v_floor_f32_e32 v5, v1 ; encoding: [0x01,0x49,0x0a,0x7e] + +v_floor_f32 v5, v255 +// GFX12: v_floor_f32_e32 v5, v255 ; encoding: [0xff,0x49,0x0a,0x7e] + +v_floor_f32 v5, s1 +// GFX12: v_floor_f32_e32 v5, s1 ; encoding: [0x01,0x48,0x0a,0x7e] + +v_floor_f32 v5, s105 +// GFX12: v_floor_f32_e32 v5, s105 ; encoding: [0x69,0x48,0x0a,0x7e] + +v_floor_f32 v5, vcc_lo +// GFX12: v_floor_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x48,0x0a,0x7e] + +v_floor_f32 v5, vcc_hi +// GFX12: v_floor_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x48,0x0a,0x7e] + +v_floor_f32 v5, ttmp15 +// GFX12: v_floor_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x48,0x0a,0x7e] + +v_floor_f32 v5, m0 +// GFX12: v_floor_f32_e32 v5, m0 ; encoding: [0x7d,0x48,0x0a,0x7e] + +v_floor_f32 v5, exec_lo +// GFX12: v_floor_f32_e32 v5, exec_lo ; encoding: [0x7e,0x48,0x0a,0x7e] + +v_floor_f32 v5, exec_hi +// GFX12: v_floor_f32_e32 v5, exec_hi ; encoding: [0x7f,0x48,0x0a,0x7e] + +v_floor_f32 v5, null +// GFX12: v_floor_f32_e32 v5, null ; encoding: [0x7c,0x48,0x0a,0x7e] + +v_floor_f32 v5, -1 +// GFX12: v_floor_f32_e32 v5, -1 ; encoding: [0xc1,0x48,0x0a,0x7e] + +v_floor_f32 v5, 0.5 +// GFX12: v_floor_f32_e32 v5, 0.5 ; encoding: [0xf0,0x48,0x0a,0x7e] + +v_floor_f32 v5, src_scc +// GFX12: v_floor_f32_e32 v5, src_scc ; encoding: [0xfd,0x48,0x0a,0x7e] + +v_floor_f32 v255, 0xaf123456 +// GFX12: v_floor_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x48,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_floor_f64 v[5:6], v[1:2] +// GFX12: v_floor_f64_e32 v[5:6], v[1:2] ; encoding: [0x01,0x35,0x0a,0x7e] + +v_floor_f64 v[5:6], v[254:255] +// GFX12: v_floor_f64_e32 v[5:6], v[254:255] ; encoding: [0xfe,0x35,0x0a,0x7e] + +v_floor_f64 v[5:6], s[2:3] +// GFX12: v_floor_f64_e32 v[5:6], s[2:3] ; encoding: [0x02,0x34,0x0a,0x7e] + +v_floor_f64 v[5:6], s[104:105] +// GFX12: v_floor_f64_e32 v[5:6], s[104:105] ; encoding: [0x68,0x34,0x0a,0x7e] + +v_floor_f64 v[5:6], vcc +// GFX12: v_floor_f64_e32 v[5:6], vcc ; encoding: [0x6a,0x34,0x0a,0x7e] + +v_floor_f64 v[5:6], ttmp[14:15] +// GFX12: v_floor_f64_e32 v[5:6], ttmp[14:15] ; encoding: [0x7a,0x34,0x0a,0x7e] + +v_floor_f64 v[5:6], exec +// GFX12: v_floor_f64_e32 v[5:6], exec ; encoding: [0x7e,0x34,0x0a,0x7e] + +v_floor_f64 v[5:6], null +// GFX12: v_floor_f64_e32 v[5:6], null ; encoding: [0x7c,0x34,0x0a,0x7e] + +v_floor_f64 v[5:6], -1 +// GFX12: v_floor_f64_e32 v[5:6], -1 ; encoding: [0xc1,0x34,0x0a,0x7e] + +v_floor_f64 v[5:6], 0.5 +// GFX12: v_floor_f64_e32 v[5:6], 0.5 ; encoding: [0xf0,0x34,0x0a,0x7e] + +v_floor_f64 v[5:6], src_scc +// GFX12: v_floor_f64_e32 v[5:6], src_scc ; encoding: [0xfd,0x34,0x0a,0x7e] + +v_floor_f64 v[254:255], 0xaf123456 +// GFX12: v_floor_f64_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x34,0xfc,0x7f,0x56,0x34,0x12,0xaf] + +v_fract_f16 v5, v1 +// GFX12: v_fract_f16_e32 v5, v1 ; encoding: [0x01,0xbf,0x0a,0x7e] + +v_fract_f16 v5, v127 +// GFX12: v_fract_f16_e32 v5, v127 ; encoding: [0x7f,0xbf,0x0a,0x7e] + +v_fract_f16 v5, s1 +// GFX12: v_fract_f16_e32 v5, s1 ; encoding: [0x01,0xbe,0x0a,0x7e] + +v_fract_f16 v5, s105 +// GFX12: v_fract_f16_e32 v5, s105 ; encoding: [0x69,0xbe,0x0a,0x7e] + +v_fract_f16 v5, vcc_lo +// GFX12: v_fract_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xbe,0x0a,0x7e] + +v_fract_f16 v5, vcc_hi +// GFX12: v_fract_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xbe,0x0a,0x7e] + +v_fract_f16 v5, ttmp15 +// GFX12: v_fract_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xbe,0x0a,0x7e] + +v_fract_f16 v5, m0 +// GFX12: v_fract_f16_e32 v5, m0 ; encoding: [0x7d,0xbe,0x0a,0x7e] + +v_fract_f16 v5, exec_lo +// GFX12: v_fract_f16_e32 v5, exec_lo ; encoding: [0x7e,0xbe,0x0a,0x7e] + +v_fract_f16 v5, exec_hi +// GFX12: v_fract_f16_e32 v5, exec_hi ; encoding: [0x7f,0xbe,0x0a,0x7e] + +v_fract_f16 v5, null +// GFX12: v_fract_f16_e32 v5, null ; encoding: [0x7c,0xbe,0x0a,0x7e] + +v_fract_f16 v5, -1 +// GFX12: v_fract_f16_e32 v5, -1 ; encoding: [0xc1,0xbe,0x0a,0x7e] + +v_fract_f16 v5, 0.5 +// GFX12: v_fract_f16_e32 v5, 0.5 ; encoding: [0xf0,0xbe,0x0a,0x7e] + +v_fract_f16 v5, src_scc +// GFX12: v_fract_f16_e32 v5, src_scc ; encoding: [0xfd,0xbe,0x0a,0x7e] + +v_fract_f16 v127, 0xfe0b +// GFX12: v_fract_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xbe,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_fract_f32 v5, v1 +// GFX12: v_fract_f32_e32 v5, v1 ; encoding: [0x01,0x41,0x0a,0x7e] + +v_fract_f32 v5, v255 +// GFX12: v_fract_f32_e32 v5, v255 ; encoding: [0xff,0x41,0x0a,0x7e] + +v_fract_f32 v5, s1 +// GFX12: v_fract_f32_e32 v5, s1 ; encoding: [0x01,0x40,0x0a,0x7e] + +v_fract_f32 v5, s105 +// GFX12: v_fract_f32_e32 v5, s105 ; encoding: [0x69,0x40,0x0a,0x7e] + +v_fract_f32 v5, vcc_lo +// GFX12: v_fract_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x40,0x0a,0x7e] + +v_fract_f32 v5, vcc_hi +// GFX12: v_fract_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x40,0x0a,0x7e] + +v_fract_f32 v5, ttmp15 +// GFX12: v_fract_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x40,0x0a,0x7e] + +v_fract_f32 v5, m0 +// GFX12: v_fract_f32_e32 v5, m0 ; encoding: [0x7d,0x40,0x0a,0x7e] + +v_fract_f32 v5, exec_lo +// GFX12: v_fract_f32_e32 v5, exec_lo ; encoding: [0x7e,0x40,0x0a,0x7e] + +v_fract_f32 v5, exec_hi +// GFX12: v_fract_f32_e32 v5, exec_hi ; encoding: [0x7f,0x40,0x0a,0x7e] + +v_fract_f32 v5, null +// GFX12: v_fract_f32_e32 v5, null ; encoding: [0x7c,0x40,0x0a,0x7e] + +v_fract_f32 v5, -1 +// GFX12: v_fract_f32_e32 v5, -1 ; encoding: [0xc1,0x40,0x0a,0x7e] + +v_fract_f32 v5, 0.5 +// GFX12: v_fract_f32_e32 v5, 0.5 ; encoding: [0xf0,0x40,0x0a,0x7e] + +v_fract_f32 v5, src_scc +// GFX12: v_fract_f32_e32 v5, src_scc ; encoding: [0xfd,0x40,0x0a,0x7e] + +v_fract_f32 v255, 0xaf123456 +// GFX12: v_fract_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x40,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_fract_f64 v[5:6], v[1:2] +// GFX12: v_fract_f64_e32 v[5:6], v[1:2] ; encoding: [0x01,0x7d,0x0a,0x7e] + +v_fract_f64 v[5:6], v[254:255] +// GFX12: v_fract_f64_e32 v[5:6], v[254:255] ; encoding: [0xfe,0x7d,0x0a,0x7e] + +v_fract_f64 v[5:6], s[2:3] +// GFX12: v_fract_f64_e32 v[5:6], s[2:3] ; encoding: [0x02,0x7c,0x0a,0x7e] + +v_fract_f64 v[5:6], s[104:105] +// GFX12: v_fract_f64_e32 v[5:6], s[104:105] ; encoding: [0x68,0x7c,0x0a,0x7e] + +v_fract_f64 v[5:6], vcc +// GFX12: v_fract_f64_e32 v[5:6], vcc ; encoding: [0x6a,0x7c,0x0a,0x7e] + +v_fract_f64 v[5:6], ttmp[14:15] +// GFX12: v_fract_f64_e32 v[5:6], ttmp[14:15] ; encoding: [0x7a,0x7c,0x0a,0x7e] + +v_fract_f64 v[5:6], exec +// GFX12: v_fract_f64_e32 v[5:6], exec ; encoding: [0x7e,0x7c,0x0a,0x7e] + +v_fract_f64 v[5:6], null +// GFX12: v_fract_f64_e32 v[5:6], null ; encoding: [0x7c,0x7c,0x0a,0x7e] + +v_fract_f64 v[5:6], -1 +// GFX12: v_fract_f64_e32 v[5:6], -1 ; encoding: [0xc1,0x7c,0x0a,0x7e] + +v_fract_f64 v[5:6], 0.5 +// GFX12: v_fract_f64_e32 v[5:6], 0.5 ; encoding: [0xf0,0x7c,0x0a,0x7e] + +v_fract_f64 v[5:6], src_scc +// GFX12: v_fract_f64_e32 v[5:6], src_scc ; encoding: [0xfd,0x7c,0x0a,0x7e] + +v_fract_f64 v[254:255], 0xaf123456 +// GFX12: v_fract_f64_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x7c,0xfc,0x7f,0x56,0x34,0x12,0xaf] + +v_frexp_exp_i16_f16 v5, v1 +// GFX12: v_frexp_exp_i16_f16_e32 v5, v1 ; encoding: [0x01,0xb5,0x0a,0x7e] + +v_frexp_exp_i16_f16 v5, v127 +// GFX12: v_frexp_exp_i16_f16_e32 v5, v127 ; encoding: [0x7f,0xb5,0x0a,0x7e] + +v_frexp_exp_i16_f16 v5, s1 +// GFX12: v_frexp_exp_i16_f16_e32 v5, s1 ; encoding: [0x01,0xb4,0x0a,0x7e] + +v_frexp_exp_i16_f16 v5, s105 +// GFX12: v_frexp_exp_i16_f16_e32 v5, s105 ; encoding: [0x69,0xb4,0x0a,0x7e] + +v_frexp_exp_i16_f16 v5, vcc_lo +// GFX12: v_frexp_exp_i16_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xb4,0x0a,0x7e] + +v_frexp_exp_i16_f16 v5, vcc_hi +// GFX12: v_frexp_exp_i16_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xb4,0x0a,0x7e] + +v_frexp_exp_i16_f16 v5, ttmp15 +// GFX12: v_frexp_exp_i16_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xb4,0x0a,0x7e] + +v_frexp_exp_i16_f16 v5, m0 +// GFX12: v_frexp_exp_i16_f16_e32 v5, m0 ; encoding: [0x7d,0xb4,0x0a,0x7e] + +v_frexp_exp_i16_f16 v5, exec_lo +// GFX12: v_frexp_exp_i16_f16_e32 v5, exec_lo ; encoding: [0x7e,0xb4,0x0a,0x7e] + +v_frexp_exp_i16_f16 v5, exec_hi +// GFX12: v_frexp_exp_i16_f16_e32 v5, exec_hi ; encoding: [0x7f,0xb4,0x0a,0x7e] + +v_frexp_exp_i16_f16 v5, null +// GFX12: v_frexp_exp_i16_f16_e32 v5, null ; encoding: [0x7c,0xb4,0x0a,0x7e] + +v_frexp_exp_i16_f16 v5, -1 +// GFX12: v_frexp_exp_i16_f16_e32 v5, -1 ; encoding: [0xc1,0xb4,0x0a,0x7e] + +v_frexp_exp_i16_f16 v5, 0.5 +// GFX12: v_frexp_exp_i16_f16_e32 v5, 0.5 ; encoding: [0xf0,0xb4,0x0a,0x7e] + +v_frexp_exp_i16_f16 v5, src_scc +// GFX12: v_frexp_exp_i16_f16_e32 v5, src_scc ; encoding: [0xfd,0xb4,0x0a,0x7e] + +v_frexp_exp_i16_f16 v127, 0xfe0b +// GFX12: v_frexp_exp_i16_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xb4,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_frexp_exp_i32_f32 v5, v1 +// GFX12: v_frexp_exp_i32_f32_e32 v5, v1 ; encoding: [0x01,0x7f,0x0a,0x7e] + +v_frexp_exp_i32_f32 v5, v255 +// GFX12: v_frexp_exp_i32_f32_e32 v5, v255 ; encoding: [0xff,0x7f,0x0a,0x7e] + +v_frexp_exp_i32_f32 v5, s1 +// GFX12: v_frexp_exp_i32_f32_e32 v5, s1 ; encoding: [0x01,0x7e,0x0a,0x7e] + +v_frexp_exp_i32_f32 v5, s105 +// GFX12: v_frexp_exp_i32_f32_e32 v5, s105 ; encoding: [0x69,0x7e,0x0a,0x7e] + +v_frexp_exp_i32_f32 v5, vcc_lo +// GFX12: v_frexp_exp_i32_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x7e,0x0a,0x7e] + +v_frexp_exp_i32_f32 v5, vcc_hi +// GFX12: v_frexp_exp_i32_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x7e,0x0a,0x7e] + +v_frexp_exp_i32_f32 v5, ttmp15 +// GFX12: v_frexp_exp_i32_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x7e,0x0a,0x7e] + +v_frexp_exp_i32_f32 v5, m0 +// GFX12: v_frexp_exp_i32_f32_e32 v5, m0 ; encoding: [0x7d,0x7e,0x0a,0x7e] + +v_frexp_exp_i32_f32 v5, exec_lo +// GFX12: v_frexp_exp_i32_f32_e32 v5, exec_lo ; encoding: [0x7e,0x7e,0x0a,0x7e] + +v_frexp_exp_i32_f32 v5, exec_hi +// GFX12: v_frexp_exp_i32_f32_e32 v5, exec_hi ; encoding: [0x7f,0x7e,0x0a,0x7e] + +v_frexp_exp_i32_f32 v5, null +// GFX12: v_frexp_exp_i32_f32_e32 v5, null ; encoding: [0x7c,0x7e,0x0a,0x7e] + +v_frexp_exp_i32_f32 v5, -1 +// GFX12: v_frexp_exp_i32_f32_e32 v5, -1 ; encoding: [0xc1,0x7e,0x0a,0x7e] + +v_frexp_exp_i32_f32 v5, 0.5 +// GFX12: v_frexp_exp_i32_f32_e32 v5, 0.5 ; encoding: [0xf0,0x7e,0x0a,0x7e] + +v_frexp_exp_i32_f32 v5, src_scc +// GFX12: v_frexp_exp_i32_f32_e32 v5, src_scc ; encoding: [0xfd,0x7e,0x0a,0x7e] + +v_frexp_exp_i32_f32 v255, 0xaf123456 +// GFX12: v_frexp_exp_i32_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x7e,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_frexp_exp_i32_f64 v5, v[1:2] +// GFX12: v_frexp_exp_i32_f64_e32 v5, v[1:2] ; encoding: [0x01,0x79,0x0a,0x7e] + +v_frexp_exp_i32_f64 v5, v[254:255] +// GFX12: v_frexp_exp_i32_f64_e32 v5, v[254:255] ; encoding: [0xfe,0x79,0x0a,0x7e] + +v_frexp_exp_i32_f64 v5, s[2:3] +// GFX12: v_frexp_exp_i32_f64_e32 v5, s[2:3] ; encoding: [0x02,0x78,0x0a,0x7e] + +v_frexp_exp_i32_f64 v5, s[104:105] +// GFX12: v_frexp_exp_i32_f64_e32 v5, s[104:105] ; encoding: [0x68,0x78,0x0a,0x7e] + +v_frexp_exp_i32_f64 v5, vcc +// GFX12: v_frexp_exp_i32_f64_e32 v5, vcc ; encoding: [0x6a,0x78,0x0a,0x7e] + +v_frexp_exp_i32_f64 v5, ttmp[14:15] +// GFX12: v_frexp_exp_i32_f64_e32 v5, ttmp[14:15] ; encoding: [0x7a,0x78,0x0a,0x7e] + +v_frexp_exp_i32_f64 v5, exec +// GFX12: v_frexp_exp_i32_f64_e32 v5, exec ; encoding: [0x7e,0x78,0x0a,0x7e] + +v_frexp_exp_i32_f64 v5, null +// GFX12: v_frexp_exp_i32_f64_e32 v5, null ; encoding: [0x7c,0x78,0x0a,0x7e] + +v_frexp_exp_i32_f64 v5, -1 +// GFX12: v_frexp_exp_i32_f64_e32 v5, -1 ; encoding: [0xc1,0x78,0x0a,0x7e] + +v_frexp_exp_i32_f64 v5, 0.5 +// GFX12: v_frexp_exp_i32_f64_e32 v5, 0.5 ; encoding: [0xf0,0x78,0x0a,0x7e] + +v_frexp_exp_i32_f64 v5, src_scc +// GFX12: v_frexp_exp_i32_f64_e32 v5, src_scc ; encoding: [0xfd,0x78,0x0a,0x7e] + +v_frexp_exp_i32_f64 v255, 0xaf123456 +// GFX12: v_frexp_exp_i32_f64_e32 v255, 0xaf123456 ; encoding: [0xff,0x78,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_frexp_mant_f16 v5, v1 +// GFX12: v_frexp_mant_f16_e32 v5, v1 ; encoding: [0x01,0xb3,0x0a,0x7e] + +v_frexp_mant_f16 v5, v127 +// GFX12: v_frexp_mant_f16_e32 v5, v127 ; encoding: [0x7f,0xb3,0x0a,0x7e] + +v_frexp_mant_f16 v5, s1 +// GFX12: v_frexp_mant_f16_e32 v5, s1 ; encoding: [0x01,0xb2,0x0a,0x7e] + +v_frexp_mant_f16 v5, s105 +// GFX12: v_frexp_mant_f16_e32 v5, s105 ; encoding: [0x69,0xb2,0x0a,0x7e] + +v_frexp_mant_f16 v5, vcc_lo +// GFX12: v_frexp_mant_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xb2,0x0a,0x7e] + +v_frexp_mant_f16 v5, vcc_hi +// GFX12: v_frexp_mant_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xb2,0x0a,0x7e] + +v_frexp_mant_f16 v5, ttmp15 +// GFX12: v_frexp_mant_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xb2,0x0a,0x7e] + +v_frexp_mant_f16 v5, m0 +// GFX12: v_frexp_mant_f16_e32 v5, m0 ; encoding: [0x7d,0xb2,0x0a,0x7e] + +v_frexp_mant_f16 v5, exec_lo +// GFX12: v_frexp_mant_f16_e32 v5, exec_lo ; encoding: [0x7e,0xb2,0x0a,0x7e] + +v_frexp_mant_f16 v5, exec_hi +// GFX12: v_frexp_mant_f16_e32 v5, exec_hi ; encoding: [0x7f,0xb2,0x0a,0x7e] + +v_frexp_mant_f16 v5, null +// GFX12: v_frexp_mant_f16_e32 v5, null ; encoding: [0x7c,0xb2,0x0a,0x7e] + +v_frexp_mant_f16 v5, -1 +// GFX12: v_frexp_mant_f16_e32 v5, -1 ; encoding: [0xc1,0xb2,0x0a,0x7e] + +v_frexp_mant_f16 v5, 0.5 +// GFX12: v_frexp_mant_f16_e32 v5, 0.5 ; encoding: [0xf0,0xb2,0x0a,0x7e] + +v_frexp_mant_f16 v5, src_scc +// GFX12: v_frexp_mant_f16_e32 v5, src_scc ; encoding: [0xfd,0xb2,0x0a,0x7e] + +v_frexp_mant_f16 v127, 0xfe0b +// GFX12: v_frexp_mant_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xb2,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_frexp_mant_f32 v5, v1 +// GFX12: v_frexp_mant_f32_e32 v5, v1 ; encoding: [0x01,0x81,0x0a,0x7e] + +v_frexp_mant_f32 v5, v255 +// GFX12: v_frexp_mant_f32_e32 v5, v255 ; encoding: [0xff,0x81,0x0a,0x7e] + +v_frexp_mant_f32 v5, s1 +// GFX12: v_frexp_mant_f32_e32 v5, s1 ; encoding: [0x01,0x80,0x0a,0x7e] + +v_frexp_mant_f32 v5, s105 +// GFX12: v_frexp_mant_f32_e32 v5, s105 ; encoding: [0x69,0x80,0x0a,0x7e] + +v_frexp_mant_f32 v5, vcc_lo +// GFX12: v_frexp_mant_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x80,0x0a,0x7e] + +v_frexp_mant_f32 v5, vcc_hi +// GFX12: v_frexp_mant_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x80,0x0a,0x7e] + +v_frexp_mant_f32 v5, ttmp15 +// GFX12: v_frexp_mant_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x80,0x0a,0x7e] + +v_frexp_mant_f32 v5, m0 +// GFX12: v_frexp_mant_f32_e32 v5, m0 ; encoding: [0x7d,0x80,0x0a,0x7e] + +v_frexp_mant_f32 v5, exec_lo +// GFX12: v_frexp_mant_f32_e32 v5, exec_lo ; encoding: [0x7e,0x80,0x0a,0x7e] + +v_frexp_mant_f32 v5, exec_hi +// GFX12: v_frexp_mant_f32_e32 v5, exec_hi ; encoding: [0x7f,0x80,0x0a,0x7e] + +v_frexp_mant_f32 v5, null +// GFX12: v_frexp_mant_f32_e32 v5, null ; encoding: [0x7c,0x80,0x0a,0x7e] + +v_frexp_mant_f32 v5, -1 +// GFX12: v_frexp_mant_f32_e32 v5, -1 ; encoding: [0xc1,0x80,0x0a,0x7e] + +v_frexp_mant_f32 v5, 0.5 +// GFX12: v_frexp_mant_f32_e32 v5, 0.5 ; encoding: [0xf0,0x80,0x0a,0x7e] + +v_frexp_mant_f32 v5, src_scc +// GFX12: v_frexp_mant_f32_e32 v5, src_scc ; encoding: [0xfd,0x80,0x0a,0x7e] + +v_frexp_mant_f32 v255, 0xaf123456 +// GFX12: v_frexp_mant_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x80,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_frexp_mant_f64 v[5:6], v[1:2] +// GFX12: v_frexp_mant_f64_e32 v[5:6], v[1:2] ; encoding: [0x01,0x7b,0x0a,0x7e] + +v_frexp_mant_f64 v[5:6], v[254:255] +// GFX12: v_frexp_mant_f64_e32 v[5:6], v[254:255] ; encoding: [0xfe,0x7b,0x0a,0x7e] + +v_frexp_mant_f64 v[5:6], s[2:3] +// GFX12: v_frexp_mant_f64_e32 v[5:6], s[2:3] ; encoding: [0x02,0x7a,0x0a,0x7e] + +v_frexp_mant_f64 v[5:6], s[104:105] +// GFX12: v_frexp_mant_f64_e32 v[5:6], s[104:105] ; encoding: [0x68,0x7a,0x0a,0x7e] + +v_frexp_mant_f64 v[5:6], vcc +// GFX12: v_frexp_mant_f64_e32 v[5:6], vcc ; encoding: [0x6a,0x7a,0x0a,0x7e] + +v_frexp_mant_f64 v[5:6], ttmp[14:15] +// GFX12: v_frexp_mant_f64_e32 v[5:6], ttmp[14:15] ; encoding: [0x7a,0x7a,0x0a,0x7e] + +v_frexp_mant_f64 v[5:6], exec +// GFX12: v_frexp_mant_f64_e32 v[5:6], exec ; encoding: [0x7e,0x7a,0x0a,0x7e] + +v_frexp_mant_f64 v[5:6], null +// GFX12: v_frexp_mant_f64_e32 v[5:6], null ; encoding: [0x7c,0x7a,0x0a,0x7e] + +v_frexp_mant_f64 v[5:6], -1 +// GFX12: v_frexp_mant_f64_e32 v[5:6], -1 ; encoding: [0xc1,0x7a,0x0a,0x7e] + +v_frexp_mant_f64 v[5:6], 0.5 +// GFX12: v_frexp_mant_f64_e32 v[5:6], 0.5 ; encoding: [0xf0,0x7a,0x0a,0x7e] + +v_frexp_mant_f64 v[5:6], src_scc +// GFX12: v_frexp_mant_f64_e32 v[5:6], src_scc ; encoding: [0xfd,0x7a,0x0a,0x7e] + +v_frexp_mant_f64 v[254:255], 0xaf123456 +// GFX12: v_frexp_mant_f64_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x7a,0xfc,0x7f,0x56,0x34,0x12,0xaf] + +v_log_f16 v5, v1 +// GFX12: v_log_f16_e32 v5, v1 ; encoding: [0x01,0xaf,0x0a,0x7e] + +v_log_f16 v5, v127 +// GFX12: v_log_f16_e32 v5, v127 ; encoding: [0x7f,0xaf,0x0a,0x7e] + +v_log_f16 v5, s1 +// GFX12: v_log_f16_e32 v5, s1 ; encoding: [0x01,0xae,0x0a,0x7e] + +v_log_f16 v5, s105 +// GFX12: v_log_f16_e32 v5, s105 ; encoding: [0x69,0xae,0x0a,0x7e] + +v_log_f16 v5, vcc_lo +// GFX12: v_log_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xae,0x0a,0x7e] + +v_log_f16 v5, vcc_hi +// GFX12: v_log_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xae,0x0a,0x7e] + +v_log_f16 v5, ttmp15 +// GFX12: v_log_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xae,0x0a,0x7e] + +v_log_f16 v5, m0 +// GFX12: v_log_f16_e32 v5, m0 ; encoding: [0x7d,0xae,0x0a,0x7e] + +v_log_f16 v5, exec_lo +// GFX12: v_log_f16_e32 v5, exec_lo ; encoding: [0x7e,0xae,0x0a,0x7e] + +v_log_f16 v5, exec_hi +// GFX12: v_log_f16_e32 v5, exec_hi ; encoding: [0x7f,0xae,0x0a,0x7e] + +v_log_f16 v5, null +// GFX12: v_log_f16_e32 v5, null ; encoding: [0x7c,0xae,0x0a,0x7e] + +v_log_f16 v5, -1 +// GFX12: v_log_f16_e32 v5, -1 ; encoding: [0xc1,0xae,0x0a,0x7e] + +v_log_f16 v5, 0.5 +// GFX12: v_log_f16_e32 v5, 0.5 ; encoding: [0xf0,0xae,0x0a,0x7e] + +v_log_f16 v5, src_scc +// GFX12: v_log_f16_e32 v5, src_scc ; encoding: [0xfd,0xae,0x0a,0x7e] + +v_log_f16 v127, 0xfe0b +// GFX12: v_log_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xae,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_log_f32 v5, v1 +// GFX12: v_log_f32_e32 v5, v1 ; encoding: [0x01,0x4f,0x0a,0x7e] + +v_log_f32 v5, v255 +// GFX12: v_log_f32_e32 v5, v255 ; encoding: [0xff,0x4f,0x0a,0x7e] + +v_log_f32 v5, s1 +// GFX12: v_log_f32_e32 v5, s1 ; encoding: [0x01,0x4e,0x0a,0x7e] + +v_log_f32 v5, s105 +// GFX12: v_log_f32_e32 v5, s105 ; encoding: [0x69,0x4e,0x0a,0x7e] + +v_log_f32 v5, vcc_lo +// GFX12: v_log_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x4e,0x0a,0x7e] + +v_log_f32 v5, vcc_hi +// GFX12: v_log_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x4e,0x0a,0x7e] + +v_log_f32 v5, ttmp15 +// GFX12: v_log_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x4e,0x0a,0x7e] + +v_log_f32 v5, m0 +// GFX12: v_log_f32_e32 v5, m0 ; encoding: [0x7d,0x4e,0x0a,0x7e] + +v_log_f32 v5, exec_lo +// GFX12: v_log_f32_e32 v5, exec_lo ; encoding: [0x7e,0x4e,0x0a,0x7e] + +v_log_f32 v5, exec_hi +// GFX12: v_log_f32_e32 v5, exec_hi ; encoding: [0x7f,0x4e,0x0a,0x7e] + +v_log_f32 v5, null +// GFX12: v_log_f32_e32 v5, null ; encoding: [0x7c,0x4e,0x0a,0x7e] + +v_log_f32 v5, -1 +// GFX12: v_log_f32_e32 v5, -1 ; encoding: [0xc1,0x4e,0x0a,0x7e] + +v_log_f32 v5, 0.5 +// GFX12: v_log_f32_e32 v5, 0.5 ; encoding: [0xf0,0x4e,0x0a,0x7e] + +v_log_f32 v5, src_scc +// GFX12: v_log_f32_e32 v5, src_scc ; encoding: [0xfd,0x4e,0x0a,0x7e] + +v_log_f32 v255, 0xaf123456 +// GFX12: v_log_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x4e,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_mov_b32 v5, v1 +// GFX12: v_mov_b32_e32 v5, v1 ; encoding: [0x01,0x03,0x0a,0x7e] + +v_mov_b32 v5, v255 +// GFX12: v_mov_b32_e32 v5, v255 ; encoding: [0xff,0x03,0x0a,0x7e] + +v_mov_b32 v5, s1 +// GFX12: v_mov_b32_e32 v5, s1 ; encoding: [0x01,0x02,0x0a,0x7e] + +v_mov_b32 v5, s105 +// GFX12: v_mov_b32_e32 v5, s105 ; encoding: [0x69,0x02,0x0a,0x7e] + +v_mov_b32 v5, vcc_lo +// GFX12: v_mov_b32_e32 v5, vcc_lo ; encoding: [0x6a,0x02,0x0a,0x7e] + +v_mov_b32 v5, vcc_hi +// GFX12: v_mov_b32_e32 v5, vcc_hi ; encoding: [0x6b,0x02,0x0a,0x7e] + +v_mov_b32 v5, ttmp15 +// GFX12: v_mov_b32_e32 v5, ttmp15 ; encoding: [0x7b,0x02,0x0a,0x7e] + +v_mov_b32 v5, m0 +// GFX12: v_mov_b32_e32 v5, m0 ; encoding: [0x7d,0x02,0x0a,0x7e] + +v_mov_b32 v5, exec_lo +// GFX12: v_mov_b32_e32 v5, exec_lo ; encoding: [0x7e,0x02,0x0a,0x7e] + +v_mov_b32 v5, exec_hi +// GFX12: v_mov_b32_e32 v5, exec_hi ; encoding: [0x7f,0x02,0x0a,0x7e] + +v_mov_b32 v5, null +// GFX12: v_mov_b32_e32 v5, null ; encoding: [0x7c,0x02,0x0a,0x7e] + +v_mov_b32 v5, -1 +// GFX12: v_mov_b32_e32 v5, -1 ; encoding: [0xc1,0x02,0x0a,0x7e] + +v_mov_b32 v5, 0.5 +// GFX12: v_mov_b32_e32 v5, 0.5 ; encoding: [0xf0,0x02,0x0a,0x7e] + +v_mov_b32 v5, src_scc +// GFX12: v_mov_b32_e32 v5, src_scc ; encoding: [0xfd,0x02,0x0a,0x7e] + +v_mov_b32 v255, 0xaf123456 +// GFX12: v_mov_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0x02,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_movreld_b32 v5, v1 +// GFX12: v_movreld_b32_e32 v5, v1 ; encoding: [0x01,0x85,0x0a,0x7e] + +v_movreld_b32 v5, v255 +// GFX12: v_movreld_b32_e32 v5, v255 ; encoding: [0xff,0x85,0x0a,0x7e] + +v_movreld_b32 v5, s1 +// GFX12: v_movreld_b32_e32 v5, s1 ; encoding: [0x01,0x84,0x0a,0x7e] + +v_movreld_b32 v5, s105 +// GFX12: v_movreld_b32_e32 v5, s105 ; encoding: [0x69,0x84,0x0a,0x7e] + +v_movreld_b32 v5, vcc_lo +// GFX12: v_movreld_b32_e32 v5, vcc_lo ; encoding: [0x6a,0x84,0x0a,0x7e] + +v_movreld_b32 v5, vcc_hi +// GFX12: v_movreld_b32_e32 v5, vcc_hi ; encoding: [0x6b,0x84,0x0a,0x7e] + +v_movreld_b32 v5, ttmp15 +// GFX12: v_movreld_b32_e32 v5, ttmp15 ; encoding: [0x7b,0x84,0x0a,0x7e] + +v_movreld_b32 v5, m0 +// GFX12: v_movreld_b32_e32 v5, m0 ; encoding: [0x7d,0x84,0x0a,0x7e] + +v_movreld_b32 v5, exec_lo +// GFX12: v_movreld_b32_e32 v5, exec_lo ; encoding: [0x7e,0x84,0x0a,0x7e] + +v_movreld_b32 v5, exec_hi +// GFX12: v_movreld_b32_e32 v5, exec_hi ; encoding: [0x7f,0x84,0x0a,0x7e] + +v_movreld_b32 v5, null +// GFX12: v_movreld_b32_e32 v5, null ; encoding: [0x7c,0x84,0x0a,0x7e] + +v_movreld_b32 v5, -1 +// GFX12: v_movreld_b32_e32 v5, -1 ; encoding: [0xc1,0x84,0x0a,0x7e] + +v_movreld_b32 v5, 0.5 +// GFX12: v_movreld_b32_e32 v5, 0.5 ; encoding: [0xf0,0x84,0x0a,0x7e] + +v_movreld_b32 v5, src_scc +// GFX12: v_movreld_b32_e32 v5, src_scc ; encoding: [0xfd,0x84,0x0a,0x7e] + +v_movreld_b32 v255, 0xaf123456 +// GFX12: v_movreld_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0x84,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_movrels_b32 v5, v1 +// GFX12: v_movrels_b32_e32 v5, v1 ; encoding: [0x01,0x87,0x0a,0x7e] + +v_movrels_b32 v255, v255 +// GFX12: v_movrels_b32_e32 v255, v255 ; encoding: [0xff,0x87,0xfe,0x7f] + +v_movrelsd_2_b32 v5, v1 +// GFX12: v_movrelsd_2_b32_e32 v5, v1 ; encoding: [0x01,0x91,0x0a,0x7e] + +v_movrelsd_2_b32 v255, v255 +// GFX12: v_movrelsd_2_b32_e32 v255, v255 ; encoding: [0xff,0x91,0xfe,0x7f] + +v_movrelsd_b32 v5, v1 +// GFX12: v_movrelsd_b32_e32 v5, v1 ; encoding: [0x01,0x89,0x0a,0x7e] + +v_movrelsd_b32 v255, v255 +// GFX12: v_movrelsd_b32_e32 v255, v255 ; encoding: [0xff,0x89,0xfe,0x7f] + +v_nop +// GFX12: v_nop ; encoding: [0x00,0x00,0x00,0x7e] + +v_not_b16 v5, v1 +// GFX12: v_not_b16_e32 v5, v1 ; encoding: [0x01,0xd3,0x0a,0x7e] + +v_not_b16 v5, v127 +// GFX12: v_not_b16_e32 v5, v127 ; encoding: [0x7f,0xd3,0x0a,0x7e] + +v_not_b16 v5, s1 +// GFX12: v_not_b16_e32 v5, s1 ; encoding: [0x01,0xd2,0x0a,0x7e] + +v_not_b16 v5, s105 +// GFX12: v_not_b16_e32 v5, s105 ; encoding: [0x69,0xd2,0x0a,0x7e] + +v_not_b16 v5, vcc_lo +// GFX12: v_not_b16_e32 v5, vcc_lo ; encoding: [0x6a,0xd2,0x0a,0x7e] + +v_not_b16 v5, vcc_hi +// GFX12: v_not_b16_e32 v5, vcc_hi ; encoding: [0x6b,0xd2,0x0a,0x7e] + +v_not_b16 v5, ttmp15 +// GFX12: v_not_b16_e32 v5, ttmp15 ; encoding: [0x7b,0xd2,0x0a,0x7e] + +v_not_b16 v5, m0 +// GFX12: v_not_b16_e32 v5, m0 ; encoding: [0x7d,0xd2,0x0a,0x7e] + +v_not_b16 v5, exec_lo +// GFX12: v_not_b16_e32 v5, exec_lo ; encoding: [0x7e,0xd2,0x0a,0x7e] + +v_not_b16 v5, exec_hi +// GFX12: v_not_b16_e32 v5, exec_hi ; encoding: [0x7f,0xd2,0x0a,0x7e] + +v_not_b16 v5, null +// GFX12: v_not_b16_e32 v5, null ; encoding: [0x7c,0xd2,0x0a,0x7e] + +v_not_b16 v5, -1 +// GFX12: v_not_b16_e32 v5, -1 ; encoding: [0xc1,0xd2,0x0a,0x7e] + +v_not_b16 v5, 0.5 +// GFX12-ASM: v_not_b16_e32 v5, 0.5 ; encoding: [0xf0,0xd2,0x0a,0x7e] +// GFX12-DIS: v_not_b16_e32 v5, 0x3800 ; encoding: [0xff,0xd2,0x0a,0x7e,0x00,0x38,0x00,0x00] + +v_not_b16 v5, src_scc +// GFX12: v_not_b16_e32 v5, src_scc ; encoding: [0xfd,0xd2,0x0a,0x7e] + +v_not_b16 v127, 0xfe0b +// GFX12: v_not_b16_e32 v127, 0xfe0b ; encoding: [0xff,0xd2,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_not_b32 v5, v1 +// GFX12: v_not_b32_e32 v5, v1 ; encoding: [0x01,0x6f,0x0a,0x7e] + +v_not_b32 v5, v255 +// GFX12: v_not_b32_e32 v5, v255 ; encoding: [0xff,0x6f,0x0a,0x7e] + +v_not_b32 v5, s1 +// GFX12: v_not_b32_e32 v5, s1 ; encoding: [0x01,0x6e,0x0a,0x7e] + +v_not_b32 v5, s105 +// GFX12: v_not_b32_e32 v5, s105 ; encoding: [0x69,0x6e,0x0a,0x7e] + +v_not_b32 v5, vcc_lo +// GFX12: v_not_b32_e32 v5, vcc_lo ; encoding: [0x6a,0x6e,0x0a,0x7e] + +v_not_b32 v5, vcc_hi +// GFX12: v_not_b32_e32 v5, vcc_hi ; encoding: [0x6b,0x6e,0x0a,0x7e] + +v_not_b32 v5, ttmp15 +// GFX12: v_not_b32_e32 v5, ttmp15 ; encoding: [0x7b,0x6e,0x0a,0x7e] + +v_not_b32 v5, m0 +// GFX12: v_not_b32_e32 v5, m0 ; encoding: [0x7d,0x6e,0x0a,0x7e] + +v_not_b32 v5, exec_lo +// GFX12: v_not_b32_e32 v5, exec_lo ; encoding: [0x7e,0x6e,0x0a,0x7e] + +v_not_b32 v5, exec_hi +// GFX12: v_not_b32_e32 v5, exec_hi ; encoding: [0x7f,0x6e,0x0a,0x7e] + +v_not_b32 v5, null +// GFX12: v_not_b32_e32 v5, null ; encoding: [0x7c,0x6e,0x0a,0x7e] + +v_not_b32 v5, -1 +// GFX12: v_not_b32_e32 v5, -1 ; encoding: [0xc1,0x6e,0x0a,0x7e] + +v_not_b32 v5, 0.5 +// GFX12: v_not_b32_e32 v5, 0.5 ; encoding: [0xf0,0x6e,0x0a,0x7e] + +v_not_b32 v5, src_scc +// GFX12: v_not_b32_e32 v5, src_scc ; encoding: [0xfd,0x6e,0x0a,0x7e] + +v_not_b32 v255, 0xaf123456 +// GFX12: v_not_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0x6e,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_permlane64_b32 v5, v1 +// GFX12: v_permlane64_b32 v5, v1 ; encoding: [0x01,0xcf,0x0a,0x7e] + +v_permlane64_b32 v255, v255 +// GFX12: v_permlane64_b32 v255, v255 ; encoding: [0xff,0xcf,0xfe,0x7f] + +v_pipeflush +// GFX12: v_pipeflush ; encoding: [0x00,0x36,0x00,0x7e] + +v_rcp_f16 v5, v1 +// GFX12: v_rcp_f16_e32 v5, v1 ; encoding: [0x01,0xa9,0x0a,0x7e] + +v_rcp_f16 v5, v127 +// GFX12: v_rcp_f16_e32 v5, v127 ; encoding: [0x7f,0xa9,0x0a,0x7e] + +v_rcp_f16 v5, s1 +// GFX12: v_rcp_f16_e32 v5, s1 ; encoding: [0x01,0xa8,0x0a,0x7e] + +v_rcp_f16 v5, s105 +// GFX12: v_rcp_f16_e32 v5, s105 ; encoding: [0x69,0xa8,0x0a,0x7e] + +v_rcp_f16 v5, vcc_lo +// GFX12: v_rcp_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xa8,0x0a,0x7e] + +v_rcp_f16 v5, vcc_hi +// GFX12: v_rcp_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xa8,0x0a,0x7e] + +v_rcp_f16 v5, ttmp15 +// GFX12: v_rcp_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xa8,0x0a,0x7e] + +v_rcp_f16 v5, m0 +// GFX12: v_rcp_f16_e32 v5, m0 ; encoding: [0x7d,0xa8,0x0a,0x7e] + +v_rcp_f16 v5, exec_lo +// GFX12: v_rcp_f16_e32 v5, exec_lo ; encoding: [0x7e,0xa8,0x0a,0x7e] + +v_rcp_f16 v5, exec_hi +// GFX12: v_rcp_f16_e32 v5, exec_hi ; encoding: [0x7f,0xa8,0x0a,0x7e] + +v_rcp_f16 v5, null +// GFX12: v_rcp_f16_e32 v5, null ; encoding: [0x7c,0xa8,0x0a,0x7e] + +v_rcp_f16 v5, -1 +// GFX12: v_rcp_f16_e32 v5, -1 ; encoding: [0xc1,0xa8,0x0a,0x7e] + +v_rcp_f16 v5, 0.5 +// GFX12: v_rcp_f16_e32 v5, 0.5 ; encoding: [0xf0,0xa8,0x0a,0x7e] + +v_rcp_f16 v5, src_scc +// GFX12: v_rcp_f16_e32 v5, src_scc ; encoding: [0xfd,0xa8,0x0a,0x7e] + +v_rcp_f16 v127, 0xfe0b +// GFX12: v_rcp_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xa8,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_rcp_f32 v5, v1 +// GFX12: v_rcp_f32_e32 v5, v1 ; encoding: [0x01,0x55,0x0a,0x7e] + +v_rcp_f32 v5, v255 +// GFX12: v_rcp_f32_e32 v5, v255 ; encoding: [0xff,0x55,0x0a,0x7e] + +v_rcp_f32 v5, s1 +// GFX12: v_rcp_f32_e32 v5, s1 ; encoding: [0x01,0x54,0x0a,0x7e] + +v_rcp_f32 v5, s105 +// GFX12: v_rcp_f32_e32 v5, s105 ; encoding: [0x69,0x54,0x0a,0x7e] + +v_rcp_f32 v5, vcc_lo +// GFX12: v_rcp_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x54,0x0a,0x7e] + +v_rcp_f32 v5, vcc_hi +// GFX12: v_rcp_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x54,0x0a,0x7e] + +v_rcp_f32 v5, ttmp15 +// GFX12: v_rcp_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x54,0x0a,0x7e] + +v_rcp_f32 v5, m0 +// GFX12: v_rcp_f32_e32 v5, m0 ; encoding: [0x7d,0x54,0x0a,0x7e] + +v_rcp_f32 v5, exec_lo +// GFX12: v_rcp_f32_e32 v5, exec_lo ; encoding: [0x7e,0x54,0x0a,0x7e] + +v_rcp_f32 v5, exec_hi +// GFX12: v_rcp_f32_e32 v5, exec_hi ; encoding: [0x7f,0x54,0x0a,0x7e] + +v_rcp_f32 v5, null +// GFX12: v_rcp_f32_e32 v5, null ; encoding: [0x7c,0x54,0x0a,0x7e] + +v_rcp_f32 v5, -1 +// GFX12: v_rcp_f32_e32 v5, -1 ; encoding: [0xc1,0x54,0x0a,0x7e] + +v_rcp_f32 v5, 0.5 +// GFX12: v_rcp_f32_e32 v5, 0.5 ; encoding: [0xf0,0x54,0x0a,0x7e] + +v_rcp_f32 v5, src_scc +// GFX12: v_rcp_f32_e32 v5, src_scc ; encoding: [0xfd,0x54,0x0a,0x7e] + +v_rcp_f32 v255, 0xaf123456 +// GFX12: v_rcp_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x54,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_rcp_f64 v[5:6], v[1:2] +// GFX12: v_rcp_f64_e32 v[5:6], v[1:2] ; encoding: [0x01,0x5f,0x0a,0x7e] + +v_rcp_f64 v[5:6], v[254:255] +// GFX12: v_rcp_f64_e32 v[5:6], v[254:255] ; encoding: [0xfe,0x5f,0x0a,0x7e] + +v_rcp_f64 v[5:6], s[2:3] +// GFX12: v_rcp_f64_e32 v[5:6], s[2:3] ; encoding: [0x02,0x5e,0x0a,0x7e] + +v_rcp_f64 v[5:6], s[104:105] +// GFX12: v_rcp_f64_e32 v[5:6], s[104:105] ; encoding: [0x68,0x5e,0x0a,0x7e] + +v_rcp_f64 v[5:6], vcc +// GFX12: v_rcp_f64_e32 v[5:6], vcc ; encoding: [0x6a,0x5e,0x0a,0x7e] + +v_rcp_f64 v[5:6], ttmp[14:15] +// GFX12: v_rcp_f64_e32 v[5:6], ttmp[14:15] ; encoding: [0x7a,0x5e,0x0a,0x7e] + +v_rcp_f64 v[5:6], exec +// GFX12: v_rcp_f64_e32 v[5:6], exec ; encoding: [0x7e,0x5e,0x0a,0x7e] + +v_rcp_f64 v[5:6], null +// GFX12: v_rcp_f64_e32 v[5:6], null ; encoding: [0x7c,0x5e,0x0a,0x7e] + +v_rcp_f64 v[5:6], -1 +// GFX12: v_rcp_f64_e32 v[5:6], -1 ; encoding: [0xc1,0x5e,0x0a,0x7e] + +v_rcp_f64 v[5:6], 0.5 +// GFX12: v_rcp_f64_e32 v[5:6], 0.5 ; encoding: [0xf0,0x5e,0x0a,0x7e] + +v_rcp_f64 v[5:6], src_scc +// GFX12: v_rcp_f64_e32 v[5:6], src_scc ; encoding: [0xfd,0x5e,0x0a,0x7e] + +v_rcp_f64 v[254:255], 0xaf123456 +// GFX12: v_rcp_f64_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x5e,0xfc,0x7f,0x56,0x34,0x12,0xaf] + +v_rcp_iflag_f32 v5, v1 +// GFX12: v_rcp_iflag_f32_e32 v5, v1 ; encoding: [0x01,0x57,0x0a,0x7e] + +v_rcp_iflag_f32 v5, v255 +// GFX12: v_rcp_iflag_f32_e32 v5, v255 ; encoding: [0xff,0x57,0x0a,0x7e] + +v_rcp_iflag_f32 v5, s1 +// GFX12: v_rcp_iflag_f32_e32 v5, s1 ; encoding: [0x01,0x56,0x0a,0x7e] + +v_rcp_iflag_f32 v5, s105 +// GFX12: v_rcp_iflag_f32_e32 v5, s105 ; encoding: [0x69,0x56,0x0a,0x7e] + +v_rcp_iflag_f32 v5, vcc_lo +// GFX12: v_rcp_iflag_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x56,0x0a,0x7e] + +v_rcp_iflag_f32 v5, vcc_hi +// GFX12: v_rcp_iflag_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x56,0x0a,0x7e] + +v_rcp_iflag_f32 v5, ttmp15 +// GFX12: v_rcp_iflag_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x56,0x0a,0x7e] + +v_rcp_iflag_f32 v5, m0 +// GFX12: v_rcp_iflag_f32_e32 v5, m0 ; encoding: [0x7d,0x56,0x0a,0x7e] + +v_rcp_iflag_f32 v5, exec_lo +// GFX12: v_rcp_iflag_f32_e32 v5, exec_lo ; encoding: [0x7e,0x56,0x0a,0x7e] + +v_rcp_iflag_f32 v5, exec_hi +// GFX12: v_rcp_iflag_f32_e32 v5, exec_hi ; encoding: [0x7f,0x56,0x0a,0x7e] + +v_rcp_iflag_f32 v5, null +// GFX12: v_rcp_iflag_f32_e32 v5, null ; encoding: [0x7c,0x56,0x0a,0x7e] + +v_rcp_iflag_f32 v5, -1 +// GFX12: v_rcp_iflag_f32_e32 v5, -1 ; encoding: [0xc1,0x56,0x0a,0x7e] + +v_rcp_iflag_f32 v5, 0.5 +// GFX12: v_rcp_iflag_f32_e32 v5, 0.5 ; encoding: [0xf0,0x56,0x0a,0x7e] + +v_rcp_iflag_f32 v5, src_scc +// GFX12: v_rcp_iflag_f32_e32 v5, src_scc ; encoding: [0xfd,0x56,0x0a,0x7e] + +v_rcp_iflag_f32 v255, 0xaf123456 +// GFX12: v_rcp_iflag_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x56,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_readfirstlane_b32 s5, v1 +// GFX12: v_readfirstlane_b32 s5, v1 ; encoding: [0x01,0x05,0x0a,0x7e] + +v_readfirstlane_b32 s105, v1 +// GFX12: v_readfirstlane_b32 s105, v1 ; encoding: [0x01,0x05,0xd2,0x7e] + +v_readfirstlane_b32 vcc_lo, v1 +// GFX12: v_readfirstlane_b32 vcc_lo, v1 ; encoding: [0x01,0x05,0xd4,0x7e] + +v_readfirstlane_b32 vcc_hi, v1 +// GFX12: v_readfirstlane_b32 vcc_hi, v1 ; encoding: [0x01,0x05,0xd6,0x7e] + +v_readfirstlane_b32 ttmp15, v1 +// GFX12: v_readfirstlane_b32 ttmp15, v1 ; encoding: [0x01,0x05,0xf6,0x7e] + +v_readfirstlane_b32 null, v255 +// GFX12: v_readfirstlane_b32 null, v255 ; encoding: [0xff,0x05,0xf8,0x7e] + +v_rndne_f16 v5, v1 +// GFX12: v_rndne_f16_e32 v5, v1 ; encoding: [0x01,0xbd,0x0a,0x7e] + +v_rndne_f16 v5, v127 +// GFX12: v_rndne_f16_e32 v5, v127 ; encoding: [0x7f,0xbd,0x0a,0x7e] + +v_rndne_f16 v5, s1 +// GFX12: v_rndne_f16_e32 v5, s1 ; encoding: [0x01,0xbc,0x0a,0x7e] + +v_rndne_f16 v5, s105 +// GFX12: v_rndne_f16_e32 v5, s105 ; encoding: [0x69,0xbc,0x0a,0x7e] + +v_rndne_f16 v5, vcc_lo +// GFX12: v_rndne_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xbc,0x0a,0x7e] + +v_rndne_f16 v5, vcc_hi +// GFX12: v_rndne_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xbc,0x0a,0x7e] + +v_rndne_f16 v5, ttmp15 +// GFX12: v_rndne_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xbc,0x0a,0x7e] + +v_rndne_f16 v5, m0 +// GFX12: v_rndne_f16_e32 v5, m0 ; encoding: [0x7d,0xbc,0x0a,0x7e] + +v_rndne_f16 v5, exec_lo +// GFX12: v_rndne_f16_e32 v5, exec_lo ; encoding: [0x7e,0xbc,0x0a,0x7e] + +v_rndne_f16 v5, exec_hi +// GFX12: v_rndne_f16_e32 v5, exec_hi ; encoding: [0x7f,0xbc,0x0a,0x7e] + +v_rndne_f16 v5, null +// GFX12: v_rndne_f16_e32 v5, null ; encoding: [0x7c,0xbc,0x0a,0x7e] + +v_rndne_f16 v5, -1 +// GFX12: v_rndne_f16_e32 v5, -1 ; encoding: [0xc1,0xbc,0x0a,0x7e] + +v_rndne_f16 v5, 0.5 +// GFX12: v_rndne_f16_e32 v5, 0.5 ; encoding: [0xf0,0xbc,0x0a,0x7e] + +v_rndne_f16 v5, src_scc +// GFX12: v_rndne_f16_e32 v5, src_scc ; encoding: [0xfd,0xbc,0x0a,0x7e] + +v_rndne_f16 v127, 0xfe0b +// GFX12: v_rndne_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xbc,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_rndne_f32 v5, v1 +// GFX12: v_rndne_f32_e32 v5, v1 ; encoding: [0x01,0x47,0x0a,0x7e] + +v_rndne_f32 v5, v255 +// GFX12: v_rndne_f32_e32 v5, v255 ; encoding: [0xff,0x47,0x0a,0x7e] + +v_rndne_f32 v5, s1 +// GFX12: v_rndne_f32_e32 v5, s1 ; encoding: [0x01,0x46,0x0a,0x7e] + +v_rndne_f32 v5, s105 +// GFX12: v_rndne_f32_e32 v5, s105 ; encoding: [0x69,0x46,0x0a,0x7e] + +v_rndne_f32 v5, vcc_lo +// GFX12: v_rndne_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x46,0x0a,0x7e] + +v_rndne_f32 v5, vcc_hi +// GFX12: v_rndne_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x46,0x0a,0x7e] + +v_rndne_f32 v5, ttmp15 +// GFX12: v_rndne_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x46,0x0a,0x7e] + +v_rndne_f32 v5, m0 +// GFX12: v_rndne_f32_e32 v5, m0 ; encoding: [0x7d,0x46,0x0a,0x7e] + +v_rndne_f32 v5, exec_lo +// GFX12: v_rndne_f32_e32 v5, exec_lo ; encoding: [0x7e,0x46,0x0a,0x7e] + +v_rndne_f32 v5, exec_hi +// GFX12: v_rndne_f32_e32 v5, exec_hi ; encoding: [0x7f,0x46,0x0a,0x7e] + +v_rndne_f32 v5, null +// GFX12: v_rndne_f32_e32 v5, null ; encoding: [0x7c,0x46,0x0a,0x7e] + +v_rndne_f32 v5, -1 +// GFX12: v_rndne_f32_e32 v5, -1 ; encoding: [0xc1,0x46,0x0a,0x7e] + +v_rndne_f32 v5, 0.5 +// GFX12: v_rndne_f32_e32 v5, 0.5 ; encoding: [0xf0,0x46,0x0a,0x7e] + +v_rndne_f32 v5, src_scc +// GFX12: v_rndne_f32_e32 v5, src_scc ; encoding: [0xfd,0x46,0x0a,0x7e] + +v_rndne_f32 v255, 0xaf123456 +// GFX12: v_rndne_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x46,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_rndne_f64 v[5:6], v[1:2] +// GFX12: v_rndne_f64_e32 v[5:6], v[1:2] ; encoding: [0x01,0x33,0x0a,0x7e] + +v_rndne_f64 v[5:6], v[254:255] +// GFX12: v_rndne_f64_e32 v[5:6], v[254:255] ; encoding: [0xfe,0x33,0x0a,0x7e] + +v_rndne_f64 v[5:6], s[2:3] +// GFX12: v_rndne_f64_e32 v[5:6], s[2:3] ; encoding: [0x02,0x32,0x0a,0x7e] + +v_rndne_f64 v[5:6], s[104:105] +// GFX12: v_rndne_f64_e32 v[5:6], s[104:105] ; encoding: [0x68,0x32,0x0a,0x7e] + +v_rndne_f64 v[5:6], vcc +// GFX12: v_rndne_f64_e32 v[5:6], vcc ; encoding: [0x6a,0x32,0x0a,0x7e] + +v_rndne_f64 v[5:6], ttmp[14:15] +// GFX12: v_rndne_f64_e32 v[5:6], ttmp[14:15] ; encoding: [0x7a,0x32,0x0a,0x7e] + +v_rndne_f64 v[5:6], exec +// GFX12: v_rndne_f64_e32 v[5:6], exec ; encoding: [0x7e,0x32,0x0a,0x7e] + +v_rndne_f64 v[5:6], null +// GFX12: v_rndne_f64_e32 v[5:6], null ; encoding: [0x7c,0x32,0x0a,0x7e] + +v_rndne_f64 v[5:6], -1 +// GFX12: v_rndne_f64_e32 v[5:6], -1 ; encoding: [0xc1,0x32,0x0a,0x7e] + +v_rndne_f64 v[5:6], 0.5 +// GFX12: v_rndne_f64_e32 v[5:6], 0.5 ; encoding: [0xf0,0x32,0x0a,0x7e] + +v_rndne_f64 v[5:6], src_scc +// GFX12: v_rndne_f64_e32 v[5:6], src_scc ; encoding: [0xfd,0x32,0x0a,0x7e] + +v_rndne_f64 v[254:255], 0xaf123456 +// GFX12: v_rndne_f64_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x32,0xfc,0x7f,0x56,0x34,0x12,0xaf] + +v_rsq_f16 v5, v1 +// GFX12: v_rsq_f16_e32 v5, v1 ; encoding: [0x01,0xad,0x0a,0x7e] + +v_rsq_f16 v5, v127 +// GFX12: v_rsq_f16_e32 v5, v127 ; encoding: [0x7f,0xad,0x0a,0x7e] + +v_rsq_f16 v5, s1 +// GFX12: v_rsq_f16_e32 v5, s1 ; encoding: [0x01,0xac,0x0a,0x7e] + +v_rsq_f16 v5, s105 +// GFX12: v_rsq_f16_e32 v5, s105 ; encoding: [0x69,0xac,0x0a,0x7e] + +v_rsq_f16 v5, vcc_lo +// GFX12: v_rsq_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xac,0x0a,0x7e] + +v_rsq_f16 v5, vcc_hi +// GFX12: v_rsq_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xac,0x0a,0x7e] + +v_rsq_f16 v5, ttmp15 +// GFX12: v_rsq_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xac,0x0a,0x7e] + +v_rsq_f16 v5, m0 +// GFX12: v_rsq_f16_e32 v5, m0 ; encoding: [0x7d,0xac,0x0a,0x7e] + +v_rsq_f16 v5, exec_lo +// GFX12: v_rsq_f16_e32 v5, exec_lo ; encoding: [0x7e,0xac,0x0a,0x7e] + +v_rsq_f16 v5, exec_hi +// GFX12: v_rsq_f16_e32 v5, exec_hi ; encoding: [0x7f,0xac,0x0a,0x7e] + +v_rsq_f16 v5, null +// GFX12: v_rsq_f16_e32 v5, null ; encoding: [0x7c,0xac,0x0a,0x7e] + +v_rsq_f16 v5, -1 +// GFX12: v_rsq_f16_e32 v5, -1 ; encoding: [0xc1,0xac,0x0a,0x7e] + +v_rsq_f16 v5, 0.5 +// GFX12: v_rsq_f16_e32 v5, 0.5 ; encoding: [0xf0,0xac,0x0a,0x7e] + +v_rsq_f16 v5, src_scc +// GFX12: v_rsq_f16_e32 v5, src_scc ; encoding: [0xfd,0xac,0x0a,0x7e] + +v_rsq_f16 v127, 0xfe0b +// GFX12: v_rsq_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xac,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_rsq_f32 v5, v1 +// GFX12: v_rsq_f32_e32 v5, v1 ; encoding: [0x01,0x5d,0x0a,0x7e] + +v_rsq_f32 v5, v255 +// GFX12: v_rsq_f32_e32 v5, v255 ; encoding: [0xff,0x5d,0x0a,0x7e] + +v_rsq_f32 v5, s1 +// GFX12: v_rsq_f32_e32 v5, s1 ; encoding: [0x01,0x5c,0x0a,0x7e] + +v_rsq_f32 v5, s105 +// GFX12: v_rsq_f32_e32 v5, s105 ; encoding: [0x69,0x5c,0x0a,0x7e] + +v_rsq_f32 v5, vcc_lo +// GFX12: v_rsq_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x5c,0x0a,0x7e] + +v_rsq_f32 v5, vcc_hi +// GFX12: v_rsq_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x5c,0x0a,0x7e] + +v_rsq_f32 v5, ttmp15 +// GFX12: v_rsq_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x5c,0x0a,0x7e] + +v_rsq_f32 v5, m0 +// GFX12: v_rsq_f32_e32 v5, m0 ; encoding: [0x7d,0x5c,0x0a,0x7e] + +v_rsq_f32 v5, exec_lo +// GFX12: v_rsq_f32_e32 v5, exec_lo ; encoding: [0x7e,0x5c,0x0a,0x7e] + +v_rsq_f32 v5, exec_hi +// GFX12: v_rsq_f32_e32 v5, exec_hi ; encoding: [0x7f,0x5c,0x0a,0x7e] + +v_rsq_f32 v5, null +// GFX12: v_rsq_f32_e32 v5, null ; encoding: [0x7c,0x5c,0x0a,0x7e] + +v_rsq_f32 v5, -1 +// GFX12: v_rsq_f32_e32 v5, -1 ; encoding: [0xc1,0x5c,0x0a,0x7e] + +v_rsq_f32 v5, 0.5 +// GFX12: v_rsq_f32_e32 v5, 0.5 ; encoding: [0xf0,0x5c,0x0a,0x7e] + +v_rsq_f32 v5, src_scc +// GFX12: v_rsq_f32_e32 v5, src_scc ; encoding: [0xfd,0x5c,0x0a,0x7e] + +v_rsq_f32 v255, 0xaf123456 +// GFX12: v_rsq_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x5c,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_rsq_f64 v[5:6], v[1:2] +// GFX12: v_rsq_f64_e32 v[5:6], v[1:2] ; encoding: [0x01,0x63,0x0a,0x7e] + +v_rsq_f64 v[5:6], v[254:255] +// GFX12: v_rsq_f64_e32 v[5:6], v[254:255] ; encoding: [0xfe,0x63,0x0a,0x7e] + +v_rsq_f64 v[5:6], s[2:3] +// GFX12: v_rsq_f64_e32 v[5:6], s[2:3] ; encoding: [0x02,0x62,0x0a,0x7e] + +v_rsq_f64 v[5:6], s[104:105] +// GFX12: v_rsq_f64_e32 v[5:6], s[104:105] ; encoding: [0x68,0x62,0x0a,0x7e] + +v_rsq_f64 v[5:6], vcc +// GFX12: v_rsq_f64_e32 v[5:6], vcc ; encoding: [0x6a,0x62,0x0a,0x7e] + +v_rsq_f64 v[5:6], ttmp[14:15] +// GFX12: v_rsq_f64_e32 v[5:6], ttmp[14:15] ; encoding: [0x7a,0x62,0x0a,0x7e] + +v_rsq_f64 v[5:6], exec +// GFX12: v_rsq_f64_e32 v[5:6], exec ; encoding: [0x7e,0x62,0x0a,0x7e] + +v_rsq_f64 v[5:6], null +// GFX12: v_rsq_f64_e32 v[5:6], null ; encoding: [0x7c,0x62,0x0a,0x7e] + +v_rsq_f64 v[5:6], -1 +// GFX12: v_rsq_f64_e32 v[5:6], -1 ; encoding: [0xc1,0x62,0x0a,0x7e] + +v_rsq_f64 v[5:6], 0.5 +// GFX12: v_rsq_f64_e32 v[5:6], 0.5 ; encoding: [0xf0,0x62,0x0a,0x7e] + +v_rsq_f64 v[5:6], src_scc +// GFX12: v_rsq_f64_e32 v[5:6], src_scc ; encoding: [0xfd,0x62,0x0a,0x7e] + +v_rsq_f64 v[254:255], 0xaf123456 +// GFX12: v_rsq_f64_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x62,0xfc,0x7f,0x56,0x34,0x12,0xaf] + +v_sat_pk_u8_i16 v5, v1 +// GFX12: v_sat_pk_u8_i16_e32 v5, v1 ; encoding: [0x01,0xc5,0x0a,0x7e] + +v_sat_pk_u8_i16 v5, v255 +// GFX12: v_sat_pk_u8_i16_e32 v5, v255 ; encoding: [0xff,0xc5,0x0a,0x7e] + +v_sat_pk_u8_i16 v5, s1 +// GFX12: v_sat_pk_u8_i16_e32 v5, s1 ; encoding: [0x01,0xc4,0x0a,0x7e] + +v_sat_pk_u8_i16 v5, s105 +// GFX12: v_sat_pk_u8_i16_e32 v5, s105 ; encoding: [0x69,0xc4,0x0a,0x7e] + +v_sat_pk_u8_i16 v5, vcc_lo +// GFX12: v_sat_pk_u8_i16_e32 v5, vcc_lo ; encoding: [0x6a,0xc4,0x0a,0x7e] + +v_sat_pk_u8_i16 v5, vcc_hi +// GFX12: v_sat_pk_u8_i16_e32 v5, vcc_hi ; encoding: [0x6b,0xc4,0x0a,0x7e] + +v_sat_pk_u8_i16 v5, ttmp15 +// GFX12: v_sat_pk_u8_i16_e32 v5, ttmp15 ; encoding: [0x7b,0xc4,0x0a,0x7e] + +v_sat_pk_u8_i16 v5, m0 +// GFX12: v_sat_pk_u8_i16_e32 v5, m0 ; encoding: [0x7d,0xc4,0x0a,0x7e] + +v_sat_pk_u8_i16 v5, exec_lo +// GFX12: v_sat_pk_u8_i16_e32 v5, exec_lo ; encoding: [0x7e,0xc4,0x0a,0x7e] + +v_sat_pk_u8_i16 v5, exec_hi +// GFX12: v_sat_pk_u8_i16_e32 v5, exec_hi ; encoding: [0x7f,0xc4,0x0a,0x7e] + +v_sat_pk_u8_i16 v5, null +// GFX12: v_sat_pk_u8_i16_e32 v5, null ; encoding: [0x7c,0xc4,0x0a,0x7e] + +v_sat_pk_u8_i16 v5, -1 +// GFX12: v_sat_pk_u8_i16_e32 v5, -1 ; encoding: [0xc1,0xc4,0x0a,0x7e] + +v_sat_pk_u8_i16 v5, 0.5 +// GFX12: v_sat_pk_u8_i16_e32 v5, 0.5 ; encoding: [0xf0,0xc4,0x0a,0x7e] + +v_sat_pk_u8_i16 v5, src_scc +// GFX12: v_sat_pk_u8_i16_e32 v5, src_scc ; encoding: [0xfd,0xc4,0x0a,0x7e] + +v_sat_pk_u8_i16 v127, 0xfe0b +// GFX12: v_sat_pk_u8_i16_e32 v127, 0xfe0b ; encoding: [0xff,0xc4,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_sin_f16 v5, v1 +// GFX12: v_sin_f16_e32 v5, v1 ; encoding: [0x01,0xc1,0x0a,0x7e] + +v_sin_f16 v5, v127 +// GFX12: v_sin_f16_e32 v5, v127 ; encoding: [0x7f,0xc1,0x0a,0x7e] + +v_sin_f16 v5, s1 +// GFX12: v_sin_f16_e32 v5, s1 ; encoding: [0x01,0xc0,0x0a,0x7e] + +v_sin_f16 v5, s105 +// GFX12: v_sin_f16_e32 v5, s105 ; encoding: [0x69,0xc0,0x0a,0x7e] + +v_sin_f16 v5, vcc_lo +// GFX12: v_sin_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xc0,0x0a,0x7e] + +v_sin_f16 v5, vcc_hi +// GFX12: v_sin_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xc0,0x0a,0x7e] + +v_sin_f16 v5, ttmp15 +// GFX12: v_sin_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xc0,0x0a,0x7e] + +v_sin_f16 v5, m0 +// GFX12: v_sin_f16_e32 v5, m0 ; encoding: [0x7d,0xc0,0x0a,0x7e] + +v_sin_f16 v5, exec_lo +// GFX12: v_sin_f16_e32 v5, exec_lo ; encoding: [0x7e,0xc0,0x0a,0x7e] + +v_sin_f16 v5, exec_hi +// GFX12: v_sin_f16_e32 v5, exec_hi ; encoding: [0x7f,0xc0,0x0a,0x7e] + +v_sin_f16 v5, null +// GFX12: v_sin_f16_e32 v5, null ; encoding: [0x7c,0xc0,0x0a,0x7e] + +v_sin_f16 v5, -1 +// GFX12: v_sin_f16_e32 v5, -1 ; encoding: [0xc1,0xc0,0x0a,0x7e] + +v_sin_f16 v5, 0.5 +// GFX12: v_sin_f16_e32 v5, 0.5 ; encoding: [0xf0,0xc0,0x0a,0x7e] + +v_sin_f16 v5, src_scc +// GFX12: v_sin_f16_e32 v5, src_scc ; encoding: [0xfd,0xc0,0x0a,0x7e] + +v_sin_f16 v127, 0xfe0b +// GFX12: v_sin_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xc0,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_sin_f32 v5, v1 +// GFX12: v_sin_f32_e32 v5, v1 ; encoding: [0x01,0x6b,0x0a,0x7e] + +v_sin_f32 v5, v255 +// GFX12: v_sin_f32_e32 v5, v255 ; encoding: [0xff,0x6b,0x0a,0x7e] + +v_sin_f32 v5, s1 +// GFX12: v_sin_f32_e32 v5, s1 ; encoding: [0x01,0x6a,0x0a,0x7e] + +v_sin_f32 v5, s105 +// GFX12: v_sin_f32_e32 v5, s105 ; encoding: [0x69,0x6a,0x0a,0x7e] + +v_sin_f32 v5, vcc_lo +// GFX12: v_sin_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x6a,0x0a,0x7e] + +v_sin_f32 v5, vcc_hi +// GFX12: v_sin_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x6a,0x0a,0x7e] + +v_sin_f32 v5, ttmp15 +// GFX12: v_sin_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x6a,0x0a,0x7e] + +v_sin_f32 v5, m0 +// GFX12: v_sin_f32_e32 v5, m0 ; encoding: [0x7d,0x6a,0x0a,0x7e] + +v_sin_f32 v5, exec_lo +// GFX12: v_sin_f32_e32 v5, exec_lo ; encoding: [0x7e,0x6a,0x0a,0x7e] + +v_sin_f32 v5, exec_hi +// GFX12: v_sin_f32_e32 v5, exec_hi ; encoding: [0x7f,0x6a,0x0a,0x7e] + +v_sin_f32 v5, null +// GFX12: v_sin_f32_e32 v5, null ; encoding: [0x7c,0x6a,0x0a,0x7e] + +v_sin_f32 v5, -1 +// GFX12: v_sin_f32_e32 v5, -1 ; encoding: [0xc1,0x6a,0x0a,0x7e] + +v_sin_f32 v5, 0.5 +// GFX12: v_sin_f32_e32 v5, 0.5 ; encoding: [0xf0,0x6a,0x0a,0x7e] + +v_sin_f32 v5, src_scc +// GFX12: v_sin_f32_e32 v5, src_scc ; encoding: [0xfd,0x6a,0x0a,0x7e] + +v_sin_f32 v255, 0xaf123456 +// GFX12: v_sin_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x6a,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_sqrt_f16 v5, v1 +// GFX12: v_sqrt_f16_e32 v5, v1 ; encoding: [0x01,0xab,0x0a,0x7e] + +v_sqrt_f16 v5, v127 +// GFX12: v_sqrt_f16_e32 v5, v127 ; encoding: [0x7f,0xab,0x0a,0x7e] + +v_sqrt_f16 v5, s1 +// GFX12: v_sqrt_f16_e32 v5, s1 ; encoding: [0x01,0xaa,0x0a,0x7e] + +v_sqrt_f16 v5, s105 +// GFX12: v_sqrt_f16_e32 v5, s105 ; encoding: [0x69,0xaa,0x0a,0x7e] + +v_sqrt_f16 v5, vcc_lo +// GFX12: v_sqrt_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xaa,0x0a,0x7e] + +v_sqrt_f16 v5, vcc_hi +// GFX12: v_sqrt_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xaa,0x0a,0x7e] + +v_sqrt_f16 v5, ttmp15 +// GFX12: v_sqrt_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xaa,0x0a,0x7e] + +v_sqrt_f16 v5, m0 +// GFX12: v_sqrt_f16_e32 v5, m0 ; encoding: [0x7d,0xaa,0x0a,0x7e] + +v_sqrt_f16 v5, exec_lo +// GFX12: v_sqrt_f16_e32 v5, exec_lo ; encoding: [0x7e,0xaa,0x0a,0x7e] + +v_sqrt_f16 v5, exec_hi +// GFX12: v_sqrt_f16_e32 v5, exec_hi ; encoding: [0x7f,0xaa,0x0a,0x7e] + +v_sqrt_f16 v5, null +// GFX12: v_sqrt_f16_e32 v5, null ; encoding: [0x7c,0xaa,0x0a,0x7e] + +v_sqrt_f16 v5, -1 +// GFX12: v_sqrt_f16_e32 v5, -1 ; encoding: [0xc1,0xaa,0x0a,0x7e] + +v_sqrt_f16 v5, 0.5 +// GFX12: v_sqrt_f16_e32 v5, 0.5 ; encoding: [0xf0,0xaa,0x0a,0x7e] + +v_sqrt_f16 v5, src_scc +// GFX12: v_sqrt_f16_e32 v5, src_scc ; encoding: [0xfd,0xaa,0x0a,0x7e] + +v_sqrt_f16 v127, 0xfe0b +// GFX12: v_sqrt_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xaa,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_sqrt_f32 v5, v1 +// GFX12: v_sqrt_f32_e32 v5, v1 ; encoding: [0x01,0x67,0x0a,0x7e] + +v_sqrt_f32 v5, v255 +// GFX12: v_sqrt_f32_e32 v5, v255 ; encoding: [0xff,0x67,0x0a,0x7e] + +v_sqrt_f32 v5, s1 +// GFX12: v_sqrt_f32_e32 v5, s1 ; encoding: [0x01,0x66,0x0a,0x7e] + +v_sqrt_f32 v5, s105 +// GFX12: v_sqrt_f32_e32 v5, s105 ; encoding: [0x69,0x66,0x0a,0x7e] + +v_sqrt_f32 v5, vcc_lo +// GFX12: v_sqrt_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x66,0x0a,0x7e] + +v_sqrt_f32 v5, vcc_hi +// GFX12: v_sqrt_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x66,0x0a,0x7e] + +v_sqrt_f32 v5, ttmp15 +// GFX12: v_sqrt_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x66,0x0a,0x7e] + +v_sqrt_f32 v5, m0 +// GFX12: v_sqrt_f32_e32 v5, m0 ; encoding: [0x7d,0x66,0x0a,0x7e] + +v_sqrt_f32 v5, exec_lo +// GFX12: v_sqrt_f32_e32 v5, exec_lo ; encoding: [0x7e,0x66,0x0a,0x7e] + +v_sqrt_f32 v5, exec_hi +// GFX12: v_sqrt_f32_e32 v5, exec_hi ; encoding: [0x7f,0x66,0x0a,0x7e] + +v_sqrt_f32 v5, null +// GFX12: v_sqrt_f32_e32 v5, null ; encoding: [0x7c,0x66,0x0a,0x7e] + +v_sqrt_f32 v5, -1 +// GFX12: v_sqrt_f32_e32 v5, -1 ; encoding: [0xc1,0x66,0x0a,0x7e] + +v_sqrt_f32 v5, 0.5 +// GFX12: v_sqrt_f32_e32 v5, 0.5 ; encoding: [0xf0,0x66,0x0a,0x7e] + +v_sqrt_f32 v5, src_scc +// GFX12: v_sqrt_f32_e32 v5, src_scc ; encoding: [0xfd,0x66,0x0a,0x7e] + +v_sqrt_f32 v255, 0xaf123456 +// GFX12: v_sqrt_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x66,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_sqrt_f64 v[5:6], v[1:2] +// GFX12: v_sqrt_f64_e32 v[5:6], v[1:2] ; encoding: [0x01,0x69,0x0a,0x7e] + +v_sqrt_f64 v[5:6], v[254:255] +// GFX12: v_sqrt_f64_e32 v[5:6], v[254:255] ; encoding: [0xfe,0x69,0x0a,0x7e] + +v_sqrt_f64 v[5:6], s[2:3] +// GFX12: v_sqrt_f64_e32 v[5:6], s[2:3] ; encoding: [0x02,0x68,0x0a,0x7e] + +v_sqrt_f64 v[5:6], s[104:105] +// GFX12: v_sqrt_f64_e32 v[5:6], s[104:105] ; encoding: [0x68,0x68,0x0a,0x7e] + +v_sqrt_f64 v[5:6], vcc +// GFX12: v_sqrt_f64_e32 v[5:6], vcc ; encoding: [0x6a,0x68,0x0a,0x7e] + +v_sqrt_f64 v[5:6], ttmp[14:15] +// GFX12: v_sqrt_f64_e32 v[5:6], ttmp[14:15] ; encoding: [0x7a,0x68,0x0a,0x7e] + +v_sqrt_f64 v[5:6], exec +// GFX12: v_sqrt_f64_e32 v[5:6], exec ; encoding: [0x7e,0x68,0x0a,0x7e] + +v_sqrt_f64 v[5:6], null +// GFX12: v_sqrt_f64_e32 v[5:6], null ; encoding: [0x7c,0x68,0x0a,0x7e] + +v_sqrt_f64 v[5:6], -1 +// GFX12: v_sqrt_f64_e32 v[5:6], -1 ; encoding: [0xc1,0x68,0x0a,0x7e] + +v_sqrt_f64 v[5:6], 0.5 +// GFX12: v_sqrt_f64_e32 v[5:6], 0.5 ; encoding: [0xf0,0x68,0x0a,0x7e] + +v_sqrt_f64 v[5:6], src_scc +// GFX12: v_sqrt_f64_e32 v[5:6], src_scc ; encoding: [0xfd,0x68,0x0a,0x7e] + +v_sqrt_f64 v[254:255], 0xaf123456 +// GFX12: v_sqrt_f64_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x68,0xfc,0x7f,0x56,0x34,0x12,0xaf] + +v_swap_b32 v5, v1 +// GFX12: v_swap_b32 v5, v1 ; encoding: [0x01,0xcb,0x0a,0x7e] + +v_swap_b32 v255, v255 +// GFX12: v_swap_b32 v255, v255 ; encoding: [0xff,0xcb,0xfe,0x7f] + +v_swaprel_b32 v5, v1 +// GFX12: v_swaprel_b32 v5, v1 ; encoding: [0x01,0xd1,0x0a,0x7e] + +v_swaprel_b32 v255, v255 +// GFX12: v_swaprel_b32 v255, v255 ; encoding: [0xff,0xd1,0xfe,0x7f] + +v_trunc_f16 v5, v1 +// GFX12: v_trunc_f16_e32 v5, v1 ; encoding: [0x01,0xbb,0x0a,0x7e] + +v_trunc_f16 v5, v127 +// GFX12: v_trunc_f16_e32 v5, v127 ; encoding: [0x7f,0xbb,0x0a,0x7e] + +v_trunc_f16 v5, s1 +// GFX12: v_trunc_f16_e32 v5, s1 ; encoding: [0x01,0xba,0x0a,0x7e] + +v_trunc_f16 v5, s105 +// GFX12: v_trunc_f16_e32 v5, s105 ; encoding: [0x69,0xba,0x0a,0x7e] + +v_trunc_f16 v5, vcc_lo +// GFX12: v_trunc_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xba,0x0a,0x7e] + +v_trunc_f16 v5, vcc_hi +// GFX12: v_trunc_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xba,0x0a,0x7e] + +v_trunc_f16 v5, ttmp15 +// GFX12: v_trunc_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xba,0x0a,0x7e] + +v_trunc_f16 v5, m0 +// GFX12: v_trunc_f16_e32 v5, m0 ; encoding: [0x7d,0xba,0x0a,0x7e] + +v_trunc_f16 v5, exec_lo +// GFX12: v_trunc_f16_e32 v5, exec_lo ; encoding: [0x7e,0xba,0x0a,0x7e] + +v_trunc_f16 v5, exec_hi +// GFX12: v_trunc_f16_e32 v5, exec_hi ; encoding: [0x7f,0xba,0x0a,0x7e] + +v_trunc_f16 v5, null +// GFX12: v_trunc_f16_e32 v5, null ; encoding: [0x7c,0xba,0x0a,0x7e] + +v_trunc_f16 v5, -1 +// GFX12: v_trunc_f16_e32 v5, -1 ; encoding: [0xc1,0xba,0x0a,0x7e] + +v_trunc_f16 v5, 0.5 +// GFX12: v_trunc_f16_e32 v5, 0.5 ; encoding: [0xf0,0xba,0x0a,0x7e] + +v_trunc_f16 v5, src_scc +// GFX12: v_trunc_f16_e32 v5, src_scc ; encoding: [0xfd,0xba,0x0a,0x7e] + +v_trunc_f16 v127, 0xfe0b +// GFX12: v_trunc_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xba,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_trunc_f32 v5, v1 +// GFX12: v_trunc_f32_e32 v5, v1 ; encoding: [0x01,0x43,0x0a,0x7e] + +v_trunc_f32 v5, v255 +// GFX12: v_trunc_f32_e32 v5, v255 ; encoding: [0xff,0x43,0x0a,0x7e] + +v_trunc_f32 v5, s1 +// GFX12: v_trunc_f32_e32 v5, s1 ; encoding: [0x01,0x42,0x0a,0x7e] + +v_trunc_f32 v5, s105 +// GFX12: v_trunc_f32_e32 v5, s105 ; encoding: [0x69,0x42,0x0a,0x7e] + +v_trunc_f32 v5, vcc_lo +// GFX12: v_trunc_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x42,0x0a,0x7e] + +v_trunc_f32 v5, vcc_hi +// GFX12: v_trunc_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x42,0x0a,0x7e] + +v_trunc_f32 v5, ttmp15 +// GFX12: v_trunc_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x42,0x0a,0x7e] + +v_trunc_f32 v5, m0 +// GFX12: v_trunc_f32_e32 v5, m0 ; encoding: [0x7d,0x42,0x0a,0x7e] + +v_trunc_f32 v5, exec_lo +// GFX12: v_trunc_f32_e32 v5, exec_lo ; encoding: [0x7e,0x42,0x0a,0x7e] + +v_trunc_f32 v5, exec_hi +// GFX12: v_trunc_f32_e32 v5, exec_hi ; encoding: [0x7f,0x42,0x0a,0x7e] + +v_trunc_f32 v5, null +// GFX12: v_trunc_f32_e32 v5, null ; encoding: [0x7c,0x42,0x0a,0x7e] + +v_trunc_f32 v5, -1 +// GFX12: v_trunc_f32_e32 v5, -1 ; encoding: [0xc1,0x42,0x0a,0x7e] + +v_trunc_f32 v5, 0.5 +// GFX12: v_trunc_f32_e32 v5, 0.5 ; encoding: [0xf0,0x42,0x0a,0x7e] + +v_trunc_f32 v5, src_scc +// GFX12: v_trunc_f32_e32 v5, src_scc ; encoding: [0xfd,0x42,0x0a,0x7e] + +v_trunc_f32 v255, 0xaf123456 +// GFX12: v_trunc_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x42,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_trunc_f64 v[5:6], v[1:2] +// GFX12: v_trunc_f64_e32 v[5:6], v[1:2] ; encoding: [0x01,0x2f,0x0a,0x7e] + +v_trunc_f64 v[5:6], v[254:255] +// GFX12: v_trunc_f64_e32 v[5:6], v[254:255] ; encoding: [0xfe,0x2f,0x0a,0x7e] + +v_trunc_f64 v[5:6], s[2:3] +// GFX12: v_trunc_f64_e32 v[5:6], s[2:3] ; encoding: [0x02,0x2e,0x0a,0x7e] + +v_trunc_f64 v[5:6], s[104:105] +// GFX12: v_trunc_f64_e32 v[5:6], s[104:105] ; encoding: [0x68,0x2e,0x0a,0x7e] + +v_trunc_f64 v[5:6], vcc +// GFX12: v_trunc_f64_e32 v[5:6], vcc ; encoding: [0x6a,0x2e,0x0a,0x7e] + +v_trunc_f64 v[5:6], ttmp[14:15] +// GFX12: v_trunc_f64_e32 v[5:6], ttmp[14:15] ; encoding: [0x7a,0x2e,0x0a,0x7e] + +v_trunc_f64 v[5:6], exec +// GFX12: v_trunc_f64_e32 v[5:6], exec ; encoding: [0x7e,0x2e,0x0a,0x7e] + +v_trunc_f64 v[5:6], null +// GFX12: v_trunc_f64_e32 v[5:6], null ; encoding: [0x7c,0x2e,0x0a,0x7e] + +v_trunc_f64 v[5:6], -1 +// GFX12: v_trunc_f64_e32 v[5:6], -1 ; encoding: [0xc1,0x2e,0x0a,0x7e] + +v_trunc_f64 v[5:6], 0.5 +// GFX12: v_trunc_f64_e32 v[5:6], 0.5 ; encoding: [0xf0,0x2e,0x0a,0x7e] + +v_trunc_f64 v[5:6], src_scc +// GFX12: v_trunc_f64_e32 v[5:6], src_scc ; encoding: [0xfd,0x2e,0x0a,0x7e] + +v_trunc_f64 v[254:255], 0xaf123456 +// GFX12: v_trunc_f64_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x2e,0xfc,0x7f,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s index 2e83f8ffc51318..e82ccc176d2d14 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s @@ -1,7 +1,9 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-ASM %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -disassemble -show-encoding | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-ASM %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-ASM %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -disassemble -show-encoding | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-ASM %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s + +// this file will be converted to true16 format when more true16 instructions are supported v_bfrev_b32_e32 v5, v1 // GFX12: v_bfrev_b32_e32 v5, v1 ; encoding: [0x01,0x71,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16-fake16.s new file mode 100644 index 00000000000000..9b181e98231aeb --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16-fake16.s @@ -0,0 +1,2828 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s + +v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_bfrev_b32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_bfrev_b32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_bfrev_b32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_bfrev_b32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_bfrev_b32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_bfrev_b32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_bfrev_b32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_bfrev_b32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_bfrev_b32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_bfrev_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_bfrev_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_bfrev_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_bfrev_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x70,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_ceil_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_ceil_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_ceil_f16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_ceil_f16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_ceil_f16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_ceil_f16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_ceil_f16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_ceil_f16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_ceil_f16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_ceil_f16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_ceil_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_ceil_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_ceil_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_ceil_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xb8,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +v_ceil_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_ceil_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_ceil_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_ceil_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_ceil_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_ceil_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_ceil_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_ceil_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_ceil_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_ceil_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_ceil_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_ceil_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_ceil_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_ceil_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x44,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_cls_i32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cls_i32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cls_i32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cls_i32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cls_i32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cls_i32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cls_i32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cls_i32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cls_i32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cls_i32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cls_i32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cls_i32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cls_i32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cls_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x76,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_clz_i32_u32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_clz_i32_u32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_clz_i32_u32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_clz_i32_u32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_clz_i32_u32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_clz_i32_u32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_clz_i32_u32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_clz_i32_u32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_clz_i32_u32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_clz_i32_u32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_clz_i32_u32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_clz_i32_u32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_clz_i32_u32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_clz_i32_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x72,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_cos_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cos_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cos_f16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cos_f16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cos_f16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cos_f16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cos_f16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cos_f16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cos_f16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cos_f16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cos_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cos_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cos_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cos_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xc2,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +v_cos_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cos_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cos_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cos_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cos_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cos_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cos_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cos_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cos_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cos_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cos_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cos_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cos_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cos_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x6c,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_ctz_i32_b32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_ctz_i32_b32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_ctz_i32_b32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_ctz_i32_b32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_ctz_i32_b32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_ctz_i32_b32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_ctz_i32_b32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_ctz_i32_b32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_ctz_i32_b32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_ctz_i32_b32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_ctz_i32_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_ctz_i32_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_ctz_i32_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_ctz_i32_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x74,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_cvt_f32_fp8 v1, v3 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xc +// GFX12: encoding: [0xfa,0xd8,0x02,0x7e,0x03,0xe4,0x00,0xac] + +v_cvt_f32_fp8 v1, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xe +// GFX12: encoding: [0xfa,0xd8,0x02,0x7e,0x03,0x1b,0x00,0x2e] + +v_cvt_f32_bf8 v1, v3 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xc +// GFX12: encoding: [0xfa,0xda,0x02,0x7e,0x03,0xe4,0x00,0xac] + +v_cvt_f32_bf8 v1, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xe +// GFX12: encoding: [0xfa,0xda,0x02,0x7e,0x03,0x1b,0x00,0x2e] + +v_cvt_f16_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_f16_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_f16_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_f16_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_f16_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_f16_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_f16_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_f16_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_f16_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_f16_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_f16_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_f16_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_f16_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_f16_f32 v127, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x14,0xfe,0x7e,0xff,0x6f,0x35,0x30] + +v_cvt_f16_i16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_f16_i16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_f16_i16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_f16_i16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_f16_i16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_f16_i16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_f16_i16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_f16_i16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_f16_i16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_f16_i16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_f16_i16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_f16_i16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_f16_i16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_f16_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xa2,0xfe,0x7e,0x7f,0x6f,0x05,0x30] + +v_cvt_f16_u16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_f16_u16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_f16_u16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_f16_u16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_f16_u16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_f16_u16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_f16_u16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_f16_u16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_f16_u16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_f16_u16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_f16_u16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_f16_u16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_f16_u16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_f16_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xa0,0xfe,0x7e,0x7f,0x6f,0x05,0x30] + +v_cvt_f32_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_f32_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_f32_f16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_f32_f16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_f32_f16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_f32_f16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_f32_f16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_f32_f16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_f32_f16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_f32_f16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_f32_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_f32_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_f32_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_f32_f16 v255, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x16,0xfe,0x7f,0x7f,0x6f,0x35,0x30] + +v_cvt_f32_i32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_f32_i32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_f32_i32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_f32_i32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_f32_i32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_f32_i32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_f32_i32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_f32_i32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_f32_i32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_f32_i32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_f32_i32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_f32_i32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_f32_i32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_f32_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x0a,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_cvt_f32_u32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_f32_u32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_f32_u32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_f32_u32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_f32_u32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_f32_u32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_f32_u32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_f32_u32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_f32_u32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_f32_u32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_f32_u32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_f32_u32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_f32_u32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_f32_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x0c,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_cvt_f32_ubyte0 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_f32_ubyte0 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_f32_ubyte0 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_f32_ubyte0 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_f32_ubyte0 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_f32_ubyte0 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_f32_ubyte0 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_f32_ubyte0 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_f32_ubyte0 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_f32_ubyte0 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_f32_ubyte0 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_f32_ubyte0 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_f32_ubyte0 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_f32_ubyte0 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x22,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_cvt_f32_ubyte1 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_f32_ubyte1 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_f32_ubyte1 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_f32_ubyte1 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_f32_ubyte1 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_f32_ubyte1 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_f32_ubyte1 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_f32_ubyte1 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_f32_ubyte1 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_f32_ubyte1 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_f32_ubyte1 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_f32_ubyte1 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_f32_ubyte1 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_f32_ubyte1 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x24,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_cvt_f32_ubyte2 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_f32_ubyte2 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_f32_ubyte2 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_f32_ubyte2 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_f32_ubyte2 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_f32_ubyte2 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_f32_ubyte2 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_f32_ubyte2 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_f32_ubyte2 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_f32_ubyte2 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_f32_ubyte2 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_f32_ubyte2 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_f32_ubyte2 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_f32_ubyte2 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x26,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_cvt_f32_ubyte3 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_f32_ubyte3 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_f32_ubyte3 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_f32_ubyte3 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_f32_ubyte3 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_f32_ubyte3 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_f32_ubyte3 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_f32_ubyte3 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_f32_ubyte3 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_f32_ubyte3 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_f32_ubyte3 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_f32_ubyte3 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_f32_ubyte3 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_f32_ubyte3 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x28,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_cvt_floor_i32_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_floor_i32_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_floor_i32_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_floor_i32_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_floor_i32_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_floor_i32_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_floor_i32_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_floor_i32_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_floor_i32_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_floor_i32_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_floor_i32_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_floor_i32_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_floor_i32_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_floor_i32_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x1a,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_cvt_flr_i32_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_flr_i32_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_flr_i32_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_flr_i32_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_flr_i32_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_flr_i32_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_flr_i32_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_flr_i32_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_flr_i32_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_flr_i32_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_flr_i32_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_flr_i32_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_flr_i32_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_flr_i32_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x1a,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_cvt_i16_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_i16_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_i16_f16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_i16_f16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_i16_f16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_i16_f16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_i16_f16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_i16_f16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_i16_f16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_i16_f16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_i16_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_i16_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_i16_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_i16_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xa6,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +v_cvt_i32_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_i32_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_i32_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_i32_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_i32_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_i32_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_i32_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_i32_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_i32_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_i32_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_i32_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_i32_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_i32_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_i32_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x10,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_cvt_i32_i16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_i32_i16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_i32_i16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_i32_i16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_i32_i16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_i32_i16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_i32_i16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_i32_i16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_i32_i16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_i32_i16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_i32_i16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_i32_i16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_i32_i16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_i32_i16 v255, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xd4,0xfe,0x7f,0x7f,0x6f,0x05,0x30] + +v_cvt_nearest_i32_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_nearest_i32_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_nearest_i32_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_nearest_i32_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_nearest_i32_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_nearest_i32_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_nearest_i32_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_nearest_i32_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_nearest_i32_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_nearest_i32_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_nearest_i32_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_nearest_i32_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_nearest_i32_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_nearest_i32_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x18,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_cvt_norm_i16_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_norm_i16_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_norm_i16_f16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_norm_i16_f16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_norm_i16_f16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_norm_i16_f16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_norm_i16_f16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_norm_i16_f16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_norm_i16_f16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_norm_i16_f16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_norm_i16_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_norm_i16_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_norm_i16_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_norm_i16_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xc6,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +v_cvt_norm_u16_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_norm_u16_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_norm_u16_f16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_norm_u16_f16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_norm_u16_f16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_norm_u16_f16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_norm_u16_f16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_norm_u16_f16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_norm_u16_f16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_norm_u16_f16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_norm_u16_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_norm_u16_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_norm_u16_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_norm_u16_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xc8,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +v_cvt_off_f32_i4 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_off_f32_i4 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_off_f32_i4 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_off_f32_i4 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_off_f32_i4 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_off_f32_i4 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_off_f32_i4 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_off_f32_i4 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_off_f32_i4 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_off_f32_i4 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_off_f32_i4 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_off_f32_i4 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_off_f32_i4 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_off_f32_i4 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x1c,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_cvt_rpi_i32_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_rpi_i32_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_rpi_i32_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_rpi_i32_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_rpi_i32_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_rpi_i32_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_rpi_i32_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_rpi_i32_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_rpi_i32_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_rpi_i32_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_rpi_i32_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_rpi_i32_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_rpi_i32_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_rpi_i32_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x18,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_cvt_u16_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_u16_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_u16_f16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_u16_f16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_u16_f16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_u16_f16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_u16_f16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_u16_f16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_u16_f16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_u16_f16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_u16_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_u16_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_u16_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_u16_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xa4,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +v_cvt_u32_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_u32_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_u32_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_u32_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_u32_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_u32_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_u32_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_u32_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_u32_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_u32_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_u32_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_u32_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_u32_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_u32_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x0e,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_cvt_u32_u16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_cvt_u32_u16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_cvt_u32_u16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_cvt_u32_u16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_cvt_u32_u16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_cvt_u32_u16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_cvt_u32_u16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_cvt_u32_u16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_cvt_u32_u16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_cvt_u32_u16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_cvt_u32_u16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_cvt_u32_u16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_cvt_u32_u16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_cvt_u32_u16 v255, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xd6,0xfe,0x7f,0x7f,0x6f,0x05,0x30] + +v_exp_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_exp_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_exp_f16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_exp_f16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_exp_f16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_exp_f16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_exp_f16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_exp_f16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_exp_f16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_exp_f16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_exp_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_exp_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_exp_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_exp_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xb0,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +v_exp_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_exp_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_exp_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_exp_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_exp_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_exp_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_exp_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_exp_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_exp_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_exp_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_exp_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_exp_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_exp_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_exp_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x4a,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_ffbh_i32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_ffbh_i32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_ffbh_i32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_ffbh_i32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_ffbh_i32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_ffbh_i32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_ffbh_i32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_ffbh_i32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_ffbh_i32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_ffbh_i32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_ffbh_i32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_ffbh_i32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_ffbh_i32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_ffbh_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x76,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_ffbh_u32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_ffbh_u32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_ffbh_u32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_ffbh_u32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_ffbh_u32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_ffbh_u32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_ffbh_u32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_ffbh_u32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_ffbh_u32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_ffbh_u32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_ffbh_u32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_ffbh_u32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_ffbh_u32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_ffbh_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x72,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_ffbl_b32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_ffbl_b32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_ffbl_b32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_ffbl_b32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_ffbl_b32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_ffbl_b32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_ffbl_b32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_ffbl_b32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_ffbl_b32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_ffbl_b32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_ffbl_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_ffbl_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_ffbl_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_ffbl_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x74,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_floor_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_floor_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_floor_f16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_floor_f16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_floor_f16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_floor_f16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_floor_f16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_floor_f16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_floor_f16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_floor_f16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_floor_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_floor_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_floor_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_floor_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xb6,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +v_floor_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_floor_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_floor_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_floor_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_floor_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_floor_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_floor_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_floor_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_floor_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_floor_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_floor_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_floor_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_floor_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_floor_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x48,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_fract_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_fract_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_fract_f16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_fract_f16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_fract_f16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_fract_f16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_fract_f16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_fract_f16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_fract_f16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_fract_f16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_fract_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_fract_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_fract_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_fract_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xbe,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +v_fract_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_fract_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_fract_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_fract_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_fract_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_fract_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_fract_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_fract_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_fract_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_fract_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_fract_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_fract_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_fract_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_fract_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x40,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_frexp_exp_i16_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_frexp_exp_i16_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_frexp_exp_i16_f16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_frexp_exp_i16_f16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_frexp_exp_i16_f16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_frexp_exp_i16_f16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_frexp_exp_i16_f16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_frexp_exp_i16_f16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_frexp_exp_i16_f16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_frexp_exp_i16_f16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_frexp_exp_i16_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_frexp_exp_i16_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_frexp_exp_i16_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_frexp_exp_i16_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xb4,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +v_frexp_exp_i32_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_frexp_exp_i32_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_frexp_exp_i32_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_frexp_exp_i32_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_frexp_exp_i32_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_frexp_exp_i32_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_frexp_exp_i32_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_frexp_exp_i32_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_frexp_exp_i32_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_frexp_exp_i32_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_frexp_exp_i32_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_frexp_exp_i32_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_frexp_exp_i32_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_frexp_exp_i32_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x7e,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_frexp_mant_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_frexp_mant_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_frexp_mant_f16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_frexp_mant_f16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_frexp_mant_f16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_frexp_mant_f16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_frexp_mant_f16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_frexp_mant_f16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_frexp_mant_f16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_frexp_mant_f16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_frexp_mant_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_frexp_mant_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_frexp_mant_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_frexp_mant_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xb2,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +v_frexp_mant_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_frexp_mant_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_frexp_mant_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_frexp_mant_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_frexp_mant_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_frexp_mant_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_frexp_mant_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_frexp_mant_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_frexp_mant_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_frexp_mant_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_frexp_mant_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_frexp_mant_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_frexp_mant_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_frexp_mant_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x80,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_log_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_log_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_log_f16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_log_f16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_log_f16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_log_f16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_log_f16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_log_f16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_log_f16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_log_f16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_log_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_log_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_log_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_log_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xae,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +v_log_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_log_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_log_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_log_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_log_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_log_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_log_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_log_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_log_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_log_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_log_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_log_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_log_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_log_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x4e,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_mov_b32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_mov_b32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_mov_b32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_mov_b32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_mov_b32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_mov_b32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_mov_b32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_mov_b32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_mov_b32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_mov_b32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_mov_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_mov_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_mov_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_mov_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x02,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_movreld_b32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_movreld_b32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_movreld_b32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_movreld_b32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_movreld_b32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_movreld_b32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_movreld_b32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_movreld_b32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_movreld_b32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_movreld_b32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_movreld_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_movreld_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_movreld_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_movreld_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x84,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_movrels_b32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_movrels_b32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_movrels_b32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_movrels_b32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_movrels_b32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_movrels_b32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_movrels_b32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_movrels_b32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_movrels_b32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_movrels_b32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_movrels_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_movrels_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_movrels_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_movrels_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x86,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_movrelsd_2_b32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_movrelsd_2_b32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_movrelsd_2_b32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_movrelsd_2_b32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_movrelsd_2_b32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_movrelsd_2_b32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_movrelsd_2_b32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_movrelsd_2_b32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_movrelsd_2_b32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_movrelsd_2_b32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_movrelsd_2_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_movrelsd_2_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_movrelsd_2_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_movrelsd_2_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x90,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_movrelsd_b32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_movrelsd_b32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_movrelsd_b32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_movrelsd_b32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_movrelsd_b32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_movrelsd_b32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_movrelsd_b32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_movrelsd_b32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_movrelsd_b32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_movrelsd_b32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_movrelsd_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_movrelsd_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_movrelsd_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_movrelsd_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x88,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_not_b16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_not_b16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_not_b16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_not_b16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_not_b16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_not_b16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_not_b16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_not_b16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_not_b16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_not_b16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_not_b16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_not_b16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_not_b16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_not_b16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xd2,0xfe,0x7e,0x7f,0x6f,0x05,0x30] + +v_not_b32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_not_b32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_not_b32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_not_b32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_not_b32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_not_b32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_not_b32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_not_b32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_not_b32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_not_b32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_not_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_not_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_not_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_not_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x6e,0xfe,0x7f,0xff,0x6f,0x05,0x30] + +v_rcp_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_rcp_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_rcp_f16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_rcp_f16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_rcp_f16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_rcp_f16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_rcp_f16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_rcp_f16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_rcp_f16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_rcp_f16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_rcp_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_rcp_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_rcp_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_rcp_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xa8,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +v_rcp_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_rcp_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_rcp_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_rcp_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_rcp_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_rcp_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_rcp_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_rcp_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_rcp_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_rcp_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_rcp_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_rcp_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_rcp_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_rcp_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x54,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_rcp_iflag_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_rcp_iflag_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_rcp_iflag_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_rcp_iflag_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_rcp_iflag_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_rcp_iflag_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_rcp_iflag_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_rcp_iflag_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_rcp_iflag_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_rcp_iflag_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_rcp_iflag_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_rcp_iflag_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_rcp_iflag_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_rcp_iflag_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x56,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_rndne_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_rndne_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_rndne_f16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_rndne_f16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_rndne_f16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_rndne_f16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_rndne_f16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_rndne_f16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_rndne_f16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_rndne_f16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_rndne_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_rndne_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_rndne_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_rndne_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xbc,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +v_rndne_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_rndne_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_rndne_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_rndne_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_rndne_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_rndne_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_rndne_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_rndne_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_rndne_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_rndne_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_rndne_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_rndne_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_rndne_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_rndne_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x46,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_rsq_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_rsq_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_rsq_f16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_rsq_f16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_rsq_f16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_rsq_f16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_rsq_f16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_rsq_f16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_rsq_f16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_rsq_f16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_rsq_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_rsq_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_rsq_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_rsq_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xac,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +v_rsq_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_rsq_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_rsq_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_rsq_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_rsq_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_rsq_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_rsq_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_rsq_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_rsq_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_rsq_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_rsq_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_rsq_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_rsq_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_rsq_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x5c,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_sat_pk_u8_i16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_sat_pk_u8_i16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_sat_pk_u8_i16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_sat_pk_u8_i16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_sat_pk_u8_i16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_sat_pk_u8_i16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_sat_pk_u8_i16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_sat_pk_u8_i16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_sat_pk_u8_i16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_sat_pk_u8_i16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_sat_pk_u8_i16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_sat_pk_u8_i16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_sat_pk_u8_i16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_sat_pk_u8_i16 v127, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xc4,0xfe,0x7e,0xff,0x6f,0x05,0x30] + +v_sin_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_sin_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_sin_f16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_sin_f16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_sin_f16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_sin_f16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_sin_f16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_sin_f16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_sin_f16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_sin_f16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_sin_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_sin_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_sin_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_sin_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xc0,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +v_sin_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_sin_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_sin_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_sin_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_sin_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_sin_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_sin_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_sin_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_sin_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_sin_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_sin_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_sin_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_sin_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_sin_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x6a,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_sqrt_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_sqrt_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_sqrt_f16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_sqrt_f16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_sqrt_f16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_sqrt_f16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_sqrt_f16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_sqrt_f16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_sqrt_f16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_sqrt_f16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_sqrt_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_sqrt_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_sqrt_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_sqrt_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xaa,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +v_sqrt_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_sqrt_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_sqrt_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_sqrt_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_sqrt_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_sqrt_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_sqrt_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_sqrt_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_sqrt_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_sqrt_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_sqrt_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_sqrt_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_sqrt_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_sqrt_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x66,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +v_trunc_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_trunc_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_trunc_f16 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_trunc_f16 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_trunc_f16 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_trunc_f16 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_trunc_f16 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_trunc_f16 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_trunc_f16 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_trunc_f16 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_trunc_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_trunc_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_trunc_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_trunc_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xba,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +v_trunc_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_trunc_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_trunc_f32 v5, v1 row_mirror +// GFX12: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_trunc_f32 v5, v1 row_half_mirror +// GFX12: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_trunc_f32 v5, v1 row_shl:1 +// GFX12: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_trunc_f32 v5, v1 row_shl:15 +// GFX12: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_trunc_f32 v5, v1 row_shr:1 +// GFX12: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_trunc_f32 v5, v1 row_shr:15 +// GFX12: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_trunc_f32 v5, v1 row_ror:1 +// GFX12: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_trunc_f32 v5, v1 row_ror:15 +// GFX12: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +v_trunc_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x50,0x01,0xff] + +v_trunc_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +v_trunc_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x60,0x09,0x13] + +v_trunc_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0x42,0xfe,0x7f,0xff,0x6f,0x35,0x30] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s index d5cafcd4c38741..323439b6edd537 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s @@ -1,5 +1,7 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s + +// this file will be converted to true16 format when more true16 instructions are supported v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] // GFX12: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8-fake16.s new file mode 100644 index 00000000000000..82acbd421bae29 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8-fake16.s @@ -0,0 +1,617 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s + +v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_bfrev_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_bfrev_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x70,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_ceil_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xb8,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_ceil_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xb8,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_ceil_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xb8,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_ceil_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x44,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_ceil_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x44,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_ceil_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x44,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_cls_i32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x76,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cls_i32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x76,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cls_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x76,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_clz_i32_u32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x72,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_clz_i32_u32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x72,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_clz_i32_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x72,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_cos_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xc2,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cos_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xc2,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cos_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xc2,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_cos_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x6c,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cos_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x6c,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cos_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x6c,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_ctz_i32_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x74,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_ctz_i32_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x74,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_ctz_i32_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x74,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_cvt_f32_fp8 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX12: encoding: [0xe9,0xd8,0x0a,0x7e,0x01,0x88,0xc6,0xfa] + +v_cvt_f32_fp8 v1, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xd8,0x02,0x7e,0x03,0x77,0x39,0x05] + +v_cvt_f32_bf8 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX12: encoding: [0xe9,0xda,0x0a,0x7e,0x01,0x88,0xc6,0xfa] + +v_cvt_f32_bf8 v1, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xda,0x02,0x7e,0x03,0x77,0x39,0x05] + +v_cvt_f16_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x14,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f16_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x14,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f16_f32 v127, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x14,0xfe,0x7e,0xff,0x00,0x00,0x00] + +v_cvt_f16_i16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xa2,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f16_i16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xa2,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f16_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xa2,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_cvt_f16_u16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xa0,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f16_u16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xa0,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f16_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xa0,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_cvt_f32_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x16,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f32_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x16,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f32_f16 v255, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x16,0xfe,0x7f,0x7f,0x00,0x00,0x00] + +v_cvt_f32_i32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x0a,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f32_i32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x0a,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f32_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x0a,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_cvt_f32_u32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x0c,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f32_u32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x0c,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f32_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x0c,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_cvt_f32_ubyte0 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x22,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte0 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x22,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte0 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x22,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_cvt_f32_ubyte1 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x24,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte1 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x24,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte1 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x24,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_cvt_f32_ubyte2 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x26,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte2 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x26,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte2 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x26,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_cvt_f32_ubyte3 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x28,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte3 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x28,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte3 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x28,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_cvt_floor_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x1a,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_floor_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x1a,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_floor_i32_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x1a,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_cvt_flr_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x1a,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_flr_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x1a,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_flr_i32_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x1a,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_cvt_i16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xa6,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_i16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xa6,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_i16_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xa6,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_cvt_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x10,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x10,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_i32_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x10,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_cvt_i32_i16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xd4,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_i32_i16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xd4,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_i32_i16 v255, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xd4,0xfe,0x7f,0x7f,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x18,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_nearest_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x18,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_nearest_i32_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x18,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_cvt_norm_i16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xc6,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_norm_i16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xc6,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_norm_i16_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xc6,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_cvt_norm_u16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xc8,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_norm_u16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xc8,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_norm_u16_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xc8,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_cvt_off_f32_i4 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x1c,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_off_f32_i4 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x1c,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_off_f32_i4 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x1c,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x18,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_rpi_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x18,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_rpi_i32_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x18,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_cvt_u16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xa4,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_u16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xa4,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_u16_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xa4,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_cvt_u32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x0e,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_u32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x0e,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_u32_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x0e,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_cvt_u32_u16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_u32_u16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_cvt_u32_u16 v255, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xd6,0xfe,0x7f,0x7f,0x00,0x00,0x00] + +v_exp_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xb0,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_exp_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xb0,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_exp_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xb0,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_exp_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x4a,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_exp_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x4a,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_exp_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x4a,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_ffbh_i32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x76,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_ffbh_i32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x76,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_ffbh_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x76,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_ffbh_u32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x72,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_ffbh_u32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x72,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_ffbh_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x72,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_ffbl_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x74,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_ffbl_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x74,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_ffbl_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x74,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_floor_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xb6,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_floor_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xb6,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_floor_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xb6,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_floor_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x48,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_floor_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x48,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_floor_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x48,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_fract_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xbe,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_fract_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xbe,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_fract_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xbe,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_fract_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x40,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_fract_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x40,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_fract_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x40,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_frexp_exp_i16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xb4,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_frexp_exp_i16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xb4,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_frexp_exp_i16_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xb4,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_frexp_exp_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x7e,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_frexp_exp_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x7e,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_frexp_exp_i32_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x7e,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_frexp_mant_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xb2,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_frexp_mant_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xb2,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_frexp_mant_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xb2,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_frexp_mant_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x80,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_frexp_mant_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x80,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_frexp_mant_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x80,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_log_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xae,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_log_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xae,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_log_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xae,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_log_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x4e,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_log_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x4e,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_log_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x4e,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_mov_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x02,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_mov_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x02,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_mov_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x02,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_movreld_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x84,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_movreld_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x84,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_movreld_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x84,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_movrels_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x86,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_movrels_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x86,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_movrels_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x86,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_movrelsd_2_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x90,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_movrelsd_2_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x90,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_movrelsd_2_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x90,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_movrelsd_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x88,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_movrelsd_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x88,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_movrelsd_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x88,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_not_b16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xd2,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_not_b16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xd2,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_not_b16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xd2,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_not_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x6e,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_not_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x6e,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_not_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x6e,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_rcp_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xa8,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_rcp_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xa8,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_rcp_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xa8,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_rcp_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x54,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_rcp_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x54,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_rcp_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x54,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_rcp_iflag_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x56,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_rcp_iflag_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x56,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_rcp_iflag_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x56,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_rndne_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xbc,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_rndne_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xbc,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_rndne_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xbc,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_rndne_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x46,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_rndne_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x46,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_rndne_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x46,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_rsq_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xac,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_rsq_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xac,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_rsq_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xac,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_rsq_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x5c,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_rsq_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x5c,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_rsq_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x5c,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_sat_pk_u8_i16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_sat_pk_u8_i16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_sat_pk_u8_i16 v127, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xc4,0xfe,0x7e,0xff,0x00,0x00,0x00] + +v_sin_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xc0,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_sin_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xc0,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_sin_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xc0,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_sin_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x6a,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_sin_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x6a,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_sin_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x6a,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_sqrt_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xaa,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_sqrt_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xaa,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_sqrt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xaa,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_sqrt_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x66,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_sqrt_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x66,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_sqrt_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x66,0xfe,0x7f,0xff,0x00,0x00,0x00] + +v_trunc_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0xba,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_trunc_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0xba,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_trunc_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xba,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +v_trunc_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x42,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_trunc_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x42,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_trunc_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0x42,0xfe,0x7f,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s index 4c884018bc5a86..fa3234d8f9b2cd 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s @@ -1,5 +1,7 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s + +// this file will be converted to true16 format when more true16 instructions are supported v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX12: encoding: [0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err-fake16.s new file mode 100644 index 00000000000000..d3aa94d373e80f --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err-fake16.s @@ -0,0 +1,505 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error %s + +v_ceil_f16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_ceil_f16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_ceil_f16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cos_f16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cos_f16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cos_f16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_f16_f32_e32 v128, 0xaf123456 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_f16_f32_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_f16_f32_e32 v255, v255 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_f16_i16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_f16_i16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_f16_i16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_f16_u16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_f16_u16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_f16_u16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_f32_f16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_i16_f16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_i16_f16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_i16_f16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_i32_i16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_norm_i16_f16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_norm_i16_f16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_norm_i16_f16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_norm_u16_f16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_norm_u16_f16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_norm_u16_f16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_u16_f16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_u16_f16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_u16_f16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_u32_u16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_exp_f16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_exp_f16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_exp_f16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_floor_f16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_floor_f16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_floor_f16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_fract_f16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_fract_f16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_fract_f16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_frexp_exp_i16_f16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_frexp_exp_i16_f16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_frexp_exp_i16_f16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_frexp_mant_f16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_frexp_mant_f16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_frexp_mant_f16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_log_f16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_log_f16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_log_f16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_not_b16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_not_b16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_not_b16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_rcp_f16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_rcp_f16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_rcp_f16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_rndne_f16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_rndne_f16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_rndne_f16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_rsq_f16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_rsq_f16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_rsq_f16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sat_pk_u8_i16_e32 v199, v5 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sin_f16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sin_f16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sin_f16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sqrt_f16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sqrt_f16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sqrt_f16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_trunc_f16_e32 v128, 0xfe0b +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_trunc_f16_e32 v255, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_trunc_f16_e32 v5, v199 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_ceil_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_ceil_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cos_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cos_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v128, 0xaf123456 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_f16_i16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_f16_i16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_f16_u16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_f16_u16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_f32_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_i16_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_i16_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_i32_i16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_norm_i16_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_norm_i16_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_norm_u16_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_norm_u16_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_u16_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_u16_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_u32_u16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_exp_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_exp_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_floor_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_floor_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_fract_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_fract_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_frexp_exp_i16_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_frexp_exp_i16_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_frexp_mant_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_frexp_mant_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_log_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_log_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_not_b16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_not_b16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_rcp_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_rcp_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_rndne_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_rndne_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_rsq_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_rsq_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sat_pk_u8_i16_e32 v199, v5 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sin_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sin_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sqrt_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sqrt_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_trunc_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_trunc_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_ceil_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_ceil_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cos_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cos_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v128, 0xaf123456 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_f16_i16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_f16_i16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_f16_u16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_f16_u16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_f32_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_i16_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_i16_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_i32_i16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_norm_i16_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_norm_i16_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_norm_u16_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_norm_u16_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_u16_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_u16_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_u32_u16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_exp_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_exp_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_floor_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_floor_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_fract_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_fract_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_frexp_exp_i16_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_frexp_exp_i16_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_frexp_mant_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_frexp_mant_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_log_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_log_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_not_b16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_not_b16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_rcp_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_rcp_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_rndne_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_rndne_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_rsq_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_rsq_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sat_pk_u8_i16_e32 v199, v5 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sin_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sin_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sqrt_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sqrt_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_trunc_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_trunc_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err.s index 37edf627e3667a..46a865b0b24693 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error %s v_ceil_f16_e32 v128, 0xfe0b // GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode From 6aa723daa9d9c54c597788d384b41dd735359316 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Thu, 1 Aug 2024 01:49:38 +0800 Subject: [PATCH 008/114] [TLI] Add support for nan libfunc (#101356) Reference: https://en.cppreference.com/w/cpp/numeric/math/nan --- .../llvm/Analysis/TargetLibraryInfo.def | 15 ++++++++++++++ llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 3 +++ .../Transforms/InferFunctionAttrs/annotate.ll | 11 +++++++++- .../tools/llvm-tli-checker/ps4-tli-check.yaml | 20 +++++++++++++++---- .../Analysis/TargetLibraryInfoTest.cpp | 3 +++ 5 files changed, 47 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def index e9f3b7fcd99eb5..754f09c19fb357 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -1807,6 +1807,21 @@ TLI_DEFINE_ENUM_INTERNAL(modfl) TLI_DEFINE_STRING_INTERNAL("modfl") TLI_DEFINE_SIG_INTERNAL(LDbl, LDbl, Ptr) +/// double nan(const char *arg); +TLI_DEFINE_ENUM_INTERNAL(nan) +TLI_DEFINE_STRING_INTERNAL("nan") +TLI_DEFINE_SIG_INTERNAL(Dbl, Ptr) + +/// float nanf(const char *arg); +TLI_DEFINE_ENUM_INTERNAL(nanf) +TLI_DEFINE_STRING_INTERNAL("nanf") +TLI_DEFINE_SIG_INTERNAL(Flt, Ptr) + +/// long double nanl(const char *arg); +TLI_DEFINE_ENUM_INTERNAL(nanl) +TLI_DEFINE_STRING_INTERNAL("nanl") +TLI_DEFINE_SIG_INTERNAL(LDbl, Ptr) + /// double nearbyint(double x); TLI_DEFINE_ENUM_INTERNAL(nearbyint) TLI_DEFINE_STRING_INTERNAL("nearbyint") diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 898e5b0f418120..30a343b2c564e8 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -279,6 +279,9 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setNonLazyBind(F); switch (TheLibFunc) { + case LibFunc_nan: + case LibFunc_nanf: + case LibFunc_nanl: case LibFunc_strlen: case LibFunc_strnlen: case LibFunc_wcslen: diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll index d54290fd3869c8..3b914dc29ca41c 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll @@ -717,6 +717,15 @@ declare float @modff(float, ptr) ; CHECK: declare x86_fp80 @modfl(x86_fp80, ptr nocapture) [[ARGMEMONLY_NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @modfl(x86_fp80, ptr) +; CHECK: declare double @nan(ptr nocapture) [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY_WILLRETURN:#[0-9]+]] +declare double @nan(ptr) + +; CHECK: declare float @nanf(ptr nocapture) [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY_WILLRETURN]] +declare float @nanf(ptr) + +; CHECK: declare x86_fp80 @nanl(ptr nocapture) [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY_WILLRETURN]] +declare x86_fp80 @nanl(ptr) + ; CHECK: declare double @nearbyint(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @nearbyint(double) @@ -956,7 +965,7 @@ declare ptr @strncpy(ptr, ptr, i64) ; CHECK: declare noalias ptr @strndup(ptr nocapture readonly, i64 noundef) [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN_FAMILY_MALLOC]] declare ptr @strndup(ptr, i64) -; CHECK: declare i64 @strnlen(ptr nocapture, i64) [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY_WILLRETURN:#[0-9]+]] +; CHECK: declare i64 @strnlen(ptr nocapture, i64) [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY_WILLRETURN]] declare i64 @strnlen(ptr, i64) ; CHECK: declare ptr @strpbrk(ptr, ptr nocapture) [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY_WILLRETURN]] diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml index 19e18e09b76d07..81f2c9c55b54dd 100644 --- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml +++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml @@ -34,21 +34,21 @@ # # CHECK: << Total TLI yes SDK no: 8 # CHECK: >> Total TLI no SDK yes: 0 -# CHECK: == Total TLI yes SDK yes: 245 +# CHECK: == Total TLI yes SDK yes: 248 # # WRONG_DETAIL: << TLI yes SDK no : '_ZdaPv' aka operator delete[](void*) # WRONG_DETAIL: >> TLI no SDK yes: '_ZdaPvj' aka operator delete[](void*, unsigned int) # WRONG_DETAIL-COUNT-8: << TLI yes SDK no : {{.*}}__hot_cold_t # WRONG_SUMMARY: << Total TLI yes SDK no: 9{{$}} # WRONG_SUMMARY: >> Total TLI no SDK yes: 1{{$}} -# WRONG_SUMMARY: == Total TLI yes SDK yes: 244 +# WRONG_SUMMARY: == Total TLI yes SDK yes: 247 # ## The -COUNT suffix doesn't care if there are too many matches, so check ## the exact count first; the two directives should add up to that. ## Yes, this means additions to TLI will fail this test, but the argument ## to -COUNT can't be an expression. -# AVAIL: TLI knows 486 symbols, 253 available -# AVAIL-COUNT-253: {{^}} available +# AVAIL: TLI knows 489 symbols, 256 available +# AVAIL-COUNT-256: {{^}} available # AVAIL-NOT: {{^}} available # UNAVAIL-COUNT-233: not available # UNAVAIL-NOT: not available @@ -703,6 +703,18 @@ DynamicSymbols: Type: STT_FUNC Section: .text Binding: STB_GLOBAL + - Name: nan + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + - Name: nanf + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + - Name: nanl + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL - Name: nearbyint Type: STT_FUNC Section: .text diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp index b8125b099343b6..d344ebe676799c 100644 --- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp +++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp @@ -255,6 +255,9 @@ TEST_F(TargetLibraryInfoTest, ValidProto) { "declare double @modf(double, double*)\n" "declare float @modff(float, float*)\n" "declare x86_fp80 @modfl(x86_fp80, x86_fp80*)\n" + "declare double @nan(ptr)\n" + "declare float @nanf(ptr)\n" + "declare x86_fp80 @nanl(ptr)\n" "declare double @nearbyint(double)\n" "declare float @nearbyintf(float)\n" "declare x86_fp80 @nearbyintl(x86_fp80)\n" From 0a01e8ff530ab15277aa9fad5361297d7b55e247 Mon Sep 17 00:00:00 2001 From: Alexandre Perez Date: Wed, 31 Jul 2024 10:57:40 -0700 Subject: [PATCH 009/114] [lldb] Allow mapping object file paths (#101361) This introduces a `target.object-map` which allows us to remap module locations, much in the same way as source mapping works today. This is useful, for instance, when debugging coredumps, so we can replace some of the locations where LLDB attempts to load shared libraries and executables from, without having to setup an entire sysroot. --- lldb/include/lldb/Target/Target.h | 2 ++ lldb/source/Target/Target.cpp | 21 +++++++++++++-- lldb/source/Target/TargetProperties.td | 3 +++ .../postmortem/elf-core/TestLinuxCore.py | 26 +++++++++++++++++++ 4 files changed, 50 insertions(+), 2 deletions(-) diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h index 5d5ae1bfcd3bdc..119dff4d498199 100644 --- a/lldb/include/lldb/Target/Target.h +++ b/lldb/include/lldb/Target/Target.h @@ -141,6 +141,8 @@ class TargetProperties : public Properties { PathMappingList &GetSourcePathMap() const; + PathMappingList &GetObjectPathMap() const; + bool GetAutoSourceMapRelative() const; FileSpecList GetExecutableSearchPaths(); diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index ec0da8a1378a84..129683c43f0c1a 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -2155,12 +2155,21 @@ bool Target::ReadPointerFromMemory(const Address &addr, Status &error, return false; } -ModuleSP Target::GetOrCreateModule(const ModuleSpec &module_spec, bool notify, - Status *error_ptr) { +ModuleSP Target::GetOrCreateModule(const ModuleSpec &orig_module_spec, + bool notify, Status *error_ptr) { ModuleSP module_sp; Status error; + // Apply any remappings specified in target.object-map: + ModuleSpec module_spec(orig_module_spec); + PathMappingList &obj_mapping = GetObjectPathMap(); + if (std::optional remapped_obj_file = + obj_mapping.RemapPath(orig_module_spec.GetFileSpec().GetPath(), + true /* only_if_exists */)) { + module_spec.GetFileSpec().SetPath(remapped_obj_file->GetPath()); + } + // First see if we already have this module in our module list. If we do, // then we're done, we don't need to consult the shared modules list. But // only do this if we are passed a UUID. @@ -4459,6 +4468,14 @@ PathMappingList &TargetProperties::GetSourcePathMap() const { return option_value->GetCurrentValue(); } +PathMappingList &TargetProperties::GetObjectPathMap() const { + const uint32_t idx = ePropertyObjectMap; + OptionValuePathMappings *option_value = + m_collection_sp->GetPropertyAtIndexAsOptionValuePathMappings(idx); + assert(option_value); + return option_value->GetCurrentValue(); +} + bool TargetProperties::GetAutoSourceMapRelative() const { const uint32_t idx = ePropertyAutoSourceMapRelative; return GetPropertyAtIndexAs( diff --git a/lldb/source/Target/TargetProperties.td b/lldb/source/Target/TargetProperties.td index 7f79218e0a6a4d..4404a454922549 100644 --- a/lldb/source/Target/TargetProperties.td +++ b/lldb/source/Target/TargetProperties.td @@ -46,6 +46,9 @@ let Definition = "target" in { def SourceMap: Property<"source-map", "PathMap">, DefaultStringValue<"">, Desc<"Source path remappings apply substitutions to the paths of source files, typically needed to debug from a different host than the one that built the target. The source-map property consists of an array of pairs, the first element is a path prefix, and the second is its replacement. The syntax is `prefix1 replacement1 prefix2 replacement2...`. The pairs are checked in order, the first prefix that matches is used, and that prefix is substituted with the replacement. A common pattern is to use source-map in conjunction with the clang -fdebug-prefix-map flag. In the build, use `-fdebug-prefix-map=/path/to/build_dir=.` to rewrite the host specific build directory to `.`. Then for debugging, use `settings set target.source-map . /path/to/local_dir` to convert `.` to a valid local path.">; + def ObjectMap: Property<"object-map", "PathMap">, + DefaultStringValue<"">, + Desc<"Object path remappings apply substitutions to the paths of object files, typically needed to debug from a different host than the one that built the target. The object-map property consists of an array of pairs, the first element is a path prefix, and the second is its replacement. The syntax is `prefix1 replacement1 prefix2 replacement2...`. The pairs are checked in order, the first prefix that matches is used, and that prefix is substituted with the replacement.">; def AutoSourceMapRelative: Property<"auto-source-map-relative", "Boolean">, DefaultTrue, Desc<"Automatically deduce source path mappings based on source file breakpoint resolution. It only deduces source mapping if source file breakpoint request is using full path and if the debug info contains relative paths.">; diff --git a/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py b/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py index 0afac26367de06..0b9d17bc9f45e6 100644 --- a/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py +++ b/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py @@ -249,6 +249,32 @@ def test_i386_sysroot(self): self.dbg.DeleteTarget(target) + def test_object_map(self): + """Test that lldb can find the exe for an i386 linux core file using the object map.""" + + # Copy linux-i386.out to lldb_i386_object_map/a.out + tmp_object_map_root = os.path.join(self.getBuildDir(), "lldb_i386_object_map") + executable = os.path.join(tmp_object_map_root, "a.out") + lldbutil.mkdir_p(os.path.dirname(executable)) + shutil.copyfile("linux-i386.out", executable) + + # Replace the original module path at /home/labath/test and load the core + self.runCmd( + "settings set target.object-map /home/labath/test {}".format( + tmp_object_map_root + ) + ) + + target = self.dbg.CreateTarget(None) + process = target.LoadCore("linux-i386.core") + + # Check that we did load the mapped executable + exe_module_spec = process.GetTarget().GetModuleAtIndex(0).GetFileSpec() + self.assertTrue(exe_module_spec.fullpath.startswith(tmp_object_map_root)) + + self.check_all(process, self._i386_pid, self._i386_regions, "a.out") + self.dbg.DeleteTarget(target) + @skipIfLLVMTargetMissing("X86") @skipIfWindows def test_x86_64_sysroot(self): From 6d3317e5389d3041c4a0ae588919d7a2fe2764f0 Mon Sep 17 00:00:00 2001 From: vporpo Date: Wed, 31 Jul 2024 10:59:14 -0700 Subject: [PATCH 010/114] [SandboxIR] Implement SIToFPInst (#101374) This patch implements sandboxir::SIToFPInst which mirrors llvm::SIToFPInst. --- llvm/include/llvm/SandboxIR/SandboxIR.h | 23 +++++++ llvm/lib/SandboxIR/SandboxIR.cpp | 28 +++++++++ llvm/unittests/SandboxIR/SandboxIRTest.cpp | 70 ++++++++++++++++++++++ 3 files changed, 121 insertions(+) diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index a0a31e659dcc57..3a23eb761f5cfd 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -39,6 +39,8 @@ // | +- IntToPtrInst // | | // | +- PtrToIntInst +// | | +// | +- SIToFPInst // | // +- CallBase -----------+- CallBrInst // | | @@ -1378,6 +1380,27 @@ class CastInst : public Instruction { #endif }; +class SIToFPInst final : public CastInst { +public: + static Value *create(Value *Src, Type *DestTy, BBIterator WhereIt, + BasicBlock *WhereBB, Context &Ctx, + const Twine &Name = ""); + static Value *create(Value *Src, Type *DestTy, Instruction *InsertBefore, + Context &Ctx, const Twine &Name = ""); + static Value *create(Value *Src, Type *DestTy, BasicBlock *InsertAtEnd, + Context &Ctx, const Twine &Name = ""); + + static bool classof(const Value *From) { + if (auto *I = dyn_cast(From)) + return I->getOpcode() == Opcode::SIToFP; + return false; + } +#ifndef NDEBUG + void dump(raw_ostream &OS) const final; + LLVM_DUMP_METHOD void dump() const final; +#endif // NDEBUG +}; + class FPToUIInst final : public CastInst { public: static Value *create(Value *Src, Type *DestTy, BBIterator WhereIt, diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp index a6de44679d0968..1ea22c3a8b48e5 100644 --- a/llvm/lib/SandboxIR/SandboxIR.cpp +++ b/llvm/lib/SandboxIR/SandboxIR.cpp @@ -1143,6 +1143,34 @@ void CastInst::dump() const { } #endif // NDEBUG +Value *SIToFPInst::create(Value *Src, Type *DestTy, BBIterator WhereIt, + BasicBlock *WhereBB, Context &Ctx, + const Twine &Name) { + return CastInst::create(DestTy, Instruction::Opcode::SIToFP, Src, WhereIt, + WhereBB, Ctx, Name); +} +Value *SIToFPInst::create(Value *Src, Type *DestTy, Instruction *InsertBefore, + Context &Ctx, const Twine &Name) { + return create(Src, DestTy, InsertBefore->getIterator(), + InsertBefore->getParent(), Ctx, Name); +} +Value *SIToFPInst::create(Value *Src, Type *DestTy, BasicBlock *InsertAtEnd, + Context &Ctx, const Twine &Name) { + return create(Src, DestTy, InsertAtEnd->end(), InsertAtEnd, Ctx, Name); +} + +#ifndef NDEBUG +void SIToFPInst::dump(raw_ostream &OS) const { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); +} + +void SIToFPInst::dump() const { + dump(dbgs()); + dbgs() << "\n"; +} +#endif // NDEBUG + Value *FPToUIInst::create(Value *Src, Type *DestTy, BBIterator WhereIt, BasicBlock *WhereBB, Context &Ctx, const Twine &Name) { diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index f405bc1fca7444..9d4fba404a43cf 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -1528,6 +1528,7 @@ define void @foo(i32 %arg, float %farg, double %darg, ptr %ptr) { EXPECT_EQ(IntToPtr->getDestTy(), Tptr); auto *SIToFP = cast(&*It++); + EXPECT_TRUE(isa(SIToFP)); EXPECT_EQ(SIToFP->getOpcode(), sandboxir::Instruction::Opcode::SIToFP); EXPECT_EQ(SIToFP->getSrcTy(), Ti32); EXPECT_EQ(SIToFP->getDestTy(), Tfloat); @@ -1619,6 +1620,75 @@ define void @foo(i32 %arg, float %farg, double %darg, ptr %ptr) { } } +TEST_F(SandboxIRTest, SIToFPInst) { + parseIR(C, R"IR( +define void @foo(i32 %arg) { + %sitofp = sitofp i32 %arg to float + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + sandboxir::Context Ctx(C); + sandboxir::Function *F = Ctx.createFunction(&LLVMF); + unsigned ArgIdx = 0; + auto *Arg = F->getArg(ArgIdx++); + auto *BB = &*F->begin(); + auto It = BB->begin(); + Type *Ti32 = Type::getInt32Ty(C); + Type *Tfloat = Type::getFloatTy(C); + + auto *SIToFP = cast(&*It++); + EXPECT_EQ(SIToFP->getOpcode(), sandboxir::Instruction::Opcode::SIToFP); + EXPECT_EQ(SIToFP->getSrcTy(), Ti32); + EXPECT_EQ(SIToFP->getDestTy(), Tfloat); + auto *Ret = cast(&*It++); + + { + // Check create() WhereIt, WhereBB + auto *NewI = cast( + sandboxir::SIToFPInst::create(Arg, Tfloat, /*WhereIt=*/BB->end(), + /*WhereBB=*/BB, Ctx, "SIToFP")); + // Check getOpcode(). + EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::SIToFP); + // Check getSrcTy(). + EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); + // Check getDestTy(). + EXPECT_EQ(NewI->getDestTy(), Tfloat); + // Check instr position. + EXPECT_EQ(NewI->getNextNode(), nullptr); + EXPECT_EQ(NewI->getPrevNode(), Ret); + } + { + // Check create() InsertBefore. + auto *NewI = cast( + sandboxir::SIToFPInst::create(Arg, Tfloat, + /*InsertBefore=*/Ret, Ctx, "SIToFP")); + // Check getOpcode(). + EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::SIToFP); + // Check getSrcTy(). + EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); + // Check getDestTy(). + EXPECT_EQ(NewI->getDestTy(), Tfloat); + // Check instr position. + EXPECT_EQ(NewI->getNextNode(), Ret); + } + { + // Check create() InsertAtEnd. + auto *NewI = cast( + sandboxir::SIToFPInst::create(Arg, Tfloat, + /*InsertAtEnd=*/BB, Ctx, "SIToFP")); + // Check getOpcode(). + EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::SIToFP); + // Check getSrcTy(). + EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); + // Check getDestTy(). + EXPECT_EQ(NewI->getDestTy(), Tfloat); + // Check instr position. + EXPECT_EQ(NewI->getNextNode(), nullptr); + EXPECT_EQ(NewI->getParent(), BB); + } +} + TEST_F(SandboxIRTest, FPToUIInst) { parseIR(C, R"IR( define void @foo(float %arg) { From 496fedaccac569438ccb1fd65bf3b30eb5557350 Mon Sep 17 00:00:00 2001 From: ChiaHungDuan Date: Wed, 31 Jul 2024 11:02:48 -0700 Subject: [PATCH 011/114] Revert "[scudo] Separated committed and decommitted entries." (#101375) Reverts llvm/llvm-project#100818 --- compiler-rt/lib/scudo/standalone/secondary.h | 249 +++++++------------ 1 file changed, 92 insertions(+), 157 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h index 0f0c4ca3a197b5..d8505742d60544 100644 --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -180,14 +180,6 @@ template class NonZeroLengthArray { template class MapAllocatorCache { public: - typedef enum { COMMITTED = 0, DECOMMITTED = 1, NONE } EntryListT; - - // TODO: Refactor the intrusive list to support non-pointer link type - typedef struct { - u16 Head; - u16 Tail; - } ListInfo; - void getStats(ScopedString *Str) { ScopedLock L(Mutex); uptr Integral; @@ -205,18 +197,13 @@ template class MapAllocatorCache { SuccessfulRetrieves, CallsToRetrieve, Integral, Fractional); Str->append("Cache Entry Info (Most Recent -> Least Recent):\n"); - auto printList = [&](EntryListT ListType) REQUIRES(Mutex) { - for (u32 I = EntryLists[ListType].Head; I != CachedBlock::InvalidEntry; - I = Entries[I].Next) { - CachedBlock &Entry = Entries[I]; - Str->append(" StartBlockAddress: 0x%zx, EndBlockAddress: 0x%zx, " - "BlockSize: %zu %s\n", - Entry.CommitBase, Entry.CommitBase + Entry.CommitSize, - Entry.CommitSize, Entry.Time == 0 ? "[R]" : ""); - } - }; - printList(COMMITTED); - printList(DECOMMITTED); + for (u32 I = LRUHead; I != CachedBlock::InvalidEntry; I = Entries[I].Next) { + CachedBlock &Entry = Entries[I]; + Str->append(" StartBlockAddress: 0x%zx, EndBlockAddress: 0x%zx, " + "BlockSize: %zu %s\n", + Entry.CommitBase, Entry.CommitBase + Entry.CommitSize, + Entry.CommitSize, Entry.Time == 0 ? "[R]" : ""); + } } // Ensure the default maximum specified fits the array. @@ -240,10 +227,8 @@ template class MapAllocatorCache { setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); // The cache is initially empty - EntryLists[COMMITTED].Head = CachedBlock::InvalidEntry; - EntryLists[COMMITTED].Tail = CachedBlock::InvalidEntry; - EntryLists[DECOMMITTED].Head = CachedBlock::InvalidEntry; - EntryLists[DECOMMITTED].Tail = CachedBlock::InvalidEntry; + LRUHead = CachedBlock::InvalidEntry; + LRUTail = CachedBlock::InvalidEntry; // Available entries will be retrieved starting from the beginning of the // Entries array @@ -325,19 +310,15 @@ template class MapAllocatorCache { // All excess entries are evicted from the cache while (needToEvict()) { // Save MemMaps of evicted entries to perform unmap outside of lock - EntryListT EvictionListType; - if (EntryLists[DECOMMITTED].Tail == CachedBlock::InvalidEntry) - EvictionListType = COMMITTED; - else - EvictionListType = DECOMMITTED; - remove(EntryLists[EvictionListType].Tail, EvictionListType); + EvictionMemMaps.push_back(Entries[LRUTail].MemMap); + remove(LRUTail); } - insert(Entry, (Entry.Time == 0) ? DECOMMITTED : COMMITTED); + insert(Entry); if (OldestTime == 0) OldestTime = Entry.Time; - } while (0); // ScopedLock L(Mutex); + } while (0); for (MemMapT &EvictMemMap : EvictionMemMaps) EvictMemMap.unmap(EvictMemMap.getBase(), EvictMemMap.getCapacity()); @@ -354,69 +335,56 @@ template class MapAllocatorCache { // 10% of the requested size proved to be the optimal choice for // retrieving cached blocks after testing several options. constexpr u32 FragmentedBytesDivisor = 10; + bool Found = false; CachedBlock Entry; uptr EntryHeaderPos = 0; - uptr OptimalFitIndex = CachedBlock::InvalidEntry; { ScopedLock L(Mutex); CallsToRetrieve++; if (EntriesCount == 0) return false; + u32 OptimalFitIndex = 0; uptr MinDiff = UINTPTR_MAX; - EntryListT OptimalFitListType = NONE; - auto FindAvailableEntry = [&](EntryListT ListType) REQUIRES(Mutex) { - for (uptr I = EntryLists[ListType].Head; I != CachedBlock::InvalidEntry; - I = Entries[I].Next) { - const uptr CommitBase = Entries[I].CommitBase; - const uptr CommitSize = Entries[I].CommitSize; - const uptr AllocPos = - roundDown(CommitBase + CommitSize - Size, Alignment); - const uptr HeaderPos = AllocPos - HeadersSize; - if (HeaderPos > CommitBase + CommitSize) - continue; - if (HeaderPos < CommitBase || - AllocPos > CommitBase + PageSize * MaxUnusedCachePages) - continue; - - const uptr Diff = HeaderPos - CommitBase; - // immediately use a cached block if it's size is close enough to - // the requested size. - const uptr MaxAllowedFragmentedBytes = - (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor; - if (Diff <= MaxAllowedFragmentedBytes) { - OptimalFitIndex = I; - EntryHeaderPos = HeaderPos; - OptimalFitListType = ListType; - return Entries[OptimalFitIndex]; - } - - // keep track of the smallest cached block - // that is greater than (AllocSize + HeaderSize) - if (Diff > MinDiff) - continue; + for (u32 I = LRUHead; I != CachedBlock::InvalidEntry; + I = Entries[I].Next) { + const uptr CommitBase = Entries[I].CommitBase; + const uptr CommitSize = Entries[I].CommitSize; + const uptr AllocPos = + roundDown(CommitBase + CommitSize - Size, Alignment); + const uptr HeaderPos = AllocPos - HeadersSize; + if (HeaderPos > CommitBase + CommitSize) + continue; + if (HeaderPos < CommitBase || + AllocPos > CommitBase + PageSize * MaxUnusedCachePages) { + continue; + } + Found = true; + const uptr Diff = HeaderPos - CommitBase; + // immediately use a cached block if it's size is close enough to the + // requested size. + const uptr MaxAllowedFragmentedBytes = + (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor; + if (Diff <= MaxAllowedFragmentedBytes) { OptimalFitIndex = I; - MinDiff = Diff; - OptimalFitListType = ListType; EntryHeaderPos = HeaderPos; + break; } - CachedBlock FoundEntry; - if (OptimalFitIndex != CachedBlock::InvalidEntry) - FoundEntry = Entries[OptimalFitIndex]; - return FoundEntry; - }; - - // Prioritize valid fit from COMMITTED entries over - // optimal fit from DECOMMITTED entries - Entry = FindAvailableEntry(COMMITTED); - if (!Entry.isValid()) - Entry = FindAvailableEntry(DECOMMITTED); - - if (!Entry.isValid()) - return false; - - remove(OptimalFitIndex, OptimalFitListType); - SuccessfulRetrieves++; - } // ScopedLock L(Mutex); + // keep track of the smallest cached block + // that is greater than (AllocSize + HeaderSize) + if (Diff > MinDiff) + continue; + OptimalFitIndex = I; + MinDiff = Diff; + EntryHeaderPos = HeaderPos; + } + if (Found) { + Entry = Entries[OptimalFitIndex]; + remove(OptimalFitIndex); + SuccessfulRetrieves++; + } + } + if (!Found) + return false; *H = reinterpret_cast( LargeBlock::addHeaderTag(EntryHeaderPos)); @@ -480,15 +448,10 @@ template class MapAllocatorCache { Quarantine[I].invalidate(); } } - auto disableLists = [&](EntryListT EntryList) REQUIRES(Mutex) { - for (u32 I = EntryLists[COMMITTED].Head; I != CachedBlock::InvalidEntry; - I = Entries[I].Next) { - Entries[I].MemMap.setMemoryPermission(Entries[I].CommitBase, - Entries[I].CommitSize, 0); - } - }; - disableLists(COMMITTED); - disableLists(DECOMMITTED); + for (u32 I = LRUHead; I != CachedBlock::InvalidEntry; I = Entries[I].Next) { + Entries[I].MemMap.setMemoryPermission(Entries[I].CommitBase, + Entries[I].CommitSize, 0); + } QuarantinePos = -1U; } @@ -503,7 +466,7 @@ template class MapAllocatorCache { return (EntriesCount >= atomic_load_relaxed(&MaxEntriesCount)); } - void insert(const CachedBlock &Entry, EntryListT ListType) REQUIRES(Mutex) { + void insert(const CachedBlock &Entry) REQUIRES(Mutex) { DCHECK_LT(EntriesCount, atomic_load_relaxed(&MaxEntriesCount)); // Cache should be populated with valid entries when not empty @@ -512,92 +475,71 @@ template class MapAllocatorCache { u32 FreeIndex = AvailableHead; AvailableHead = Entries[AvailableHead].Next; + if (EntriesCount == 0) { + LRUTail = static_cast(FreeIndex); + } else { + // Check list order + if (EntriesCount > 1) + DCHECK_GE(Entries[LRUHead].Time, Entries[Entries[LRUHead].Next].Time); + Entries[LRUHead].Prev = static_cast(FreeIndex); + } + Entries[FreeIndex] = Entry; - pushFront(FreeIndex, ListType); + Entries[FreeIndex].Next = LRUHead; + Entries[FreeIndex].Prev = CachedBlock::InvalidEntry; + LRUHead = static_cast(FreeIndex); EntriesCount++; - if (Entries[EntryLists[ListType].Head].Next != CachedBlock::InvalidEntry) { - DCHECK_GE(Entries[EntryLists[ListType].Head].Time, - Entries[Entries[EntryLists[ListType].Head].Next].Time); - } // Availability stack should not have available entries when all entries // are in use if (EntriesCount == Config::getEntriesArraySize()) DCHECK_EQ(AvailableHead, CachedBlock::InvalidEntry); } - // Joins the entries adjacent to Entries[I], effectively - // unlinking Entries[I] from the list - void unlink(uptr I, EntryListT ListType) REQUIRES(Mutex) { - if (I == EntryLists[ListType].Head) - EntryLists[ListType].Head = Entries[I].Next; + void remove(uptr I) REQUIRES(Mutex) { + DCHECK(Entries[I].isValid()); + + Entries[I].invalidate(); + + if (I == LRUHead) + LRUHead = Entries[I].Next; else Entries[Entries[I].Prev].Next = Entries[I].Next; - if (I == EntryLists[ListType].Tail) - EntryLists[ListType].Tail = Entries[I].Prev; + if (I == LRUTail) + LRUTail = Entries[I].Prev; else Entries[Entries[I].Next].Prev = Entries[I].Prev; - } - // Invalidates Entries[I], removes Entries[I] from list, and pushes - // Entries[I] onto the stack of available entries - void remove(uptr I, EntryListT ListType) REQUIRES(Mutex) { - DCHECK(Entries[I].isValid()); - - Entries[I].invalidate(); - - unlink(I, ListType); Entries[I].Next = AvailableHead; AvailableHead = static_cast(I); EntriesCount--; // Cache should not have valid entries when not empty if (EntriesCount == 0) { - DCHECK_EQ(EntryLists[COMMITTED].Head, CachedBlock::InvalidEntry); - DCHECK_EQ(EntryLists[COMMITTED].Tail, CachedBlock::InvalidEntry); - DCHECK_EQ(EntryLists[DECOMMITTED].Head, CachedBlock::InvalidEntry); - DCHECK_EQ(EntryLists[DECOMMITTED].Tail, CachedBlock::InvalidEntry); + DCHECK_EQ(LRUHead, CachedBlock::InvalidEntry); + DCHECK_EQ(LRUTail, CachedBlock::InvalidEntry); } } - inline void pushFront(uptr I, EntryListT ListType) REQUIRES(Mutex) { - if (EntryLists[ListType].Tail == CachedBlock::InvalidEntry) - EntryLists[ListType].Tail = static_cast(I); - else - Entries[EntryLists[ListType].Head].Prev = static_cast(I); - - Entries[I].Next = EntryLists[ListType].Head; - Entries[I].Prev = CachedBlock::InvalidEntry; - EntryLists[ListType].Head = static_cast(I); - } - void empty() { MemMapT MapInfo[Config::getEntriesArraySize()]; uptr N = 0; { ScopedLock L(Mutex); - auto emptyList = [&](EntryListT ListType) REQUIRES(Mutex) { - for (uptr I = EntryLists[ListType].Head; - I != CachedBlock::InvalidEntry;) { - uptr ToRemove = I; - I = Entries[I].Next; - MapInfo[N] = Entries[ToRemove].MemMap; - remove(ToRemove, ListType); - N++; - } - }; - emptyList(COMMITTED); - emptyList(DECOMMITTED); + for (uptr I = 0; I < Config::getEntriesArraySize(); I++) { + if (!Entries[I].isValid()) + continue; + MapInfo[N] = Entries[I].MemMap; + remove(I); + N++; + } EntriesCount = 0; } for (uptr I = 0; I < N; I++) { MemMapT &MemMap = MapInfo[I]; MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); } - - for (uptr I = 0; I < Config::getEntriesArraySize(); I++) - DCHECK(!Entries[I].isValid()); } void releaseIfOlderThan(CachedBlock &Entry, u64 Time) REQUIRES(Mutex) { @@ -619,13 +561,8 @@ template class MapAllocatorCache { OldestTime = 0; for (uptr I = 0; I < Config::getQuarantineSize(); I++) releaseIfOlderThan(Quarantine[I], Time); - for (uptr I = 0; I < Config::getEntriesArraySize(); I++) { - if (Entries[I].isValid() && Entries[I].Time && Entries[I].Time <= Time) { - unlink(I, COMMITTED); - pushFront(I, DECOMMITTED); - } + for (uptr I = 0; I < Config::getEntriesArraySize(); I++) releaseIfOlderThan(Entries[I], Time); - } } HybridMutex Mutex; @@ -642,12 +579,10 @@ template class MapAllocatorCache { NonZeroLengthArray Quarantine GUARDED_BY(Mutex) = {}; - // EntryLists stores the head and tail indices of all - // lists being used to store valid cache entries. - // Currently there are lists storing COMMITTED and DECOMMITTED entries. - // COMMITTED entries are those that are not madvise()'d - // DECOMMITTED entries are those that are madvise()'d - ListInfo EntryLists[2] GUARDED_BY(Mutex) = {}; + // The LRUHead of the cache is the most recently used cache entry + u16 LRUHead GUARDED_BY(Mutex) = 0; + // The LRUTail of the cache is the least recently used cache entry + u16 LRUTail GUARDED_BY(Mutex) = 0; // The AvailableHead is the top of the stack of available entries u16 AvailableHead GUARDED_BY(Mutex) = 0; }; From d0b4b6b12dea662a9316e2f83277288be37be666 Mon Sep 17 00:00:00 2001 From: Paul T Robinson Date: Wed, 31 Jul 2024 11:15:30 -0700 Subject: [PATCH 012/114] [Driver] Correct comment on default for -falign-functions (#101257) --- clang/lib/Driver/ToolChains/CommonArgs.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 1e37d9d3488180..3d0714286139d2 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1957,8 +1957,8 @@ tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) { return std::make_tuple(RelocM, 0U, false); } -// `-falign-functions` indicates that the functions should be aligned to a -// 16-byte boundary. +// `-falign-functions` indicates that the functions should be aligned to the +// backend's preferred alignment. // // `-falign-functions=1` is the same as `-fno-align-functions`. // From 33960ce5a8e26baf05521fd7f8be5c5abb6bb0ff Mon Sep 17 00:00:00 2001 From: Sayhaan Siddiqui <49014204+sayhaan@users.noreply.github.com> Date: Wed, 31 Jul 2024 11:35:38 -0700 Subject: [PATCH 013/114] [BOLT][DWARF] Sort GDBIndexTUEntryVector (#101264) Sorts GDBIndexTUEntryVector in decreasing order by hash to ensure determinism when parallelized. --- bolt/include/bolt/Core/GDBIndex.h | 8 ++++++++ bolt/lib/Core/GDBIndex.cpp | 3 +-- .../X86/dwarf4-split-gdb-index-types-gdb-generated.test | 8 ++++---- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/bolt/include/bolt/Core/GDBIndex.h b/bolt/include/bolt/Core/GDBIndex.h index 6604c2a11472d4..0ebcf4ecfe99e2 100644 --- a/bolt/include/bolt/Core/GDBIndex.h +++ b/bolt/include/bolt/Core/GDBIndex.h @@ -53,6 +53,14 @@ class GDBIndex { const GDBIndexTUEntryType &getGDBIndexTUEntryVector() const { return GDBIndexTUEntryVector; } + + /// Sorts entries in GDBIndexTUEntryVector according to the TypeHash. + void sortGDBIndexTUEntryVector() { + llvm::stable_sort(GDBIndexTUEntryVector, [](const GDBIndexTUEntry &LHS, + const GDBIndexTUEntry &RHS) { + return LHS.TypeHash > RHS.TypeHash; + }); + } }; } // namespace bolt diff --git a/bolt/lib/Core/GDBIndex.cpp b/bolt/lib/Core/GDBIndex.cpp index 9e6d24167d559e..c7fb4889646b4a 100644 --- a/bolt/lib/Core/GDBIndex.cpp +++ b/bolt/lib/Core/GDBIndex.cpp @@ -23,7 +23,6 @@ void GDBIndex::updateGdbIndexSection( DebugARangesSectionWriter &ARangesSectionWriter) { if (!BC.getGdbIndexSection()) return; - // See https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html // for .gdb_index section format. @@ -141,7 +140,7 @@ void GDBIndex::updateGdbIndexSection( write64le(Buffer + 8, CUInfo.second.Length + 4); Buffer += 16; } - + sortGDBIndexTUEntryVector(); // Rewrite TU CU List, since abbrevs can be different. // Entry example: // 0: offset = 0x00000000, type_offset = 0x0000001e, type_signature = diff --git a/bolt/test/X86/dwarf4-split-gdb-index-types-gdb-generated.test b/bolt/test/X86/dwarf4-split-gdb-index-types-gdb-generated.test index c9b12574caa3ac..6caf5870fca021 100644 --- a/bolt/test/X86/dwarf4-split-gdb-index-types-gdb-generated.test +++ b/bolt/test/X86/dwarf4-split-gdb-index-types-gdb-generated.test @@ -17,10 +17,10 @@ # POSTCHECK-NEXT: 0: Offset = 0x0, Length = 0x34 # POSTCHECK-NEXT: 1: Offset = 0x34, Length = 0x34 # POSTCHECK: Types CU list offset = 0x38, has 4 entries -# POSTCHECK-NEXT: 0: offset = 0x00000000, type_offset = 0x0000001e, type_signature = 0x675d23e4f33235f2 -# POSTCHECK-NEXT: 1: offset = 0x0000004a, type_offset = 0x0000001e, type_signature = 0x49dc260088be7e56 -# POSTCHECK-NEXT: 2: offset = 0x00000000, type_offset = 0x0000001e, type_signature = 0x104ec427d2ebea6f -# POSTCHECK-NEXT: 3: offset = 0x0000004a, type_offset = 0x0000001e, type_signature = 0xb4580bc1535df1e4 +# POSTCHECK-NEXT: 0: offset = 0x0000004a, type_offset = 0x0000001e, type_signature = 0xb4580bc1535df1e4 +# POSTCHECK-NEXT: 1: offset = 0x00000000, type_offset = 0x0000001e, type_signature = 0x675d23e4f33235f2 +# POSTCHECK-NEXT: 2: offset = 0x0000004a, type_offset = 0x0000001e, type_signature = 0x49dc260088be7e56 +# POSTCHECK-NEXT: 3: offset = 0x00000000, type_offset = 0x0000001e, type_signature = 0x104ec427d2ebea6f # POSTCHECK: Address area offset = 0x98, has 2 entries # POSTCHECK-NEXT: Low/High address = [0x[[#%.4x,ADDR:]], # POSTCHECK-SAME: 0x[[#ADDR + 0x7a]]) (Size: 0x7a), CU id = 0 From 9a1013220b668d846e63f241203b80515dee0a03 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 31 Jul 2024 11:49:50 -0700 Subject: [PATCH 014/114] [Offload] Allow to record kernel launch stack traces (#100472) Similar to (de)allocation traces, we can record kernel launch stack traces and display them in case of an error. However, the AMD GPU plugin signal handler, which is invoked on memroy faults, cannot pinpoint the offending kernel. Insteade print ``, set via `OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=`, many traces. The recoding/record uses a ring buffer of fixed size (for now 8). For `trap` errors, we print the actual kernel name, and trace if recorded. --- offload/include/Shared/EnvironmentVar.h | 6 +- .../plugins-nextgen/amdgpu/dynamic_hsa/hsa.h | 1 + offload/plugins-nextgen/amdgpu/src/rtl.cpp | 72 +++++++++++++--- .../common/include/ErrorReporting.h | 85 +++++++++++++++++++ .../common/include/PluginInterface.h | 46 ++++++++++ .../common/src/PluginInterface.cpp | 12 +++ offload/test/sanitizer/kernel_crash.c | 47 ++++++++++ offload/test/sanitizer/kernel_crash_async.c | 40 +++++++++ offload/test/sanitizer/kernel_crash_many.c | 73 ++++++++++++++++ offload/test/sanitizer/kernel_crash_single.c | 36 ++++++++ offload/test/sanitizer/kernel_trap.c | 42 +++++++++ offload/test/sanitizer/kernel_trap_async.c | 40 +++++++++ offload/test/sanitizer/kernel_trap_many.c | 36 ++++++++ openmp/docs/design/Runtimes.rst | 7 ++ 14 files changed, 531 insertions(+), 12 deletions(-) create mode 100644 offload/test/sanitizer/kernel_crash.c create mode 100644 offload/test/sanitizer/kernel_crash_async.c create mode 100644 offload/test/sanitizer/kernel_crash_many.c create mode 100644 offload/test/sanitizer/kernel_crash_single.c create mode 100644 offload/test/sanitizer/kernel_trap.c create mode 100644 offload/test/sanitizer/kernel_trap_async.c create mode 100644 offload/test/sanitizer/kernel_trap_many.c diff --git a/offload/include/Shared/EnvironmentVar.h b/offload/include/Shared/EnvironmentVar.h index 4cbdad695a0ee1..82f434e91a85b8 100644 --- a/offload/include/Shared/EnvironmentVar.h +++ b/offload/include/Shared/EnvironmentVar.h @@ -28,6 +28,7 @@ struct StringParser { /// Class for reading and checking environment variables. Currently working with /// integer, floats, std::string and bool types. template class Envar { + llvm::StringRef Name; Ty Data; bool IsPresent; bool Initialized; @@ -53,7 +54,7 @@ template class Envar { /// take the value read from the environment variable, or the default if it /// was not set or not correct. This constructor is not fallible. Envar(llvm::StringRef Name, Ty Default = Ty()) - : Data(Default), IsPresent(false), Initialized(true) { + : Name(Name), Data(Default), IsPresent(false), Initialized(true) { if (const char *EnvStr = getenv(Name.data())) { // Check whether the envar is defined and valid. @@ -84,6 +85,9 @@ template class Envar { /// Get the definitive value. operator Ty() const { return get(); } + /// Return the environment variable name. + llvm::StringRef getName() const { return Name; } + /// Indicate whether the environment variable was defined and valid. bool isPresent() const { return IsPresent; } diff --git a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h index 64a1d3308aed0b..5d9fb5d7dc7cdd 100644 --- a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h +++ b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h @@ -31,6 +31,7 @@ typedef enum { HSA_STATUS_ERROR = 0x1000, HSA_STATUS_ERROR_INVALID_CODE_OBJECT = 0x1010, HSA_STATUS_ERROR_NOT_INITIALIZED = 0x100B, + HSA_STATUS_ERROR_EXCEPTION = 0x1016, } hsa_status_t; hsa_status_t hsa_status_string(hsa_status_t status, const char **status_string); diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index e6643d3260eb43..604683370cd27d 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -13,13 +13,16 @@ #include #include #include +#include #include +#include #include #include #include #include #include +#include "ErrorReporting.h" #include "Shared/APITypes.h" #include "Shared/Debug.h" #include "Shared/Environment.h" @@ -43,6 +46,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Program.h" +#include "llvm/Support/Signals.h" #include "llvm/Support/raw_ostream.h" #if !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) || \ @@ -685,12 +689,12 @@ struct AMDGPUQueueTy { AMDGPUQueueTy() : Queue(nullptr), Mutex(), NumUsers(0) {} /// Lazily initialize a new queue belonging to a specific agent. - Error init(hsa_agent_t Agent, int32_t QueueSize) { + Error init(GenericDeviceTy &Device, hsa_agent_t Agent, int32_t QueueSize) { if (Queue) return Plugin::success(); hsa_status_t Status = hsa_queue_create(Agent, QueueSize, HSA_QUEUE_TYPE_MULTI, callbackError, - nullptr, UINT32_MAX, UINT32_MAX, &Queue); + &Device, UINT32_MAX, UINT32_MAX, &Queue); return Plugin::check(Status, "Error in hsa_queue_create: %s"); } @@ -875,10 +879,8 @@ struct AMDGPUQueueTy { } /// Callack that will be called when an error is detected on the HSA queue. - static void callbackError(hsa_status_t Status, hsa_queue_t *Source, void *) { - auto Err = Plugin::check(Status, "Received error in queue %p: %s", Source); - FATAL_MESSAGE(1, "%s", toString(std::move(Err)).data()); - } + static void callbackError(hsa_status_t Status, hsa_queue_t *Source, + void *Data); /// The HSA queue. hsa_queue_t *Queue; @@ -1484,6 +1486,8 @@ struct AMDGPUStreamTy { return true; } + const AMDGPUQueueTy *getQueue() const { return Queue; } + /// Record the state of the stream on an event. Error recordEvent(AMDGPUEventTy &Event) const; @@ -1594,7 +1598,7 @@ struct AMDGPUStreamManagerTy final using ResourcePoolTy = GenericDeviceResourceManagerTy; AMDGPUStreamManagerTy(GenericDeviceTy &Device, hsa_agent_t HSAAgent) - : GenericDeviceResourceManagerTy(Device), + : GenericDeviceResourceManagerTy(Device), Device(Device), OMPX_QueueTracking("LIBOMPTARGET_AMDGPU_HSA_QUEUE_BUSY_TRACKING", true), NextQueue(0), Agent(HSAAgent) {} @@ -1603,7 +1607,7 @@ struct AMDGPUStreamManagerTy final QueueSize = HSAQueueSize; MaxNumQueues = NumHSAQueues; // Initialize one queue eagerly - if (auto Err = Queues.front().init(Agent, QueueSize)) + if (auto Err = Queues.front().init(Device, Agent, QueueSize)) return Err; return GenericDeviceResourceManagerTy::init(InitialSize); @@ -1660,7 +1664,7 @@ struct AMDGPUStreamManagerTy final } // Make sure the queue is initialized, then add user & assign. - if (auto Err = Queues[Index].init(Agent, QueueSize)) + if (auto Err = Queues[Index].init(Device, Agent, QueueSize)) return Err; Queues[Index].addUser(); Stream->Queue = &Queues[Index]; @@ -1668,6 +1672,9 @@ struct AMDGPUStreamManagerTy final return Plugin::success(); } + /// The device associated with this stream. + GenericDeviceTy &Device; + /// Envar for controlling the tracking of busy HSA queues. BoolEnvar OMPX_QueueTracking; @@ -3074,7 +3081,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy { Initialized = true; // Register event handler to detect memory errors on the devices. - Status = hsa_amd_register_system_event_handler(eventHandler, nullptr); + Status = hsa_amd_register_system_event_handler(eventHandler, this); if (auto Err = Plugin::check( Status, "Error in hsa_amd_register_system_event_handler: %s")) return std::move(Err); @@ -3209,7 +3216,8 @@ struct AMDGPUPluginTy final : public GenericPluginTy { private: /// Event handler that will be called by ROCr if an event is detected. - static hsa_status_t eventHandler(const hsa_amd_event_t *Event, void *) { + static hsa_status_t eventHandler(const hsa_amd_event_t *Event, + void *PluginPtr) { if (Event->event_type != HSA_AMD_GPU_MEMORY_FAULT_EVENT) return HSA_STATUS_SUCCESS; @@ -3240,6 +3248,26 @@ struct AMDGPUPluginTy final : public GenericPluginTy { uint32_t Node = -1; hsa_agent_get_info(Event->memory_fault.agent, HSA_AGENT_INFO_NODE, &Node); + AMDGPUPluginTy &Plugin = *reinterpret_cast(PluginPtr); + for (uint32_t I = 0, E = Plugin.getNumDevices(); + Node != uint32_t(-1) && I < E; ++I) { + AMDGPUDeviceTy &AMDGPUDevice = + reinterpret_cast(Plugin.getDevice(I)); + auto KernelTraceInfoRecord = + AMDGPUDevice.KernelLaunchTraces.getExclusiveAccessor(); + + uint32_t DeviceNode = -1; + if (auto Err = + AMDGPUDevice.getDeviceAttr(HSA_AGENT_INFO_NODE, DeviceNode)) { + consumeError(std::move(Err)); + continue; + } + if (DeviceNode != Node) + continue; + + ErrorReporter::reportKernelTraces(AMDGPUDevice, *KernelTraceInfoRecord); + } + // Abort the execution since we do not recover from this error. FATAL_MESSAGE(1, "Memory access fault by GPU %" PRIu32 " (agent 0x%" PRIx64 @@ -3480,6 +3508,28 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) { return Alloc; } +void AMDGPUQueueTy::callbackError(hsa_status_t Status, hsa_queue_t *Source, + void *Data) { + auto &AMDGPUDevice = *reinterpret_cast(Data); + + if (Status == HSA_STATUS_ERROR_EXCEPTION) { + auto KernelTraceInfoRecord = + AMDGPUDevice.KernelLaunchTraces.getExclusiveAccessor(); + std::function AsyncInfoWrapperMatcher = + [=](__tgt_async_info &AsyncInfo) { + auto *Stream = reinterpret_cast(AsyncInfo.Queue); + if (!Stream || !Stream->getQueue()) + return false; + return Stream->getQueue()->Queue == Source; + }; + ErrorReporter::reportTrapInKernel(AMDGPUDevice, *KernelTraceInfoRecord, + AsyncInfoWrapperMatcher); + } + + auto Err = Plugin::check(Status, "Received error in queue %p: %s", Source); + FATAL_MESSAGE(1, "%s", toString(std::move(Err)).data()); +} + } // namespace plugin } // namespace target } // namespace omp diff --git a/offload/plugins-nextgen/common/include/ErrorReporting.h b/offload/plugins-nextgen/common/include/ErrorReporting.h index a8efaad35bb10a..72cfb5273ae3c6 100644 --- a/offload/plugins-nextgen/common/include/ErrorReporting.h +++ b/offload/plugins-nextgen/common/include/ErrorReporting.h @@ -14,6 +14,7 @@ #include "PluginInterface.h" #include "Shared/EnvironmentVar.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" @@ -216,6 +217,90 @@ class ErrorReporter { getAllocTyName(ATI->Kind).data(), DevicePtr); #undef DEALLOCATION_ERROR } + + /// Report that a kernel encountered a trap instruction. + static void reportTrapInKernel( + GenericDeviceTy &Device, KernelTraceInfoRecordTy &KTIR, + std::function AsyncInfoWrapperMatcher) { + assert(AsyncInfoWrapperMatcher && "A matcher is required"); + + uint32_t Idx = 0; + for (uint32_t I = 0, E = KTIR.size(); I < E; ++I) { + auto KTI = KTIR.getKernelTraceInfo(I); + if (KTI.Kernel == nullptr) + break; + // Skip kernels issued in other queues. + if (KTI.AsyncInfo && !(AsyncInfoWrapperMatcher(*KTI.AsyncInfo))) + continue; + Idx = I; + break; + } + + auto KTI = KTIR.getKernelTraceInfo(Idx); + if (KTI.AsyncInfo && (AsyncInfoWrapperMatcher(*KTI.AsyncInfo))) + reportError("Kernel '%s'", KTI.Kernel->getName()); + reportError("execution interrupted by hardware trap instruction"); + if (KTI.AsyncInfo && (AsyncInfoWrapperMatcher(*KTI.AsyncInfo))) { + if (!KTI.LaunchTrace.empty()) + reportStackTrace(KTI.LaunchTrace); + else + print(Yellow, "Use '%s=1' to show the stack trace of the kernel\n", + Device.OMPX_TrackNumKernelLaunches.getName().data()); + } + abort(); + } + + /// Report the kernel traces taken from \p KTIR, up to + /// OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES many. + static void reportKernelTraces(GenericDeviceTy &Device, + KernelTraceInfoRecordTy &KTIR) { + uint32_t NumKTIs = 0; + for (uint32_t I = 0, E = KTIR.size(); I < E; ++I) { + auto KTI = KTIR.getKernelTraceInfo(I); + if (KTI.Kernel == nullptr) + break; + ++NumKTIs; + } + if (NumKTIs == 0) { + print(BoldRed, "No kernel launches known\n"); + return; + } + + uint32_t TracesToShow = + std::min(Device.OMPX_TrackNumKernelLaunches.get(), NumKTIs); + if (TracesToShow == 0) { + if (NumKTIs == 1) + print(BoldLightPurple, "Display only launched kernel:\n"); + else + print(BoldLightPurple, "Display last %u kernels launched:\n", NumKTIs); + } else { + if (NumKTIs == 1) + print(BoldLightPurple, "Display kernel launch trace:\n"); + else + print(BoldLightPurple, + "Display %u of the %u last kernel launch traces:\n", TracesToShow, + NumKTIs); + } + + for (uint32_t Idx = 0, I = 0; I < NumKTIs; ++Idx) { + auto KTI = KTIR.getKernelTraceInfo(Idx); + if (NumKTIs == 1) + print(BoldLightPurple, "Kernel '%s'\n", KTI.Kernel->getName()); + else + print(BoldLightPurple, "Kernel %d: '%s'\n", I, KTI.Kernel->getName()); + reportStackTrace(KTI.LaunchTrace); + ++I; + } + + if (NumKTIs != 1) { + print(Yellow, + "Use '%s=' to adjust the number of shown stack traces (%u " + "now, up to %zu)\n", + Device.OMPX_TrackNumKernelLaunches.getName().data(), + Device.OMPX_TrackNumKernelLaunches.get(), KTIR.size()); + } + // TODO: Let users know how to serialize kernels + } }; } // namespace plugin diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 49a3943a27429f..81823338fe2112 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -412,6 +412,44 @@ struct AllocationTraceInfoTy { std::mutex Lock; }; +/// Information about an allocation, when it has been allocated, and when/if it +/// has been deallocated, for error reporting purposes. +struct KernelTraceInfoTy { + + /// The launched kernel. + GenericKernelTy *Kernel; + + /// The stack trace of the launch itself. + std::string LaunchTrace; + + /// The async info the kernel was launched in. + __tgt_async_info *AsyncInfo; +}; + +struct KernelTraceInfoRecordTy { + KernelTraceInfoRecordTy() { KTIs.fill({}); } + + /// Return the (maximal) record size. + auto size() const { return KTIs.size(); } + + /// Create a new kernel trace info and add it into the record. + void emplace(GenericKernelTy *Kernel, const std::string &&StackTrace, + __tgt_async_info *AsyncInfo) { + KTIs[Idx] = {Kernel, std::move(StackTrace), AsyncInfo}; + Idx = (Idx + 1) % size(); + } + + /// Return the \p I'th last kernel trace info. + auto getKernelTraceInfo(int32_t I) const { + // Note that kernel trace infos "grow forward", so lookup is backwards. + return KTIs[(Idx - I - 1 + size()) % size()]; + } + +private: + std::array KTIs; + unsigned Idx = 0; +}; + /// Class representing a map of host pinned allocations. We track these pinned /// allocations, so memory tranfers invloving these buffers can be optimized. class PinnedAllocationMapTy { @@ -900,6 +938,14 @@ struct GenericDeviceTy : public DeviceAllocatorTy { /// been deallocated, both for error reporting purposes. ProtectedObj> AllocationTraces; + /// Map to record kernel have been launchedl, for error reporting purposes. + ProtectedObj KernelLaunchTraces; + + /// Environment variable to determine if stack traces for kernel launches are + /// tracked. + UInt32Envar OMPX_TrackNumKernelLaunches = + UInt32Envar("OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES", 0); + private: /// Get and set the stack size and heap size for the device. If not used, the /// plugin can implement the setters as no-op and setting the output diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index 3b6f545fe91fa5..c3ecbcc62f71f1 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -1468,6 +1468,18 @@ Error GenericDeviceTy::launchKernel(void *EntryPtr, void **ArgPtrs, GenericKernelTy &GenericKernel = *reinterpret_cast(EntryPtr); + { + std::string StackTrace; + if (OMPX_TrackNumKernelLaunches) { + llvm::raw_string_ostream OS(StackTrace); + llvm::sys::PrintStackTrace(OS); + } + + auto KernelTraceInfoRecord = KernelLaunchTraces.getExclusiveAccessor(); + (*KernelTraceInfoRecord) + .emplace(&GenericKernel, std::move(StackTrace), AsyncInfo); + } + auto Err = GenericKernel.launch(*this, ArgPtrs, ArgOffsets, KernelArgs, AsyncInfoWrapper); diff --git a/offload/test/sanitizer/kernel_crash.c b/offload/test/sanitizer/kernel_crash.c new file mode 100644 index 00000000000000..457d953a33a059 --- /dev/null +++ b/offload/test/sanitizer/kernel_crash.c @@ -0,0 +1,47 @@ + +// clang-format off +// RUN: %libomptarget-compile-generic +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,NDEBG +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK +// RUN: %libomptarget-compile-generic -g +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,DEBUG +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK +// clang-format on + +// UNSUPPORTED: nvptx64-nvidia-cuda +// UNSUPPORTED: nvptx64-nvidia-cuda-LTO +// UNSUPPORTED: aarch64-unknown-linux-gnu +// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-LTO +// UNSUPPORTED: s390x-ibm-linux-gnu +// UNSUPPORTED: s390x-ibm-linux-gnu-LTO + +#include + +int main(void) { + int *A = 0; +#pragma omp target + { + } +#pragma omp target + { + } +#pragma omp target + { + *A = 42; + } +#pragma omp target + { + } +} +// TRACE: Display 1 of the 3 last kernel launch traces +// TRACE: Kernel 0: '__omp_offloading_{{.*}}_main_l30' +// TRACE: launchKernel +// NDEBG: main +// DEBUG: main {{.*}}kernel_crash.c:30 +// +// CHECK: Display last 3 kernels launched: +// CHECK: Kernel 0: '__omp_offloading_{{.*}}_main_l30' +// CHECK: Kernel 1: '__omp_offloading_{{.*}}_main_l27' +// CHECK: Kernel 2: '__omp_offloading_{{.*}}_main_l24' diff --git a/offload/test/sanitizer/kernel_crash_async.c b/offload/test/sanitizer/kernel_crash_async.c new file mode 100644 index 00000000000000..6aebf1b42a5357 --- /dev/null +++ b/offload/test/sanitizer/kernel_crash_async.c @@ -0,0 +1,40 @@ + +// clang-format off +// RUN: %libomptarget-compileopt-generic +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK +// RUN: %libomptarget-compileopt-generic -g +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK +// clang-format on + +// UNSUPPORTED: nvptx64-nvidia-cuda +// UNSUPPORTED: nvptx64-nvidia-cuda-LTO +// UNSUPPORTED: aarch64-unknown-linux-gnu +// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-LTO +// UNSUPPORTED: s390x-ibm-linux-gnu +// UNSUPPORTED: s390x-ibm-linux-gnu-LTO + +#include + +int main(void) { + int *A = 0; +#pragma omp target nowait + { + } +#pragma omp target nowait + { + } +#pragma omp target nowait + { + *A = 42; + } +#pragma omp taskwait +} + +// TRACE: Kernel {{.*}}'__omp_offloading_{{.*}}_main_ +// TRACE: launchKernel +// +// CHECK-DAG: Kernel {{[0-9]}}: '__omp_offloading_{{.*}}_main_l30' diff --git a/offload/test/sanitizer/kernel_crash_many.c b/offload/test/sanitizer/kernel_crash_many.c new file mode 100644 index 00000000000000..9e3f4f1630acd8 --- /dev/null +++ b/offload/test/sanitizer/kernel_crash_many.c @@ -0,0 +1,73 @@ + +// clang-format off +// RUN: %libomptarget-compile-generic +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=24 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NDEBG +// RUN: %libomptarget-compile-generic -g +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=16 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,DEBUG +// clang-format on + +// UNSUPPORTED: nvptx64-nvidia-cuda +// UNSUPPORTED: nvptx64-nvidia-cuda-LTO +// UNSUPPORTED: aarch64-unknown-linux-gnu +// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-LTO +// UNSUPPORTED: s390x-ibm-linux-gnu +// UNSUPPORTED: s390x-ibm-linux-gnu-LTO + +#include + +int main(void) { + int *A = 0; + for (int i = 0; i < 10; ++i) { +#pragma omp target + { + } + } +#pragma omp target + { + *A = 42; + } +} +// CHECK: Display 8 of the 8 last kernel launch traces +// CHECK: Kernel 0: '__omp_offloading_{{.*}}_main_l27' +// CHECK: launchKernel +// NDEBG: main +// DEBUG: main {{.*}}kernel_crash_many.c:27 +// +// CHECK: Kernel 1: '__omp_offloading_{{.*}}_main_l23' +// CHECK: launchKernel +// NDEBG: main +// DEBUG: main {{.*}}kernel_crash_many.c: +// +// CHECK: Kernel 2: '__omp_offloading_{{.*}}_main_l23' +// CHECK: launchKernel +// NDEBG: main +// DEBUG: main {{.*}}kernel_crash_many.c: +// +// CHECK: Kernel 3: '__omp_offloading_{{.*}}_main_l23' +// CHECK: launchKernel +// NDEBG: main +// DEBUG: main {{.*}}kernel_crash_many.c: +// +// CHECK: Kernel 4: '__omp_offloading_{{.*}}_main_l23' +// CHECK: launchKernel +// NDEBG: main +// DEBUG: main {{.*}}kernel_crash_many.c: +// +// CHECK: Kernel 5: '__omp_offloading_{{.*}}_main_l23' +// CHECK: launchKernel +// NDEBG: main +// DEBUG: main {{.*}}kernel_crash_many.c: +// +// CHECK: Kernel 6: '__omp_offloading_{{.*}}_main_l23' +// CHECK: launchKernel +// NDEBG: main +// DEBUG: main {{.*}}kernel_crash_many.c: +// +// CHECK: Kernel 7: '__omp_offloading_{{.*}}_main_l23' +// CHECK: launchKernel +// NDEBG: main +// DEBUG: main {{.*}}kernel_crash_many.c: +// +// CHECK-NOT: Kernel {{[[0-9]]+}}: diff --git a/offload/test/sanitizer/kernel_crash_single.c b/offload/test/sanitizer/kernel_crash_single.c new file mode 100644 index 00000000000000..16a8159f074e5a --- /dev/null +++ b/offload/test/sanitizer/kernel_crash_single.c @@ -0,0 +1,36 @@ + +// clang-format off +// RUN: %libomptarget-compile-generic +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,NDEBG +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK +// RUN: %libomptarget-compile-generic -g +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,DEBUG +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK +// clang-format on + +// UNSUPPORTED: nvptx64-nvidia-cuda +// UNSUPPORTED: nvptx64-nvidia-cuda-LTO +// UNSUPPORTED: aarch64-unknown-linux-gnu +// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-LTO +// UNSUPPORTED: s390x-ibm-linux-gnu +// UNSUPPORTED: s390x-ibm-linux-gnu-LTO + +#include + +int main(void) { + int *A = 0; +#pragma omp target + { + *A = 42; + } +} +// TRACE: Display kernel launch trace +// TRACE: Kernel '__omp_offloading_{{.*}}_main_l24' +// TRACE: launchKernel +// NDEBG: main +// DEBUG: main {{.*}}kernel_crash_single.c:24 +// +// CHECK: Display only launched kernel: +// CHECK: Kernel '__omp_offloading_{{.*}}_main_l24' diff --git a/offload/test/sanitizer/kernel_trap.c b/offload/test/sanitizer/kernel_trap.c new file mode 100644 index 00000000000000..13fe6f2fb71e84 --- /dev/null +++ b/offload/test/sanitizer/kernel_trap.c @@ -0,0 +1,42 @@ + +// clang-format off +// RUN: %libomptarget-compile-generic +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NDEBG +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK +// RUN: %libomptarget-compile-generic -g +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK +// clang-format on + +// UNSUPPORTED: nvptx64-nvidia-cuda +// UNSUPPORTED: nvptx64-nvidia-cuda-LTO +// UNSUPPORTED: aarch64-unknown-linux-gnu +// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-LTO +// UNSUPPORTED: s390x-ibm-linux-gnu +// UNSUPPORTED: s390x-ibm-linux-gnu-LTO + +#include + +int main(void) { + +#pragma omp target + { + } +#pragma omp target + { + } +#pragma omp target + { + __builtin_trap(); + } +#pragma omp target + { + } +} +// CHECK: OFFLOAD ERROR: Kernel '__omp_offloading_{{.*}}_main_l30' +// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction +// TRACE: launchKernel +// NDEBG: main +// DEBUG: main {{.*}}kernel_trap.c: diff --git a/offload/test/sanitizer/kernel_trap_async.c b/offload/test/sanitizer/kernel_trap_async.c new file mode 100644 index 00000000000000..65e88807983439 --- /dev/null +++ b/offload/test/sanitizer/kernel_trap_async.c @@ -0,0 +1,40 @@ + +// clang-format off +// RUN: %libomptarget-compileopt-generic +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK +// RUN: %libomptarget-compileopt-generic -g +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG +// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK +// clang-format on + +// UNSUPPORTED: nvptx64-nvidia-cuda +// UNSUPPORTED: nvptx64-nvidia-cuda-LTO +// UNSUPPORTED: aarch64-unknown-linux-gnu +// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-LTO +// UNSUPPORTED: s390x-ibm-linux-gnu +// UNSUPPORTED: s390x-ibm-linux-gnu-LTO + +#include + +int main(void) { + +#pragma omp target nowait + { + } +#pragma omp target nowait + { + } +#pragma omp target nowait + { + __builtin_trap(); + } +#pragma omp taskwait +} + +// CHECK: OFFLOAD ERROR: Kernel '__omp_offloading_{{.*}}_main_l30' +// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction +// TRACE: launchKernel +// DEBUG: kernel_trap_async.c: diff --git a/offload/test/sanitizer/kernel_trap_many.c b/offload/test/sanitizer/kernel_trap_many.c new file mode 100644 index 00000000000000..3f1796e8913ea6 --- /dev/null +++ b/offload/test/sanitizer/kernel_trap_many.c @@ -0,0 +1,36 @@ + +// clang-format off +// RUN: %libomptarget-compile-generic +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=24 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,NDEBG +// RUN: %libomptarget-compile-generic -g +// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=16 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,DEBUG +// clang-format on + +// UNSUPPORTED: nvptx64-nvidia-cuda +// UNSUPPORTED: nvptx64-nvidia-cuda-LTO +// UNSUPPORTED: aarch64-unknown-linux-gnu +// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-LTO +// UNSUPPORTED: s390x-ibm-linux-gnu +// UNSUPPORTED: s390x-ibm-linux-gnu-LTO + +#include + +int main(void) { + + for (int i = 0; i < 10; ++i) { +#pragma omp target + { + } + } +#pragma omp target + { + __builtin_trap(); + } +} +// TRACE: OFFLOAD ERROR: Kernel '__omp_offloading_{{.*}}_main_l27' +// TRACE: OFFLOAD ERROR: execution interrupted by hardware trap instruction +// TRACE: launchKernel +// NDEBG: main +// DEBUG: main {{.*}}kernel_trap_many.c: diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst index 20e2fa715eaacd..ed002c8cf0f807 100644 --- a/openmp/docs/design/Runtimes.rst +++ b/openmp/docs/design/Runtimes.rst @@ -744,6 +744,7 @@ variables is defined below. * ``LIBOMPTARGET_MIN_THREADS_FOR_LOW_TRIP_COUNT= (default: 32)`` * ``LIBOMPTARGET_REUSE_BLOCKS_FOR_HIGH_TRIP_COUNT=[TRUE/FALSE] (default TRUE)`` * ``OFFLOAD_TRACK_ALLOCATION_TRACES=[TRUE/FALSE] (default FALSE)`` + * ``OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES= (default 0)`` LIBOMPTARGET_DEBUG """""""""""""""""" @@ -1178,6 +1179,12 @@ This environment variable determines if the stack traces of allocations and deallocations are tracked to aid in error reporting, e.g., in case of double-free. +OFFLOAD_TRACK_KERNEL_LAUNCH_TRACES +"""""""""""""""""""""""""""""""""" + +This environment variable determines how manytstack traces of kernel launches +are tracked to aid in error reporting, e.g., what asynchronous kernel failed. + .. _libomptarget_plugin: LLVM/OpenMP Target Host Runtime Plugins (``libomptarget.rtl.XXXX``) From b66aa3bfff442a5eb67f1bfcfaa148e42e49b787 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Wed, 31 Jul 2024 21:01:07 +0200 Subject: [PATCH 015/114] [libc][math][c23] Refactor expf16 (#101373) Also updates and sorts CMake target dependencies, and corrects the smoke test that expected expf16(sNaN) to return sNaN instead of aNaN, although the test still passed, as FPMatcher only checks whether both sides are NaN, not whether they're the same NaN value. --- libc/src/math/generic/CMakeLists.txt | 3 ++- libc/src/math/generic/expf16.cpp | 7 +++---- libc/test/src/math/performance_testing/expf16_perf.cpp | 2 +- libc/test/src/math/smoke/expf16_test.cpp | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index a4c1318f8a168a..e707615e69b971 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -1236,11 +1236,12 @@ add_entrypoint_object( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.__support.CPP.array + libc.src.__support.FPUtil.except_value_utils libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.multiply_add libc.src.__support.FPUtil.nearest_integer + libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization COMPILE_OPTIONS diff --git a/libc/src/math/generic/expf16.cpp b/libc/src/math/generic/expf16.cpp index b618edc36a046b..b198c559dfedb9 100644 --- a/libc/src/math/generic/expf16.cpp +++ b/libc/src/math/generic/expf16.cpp @@ -127,9 +127,8 @@ LLVM_LIBC_FUNCTION(float16, expf16, (float16 x)) { // > display = hexadecimal; // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-5, 2^-5]); // > 1 + x * P; - float r = - fputil::polyeval(xf, 0x1p+0f, 0x1p+0f, 0x1.0004p-1f, 0x1.555778p-3f); - return static_cast(r); + return static_cast( + fputil::polyeval(xf, 0x1p+0f, 0x1p+0f, 0x1.0004p-1f, 0x1.555778p-3f)); } } @@ -150,7 +149,7 @@ LLVM_LIBC_FUNCTION(float16, expf16, (float16 x)) { // respectively. exp(lo) is computed using a degree-3 minimax polynomial // generated by Sollya. - float xf = static_cast(x); + float xf = x; float kf = fputil::nearest_integer(xf * 0x1.0p+3f); int x_hi_mid = static_cast(kf); int x_hi = x_hi_mid >> 3; diff --git a/libc/test/src/math/performance_testing/expf16_perf.cpp b/libc/test/src/math/performance_testing/expf16_perf.cpp index c1213689ff5e7d..bc9d9f05559a38 100644 --- a/libc/test/src/math/performance_testing/expf16_perf.cpp +++ b/libc/test/src/math/performance_testing/expf16_perf.cpp @@ -1,4 +1,4 @@ -//===-- Performancel test for expf16 --------------------------------------===// +//===-- Performance test for expf16 ---------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libc/test/src/math/smoke/expf16_test.cpp b/libc/test/src/math/smoke/expf16_test.cpp index f05ecd0dc4d0ed..969870fe247bc2 100644 --- a/libc/test/src/math/smoke/expf16_test.cpp +++ b/libc/test/src/math/smoke/expf16_test.cpp @@ -21,7 +21,7 @@ TEST_F(LlvmLibcExpf16Test, SpecialNumbers) { EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::expf16(aNaN)); EXPECT_MATH_ERRNO(0); - EXPECT_FP_EQ_WITH_EXCEPTION(sNaN, LIBC_NAMESPACE::expf16(sNaN), FE_INVALID); + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::expf16(sNaN), FE_INVALID); EXPECT_MATH_ERRNO(0); EXPECT_FP_EQ_ALL_ROUNDING(inf, LIBC_NAMESPACE::expf16(inf)); From ef67664d81b395e6dea9586a1a508323ad2a9de7 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 31 Jul 2024 23:10:15 +0400 Subject: [PATCH 016/114] AMDGPU: Add testcase for materializing sgpr frame indexes (#101306) These add some IR tests for 57d10b4fc9142d12fbdec578a0cc6f78deb67ef4. These do rely on some lucky MIR placement to test the scc input, but I haven't found a better way to do it. Also, scc handling in inline asm is extremely buggy. --- .../materialize-frame-index-sgpr.gfx10.ll | 1836 +++++++++++++ .../AMDGPU/materialize-frame-index-sgpr.ll | 2323 +++++++++++++++++ 2 files changed, 4159 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll create mode 100644 llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll new file mode 100644 index 00000000000000..94d1eca05ed0ec --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll @@ -0,0 +1,1836 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10_1 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10_3 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck -check-prefix=GFX11 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck -check-prefix=GFX12 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX940 %s + +; We aren't pressuring the SGPRs, so this can use the add with carry out pre-gfx9. +define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { +; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: +; GFX10_1: ; %bb.0: +; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 +; GFX10_1-NEXT: v_writelane_b32 v1, s59, 0 +; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; use alloca0 v0 +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 +; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 +; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; use s59, scc +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s59, v1, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) +; GFX10_1-NEXT: s_setpc_b64 s[30:31] +; +; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: +; GFX10_3: ; %bb.0: +; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 +; GFX10_3-NEXT: v_writelane_b32 v1, s59, 0 +; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; use alloca0 v0 +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 +; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 +; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; use s59, scc +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s59, v1, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) +; GFX10_3-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 +; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_add_i32 s0, s32, 64 +; GFX11-NEXT: v_writelane_b32 v1, s59, 0 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_and_b32 s0, 0, exec_lo +; GFX11-NEXT: s_addc_u32 s0, s32, 0x4040 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use alloca0 v0 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_bitcmp1_b32 s0, 0 +; GFX11-NEXT: s_bitset0_b32 s0, 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_mov_b32 s59, s0 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s59, scc +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s59, v1, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 +; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: s_and_b32 s0, 0, exec_lo +; GFX12-NEXT: v_writelane_b32 v1, s59, 0 +; GFX12-NEXT: s_add_co_ci_u32 s0, s32, 0x4000 +; GFX12-NEXT: v_mov_b32_e32 v0, s32 +; GFX12-NEXT: s_bitcmp1_b32 s0, 0 +; GFX12-NEXT: s_bitset0_b32 s0, 0 +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; use alloca0 v0 +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: s_mov_b32 s59, s0 +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; use s59, scc +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: v_readlane_b32 s59, v1, 0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:16388 ; 4-byte Folded Reload +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 +; GFX8-NEXT: v_writelane_b32 v1, s59, 0 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use alloca0 v0 +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX8-NEXT: s_movk_i32 s59, 0x4040 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0 +; GFX8-NEXT: v_readfirstlane_b32 s59, v0 +; GFX8-NEXT: s_and_b64 s[4:5], 0, exec +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use s59, scc +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s59, v1, 0 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use alloca0 v0 +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 +; GFX900-NEXT: v_writelane_b32 v1, s59, 0 +; GFX900-NEXT: v_readfirstlane_b32 s59, v0 +; GFX900-NEXT: s_and_b64 s[4:5], 0, exec +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s59, scc +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s59, v1, 0 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s2, s32, 0x4044 +; GFX940-NEXT: scratch_store_dword off, v1, s2 sc0 sc1 ; 4-byte Folded Spill +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: s_add_i32 s0, s32, 64 +; GFX940-NEXT: v_mov_b32_e32 v0, s0 +; GFX940-NEXT: s_and_b64 s[0:1], 0, exec +; GFX940-NEXT: s_addc_u32 s0, s32, 0x4040 +; GFX940-NEXT: s_bitcmp1_b32 s0, 0 +; GFX940-NEXT: s_bitset0_b32 s0, 0 +; GFX940-NEXT: v_writelane_b32 v1, s59, 0 +; GFX940-NEXT: s_mov_b32 s59, s0 +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; use alloca0 v0 +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; use s59, scc +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: v_readlane_b32 s59, v1, 0 +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s2, s32, 0x4044 +; GFX940-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: s_setpc_b64 s[30:31] + %alloca0 = alloca [4096 x i32], align 64, addrspace(5) + %alloca1 = alloca i32, align 4, addrspace(5) + call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) + call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca1, i32 0) + ret void +} + +; %alloca1 should end up materializing with s_mov_b32, and scc is +; available. +define void @scalar_mov_materializes_frame_index_dead_scc() #0 { +; GFX10_1-LABEL: scalar_mov_materializes_frame_index_dead_scc: +; GFX10_1: ; %bb.0: +; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: v_writelane_b32 v1, s59, 0 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 +; GFX10_1-NEXT: s_lshr_b32 s59, s32, 5 +; GFX10_1-NEXT: s_addk_i32 s59, 0x4040 +; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; use alloca0 v0 +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; use s59 +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s59, v1, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) +; GFX10_1-NEXT: s_setpc_b64 s[30:31] +; +; GFX10_3-LABEL: scalar_mov_materializes_frame_index_dead_scc: +; GFX10_3: ; %bb.0: +; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: v_writelane_b32 v1, s59, 0 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 +; GFX10_3-NEXT: s_lshr_b32 s59, s32, 5 +; GFX10_3-NEXT: s_addk_i32 s59, 0x4040 +; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; use alloca0 v0 +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; use s59 +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s59, v1, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) +; GFX10_3-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: scalar_mov_materializes_frame_index_dead_scc: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 +; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v1, s59, 0 +; GFX11-NEXT: s_add_i32 s0, s32, 64 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_add_i32 s0, s32, 0x4040 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use alloca0 v0 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_mov_b32 s59, s0 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s59 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s59, v1, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 +; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: scalar_mov_materializes_frame_index_dead_scc: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: v_writelane_b32 v1, s59, 0 +; GFX12-NEXT: s_add_co_i32 s0, s32, 0x4000 +; GFX12-NEXT: v_mov_b32_e32 v0, s32 +; GFX12-NEXT: s_mov_b32 s59, s0 +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; use alloca0 v0 +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; use s59 +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: v_readlane_b32 s59, v1, 0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:16388 ; 4-byte Folded Reload +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: scalar_mov_materializes_frame_index_dead_scc: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_writelane_b32 v1, s59, 0 +; GFX8-NEXT: s_lshr_b32 s59, s32, 6 +; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX8-NEXT: s_addk_i32 s59, 0x4040 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use alloca0 v0 +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use s59 +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s59, v1, 0 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: scalar_mov_materializes_frame_index_dead_scc: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_writelane_b32 v1, s59, 0 +; GFX900-NEXT: s_lshr_b32 s59, s32, 6 +; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX900-NEXT: s_addk_i32 s59, 0x4040 +; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use alloca0 v0 +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s59 +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s59, v1, 0 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: scalar_mov_materializes_frame_index_dead_scc: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s2, s32, 0x4044 +; GFX940-NEXT: scratch_store_dword off, v1, s2 sc0 sc1 ; 4-byte Folded Spill +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: s_add_i32 s0, s32, 64 +; GFX940-NEXT: v_mov_b32_e32 v0, s0 +; GFX940-NEXT: s_add_i32 s0, s32, 0x4040 +; GFX940-NEXT: v_writelane_b32 v1, s59, 0 +; GFX940-NEXT: s_mov_b32 s59, s0 +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; use alloca0 v0 +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; use s59 +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: v_readlane_b32 s59, v1, 0 +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s2, s32, 0x4044 +; GFX940-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: s_setpc_b64 s[30:31] + %alloca0 = alloca [4096 x i32], align 64, addrspace(5) + %alloca1 = alloca i32, align 4, addrspace(5) + call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) + call void asm sideeffect "; use $0", "{s59}"(ptr addrspace(5) %alloca1) + ret void +} + +define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { +; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: +; GFX10_1: ; %bb.0: +; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_1-NEXT: s_mov_b32 s5, s33 +; GFX10_1-NEXT: s_mov_b32 s33, s32 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80880 +; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33 +; GFX10_1-NEXT: v_writelane_b32 v1, s59, 0 +; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; use alloca0 v0 +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33 +; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 +; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; use s59, scc +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s59, v1, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80880 +; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: s_add_i32 s32, s32, 0xfff7f000 +; GFX10_1-NEXT: s_mov_b32 s33, s5 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) +; GFX10_1-NEXT: s_setpc_b64 s[30:31] +; +; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: +; GFX10_3: ; %bb.0: +; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_3-NEXT: s_mov_b32 s5, s33 +; GFX10_3-NEXT: s_mov_b32 s33, s32 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80880 +; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33 +; GFX10_3-NEXT: v_writelane_b32 v1, s59, 0 +; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; use alloca0 v0 +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33 +; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 +; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; use s59, scc +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s59, v1, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80880 +; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: s_add_i32 s32, s32, 0xfff7f000 +; GFX10_3-NEXT: s_mov_b32 s33, s5 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) +; GFX10_3-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_mov_b32 s1, s33 +; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s2, s33, 0x4044 +; GFX11-NEXT: scratch_store_b32 off, v1, s2 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_addk_i32 s32, 0x4080 +; GFX11-NEXT: s_add_i32 s0, s33, 64 +; GFX11-NEXT: v_writelane_b32 v1, s59, 0 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_and_b32 s0, 0, exec_lo +; GFX11-NEXT: s_addc_u32 s0, s33, 0x4040 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use alloca0 v0 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_bitcmp1_b32 s0, 0 +; GFX11-NEXT: s_bitset0_b32 s0, 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_mov_b32 s59, s0 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s59, scc +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s59, v1, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s2, s33, 0x4044 +; GFX11-NEXT: scratch_load_b32 v1, off, s2 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_addk_i32 s32, 0xbf80 +; GFX11-NEXT: s_mov_b32 s33, s1 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_mov_b32 s1, s33 +; GFX12-NEXT: s_mov_b32 s33, s32 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_store_b32 off, v1, s33 offset:16388 ; 4-byte Folded Spill +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 +; GFX12-NEXT: s_and_b32 s0, 0, exec_lo +; GFX12-NEXT: v_writelane_b32 v1, s59, 0 +; GFX12-NEXT: s_add_co_ci_u32 s0, s33, 0x4000 +; GFX12-NEXT: v_mov_b32_e32 v0, s33 +; GFX12-NEXT: s_bitcmp1_b32 s0, 0 +; GFX12-NEXT: s_bitset0_b32 s0, 0 +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; use alloca0 v0 +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: s_mov_b32 s59, s0 +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; use s59, scc +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: v_readlane_b32 s59, v1, 0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_load_b32 v1, off, s33 offset:16388 ; 4-byte Folded Reload +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_addk_co_i32 s32, 0xbfc0 +; GFX12-NEXT: s_mov_b32 s33, s1 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 s6, s33 +; GFX8-NEXT: s_mov_b32 s33, s32 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s7, s33, 0x101100 +; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 +; GFX8-NEXT: v_writelane_b32 v1, s59, 0 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use alloca0 v0 +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33 +; GFX8-NEXT: s_movk_i32 s59, 0x4040 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0 +; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 +; GFX8-NEXT: v_readfirstlane_b32 s59, v0 +; GFX8-NEXT: s_and_b64 s[4:5], 0, exec +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use s59, scc +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s59, v1, 0 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s7, s33, 0x101100 +; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_add_i32 s32, s32, 0xffefe000 +; GFX8-NEXT: s_mov_b32 s33, s6 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s6, s33 +; GFX900-NEXT: s_mov_b32 s33, s32 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s7, s33, 0x101100 +; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33 +; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use alloca0 v0 +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33 +; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 +; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 +; GFX900-NEXT: v_writelane_b32 v1, s59, 0 +; GFX900-NEXT: v_readfirstlane_b32 s59, v0 +; GFX900-NEXT: s_and_b64 s[4:5], 0, exec +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s59, scc +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s59, v1, 0 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s7, s33, 0x101100 +; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: s_add_i32 s32, s32, 0xffefe000 +; GFX900-NEXT: s_mov_b32 s33, s6 +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_mov_b32 s2, s33 +; GFX940-NEXT: s_mov_b32 s33, s32 +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s3, s33, 0x4044 +; GFX940-NEXT: scratch_store_dword off, v1, s3 sc0 sc1 ; 4-byte Folded Spill +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: s_addk_i32 s32, 0x4080 +; GFX940-NEXT: s_add_i32 s0, s33, 64 +; GFX940-NEXT: v_mov_b32_e32 v0, s0 +; GFX940-NEXT: s_and_b64 s[0:1], 0, exec +; GFX940-NEXT: s_addc_u32 s0, s33, 0x4040 +; GFX940-NEXT: s_bitcmp1_b32 s0, 0 +; GFX940-NEXT: s_bitset0_b32 s0, 0 +; GFX940-NEXT: v_writelane_b32 v1, s59, 0 +; GFX940-NEXT: s_mov_b32 s59, s0 +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; use alloca0 v0 +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; use s59, scc +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: v_readlane_b32 s59, v1, 0 +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s3, s33, 0x4044 +; GFX940-NEXT: scratch_load_dword v1, off, s3 ; 4-byte Folded Reload +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: s_addk_i32 s32, 0xbf80 +; GFX940-NEXT: s_mov_b32 s33, s2 +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: s_setpc_b64 s[30:31] + %alloca0 = alloca [4096 x i32], align 64, addrspace(5) + %alloca1 = alloca i32, align 4, addrspace(5) + call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) + call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca1, i32 0) + ret void +} + +define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset() #0 { +; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: +; GFX10_1: ; %bb.0: +; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 +; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s32 +; GFX10_1-NEXT: v_writelane_b32 v0, s59, 0 +; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1 +; GFX10_1-NEXT: v_readfirstlane_b32 s59, v1 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; use s59, scc +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s59, v0, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 +; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) +; GFX10_1-NEXT: s_setpc_b64 s[30:31] +; +; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: +; GFX10_3: ; %bb.0: +; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 +; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s32 +; GFX10_3-NEXT: v_writelane_b32 v0, s59, 0 +; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1 +; GFX10_3-NEXT: v_readfirstlane_b32 s59, v1 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; use s59, scc +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s59, v0, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 +; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) +; GFX10_3-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 +; GFX11-NEXT: scratch_store_b32 off, v0, s1 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_and_b32 s0, 0, exec_lo +; GFX11-NEXT: v_writelane_b32 v0, s59, 0 +; GFX11-NEXT: s_addc_u32 s0, s32, 64 +; GFX11-NEXT: s_bitcmp1_b32 s0, 0 +; GFX11-NEXT: s_bitset0_b32 s0, 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_mov_b32 s59, s0 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s59, scc +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s59, v0, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 +; GFX11-NEXT: scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_store_b32 off, v0, s32 offset:16384 ; 4-byte Folded Spill +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: v_writelane_b32 v0, s59, 0 +; GFX12-NEXT: s_mov_b32 s59, s32 +; GFX12-NEXT: s_and_b32 s0, 0, exec_lo +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; use s59, scc +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_readlane_b32 s59, v0, 0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_load_b32 v0, off, s32 offset:16384 ; 4-byte Folded Reload +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 +; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_writelane_b32 v0, s59, 0 +; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s32 +; GFX8-NEXT: s_mov_b32 s59, 64 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, s59, v1 +; GFX8-NEXT: v_readfirstlane_b32 s59, v1 +; GFX8-NEXT: s_and_b64 s[4:5], 0, exec +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use s59, scc +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s59, v0, 0 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 +; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 +; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s32 +; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 +; GFX900-NEXT: v_writelane_b32 v0, s59, 0 +; GFX900-NEXT: v_readfirstlane_b32 s59, v1 +; GFX900-NEXT: s_and_b64 s[4:5], 0, exec +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s59, scc +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s59, v0, 0 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 +; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s2, s32, 0x4040 +; GFX940-NEXT: scratch_store_dword off, v0, s2 sc0 sc1 ; 4-byte Folded Spill +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: s_and_b64 s[0:1], 0, exec +; GFX940-NEXT: s_addc_u32 s0, s32, 64 +; GFX940-NEXT: s_bitcmp1_b32 s0, 0 +; GFX940-NEXT: s_bitset0_b32 s0, 0 +; GFX940-NEXT: v_writelane_b32 v0, s59, 0 +; GFX940-NEXT: s_mov_b32 s59, s0 +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; use s59, scc +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: v_readlane_b32 s59, v0, 0 +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s2, s32, 0x4040 +; GFX940-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: s_setpc_b64 s[30:31] + %alloca0 = alloca [4096 x i32], align 64, addrspace(5) + call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca0, i32 0) + ret void +} + +define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0 { +; GFX10_1-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: +; GFX10_1: ; %bb.0: +; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 +; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: v_writelane_b32 v0, s59, 0 +; GFX10_1-NEXT: s_lshr_b32 s59, s32, 5 +; GFX10_1-NEXT: s_add_i32 s59, s59, 64 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; use s59 +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s59, v0, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 +; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) +; GFX10_1-NEXT: s_setpc_b64 s[30:31] +; +; GFX10_3-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: +; GFX10_3: ; %bb.0: +; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 +; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: v_writelane_b32 v0, s59, 0 +; GFX10_3-NEXT: s_lshr_b32 s59, s32, 5 +; GFX10_3-NEXT: s_add_i32 s59, s59, 64 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; use s59 +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s59, v0, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 +; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) +; GFX10_3-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 +; GFX11-NEXT: scratch_store_b32 off, v0, s1 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v0, s59, 0 +; GFX11-NEXT: s_add_i32 s0, s32, 64 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-NEXT: s_mov_b32 s59, s0 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s59 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s59, v0, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 +; GFX11-NEXT: scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_store_b32 off, v0, s32 offset:16384 ; 4-byte Folded Spill +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: v_writelane_b32 v0, s59, 0 +; GFX12-NEXT: s_mov_b32 s59, s32 +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; use s59 +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_readlane_b32 s59, v0, 0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_load_b32 v0, off, s32 offset:16384 ; 4-byte Folded Reload +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 +; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_writelane_b32 v0, s59, 0 +; GFX8-NEXT: s_lshr_b32 s59, s32, 6 +; GFX8-NEXT: s_add_i32 s59, s59, 64 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use s59 +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s59, v0, 0 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 +; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 +; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_writelane_b32 v0, s59, 0 +; GFX900-NEXT: s_lshr_b32 s59, s32, 6 +; GFX900-NEXT: s_add_i32 s59, s59, 64 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s59 +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s59, v0, 0 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 +; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s2, s32, 0x4040 +; GFX940-NEXT: scratch_store_dword off, v0, s2 sc0 sc1 ; 4-byte Folded Spill +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: s_add_i32 s0, s32, 64 +; GFX940-NEXT: v_writelane_b32 v0, s59, 0 +; GFX940-NEXT: s_mov_b32 s59, s0 +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; use s59 +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: v_readlane_b32 s59, v0, 0 +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s2, s32, 0x4040 +; GFX940-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: s_setpc_b64 s[30:31] + %alloca0 = alloca [4096 x i32], align 64, addrspace(5) + call void asm sideeffect "; use $0", "{s59}"(ptr addrspace(5) %alloca0) + ret void +} + +define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp() #1 { +; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: +; GFX10_1: ; %bb.0: +; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_1-NEXT: s_mov_b32 s5, s33 +; GFX10_1-NEXT: s_mov_b32 s33, s32 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 +; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s33 +; GFX10_1-NEXT: v_writelane_b32 v0, s59, 0 +; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1 +; GFX10_1-NEXT: v_readfirstlane_b32 s59, v1 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; use s59, scc +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s59, v0, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 +; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: s_add_i32 s32, s32, 0xfff7f000 +; GFX10_1-NEXT: s_mov_b32 s33, s5 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) +; GFX10_1-NEXT: s_setpc_b64 s[30:31] +; +; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: +; GFX10_3: ; %bb.0: +; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_3-NEXT: s_mov_b32 s5, s33 +; GFX10_3-NEXT: s_mov_b32 s33, s32 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 +; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s33 +; GFX10_3-NEXT: v_writelane_b32 v0, s59, 0 +; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1 +; GFX10_3-NEXT: v_readfirstlane_b32 s59, v1 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; use s59, scc +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s59, v0, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 +; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: s_add_i32 s32, s32, 0xfff7f000 +; GFX10_3-NEXT: s_mov_b32 s33, s5 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) +; GFX10_3-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_mov_b32 s1, s33 +; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 +; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_addk_i32 s32, 0x4080 +; GFX11-NEXT: s_and_b32 s0, 0, exec_lo +; GFX11-NEXT: v_writelane_b32 v0, s59, 0 +; GFX11-NEXT: s_addc_u32 s0, s33, 64 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_bitcmp1_b32 s0, 0 +; GFX11-NEXT: s_bitset0_b32 s0, 0 +; GFX11-NEXT: s_mov_b32 s59, s0 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s59, scc +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s59, v0, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 +; GFX11-NEXT: scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_addk_i32 s32, 0xbf80 +; GFX11-NEXT: s_mov_b32 s33, s1 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_mov_b32 s1, s33 +; GFX12-NEXT: s_mov_b32 s33, s32 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: v_writelane_b32 v0, s59, 0 +; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 +; GFX12-NEXT: s_mov_b32 s59, s33 +; GFX12-NEXT: s_and_b32 s0, 0, exec_lo +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; use s59, scc +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: v_readlane_b32 s59, v0, 0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_load_b32 v0, off, s33 offset:16384 ; 4-byte Folded Reload +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_addk_co_i32 s32, 0xbfc0 +; GFX12-NEXT: s_mov_b32 s33, s1 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 s6, s33 +; GFX8-NEXT: s_mov_b32 s33, s32 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s7, s33, 0x101000 +; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_writelane_b32 v0, s59, 0 +; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s33 +; GFX8-NEXT: s_mov_b32 s59, 64 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, s59, v1 +; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 +; GFX8-NEXT: v_readfirstlane_b32 s59, v1 +; GFX8-NEXT: s_and_b64 s[4:5], 0, exec +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use s59, scc +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s59, v0, 0 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s7, s33, 0x101000 +; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_add_i32 s32, s32, 0xffefe000 +; GFX8-NEXT: s_mov_b32 s33, s6 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s6, s33 +; GFX900-NEXT: s_mov_b32 s33, s32 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s7, s33, 0x101000 +; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s33 +; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 +; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 +; GFX900-NEXT: v_writelane_b32 v0, s59, 0 +; GFX900-NEXT: v_readfirstlane_b32 s59, v1 +; GFX900-NEXT: s_and_b64 s[4:5], 0, exec +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s59, scc +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s59, v0, 0 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s7, s33, 0x101000 +; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: s_add_i32 s32, s32, 0xffefe000 +; GFX900-NEXT: s_mov_b32 s33, s6 +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_mov_b32 s2, s33 +; GFX940-NEXT: s_mov_b32 s33, s32 +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s3, s33, 0x4040 +; GFX940-NEXT: scratch_store_dword off, v0, s3 sc0 sc1 ; 4-byte Folded Spill +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: s_addk_i32 s32, 0x4080 +; GFX940-NEXT: s_and_b64 s[0:1], 0, exec +; GFX940-NEXT: s_addc_u32 s0, s33, 64 +; GFX940-NEXT: s_bitcmp1_b32 s0, 0 +; GFX940-NEXT: s_bitset0_b32 s0, 0 +; GFX940-NEXT: v_writelane_b32 v0, s59, 0 +; GFX940-NEXT: s_mov_b32 s59, s0 +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; use s59, scc +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: v_readlane_b32 s59, v0, 0 +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s3, s33, 0x4040 +; GFX940-NEXT: scratch_load_dword v0, off, s3 ; 4-byte Folded Reload +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: s_addk_i32 s32, 0xbf80 +; GFX940-NEXT: s_mov_b32 s33, s2 +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: s_setpc_b64 s[30:31] + %alloca0 = alloca [4096 x i32], align 64, addrspace(5) + call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca0, i32 0) + ret void +} + +define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() #1 { +; GFX10_1-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: +; GFX10_1: ; %bb.0: +; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_1-NEXT: s_mov_b32 s4, s33 +; GFX10_1-NEXT: s_mov_b32 s33, s32 +; GFX10_1-NEXT: s_xor_saveexec_b32 s5, -1 +; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 +; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s5 +; GFX10_1-NEXT: v_writelane_b32 v0, s59, 0 +; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_1-NEXT: s_lshr_b32 s59, s33, 5 +; GFX10_1-NEXT: s_add_i32 s59, s59, 64 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; use s59 +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s59, v0, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s5, -1 +; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 +; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s5 +; GFX10_1-NEXT: s_add_i32 s32, s32, 0xfff7f000 +; GFX10_1-NEXT: s_mov_b32 s33, s4 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) +; GFX10_1-NEXT: s_setpc_b64 s[30:31] +; +; GFX10_3-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: +; GFX10_3: ; %bb.0: +; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_3-NEXT: s_mov_b32 s4, s33 +; GFX10_3-NEXT: s_mov_b32 s33, s32 +; GFX10_3-NEXT: s_xor_saveexec_b32 s5, -1 +; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 +; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s5 +; GFX10_3-NEXT: v_writelane_b32 v0, s59, 0 +; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_3-NEXT: s_lshr_b32 s59, s33, 5 +; GFX10_3-NEXT: s_add_i32 s59, s59, 64 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; use s59 +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s59, v0, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s5, -1 +; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 +; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s5 +; GFX10_3-NEXT: s_add_i32 s32, s32, 0xfff7f000 +; GFX10_3-NEXT: s_mov_b32 s33, s4 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) +; GFX10_3-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_mov_b32 s0, s33 +; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_xor_saveexec_b32 s1, -1 +; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 +; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v0, s59, 0 +; GFX11-NEXT: s_addk_i32 s32, 0x4080 +; GFX11-NEXT: s_add_i32 s1, s33, 64 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_mov_b32 s59, s1 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s59 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s59, v0, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s1, -1 +; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 +; GFX11-NEXT: scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: s_addk_i32 s32, 0xbf80 +; GFX11-NEXT: s_mov_b32 s33, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_mov_b32 s0, s33 +; GFX12-NEXT: s_mov_b32 s33, s32 +; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 +; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill +; GFX12-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-NEXT: v_writelane_b32 v0, s59, 0 +; GFX12-NEXT: s_mov_b32 s59, s33 +; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; use s59 +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_readlane_b32 s59, v0, 0 +; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 +; GFX12-NEXT: scratch_load_b32 v0, off, s33 offset:16384 ; 4-byte Folded Reload +; GFX12-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-NEXT: s_addk_co_i32 s32, 0xbfc0 +; GFX12-NEXT: s_mov_b32 s33, s0 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 s4, s33 +; GFX8-NEXT: s_mov_b32 s33, s32 +; GFX8-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; GFX8-NEXT: s_add_i32 s5, s33, 0x101000 +; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[6:7] +; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 +; GFX8-NEXT: v_writelane_b32 v0, s59, 0 +; GFX8-NEXT: s_lshr_b32 s59, s33, 6 +; GFX8-NEXT: s_add_i32 s59, s59, 64 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use s59 +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s59, v0, 0 +; GFX8-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; GFX8-NEXT: s_add_i32 s5, s33, 0x101000 +; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[6:7] +; GFX8-NEXT: s_add_i32 s32, s32, 0xffefe000 +; GFX8-NEXT: s_mov_b32 s33, s4 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, s33 +; GFX900-NEXT: s_mov_b32 s33, s32 +; GFX900-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; GFX900-NEXT: s_add_i32 s5, s33, 0x101000 +; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[6:7] +; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 +; GFX900-NEXT: v_writelane_b32 v0, s59, 0 +; GFX900-NEXT: s_lshr_b32 s59, s33, 6 +; GFX900-NEXT: s_add_i32 s59, s59, 64 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s59 +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s59, v0, 0 +; GFX900-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; GFX900-NEXT: s_add_i32 s5, s33, 0x101000 +; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[6:7] +; GFX900-NEXT: s_add_i32 s32, s32, 0xffefe000 +; GFX900-NEXT: s_mov_b32 s33, s4 +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_mov_b32 s0, s33 +; GFX940-NEXT: s_mov_b32 s33, s32 +; GFX940-NEXT: s_xor_saveexec_b64 s[2:3], -1 +; GFX940-NEXT: s_add_i32 s1, s33, 0x4040 +; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 ; 4-byte Folded Spill +; GFX940-NEXT: s_mov_b64 exec, s[2:3] +; GFX940-NEXT: s_addk_i32 s32, 0x4080 +; GFX940-NEXT: s_add_i32 s1, s33, 64 +; GFX940-NEXT: v_writelane_b32 v0, s59, 0 +; GFX940-NEXT: s_mov_b32 s59, s1 +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; use s59 +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: v_readlane_b32 s59, v0, 0 +; GFX940-NEXT: s_xor_saveexec_b64 s[2:3], -1 +; GFX940-NEXT: s_add_i32 s1, s33, 0x4040 +; GFX940-NEXT: scratch_load_dword v0, off, s1 ; 4-byte Folded Reload +; GFX940-NEXT: s_mov_b64 exec, s[2:3] +; GFX940-NEXT: s_addk_i32 s32, 0xbf80 +; GFX940-NEXT: s_mov_b32 s33, s0 +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: s_setpc_b64 s[30:31] + %alloca0 = alloca [4096 x i32], align 64, addrspace(5) + call void asm sideeffect "; use $0", "{s59}"(ptr addrspace(5) %alloca0) + ret void +} + +define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset() #0 { +; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: +; GFX10_1: ; %bb.0: +; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_1-NEXT: buffer_store_dword v2, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 +; GFX10_1-NEXT: v_writelane_b32 v2, s59, 0 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s32 +; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 +; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; use alloca0 v1 +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x3ec, v0 +; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; use s59, scc +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s59, v2, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_1-NEXT: buffer_load_dword v2, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) +; GFX10_1-NEXT: s_setpc_b64 s[30:31] +; +; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: +; GFX10_3: ; %bb.0: +; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_3-NEXT: buffer_store_dword v2, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 +; GFX10_3-NEXT: v_writelane_b32 v2, s59, 0 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s32 +; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 +; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; use alloca0 v1 +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x3ec, v0 +; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; use s59, scc +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s59, v2, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_3-NEXT: buffer_load_dword v2, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) +; GFX10_3-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 +; GFX11-NEXT: scratch_store_b32 off, v2, s1 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_add_i32 s0, s32, 0x4040 +; GFX11-NEXT: v_writelane_b32 v2, s59, 0 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_add_i32 s0, s32, 64 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_mov_b32_e32 v1, s0 +; GFX11-NEXT: s_and_b32 s0, 0, exec_lo +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x3ec, v0 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use alloca0 v1 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_readfirstlane_b32 s59, v0 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s59, scc +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s59, v2, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 +; GFX11-NEXT: scratch_load_b32 v2, off, s1 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_store_b32 off, v2, s32 offset:32768 ; 4-byte Folded Spill +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_add_co_i32 s0, s32, 0x4000 +; GFX12-NEXT: v_writelane_b32 v2, s59, 0 +; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s32 +; GFX12-NEXT: s_and_b32 s0, 0, exec_lo +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; use alloca0 v1 +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_add_nc_u32_e32 v0, 0x3ec, v0 +; GFX12-NEXT: v_readfirstlane_b32 s59, v0 +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; use s59, scc +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: v_readlane_b32 s59, v2, 0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_load_b32 v2, off, s32 offset:32768 ; 4-byte Folded Reload +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX8-NEXT: buffer_store_dword v2, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX8-NEXT: s_movk_i32 vcc_lo, 0x4040 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, vcc_lo, v0 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x3ec, v0 +; GFX8-NEXT: v_writelane_b32 v2, s59, 0 +; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s32 +; GFX8-NEXT: v_readfirstlane_b32 s59, v0 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, 64, v1 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use alloca0 v1 +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_and_b64 s[4:5], 0, exec +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use s59, scc +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s59, v2, 0 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX8-NEXT: buffer_load_dword v2, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX900-NEXT: buffer_store_dword v2, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 +; GFX900-NEXT: v_add_u32_e32 v0, 0x3ec, v0 +; GFX900-NEXT: v_writelane_b32 v2, s59, 0 +; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s32 +; GFX900-NEXT: v_readfirstlane_b32 s59, v0 +; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use alloca0 v1 +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_and_b64 s[4:5], 0, exec +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s59, scc +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s59, v2, 0 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX900-NEXT: buffer_load_dword v2, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s2, s32, 0x8040 +; GFX940-NEXT: scratch_store_dword off, v2, s2 sc0 sc1 ; 4-byte Folded Spill +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: s_add_i32 s0, s32, 0x4040 +; GFX940-NEXT: v_mov_b32_e32 v0, s0 +; GFX940-NEXT: v_add_u32_e32 v0, 0x3ec, v0 +; GFX940-NEXT: v_writelane_b32 v2, s59, 0 +; GFX940-NEXT: s_add_i32 s0, s32, 64 +; GFX940-NEXT: v_readfirstlane_b32 s59, v0 +; GFX940-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; use alloca0 v1 +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: s_and_b64 s[0:1], 0, exec +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; use s59, scc +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: v_readlane_b32 s59, v2, 0 +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s2, s32, 0x8040 +; GFX940-NEXT: scratch_load_dword v2, off, s2 ; 4-byte Folded Reload +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: s_setpc_b64 s[30:31] + %alloca0 = alloca [4096 x i32], align 64, addrspace(5) + %alloca1 = alloca [4096 x i32], align 4, addrspace(5) + %alloca1.offset = getelementptr [4096 x i32], ptr addrspace(5) %alloca1, i32 0, i32 251 + call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) + call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca1.offset, i32 0) + ret void +} + +define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset(i32 inreg %soffset) #0 { +; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: +; GFX10_1: ; %bb.0: +; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_1-NEXT: buffer_store_dword v2, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v3, 5, s32 +; GFX10_1-NEXT: s_lshl_b32 s4, s6, 2 +; GFX10_1-NEXT: v_writelane_b32 v2, s59, 0 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 +; GFX10_1-NEXT: v_add_nc_u32_e32 v3, 0x4040, v3 +; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; use alloca0 v0 +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_add_nc_u32_e32 v1, s4, v3 +; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_1-NEXT: v_readfirstlane_b32 s59, v1 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; use s59, scc +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s59, v2, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_1-NEXT: buffer_load_dword v2, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) +; GFX10_1-NEXT: s_setpc_b64 s[30:31] +; +; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: +; GFX10_3: ; %bb.0: +; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_3-NEXT: buffer_store_dword v2, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v3, 5, s32 +; GFX10_3-NEXT: s_lshl_b32 s4, s6, 2 +; GFX10_3-NEXT: v_writelane_b32 v2, s59, 0 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 +; GFX10_3-NEXT: v_add_nc_u32_e32 v3, 0x4040, v3 +; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; use alloca0 v0 +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_add_nc_u32_e32 v1, s4, v3 +; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_3-NEXT: v_readfirstlane_b32 s59, v1 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; use s59, scc +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s59, v2, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_3-NEXT: buffer_load_dword v2, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) +; GFX10_3-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_xor_saveexec_b32 s1, -1 +; GFX11-NEXT: s_add_i32 s2, s32, 0x8040 +; GFX11-NEXT: scratch_store_b32 off, v2, s2 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: s_lshl_b32 s0, s0, 2 +; GFX11-NEXT: s_add_i32 s1, s32, 64 +; GFX11-NEXT: v_writelane_b32 v2, s59, 0 +; GFX11-NEXT: v_mov_b32_e32 v0, s1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use alloca0 v0 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_add_nc_u32_e64 v1, s0, s1 +; GFX11-NEXT: s_and_b32 s0, 0, exec_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_readfirstlane_b32 s59, v1 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s59, scc +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s59, v2, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 +; GFX11-NEXT: scratch_load_b32 v2, off, s1 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 +; GFX12-NEXT: scratch_store_b32 off, v2, s32 offset:32768 ; 4-byte Folded Spill +; GFX12-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-NEXT: s_lshl_b32 s0, s0, 2 +; GFX12-NEXT: s_add_co_i32 s1, s32, 0x4000 +; GFX12-NEXT: v_writelane_b32 v2, s59, 0 +; GFX12-NEXT: v_add_nc_u32_e64 v1, s0, s1 +; GFX12-NEXT: v_mov_b32_e32 v0, s32 +; GFX12-NEXT: s_and_b32 s0, 0, exec_lo +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; use alloca0 v0 +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-NEXT: v_readfirstlane_b32 s59, v1 +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; use s59, scc +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: v_readlane_b32 s59, v2, 0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_load_b32 v2, off, s32 offset:32768 ; 4-byte Folded Reload +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s7, s32, 0x201000 +; GFX8-NEXT: buffer_store_dword v2, off, s[0:3], s7 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX8-NEXT: s_movk_i32 vcc_lo, 0x4040 +; GFX8-NEXT: s_lshl_b32 s4, s6, 2 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, vcc_lo, v0 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, s4, v0 +; GFX8-NEXT: v_writelane_b32 v2, s59, 0 +; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s32 +; GFX8-NEXT: v_readfirstlane_b32 s59, v0 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, 64, v1 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use alloca0 v1 +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_and_b64 s[4:5], 0, exec +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use s59, scc +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s59, v2, 0 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX8-NEXT: buffer_load_dword v2, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s7, s32, 0x201000 +; GFX900-NEXT: buffer_store_dword v2, off, s[0:3], s7 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX900-NEXT: s_lshl_b32 s4, s6, 2 +; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 +; GFX900-NEXT: v_add_u32_e32 v0, s4, v0 +; GFX900-NEXT: v_writelane_b32 v2, s59, 0 +; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s32 +; GFX900-NEXT: v_readfirstlane_b32 s59, v0 +; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use alloca0 v1 +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_and_b64 s[4:5], 0, exec +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s59, scc +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s59, v2, 0 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX900-NEXT: buffer_load_dword v2, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_xor_saveexec_b64 s[2:3], -1 +; GFX940-NEXT: s_add_i32 s1, s32, 0x8040 +; GFX940-NEXT: scratch_store_dword off, v2, s1 sc0 sc1 ; 4-byte Folded Spill +; GFX940-NEXT: s_mov_b64 exec, s[2:3] +; GFX940-NEXT: s_lshl_b32 s0, s0, 2 +; GFX940-NEXT: s_add_i32 s1, s32, 0x4040 +; GFX940-NEXT: v_mov_b32_e32 v0, s1 +; GFX940-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX940-NEXT: v_writelane_b32 v2, s59, 0 +; GFX940-NEXT: s_add_i32 s0, s32, 64 +; GFX940-NEXT: v_readfirstlane_b32 s59, v0 +; GFX940-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; use alloca0 v1 +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: s_and_b64 s[0:1], 0, exec +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; use s59, scc +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: v_readlane_b32 s59, v2, 0 +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s2, s32, 0x8040 +; GFX940-NEXT: scratch_load_dword v2, off, s2 ; 4-byte Folded Reload +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: s_setpc_b64 s[30:31] + %alloca0 = alloca [4096 x i32], align 64, addrspace(5) + %alloca1 = alloca [4096 x i32], align 4, addrspace(5) + %alloca1.offset = getelementptr [4096 x i32], ptr addrspace(5) %alloca1, i32 0, i32 %soffset + call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) + call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca1.offset, i32 0) + ret void +} + +attributes #0 = { nounwind alignstack=64 "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="10,10" "no-realign-stack" } +attributes #1 = { nounwind alignstack=64 "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="10,10" "no-realign-stack" "frame-pointer"="all" } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX9: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll new file mode 100644 index 00000000000000..9cd92dcd5c94d1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll @@ -0,0 +1,2323 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack < %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack < %s | FileCheck -check-prefixes=GFX9,GFX900 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+xnack < %s | FileCheck -check-prefixes=GFX9,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10_1 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10_3 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s + +%asm.output = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, i32, ; sgprs + <16 x i32>, <7 x i32>, ; vgprs + i64 ; vcc + } + +%asm.output2 = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, i32, ; sgprs + <16 x i32>, <5 x i32>, ; vgprs + i64 ; vcc + } + +%asm.output3 = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, ; sgprs + <16 x i32>, <6 x i32>, ; vgprs + i64 ; vcc + } + +; %alloca1 should end up materializing with s_mov_b32, but scc is +; unavailable. +; +; This is primarily to test gfx7 and gfx8, which do not have vector +; add with no carry. +; +define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 { +; GFX7-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX7-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX7-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX7-NEXT: s_mov_b64 exec, s[4:5] +; GFX7-NEXT: v_writelane_b32 v23, s30, 0 +; GFX7-NEXT: v_writelane_b32 v23, s31, 1 +; GFX7-NEXT: v_writelane_b32 v23, s33, 2 +; GFX7-NEXT: v_writelane_b32 v23, s34, 3 +; GFX7-NEXT: v_writelane_b32 v23, s35, 4 +; GFX7-NEXT: v_writelane_b32 v23, s36, 5 +; GFX7-NEXT: v_writelane_b32 v23, s37, 6 +; GFX7-NEXT: v_writelane_b32 v23, s38, 7 +; GFX7-NEXT: v_writelane_b32 v23, s39, 8 +; GFX7-NEXT: v_writelane_b32 v23, s40, 9 +; GFX7-NEXT: v_writelane_b32 v23, s41, 10 +; GFX7-NEXT: v_writelane_b32 v23, s42, 11 +; GFX7-NEXT: v_writelane_b32 v23, s43, 12 +; GFX7-NEXT: v_writelane_b32 v23, s44, 13 +; GFX7-NEXT: v_writelane_b32 v23, s45, 14 +; GFX7-NEXT: v_writelane_b32 v23, s46, 15 +; GFX7-NEXT: v_writelane_b32 v23, s47, 16 +; GFX7-NEXT: v_writelane_b32 v23, s48, 17 +; GFX7-NEXT: v_writelane_b32 v23, s49, 18 +; GFX7-NEXT: v_writelane_b32 v23, s50, 19 +; GFX7-NEXT: v_writelane_b32 v23, s51, 20 +; GFX7-NEXT: v_writelane_b32 v23, s52, 21 +; GFX7-NEXT: v_writelane_b32 v23, s53, 22 +; GFX7-NEXT: v_writelane_b32 v23, s54, 23 +; GFX7-NEXT: v_writelane_b32 v23, s55, 24 +; GFX7-NEXT: v_writelane_b32 v23, s56, 25 +; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6 +; GFX7-NEXT: v_writelane_b32 v23, s57, 26 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0 +; GFX7-NEXT: s_and_b64 s[4:5], 0, exec +; GFX7-NEXT: v_writelane_b32 v23, s58, 27 +; GFX7-NEXT: ;;#ASMSTART +; GFX7-NEXT: ; use alloca0 v0 +; GFX7-NEXT: ;;#ASMEND +; GFX7-NEXT: ;;#ASMSTART +; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc +; GFX7-NEXT: ;;#ASMEND +; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s32 +; GFX7-NEXT: v_mov_b32_e32 v0, 0x4040 +; GFX7-NEXT: v_mad_u32_u24 v0, v0, 64, s32 +; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6 +; GFX7-NEXT: v_writelane_b32 v23, s59, 28 +; GFX7-NEXT: v_readfirstlane_b32 s59, v0 +; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s32 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: ;;#ASMSTART +; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc +; GFX7-NEXT: ;;#ASMEND +; GFX7-NEXT: v_readlane_b32 s59, v23, 28 +; GFX7-NEXT: v_readlane_b32 s58, v23, 27 +; GFX7-NEXT: v_readlane_b32 s57, v23, 26 +; GFX7-NEXT: v_readlane_b32 s56, v23, 25 +; GFX7-NEXT: v_readlane_b32 s55, v23, 24 +; GFX7-NEXT: v_readlane_b32 s54, v23, 23 +; GFX7-NEXT: v_readlane_b32 s53, v23, 22 +; GFX7-NEXT: v_readlane_b32 s52, v23, 21 +; GFX7-NEXT: v_readlane_b32 s51, v23, 20 +; GFX7-NEXT: v_readlane_b32 s50, v23, 19 +; GFX7-NEXT: v_readlane_b32 s49, v23, 18 +; GFX7-NEXT: v_readlane_b32 s48, v23, 17 +; GFX7-NEXT: v_readlane_b32 s47, v23, 16 +; GFX7-NEXT: v_readlane_b32 s46, v23, 15 +; GFX7-NEXT: v_readlane_b32 s45, v23, 14 +; GFX7-NEXT: v_readlane_b32 s44, v23, 13 +; GFX7-NEXT: v_readlane_b32 s43, v23, 12 +; GFX7-NEXT: v_readlane_b32 s42, v23, 11 +; GFX7-NEXT: v_readlane_b32 s41, v23, 10 +; GFX7-NEXT: v_readlane_b32 s40, v23, 9 +; GFX7-NEXT: v_readlane_b32 s39, v23, 8 +; GFX7-NEXT: v_readlane_b32 s38, v23, 7 +; GFX7-NEXT: v_readlane_b32 s37, v23, 6 +; GFX7-NEXT: v_readlane_b32 s36, v23, 5 +; GFX7-NEXT: v_readlane_b32 s35, v23, 4 +; GFX7-NEXT: v_readlane_b32 s34, v23, 3 +; GFX7-NEXT: v_readlane_b32 s33, v23, 2 +; GFX7-NEXT: v_readlane_b32 s31, v23, 1 +; GFX7-NEXT: v_readlane_b32 s30, v23, 0 +; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX7-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX7-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX7-NEXT: s_mov_b64 exec, s[4:5] +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX8-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_writelane_b32 v23, s30, 0 +; GFX8-NEXT: v_writelane_b32 v23, s31, 1 +; GFX8-NEXT: v_writelane_b32 v23, s33, 2 +; GFX8-NEXT: v_writelane_b32 v23, s34, 3 +; GFX8-NEXT: v_writelane_b32 v23, s35, 4 +; GFX8-NEXT: v_writelane_b32 v23, s36, 5 +; GFX8-NEXT: v_writelane_b32 v23, s37, 6 +; GFX8-NEXT: v_writelane_b32 v23, s38, 7 +; GFX8-NEXT: v_writelane_b32 v23, s39, 8 +; GFX8-NEXT: v_writelane_b32 v23, s40, 9 +; GFX8-NEXT: v_writelane_b32 v23, s41, 10 +; GFX8-NEXT: v_writelane_b32 v23, s42, 11 +; GFX8-NEXT: v_writelane_b32 v23, s43, 12 +; GFX8-NEXT: v_writelane_b32 v23, s44, 13 +; GFX8-NEXT: v_writelane_b32 v23, s45, 14 +; GFX8-NEXT: v_writelane_b32 v23, s46, 15 +; GFX8-NEXT: v_writelane_b32 v23, s47, 16 +; GFX8-NEXT: v_writelane_b32 v23, s48, 17 +; GFX8-NEXT: v_writelane_b32 v23, s49, 18 +; GFX8-NEXT: v_writelane_b32 v23, s50, 19 +; GFX8-NEXT: v_writelane_b32 v23, s51, 20 +; GFX8-NEXT: v_writelane_b32 v23, s52, 21 +; GFX8-NEXT: v_writelane_b32 v23, s53, 22 +; GFX8-NEXT: v_writelane_b32 v23, s54, 23 +; GFX8-NEXT: v_writelane_b32 v23, s55, 24 +; GFX8-NEXT: v_writelane_b32 v23, s56, 25 +; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX8-NEXT: v_writelane_b32 v23, s57, 26 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 +; GFX8-NEXT: s_and_b64 s[4:5], 0, exec +; GFX8-NEXT: v_writelane_b32 v23, s58, 27 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use alloca0 v0 +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s32 +; GFX8-NEXT: v_mov_b32_e32 v0, 0x4040 +; GFX8-NEXT: v_mad_u32_u24 v0, v0, 64, s32 +; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX8-NEXT: v_writelane_b32 v23, s59, 28 +; GFX8-NEXT: v_readfirstlane_b32 s59, v0 +; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s59, v23, 28 +; GFX8-NEXT: v_readlane_b32 s58, v23, 27 +; GFX8-NEXT: v_readlane_b32 s57, v23, 26 +; GFX8-NEXT: v_readlane_b32 s56, v23, 25 +; GFX8-NEXT: v_readlane_b32 s55, v23, 24 +; GFX8-NEXT: v_readlane_b32 s54, v23, 23 +; GFX8-NEXT: v_readlane_b32 s53, v23, 22 +; GFX8-NEXT: v_readlane_b32 s52, v23, 21 +; GFX8-NEXT: v_readlane_b32 s51, v23, 20 +; GFX8-NEXT: v_readlane_b32 s50, v23, 19 +; GFX8-NEXT: v_readlane_b32 s49, v23, 18 +; GFX8-NEXT: v_readlane_b32 s48, v23, 17 +; GFX8-NEXT: v_readlane_b32 s47, v23, 16 +; GFX8-NEXT: v_readlane_b32 s46, v23, 15 +; GFX8-NEXT: v_readlane_b32 s45, v23, 14 +; GFX8-NEXT: v_readlane_b32 s44, v23, 13 +; GFX8-NEXT: v_readlane_b32 s43, v23, 12 +; GFX8-NEXT: v_readlane_b32 s42, v23, 11 +; GFX8-NEXT: v_readlane_b32 s41, v23, 10 +; GFX8-NEXT: v_readlane_b32 s40, v23, 9 +; GFX8-NEXT: v_readlane_b32 s39, v23, 8 +; GFX8-NEXT: v_readlane_b32 s38, v23, 7 +; GFX8-NEXT: v_readlane_b32 s37, v23, 6 +; GFX8-NEXT: v_readlane_b32 s36, v23, 5 +; GFX8-NEXT: v_readlane_b32 s35, v23, 4 +; GFX8-NEXT: v_readlane_b32 s34, v23, 3 +; GFX8-NEXT: v_readlane_b32 s33, v23, 2 +; GFX8-NEXT: v_readlane_b32 s31, v23, 1 +; GFX8-NEXT: v_readlane_b32 s30, v23, 0 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX8-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX900-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_writelane_b32 v23, s30, 0 +; GFX900-NEXT: v_writelane_b32 v23, s31, 1 +; GFX900-NEXT: v_writelane_b32 v23, s33, 2 +; GFX900-NEXT: v_writelane_b32 v23, s34, 3 +; GFX900-NEXT: v_writelane_b32 v23, s35, 4 +; GFX900-NEXT: v_writelane_b32 v23, s36, 5 +; GFX900-NEXT: v_writelane_b32 v23, s37, 6 +; GFX900-NEXT: v_writelane_b32 v23, s38, 7 +; GFX900-NEXT: v_writelane_b32 v23, s39, 8 +; GFX900-NEXT: v_writelane_b32 v23, s40, 9 +; GFX900-NEXT: v_writelane_b32 v23, s41, 10 +; GFX900-NEXT: v_writelane_b32 v23, s42, 11 +; GFX900-NEXT: v_writelane_b32 v23, s43, 12 +; GFX900-NEXT: v_writelane_b32 v23, s44, 13 +; GFX900-NEXT: v_writelane_b32 v23, s45, 14 +; GFX900-NEXT: v_writelane_b32 v23, s46, 15 +; GFX900-NEXT: v_writelane_b32 v23, s47, 16 +; GFX900-NEXT: v_writelane_b32 v23, s48, 17 +; GFX900-NEXT: v_writelane_b32 v23, s49, 18 +; GFX900-NEXT: v_writelane_b32 v23, s50, 19 +; GFX900-NEXT: v_writelane_b32 v23, s51, 20 +; GFX900-NEXT: v_writelane_b32 v23, s52, 21 +; GFX900-NEXT: v_writelane_b32 v23, s53, 22 +; GFX900-NEXT: v_writelane_b32 v23, s54, 23 +; GFX900-NEXT: v_writelane_b32 v23, s55, 24 +; GFX900-NEXT: v_writelane_b32 v23, s56, 25 +; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX900-NEXT: v_writelane_b32 v23, s57, 26 +; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 +; GFX900-NEXT: s_and_b64 s[4:5], 0, exec +; GFX900-NEXT: v_writelane_b32 v23, s58, 27 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use alloca0 v0 +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 +; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 +; GFX900-NEXT: v_writelane_b32 v23, s59, 28 +; GFX900-NEXT: v_readfirstlane_b32 s59, v0 +; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s59, v23, 28 +; GFX900-NEXT: v_readlane_b32 s58, v23, 27 +; GFX900-NEXT: v_readlane_b32 s57, v23, 26 +; GFX900-NEXT: v_readlane_b32 s56, v23, 25 +; GFX900-NEXT: v_readlane_b32 s55, v23, 24 +; GFX900-NEXT: v_readlane_b32 s54, v23, 23 +; GFX900-NEXT: v_readlane_b32 s53, v23, 22 +; GFX900-NEXT: v_readlane_b32 s52, v23, 21 +; GFX900-NEXT: v_readlane_b32 s51, v23, 20 +; GFX900-NEXT: v_readlane_b32 s50, v23, 19 +; GFX900-NEXT: v_readlane_b32 s49, v23, 18 +; GFX900-NEXT: v_readlane_b32 s48, v23, 17 +; GFX900-NEXT: v_readlane_b32 s47, v23, 16 +; GFX900-NEXT: v_readlane_b32 s46, v23, 15 +; GFX900-NEXT: v_readlane_b32 s45, v23, 14 +; GFX900-NEXT: v_readlane_b32 s44, v23, 13 +; GFX900-NEXT: v_readlane_b32 s43, v23, 12 +; GFX900-NEXT: v_readlane_b32 s42, v23, 11 +; GFX900-NEXT: v_readlane_b32 s41, v23, 10 +; GFX900-NEXT: v_readlane_b32 s40, v23, 9 +; GFX900-NEXT: v_readlane_b32 s39, v23, 8 +; GFX900-NEXT: v_readlane_b32 s38, v23, 7 +; GFX900-NEXT: v_readlane_b32 s37, v23, 6 +; GFX900-NEXT: v_readlane_b32 s36, v23, 5 +; GFX900-NEXT: v_readlane_b32 s35, v23, 4 +; GFX900-NEXT: v_readlane_b32 s34, v23, 3 +; GFX900-NEXT: v_readlane_b32 s33, v23, 2 +; GFX900-NEXT: v_readlane_b32 s31, v23, 1 +; GFX900-NEXT: v_readlane_b32 s30, v23, 0 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX900-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s2, s32, 0x4044 +; GFX940-NEXT: scratch_store_dword off, v23, s2 sc0 sc1 ; 4-byte Folded Spill +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: v_writelane_b32 v23, s30, 0 +; GFX940-NEXT: v_writelane_b32 v23, s31, 1 +; GFX940-NEXT: v_writelane_b32 v23, s33, 2 +; GFX940-NEXT: v_writelane_b32 v23, s34, 3 +; GFX940-NEXT: v_writelane_b32 v23, s35, 4 +; GFX940-NEXT: v_writelane_b32 v23, s36, 5 +; GFX940-NEXT: v_writelane_b32 v23, s37, 6 +; GFX940-NEXT: v_writelane_b32 v23, s38, 7 +; GFX940-NEXT: v_writelane_b32 v23, s39, 8 +; GFX940-NEXT: v_writelane_b32 v23, s40, 9 +; GFX940-NEXT: v_writelane_b32 v23, s41, 10 +; GFX940-NEXT: v_writelane_b32 v23, s42, 11 +; GFX940-NEXT: v_writelane_b32 v23, s43, 12 +; GFX940-NEXT: v_writelane_b32 v23, s44, 13 +; GFX940-NEXT: v_writelane_b32 v23, s45, 14 +; GFX940-NEXT: v_writelane_b32 v23, s46, 15 +; GFX940-NEXT: v_writelane_b32 v23, s47, 16 +; GFX940-NEXT: v_writelane_b32 v23, s48, 17 +; GFX940-NEXT: v_writelane_b32 v23, s49, 18 +; GFX940-NEXT: v_writelane_b32 v23, s50, 19 +; GFX940-NEXT: v_writelane_b32 v23, s51, 20 +; GFX940-NEXT: v_writelane_b32 v23, s52, 21 +; GFX940-NEXT: v_writelane_b32 v23, s53, 22 +; GFX940-NEXT: v_writelane_b32 v23, s54, 23 +; GFX940-NEXT: v_writelane_b32 v23, s55, 24 +; GFX940-NEXT: v_writelane_b32 v23, s56, 25 +; GFX940-NEXT: v_writelane_b32 v23, s57, 26 +; GFX940-NEXT: v_writelane_b32 v23, s58, 27 +; GFX940-NEXT: v_writelane_b32 v23, s59, 28 +; GFX940-NEXT: v_writelane_b32 v23, s60, 29 +; GFX940-NEXT: s_add_i32 s0, s32, 64 +; GFX940-NEXT: v_writelane_b32 v23, s61, 30 +; GFX940-NEXT: v_mov_b32_e32 v0, s0 +; GFX940-NEXT: s_and_b64 s[60:61], 0, exec +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; use alloca0 v0 +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: s_addc_u32 s60, s32, 0x4040 +; GFX940-NEXT: s_bitcmp1_b32 s60, 0 +; GFX940-NEXT: s_bitset0_b32 s60, 0 +; GFX940-NEXT: s_mov_b32 s59, s60 +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: v_readlane_b32 s61, v23, 30 +; GFX940-NEXT: v_readlane_b32 s60, v23, 29 +; GFX940-NEXT: v_readlane_b32 s59, v23, 28 +; GFX940-NEXT: v_readlane_b32 s58, v23, 27 +; GFX940-NEXT: v_readlane_b32 s57, v23, 26 +; GFX940-NEXT: v_readlane_b32 s56, v23, 25 +; GFX940-NEXT: v_readlane_b32 s55, v23, 24 +; GFX940-NEXT: v_readlane_b32 s54, v23, 23 +; GFX940-NEXT: v_readlane_b32 s53, v23, 22 +; GFX940-NEXT: v_readlane_b32 s52, v23, 21 +; GFX940-NEXT: v_readlane_b32 s51, v23, 20 +; GFX940-NEXT: v_readlane_b32 s50, v23, 19 +; GFX940-NEXT: v_readlane_b32 s49, v23, 18 +; GFX940-NEXT: v_readlane_b32 s48, v23, 17 +; GFX940-NEXT: v_readlane_b32 s47, v23, 16 +; GFX940-NEXT: v_readlane_b32 s46, v23, 15 +; GFX940-NEXT: v_readlane_b32 s45, v23, 14 +; GFX940-NEXT: v_readlane_b32 s44, v23, 13 +; GFX940-NEXT: v_readlane_b32 s43, v23, 12 +; GFX940-NEXT: v_readlane_b32 s42, v23, 11 +; GFX940-NEXT: v_readlane_b32 s41, v23, 10 +; GFX940-NEXT: v_readlane_b32 s40, v23, 9 +; GFX940-NEXT: v_readlane_b32 s39, v23, 8 +; GFX940-NEXT: v_readlane_b32 s38, v23, 7 +; GFX940-NEXT: v_readlane_b32 s37, v23, 6 +; GFX940-NEXT: v_readlane_b32 s36, v23, 5 +; GFX940-NEXT: v_readlane_b32 s35, v23, 4 +; GFX940-NEXT: v_readlane_b32 s34, v23, 3 +; GFX940-NEXT: v_readlane_b32 s33, v23, 2 +; GFX940-NEXT: v_readlane_b32 s31, v23, 1 +; GFX940-NEXT: v_readlane_b32 s30, v23, 0 +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s2, s32, 0x4044 +; GFX940-NEXT: scratch_load_dword v23, off, s2 ; 4-byte Folded Reload +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX10_1-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: +; GFX10_1: ; %bb.0: +; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_1-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: v_writelane_b32 v23, s30, 0 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 +; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_1-NEXT: v_writelane_b32 v23, s31, 1 +; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; use alloca0 v0 +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_writelane_b32 v23, s33, 2 +; GFX10_1-NEXT: v_writelane_b32 v23, s34, 3 +; GFX10_1-NEXT: v_writelane_b32 v23, s35, 4 +; GFX10_1-NEXT: v_writelane_b32 v23, s36, 5 +; GFX10_1-NEXT: v_writelane_b32 v23, s37, 6 +; GFX10_1-NEXT: v_writelane_b32 v23, s38, 7 +; GFX10_1-NEXT: v_writelane_b32 v23, s39, 8 +; GFX10_1-NEXT: v_writelane_b32 v23, s40, 9 +; GFX10_1-NEXT: v_writelane_b32 v23, s41, 10 +; GFX10_1-NEXT: v_writelane_b32 v23, s42, 11 +; GFX10_1-NEXT: v_writelane_b32 v23, s43, 12 +; GFX10_1-NEXT: v_writelane_b32 v23, s44, 13 +; GFX10_1-NEXT: v_writelane_b32 v23, s45, 14 +; GFX10_1-NEXT: v_writelane_b32 v23, s46, 15 +; GFX10_1-NEXT: v_writelane_b32 v23, s47, 16 +; GFX10_1-NEXT: v_writelane_b32 v23, s48, 17 +; GFX10_1-NEXT: v_writelane_b32 v23, s49, 18 +; GFX10_1-NEXT: v_writelane_b32 v23, s50, 19 +; GFX10_1-NEXT: v_writelane_b32 v23, s51, 20 +; GFX10_1-NEXT: v_writelane_b32 v23, s52, 21 +; GFX10_1-NEXT: v_writelane_b32 v23, s53, 22 +; GFX10_1-NEXT: v_writelane_b32 v23, s54, 23 +; GFX10_1-NEXT: v_writelane_b32 v23, s55, 24 +; GFX10_1-NEXT: v_writelane_b32 v23, s56, 25 +; GFX10_1-NEXT: v_writelane_b32 v23, s57, 26 +; GFX10_1-NEXT: v_writelane_b32 v23, s58, 27 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_lshrrev_b32_e64 v24, 5, s32 +; GFX10_1-NEXT: v_writelane_b32 v23, s59, 28 +; GFX10_1-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24 +; GFX10_1-NEXT: v_readfirstlane_b32 s59, v24 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s59, v23, 28 +; GFX10_1-NEXT: v_readlane_b32 s58, v23, 27 +; GFX10_1-NEXT: v_readlane_b32 s57, v23, 26 +; GFX10_1-NEXT: v_readlane_b32 s56, v23, 25 +; GFX10_1-NEXT: v_readlane_b32 s55, v23, 24 +; GFX10_1-NEXT: v_readlane_b32 s54, v23, 23 +; GFX10_1-NEXT: v_readlane_b32 s53, v23, 22 +; GFX10_1-NEXT: v_readlane_b32 s52, v23, 21 +; GFX10_1-NEXT: v_readlane_b32 s51, v23, 20 +; GFX10_1-NEXT: v_readlane_b32 s50, v23, 19 +; GFX10_1-NEXT: v_readlane_b32 s49, v23, 18 +; GFX10_1-NEXT: v_readlane_b32 s48, v23, 17 +; GFX10_1-NEXT: v_readlane_b32 s47, v23, 16 +; GFX10_1-NEXT: v_readlane_b32 s46, v23, 15 +; GFX10_1-NEXT: v_readlane_b32 s45, v23, 14 +; GFX10_1-NEXT: v_readlane_b32 s44, v23, 13 +; GFX10_1-NEXT: v_readlane_b32 s43, v23, 12 +; GFX10_1-NEXT: v_readlane_b32 s42, v23, 11 +; GFX10_1-NEXT: v_readlane_b32 s41, v23, 10 +; GFX10_1-NEXT: v_readlane_b32 s40, v23, 9 +; GFX10_1-NEXT: v_readlane_b32 s39, v23, 8 +; GFX10_1-NEXT: v_readlane_b32 s38, v23, 7 +; GFX10_1-NEXT: v_readlane_b32 s37, v23, 6 +; GFX10_1-NEXT: v_readlane_b32 s36, v23, 5 +; GFX10_1-NEXT: v_readlane_b32 s35, v23, 4 +; GFX10_1-NEXT: v_readlane_b32 s34, v23, 3 +; GFX10_1-NEXT: v_readlane_b32 s33, v23, 2 +; GFX10_1-NEXT: v_readlane_b32 s31, v23, 1 +; GFX10_1-NEXT: v_readlane_b32 s30, v23, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_1-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) +; GFX10_1-NEXT: s_setpc_b64 s[30:31] +; +; GFX10_3-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: +; GFX10_3: ; %bb.0: +; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_3-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: v_writelane_b32 v23, s30, 0 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 +; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_3-NEXT: v_writelane_b32 v23, s31, 1 +; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; use alloca0 v0 +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_writelane_b32 v23, s33, 2 +; GFX10_3-NEXT: v_writelane_b32 v23, s34, 3 +; GFX10_3-NEXT: v_writelane_b32 v23, s35, 4 +; GFX10_3-NEXT: v_writelane_b32 v23, s36, 5 +; GFX10_3-NEXT: v_writelane_b32 v23, s37, 6 +; GFX10_3-NEXT: v_writelane_b32 v23, s38, 7 +; GFX10_3-NEXT: v_writelane_b32 v23, s39, 8 +; GFX10_3-NEXT: v_writelane_b32 v23, s40, 9 +; GFX10_3-NEXT: v_writelane_b32 v23, s41, 10 +; GFX10_3-NEXT: v_writelane_b32 v23, s42, 11 +; GFX10_3-NEXT: v_writelane_b32 v23, s43, 12 +; GFX10_3-NEXT: v_writelane_b32 v23, s44, 13 +; GFX10_3-NEXT: v_writelane_b32 v23, s45, 14 +; GFX10_3-NEXT: v_writelane_b32 v23, s46, 15 +; GFX10_3-NEXT: v_writelane_b32 v23, s47, 16 +; GFX10_3-NEXT: v_writelane_b32 v23, s48, 17 +; GFX10_3-NEXT: v_writelane_b32 v23, s49, 18 +; GFX10_3-NEXT: v_writelane_b32 v23, s50, 19 +; GFX10_3-NEXT: v_writelane_b32 v23, s51, 20 +; GFX10_3-NEXT: v_writelane_b32 v23, s52, 21 +; GFX10_3-NEXT: v_writelane_b32 v23, s53, 22 +; GFX10_3-NEXT: v_writelane_b32 v23, s54, 23 +; GFX10_3-NEXT: v_writelane_b32 v23, s55, 24 +; GFX10_3-NEXT: v_writelane_b32 v23, s56, 25 +; GFX10_3-NEXT: v_writelane_b32 v23, s57, 26 +; GFX10_3-NEXT: v_writelane_b32 v23, s58, 27 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_lshrrev_b32_e64 v24, 5, s32 +; GFX10_3-NEXT: v_writelane_b32 v23, s59, 28 +; GFX10_3-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24 +; GFX10_3-NEXT: v_readfirstlane_b32 s59, v24 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s59, v23, 28 +; GFX10_3-NEXT: v_readlane_b32 s58, v23, 27 +; GFX10_3-NEXT: v_readlane_b32 s57, v23, 26 +; GFX10_3-NEXT: v_readlane_b32 s56, v23, 25 +; GFX10_3-NEXT: v_readlane_b32 s55, v23, 24 +; GFX10_3-NEXT: v_readlane_b32 s54, v23, 23 +; GFX10_3-NEXT: v_readlane_b32 s53, v23, 22 +; GFX10_3-NEXT: v_readlane_b32 s52, v23, 21 +; GFX10_3-NEXT: v_readlane_b32 s51, v23, 20 +; GFX10_3-NEXT: v_readlane_b32 s50, v23, 19 +; GFX10_3-NEXT: v_readlane_b32 s49, v23, 18 +; GFX10_3-NEXT: v_readlane_b32 s48, v23, 17 +; GFX10_3-NEXT: v_readlane_b32 s47, v23, 16 +; GFX10_3-NEXT: v_readlane_b32 s46, v23, 15 +; GFX10_3-NEXT: v_readlane_b32 s45, v23, 14 +; GFX10_3-NEXT: v_readlane_b32 s44, v23, 13 +; GFX10_3-NEXT: v_readlane_b32 s43, v23, 12 +; GFX10_3-NEXT: v_readlane_b32 s42, v23, 11 +; GFX10_3-NEXT: v_readlane_b32 s41, v23, 10 +; GFX10_3-NEXT: v_readlane_b32 s40, v23, 9 +; GFX10_3-NEXT: v_readlane_b32 s39, v23, 8 +; GFX10_3-NEXT: v_readlane_b32 s38, v23, 7 +; GFX10_3-NEXT: v_readlane_b32 s37, v23, 6 +; GFX10_3-NEXT: v_readlane_b32 s36, v23, 5 +; GFX10_3-NEXT: v_readlane_b32 s35, v23, 4 +; GFX10_3-NEXT: v_readlane_b32 s34, v23, 3 +; GFX10_3-NEXT: v_readlane_b32 s33, v23, 2 +; GFX10_3-NEXT: v_readlane_b32 s31, v23, 1 +; GFX10_3-NEXT: v_readlane_b32 s30, v23, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_3-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) +; GFX10_3-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 +; GFX11-NEXT: scratch_store_b32 off, v23, s1 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v23, s30, 0 +; GFX11-NEXT: s_add_i32 s0, s32, 64 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_and_b32 s0, 0, exec_lo +; GFX11-NEXT: v_writelane_b32 v23, s31, 1 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use alloca0 v0 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_writelane_b32 v23, s33, 2 +; GFX11-NEXT: v_writelane_b32 v23, s34, 3 +; GFX11-NEXT: v_writelane_b32 v23, s35, 4 +; GFX11-NEXT: v_writelane_b32 v23, s36, 5 +; GFX11-NEXT: v_writelane_b32 v23, s37, 6 +; GFX11-NEXT: v_writelane_b32 v23, s38, 7 +; GFX11-NEXT: v_writelane_b32 v23, s39, 8 +; GFX11-NEXT: v_writelane_b32 v23, s40, 9 +; GFX11-NEXT: v_writelane_b32 v23, s41, 10 +; GFX11-NEXT: v_writelane_b32 v23, s42, 11 +; GFX11-NEXT: v_writelane_b32 v23, s43, 12 +; GFX11-NEXT: v_writelane_b32 v23, s44, 13 +; GFX11-NEXT: v_writelane_b32 v23, s45, 14 +; GFX11-NEXT: v_writelane_b32 v23, s46, 15 +; GFX11-NEXT: v_writelane_b32 v23, s47, 16 +; GFX11-NEXT: v_writelane_b32 v23, s48, 17 +; GFX11-NEXT: v_writelane_b32 v23, s49, 18 +; GFX11-NEXT: v_writelane_b32 v23, s50, 19 +; GFX11-NEXT: v_writelane_b32 v23, s51, 20 +; GFX11-NEXT: v_writelane_b32 v23, s52, 21 +; GFX11-NEXT: v_writelane_b32 v23, s53, 22 +; GFX11-NEXT: v_writelane_b32 v23, s54, 23 +; GFX11-NEXT: v_writelane_b32 v23, s55, 24 +; GFX11-NEXT: v_writelane_b32 v23, s56, 25 +; GFX11-NEXT: v_writelane_b32 v23, s57, 26 +; GFX11-NEXT: v_writelane_b32 v23, s58, 27 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_addc_u32 s32, s32, 0x4040 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_bitcmp1_b32 s32, 0 +; GFX11-NEXT: v_writelane_b32 v23, s59, 28 +; GFX11-NEXT: s_bitset0_b32 s32, 0 +; GFX11-NEXT: s_mov_b32 s59, s32 +; GFX11-NEXT: s_addc_u32 s32, s32, 0xffffbfc0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_bitcmp1_b32 s32, 0 +; GFX11-NEXT: s_bitset0_b32 s32, 0 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s59, v23, 28 +; GFX11-NEXT: v_readlane_b32 s58, v23, 27 +; GFX11-NEXT: v_readlane_b32 s57, v23, 26 +; GFX11-NEXT: v_readlane_b32 s56, v23, 25 +; GFX11-NEXT: v_readlane_b32 s55, v23, 24 +; GFX11-NEXT: v_readlane_b32 s54, v23, 23 +; GFX11-NEXT: v_readlane_b32 s53, v23, 22 +; GFX11-NEXT: v_readlane_b32 s52, v23, 21 +; GFX11-NEXT: v_readlane_b32 s51, v23, 20 +; GFX11-NEXT: v_readlane_b32 s50, v23, 19 +; GFX11-NEXT: v_readlane_b32 s49, v23, 18 +; GFX11-NEXT: v_readlane_b32 s48, v23, 17 +; GFX11-NEXT: v_readlane_b32 s47, v23, 16 +; GFX11-NEXT: v_readlane_b32 s46, v23, 15 +; GFX11-NEXT: v_readlane_b32 s45, v23, 14 +; GFX11-NEXT: v_readlane_b32 s44, v23, 13 +; GFX11-NEXT: v_readlane_b32 s43, v23, 12 +; GFX11-NEXT: v_readlane_b32 s42, v23, 11 +; GFX11-NEXT: v_readlane_b32 s41, v23, 10 +; GFX11-NEXT: v_readlane_b32 s40, v23, 9 +; GFX11-NEXT: v_readlane_b32 s39, v23, 8 +; GFX11-NEXT: v_readlane_b32 s38, v23, 7 +; GFX11-NEXT: v_readlane_b32 s37, v23, 6 +; GFX11-NEXT: v_readlane_b32 s36, v23, 5 +; GFX11-NEXT: v_readlane_b32 s35, v23, 4 +; GFX11-NEXT: v_readlane_b32 s34, v23, 3 +; GFX11-NEXT: v_readlane_b32 s33, v23, 2 +; GFX11-NEXT: v_readlane_b32 s31, v23, 1 +; GFX11-NEXT: v_readlane_b32 s30, v23, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 +; GFX11-NEXT: scratch_load_b32 v23, off, s1 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_store_b32 off, v23, s32 offset:16388 ; 4-byte Folded Spill +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: v_writelane_b32 v23, s30, 0 +; GFX12-NEXT: v_mov_b32_e32 v0, s32 +; GFX12-NEXT: s_and_b32 s0, 0, exec_lo +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; use alloca0 v0 +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: v_writelane_b32 v23, s31, 1 +; GFX12-NEXT: v_writelane_b32 v23, s33, 2 +; GFX12-NEXT: v_writelane_b32 v23, s34, 3 +; GFX12-NEXT: v_writelane_b32 v23, s35, 4 +; GFX12-NEXT: v_writelane_b32 v23, s36, 5 +; GFX12-NEXT: v_writelane_b32 v23, s37, 6 +; GFX12-NEXT: v_writelane_b32 v23, s38, 7 +; GFX12-NEXT: v_writelane_b32 v23, s39, 8 +; GFX12-NEXT: v_writelane_b32 v23, s40, 9 +; GFX12-NEXT: v_writelane_b32 v23, s41, 10 +; GFX12-NEXT: v_writelane_b32 v23, s42, 11 +; GFX12-NEXT: v_writelane_b32 v23, s43, 12 +; GFX12-NEXT: v_writelane_b32 v23, s44, 13 +; GFX12-NEXT: v_writelane_b32 v23, s45, 14 +; GFX12-NEXT: v_writelane_b32 v23, s46, 15 +; GFX12-NEXT: v_writelane_b32 v23, s47, 16 +; GFX12-NEXT: v_writelane_b32 v23, s48, 17 +; GFX12-NEXT: v_writelane_b32 v23, s49, 18 +; GFX12-NEXT: v_writelane_b32 v23, s50, 19 +; GFX12-NEXT: v_writelane_b32 v23, s51, 20 +; GFX12-NEXT: v_writelane_b32 v23, s52, 21 +; GFX12-NEXT: v_writelane_b32 v23, s53, 22 +; GFX12-NEXT: v_writelane_b32 v23, s54, 23 +; GFX12-NEXT: v_writelane_b32 v23, s55, 24 +; GFX12-NEXT: v_writelane_b32 v23, s56, 25 +; GFX12-NEXT: v_writelane_b32 v23, s57, 26 +; GFX12-NEXT: v_writelane_b32 v23, s58, 27 +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: s_add_co_ci_u32 s32, s32, 0x4000 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_bitcmp1_b32 s32, 0 +; GFX12-NEXT: v_writelane_b32 v23, s59, 28 +; GFX12-NEXT: s_bitset0_b32 s32, 0 +; GFX12-NEXT: s_mov_b32 s59, s32 +; GFX12-NEXT: s_add_co_ci_u32 s32, s32, 0xffffc000 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: s_bitcmp1_b32 s32, 0 +; GFX12-NEXT: s_bitset0_b32 s32, 0 +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: v_readlane_b32 s59, v23, 28 +; GFX12-NEXT: v_readlane_b32 s58, v23, 27 +; GFX12-NEXT: v_readlane_b32 s57, v23, 26 +; GFX12-NEXT: v_readlane_b32 s56, v23, 25 +; GFX12-NEXT: v_readlane_b32 s55, v23, 24 +; GFX12-NEXT: v_readlane_b32 s54, v23, 23 +; GFX12-NEXT: v_readlane_b32 s53, v23, 22 +; GFX12-NEXT: v_readlane_b32 s52, v23, 21 +; GFX12-NEXT: v_readlane_b32 s51, v23, 20 +; GFX12-NEXT: v_readlane_b32 s50, v23, 19 +; GFX12-NEXT: v_readlane_b32 s49, v23, 18 +; GFX12-NEXT: v_readlane_b32 s48, v23, 17 +; GFX12-NEXT: v_readlane_b32 s47, v23, 16 +; GFX12-NEXT: v_readlane_b32 s46, v23, 15 +; GFX12-NEXT: v_readlane_b32 s45, v23, 14 +; GFX12-NEXT: v_readlane_b32 s44, v23, 13 +; GFX12-NEXT: v_readlane_b32 s43, v23, 12 +; GFX12-NEXT: v_readlane_b32 s42, v23, 11 +; GFX12-NEXT: v_readlane_b32 s41, v23, 10 +; GFX12-NEXT: v_readlane_b32 s40, v23, 9 +; GFX12-NEXT: v_readlane_b32 s39, v23, 8 +; GFX12-NEXT: v_readlane_b32 s38, v23, 7 +; GFX12-NEXT: v_readlane_b32 s37, v23, 6 +; GFX12-NEXT: v_readlane_b32 s36, v23, 5 +; GFX12-NEXT: v_readlane_b32 s35, v23, 4 +; GFX12-NEXT: v_readlane_b32 s34, v23, 3 +; GFX12-NEXT: v_readlane_b32 s33, v23, 2 +; GFX12-NEXT: v_readlane_b32 s31, v23, 1 +; GFX12-NEXT: v_readlane_b32 s30, v23, 0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_load_b32 v23, off, s32 offset:16388 ; 4-byte Folded Reload +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %alloca0 = alloca [4096 x i32], align 64, addrspace(5) + %alloca1 = alloca i32, align 4, addrspace(5) + call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) + + ; Force no SGPRs to be available for the carry-out of the vector add. + %asm = call %asm.output asm sideeffect + "; def $0, $1, $2, $3, $4, $5, $6, $7, $8", + "={s[0:15]},={s[16:31]},={s[32:47]},={s[48:55]},={s[56:57]},={s58},={v[0:15]},={v[16:22]},={vcc}"() + + %s0 = extractvalue %asm.output %asm, 0 + %s1 = extractvalue %asm.output %asm, 1 + %s2 = extractvalue %asm.output %asm, 2 + %s3 = extractvalue %asm.output %asm, 3 + %s4 = extractvalue %asm.output %asm, 4 + %s5 = extractvalue %asm.output %asm, 5 + + %v0 = extractvalue %asm.output %asm, 6 + %v1 = extractvalue %asm.output %asm, 7 + + %vcc = extractvalue %asm.output %asm, 8 + + ; scc is unavailable since it is live in + call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10", + "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:22]},{vcc},{s59},{scc}"( + <16 x i32> %s0, + <16 x i32> %s1, + <16 x i32> %s2, + <8 x i32> %s3, + <2 x i32> %s4, + i32 %s5, + <16 x i32> %v0, + <7 x i32> %v1, + i64 %vcc, + ptr addrspace(5) %alloca1, + i32 0) ; use of scc + + ret void +} + +; FIXME: This would have test FI at offset 0, but other objects get +; assigned there. This shows a non-0, but inline immediate that can +; fold directly into the address computation. +define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset() #1 { +; GFX7-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX7-NEXT: s_add_i32 s6, s32, 0x100400 +; GFX7-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX7-NEXT: s_mov_b64 exec, s[4:5] +; GFX7-NEXT: v_writelane_b32 v21, s30, 0 +; GFX7-NEXT: v_writelane_b32 v21, s31, 1 +; GFX7-NEXT: v_writelane_b32 v21, s33, 2 +; GFX7-NEXT: v_writelane_b32 v21, s34, 3 +; GFX7-NEXT: v_writelane_b32 v21, s35, 4 +; GFX7-NEXT: v_writelane_b32 v21, s36, 5 +; GFX7-NEXT: v_writelane_b32 v21, s37, 6 +; GFX7-NEXT: v_writelane_b32 v21, s38, 7 +; GFX7-NEXT: v_writelane_b32 v21, s39, 8 +; GFX7-NEXT: v_writelane_b32 v21, s40, 9 +; GFX7-NEXT: v_writelane_b32 v21, s41, 10 +; GFX7-NEXT: v_writelane_b32 v21, s42, 11 +; GFX7-NEXT: v_writelane_b32 v21, s43, 12 +; GFX7-NEXT: v_writelane_b32 v21, s44, 13 +; GFX7-NEXT: v_writelane_b32 v21, s45, 14 +; GFX7-NEXT: v_writelane_b32 v21, s46, 15 +; GFX7-NEXT: v_writelane_b32 v21, s47, 16 +; GFX7-NEXT: v_writelane_b32 v21, s48, 17 +; GFX7-NEXT: v_writelane_b32 v21, s49, 18 +; GFX7-NEXT: v_writelane_b32 v21, s50, 19 +; GFX7-NEXT: v_writelane_b32 v21, s51, 20 +; GFX7-NEXT: v_writelane_b32 v21, s52, 21 +; GFX7-NEXT: v_writelane_b32 v21, s53, 22 +; GFX7-NEXT: v_writelane_b32 v21, s54, 23 +; GFX7-NEXT: v_writelane_b32 v21, s55, 24 +; GFX7-NEXT: v_writelane_b32 v21, s56, 25 +; GFX7-NEXT: v_writelane_b32 v21, s57, 26 +; GFX7-NEXT: s_and_b64 s[4:5], 0, exec +; GFX7-NEXT: v_mov_b32_e32 v22, 16 +; GFX7-NEXT: v_writelane_b32 v21, s58, 27 +; GFX7-NEXT: ;;#ASMSTART +; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc +; GFX7-NEXT: ;;#ASMEND +; GFX7-NEXT: v_mad_u32_u24 v22, v22, 64, s32 +; GFX7-NEXT: v_lshr_b32_e64 v22, s32, 6 +; GFX7-NEXT: v_writelane_b32 v21, s59, 28 +; GFX7-NEXT: v_readfirstlane_b32 s59, v22 +; GFX7-NEXT: ;;#ASMSTART +; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc +; GFX7-NEXT: ;;#ASMEND +; GFX7-NEXT: v_readlane_b32 s59, v21, 28 +; GFX7-NEXT: v_readlane_b32 s58, v21, 27 +; GFX7-NEXT: v_readlane_b32 s57, v21, 26 +; GFX7-NEXT: v_readlane_b32 s56, v21, 25 +; GFX7-NEXT: v_readlane_b32 s55, v21, 24 +; GFX7-NEXT: v_readlane_b32 s54, v21, 23 +; GFX7-NEXT: v_readlane_b32 s53, v21, 22 +; GFX7-NEXT: v_readlane_b32 s52, v21, 21 +; GFX7-NEXT: v_readlane_b32 s51, v21, 20 +; GFX7-NEXT: v_readlane_b32 s50, v21, 19 +; GFX7-NEXT: v_readlane_b32 s49, v21, 18 +; GFX7-NEXT: v_readlane_b32 s48, v21, 17 +; GFX7-NEXT: v_readlane_b32 s47, v21, 16 +; GFX7-NEXT: v_readlane_b32 s46, v21, 15 +; GFX7-NEXT: v_readlane_b32 s45, v21, 14 +; GFX7-NEXT: v_readlane_b32 s44, v21, 13 +; GFX7-NEXT: v_readlane_b32 s43, v21, 12 +; GFX7-NEXT: v_readlane_b32 s42, v21, 11 +; GFX7-NEXT: v_readlane_b32 s41, v21, 10 +; GFX7-NEXT: v_readlane_b32 s40, v21, 9 +; GFX7-NEXT: v_readlane_b32 s39, v21, 8 +; GFX7-NEXT: v_readlane_b32 s38, v21, 7 +; GFX7-NEXT: v_readlane_b32 s37, v21, 6 +; GFX7-NEXT: v_readlane_b32 s36, v21, 5 +; GFX7-NEXT: v_readlane_b32 s35, v21, 4 +; GFX7-NEXT: v_readlane_b32 s34, v21, 3 +; GFX7-NEXT: v_readlane_b32 s33, v21, 2 +; GFX7-NEXT: v_readlane_b32 s31, v21, 1 +; GFX7-NEXT: v_readlane_b32 s30, v21, 0 +; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX7-NEXT: s_add_i32 s6, s32, 0x100400 +; GFX7-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX7-NEXT: s_mov_b64 exec, s[4:5] +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x100400 +; GFX8-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_writelane_b32 v21, s30, 0 +; GFX8-NEXT: v_writelane_b32 v21, s31, 1 +; GFX8-NEXT: v_writelane_b32 v21, s33, 2 +; GFX8-NEXT: v_writelane_b32 v21, s34, 3 +; GFX8-NEXT: v_writelane_b32 v21, s35, 4 +; GFX8-NEXT: v_writelane_b32 v21, s36, 5 +; GFX8-NEXT: v_writelane_b32 v21, s37, 6 +; GFX8-NEXT: v_writelane_b32 v21, s38, 7 +; GFX8-NEXT: v_writelane_b32 v21, s39, 8 +; GFX8-NEXT: v_writelane_b32 v21, s40, 9 +; GFX8-NEXT: v_writelane_b32 v21, s41, 10 +; GFX8-NEXT: v_writelane_b32 v21, s42, 11 +; GFX8-NEXT: v_writelane_b32 v21, s43, 12 +; GFX8-NEXT: v_writelane_b32 v21, s44, 13 +; GFX8-NEXT: v_writelane_b32 v21, s45, 14 +; GFX8-NEXT: v_writelane_b32 v21, s46, 15 +; GFX8-NEXT: v_writelane_b32 v21, s47, 16 +; GFX8-NEXT: v_writelane_b32 v21, s48, 17 +; GFX8-NEXT: v_writelane_b32 v21, s49, 18 +; GFX8-NEXT: v_writelane_b32 v21, s50, 19 +; GFX8-NEXT: v_writelane_b32 v21, s51, 20 +; GFX8-NEXT: v_writelane_b32 v21, s52, 21 +; GFX8-NEXT: v_writelane_b32 v21, s53, 22 +; GFX8-NEXT: v_writelane_b32 v21, s54, 23 +; GFX8-NEXT: v_writelane_b32 v21, s55, 24 +; GFX8-NEXT: v_writelane_b32 v21, s56, 25 +; GFX8-NEXT: v_writelane_b32 v21, s57, 26 +; GFX8-NEXT: s_and_b64 s[4:5], 0, exec +; GFX8-NEXT: v_mov_b32_e32 v22, 16 +; GFX8-NEXT: v_writelane_b32 v21, s58, 27 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_mad_u32_u24 v22, v22, 64, s32 +; GFX8-NEXT: v_lshrrev_b32_e64 v22, 6, s32 +; GFX8-NEXT: v_writelane_b32 v21, s59, 28 +; GFX8-NEXT: v_readfirstlane_b32 s59, v22 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s59, v21, 28 +; GFX8-NEXT: v_readlane_b32 s58, v21, 27 +; GFX8-NEXT: v_readlane_b32 s57, v21, 26 +; GFX8-NEXT: v_readlane_b32 s56, v21, 25 +; GFX8-NEXT: v_readlane_b32 s55, v21, 24 +; GFX8-NEXT: v_readlane_b32 s54, v21, 23 +; GFX8-NEXT: v_readlane_b32 s53, v21, 22 +; GFX8-NEXT: v_readlane_b32 s52, v21, 21 +; GFX8-NEXT: v_readlane_b32 s51, v21, 20 +; GFX8-NEXT: v_readlane_b32 s50, v21, 19 +; GFX8-NEXT: v_readlane_b32 s49, v21, 18 +; GFX8-NEXT: v_readlane_b32 s48, v21, 17 +; GFX8-NEXT: v_readlane_b32 s47, v21, 16 +; GFX8-NEXT: v_readlane_b32 s46, v21, 15 +; GFX8-NEXT: v_readlane_b32 s45, v21, 14 +; GFX8-NEXT: v_readlane_b32 s44, v21, 13 +; GFX8-NEXT: v_readlane_b32 s43, v21, 12 +; GFX8-NEXT: v_readlane_b32 s42, v21, 11 +; GFX8-NEXT: v_readlane_b32 s41, v21, 10 +; GFX8-NEXT: v_readlane_b32 s40, v21, 9 +; GFX8-NEXT: v_readlane_b32 s39, v21, 8 +; GFX8-NEXT: v_readlane_b32 s38, v21, 7 +; GFX8-NEXT: v_readlane_b32 s37, v21, 6 +; GFX8-NEXT: v_readlane_b32 s36, v21, 5 +; GFX8-NEXT: v_readlane_b32 s35, v21, 4 +; GFX8-NEXT: v_readlane_b32 s34, v21, 3 +; GFX8-NEXT: v_readlane_b32 s33, v21, 2 +; GFX8-NEXT: v_readlane_b32 s31, v21, 1 +; GFX8-NEXT: v_readlane_b32 s30, v21, 0 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x100400 +; GFX8-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x100400 +; GFX900-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_writelane_b32 v21, s30, 0 +; GFX900-NEXT: v_writelane_b32 v21, s31, 1 +; GFX900-NEXT: v_writelane_b32 v21, s33, 2 +; GFX900-NEXT: v_writelane_b32 v21, s34, 3 +; GFX900-NEXT: v_writelane_b32 v21, s35, 4 +; GFX900-NEXT: v_writelane_b32 v21, s36, 5 +; GFX900-NEXT: v_writelane_b32 v21, s37, 6 +; GFX900-NEXT: v_writelane_b32 v21, s38, 7 +; GFX900-NEXT: v_writelane_b32 v21, s39, 8 +; GFX900-NEXT: v_writelane_b32 v21, s40, 9 +; GFX900-NEXT: v_writelane_b32 v21, s41, 10 +; GFX900-NEXT: v_writelane_b32 v21, s42, 11 +; GFX900-NEXT: v_writelane_b32 v21, s43, 12 +; GFX900-NEXT: v_writelane_b32 v21, s44, 13 +; GFX900-NEXT: v_writelane_b32 v21, s45, 14 +; GFX900-NEXT: v_writelane_b32 v21, s46, 15 +; GFX900-NEXT: v_writelane_b32 v21, s47, 16 +; GFX900-NEXT: v_writelane_b32 v21, s48, 17 +; GFX900-NEXT: v_writelane_b32 v21, s49, 18 +; GFX900-NEXT: v_writelane_b32 v21, s50, 19 +; GFX900-NEXT: v_writelane_b32 v21, s51, 20 +; GFX900-NEXT: v_writelane_b32 v21, s52, 21 +; GFX900-NEXT: v_writelane_b32 v21, s53, 22 +; GFX900-NEXT: v_writelane_b32 v21, s54, 23 +; GFX900-NEXT: v_writelane_b32 v21, s55, 24 +; GFX900-NEXT: v_writelane_b32 v21, s56, 25 +; GFX900-NEXT: v_writelane_b32 v21, s57, 26 +; GFX900-NEXT: s_and_b64 s[4:5], 0, exec +; GFX900-NEXT: v_writelane_b32 v21, s58, 27 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_lshrrev_b32_e64 v22, 6, s32 +; GFX900-NEXT: v_add_u32_e32 v22, 16, v22 +; GFX900-NEXT: v_writelane_b32 v21, s59, 28 +; GFX900-NEXT: v_readfirstlane_b32 s59, v22 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s59, v21, 28 +; GFX900-NEXT: v_readlane_b32 s58, v21, 27 +; GFX900-NEXT: v_readlane_b32 s57, v21, 26 +; GFX900-NEXT: v_readlane_b32 s56, v21, 25 +; GFX900-NEXT: v_readlane_b32 s55, v21, 24 +; GFX900-NEXT: v_readlane_b32 s54, v21, 23 +; GFX900-NEXT: v_readlane_b32 s53, v21, 22 +; GFX900-NEXT: v_readlane_b32 s52, v21, 21 +; GFX900-NEXT: v_readlane_b32 s51, v21, 20 +; GFX900-NEXT: v_readlane_b32 s50, v21, 19 +; GFX900-NEXT: v_readlane_b32 s49, v21, 18 +; GFX900-NEXT: v_readlane_b32 s48, v21, 17 +; GFX900-NEXT: v_readlane_b32 s47, v21, 16 +; GFX900-NEXT: v_readlane_b32 s46, v21, 15 +; GFX900-NEXT: v_readlane_b32 s45, v21, 14 +; GFX900-NEXT: v_readlane_b32 s44, v21, 13 +; GFX900-NEXT: v_readlane_b32 s43, v21, 12 +; GFX900-NEXT: v_readlane_b32 s42, v21, 11 +; GFX900-NEXT: v_readlane_b32 s41, v21, 10 +; GFX900-NEXT: v_readlane_b32 s40, v21, 9 +; GFX900-NEXT: v_readlane_b32 s39, v21, 8 +; GFX900-NEXT: v_readlane_b32 s38, v21, 7 +; GFX900-NEXT: v_readlane_b32 s37, v21, 6 +; GFX900-NEXT: v_readlane_b32 s36, v21, 5 +; GFX900-NEXT: v_readlane_b32 s35, v21, 4 +; GFX900-NEXT: v_readlane_b32 s34, v21, 3 +; GFX900-NEXT: v_readlane_b32 s33, v21, 2 +; GFX900-NEXT: v_readlane_b32 s31, v21, 1 +; GFX900-NEXT: v_readlane_b32 s30, v21, 0 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x100400 +; GFX900-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s2, s32, 0x4010 +; GFX940-NEXT: scratch_store_dword off, v21, s2 sc0 sc1 ; 4-byte Folded Spill +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: v_writelane_b32 v21, s30, 0 +; GFX940-NEXT: v_writelane_b32 v21, s31, 1 +; GFX940-NEXT: v_writelane_b32 v21, s33, 2 +; GFX940-NEXT: v_writelane_b32 v21, s34, 3 +; GFX940-NEXT: v_writelane_b32 v21, s35, 4 +; GFX940-NEXT: v_writelane_b32 v21, s36, 5 +; GFX940-NEXT: v_writelane_b32 v21, s37, 6 +; GFX940-NEXT: v_writelane_b32 v21, s38, 7 +; GFX940-NEXT: v_writelane_b32 v21, s39, 8 +; GFX940-NEXT: v_writelane_b32 v21, s40, 9 +; GFX940-NEXT: v_writelane_b32 v21, s41, 10 +; GFX940-NEXT: v_writelane_b32 v21, s42, 11 +; GFX940-NEXT: v_writelane_b32 v21, s43, 12 +; GFX940-NEXT: v_writelane_b32 v21, s44, 13 +; GFX940-NEXT: v_writelane_b32 v21, s45, 14 +; GFX940-NEXT: v_writelane_b32 v21, s46, 15 +; GFX940-NEXT: v_writelane_b32 v21, s47, 16 +; GFX940-NEXT: v_writelane_b32 v21, s48, 17 +; GFX940-NEXT: v_writelane_b32 v21, s49, 18 +; GFX940-NEXT: v_writelane_b32 v21, s50, 19 +; GFX940-NEXT: v_writelane_b32 v21, s51, 20 +; GFX940-NEXT: v_writelane_b32 v21, s52, 21 +; GFX940-NEXT: v_writelane_b32 v21, s53, 22 +; GFX940-NEXT: v_writelane_b32 v21, s54, 23 +; GFX940-NEXT: v_writelane_b32 v21, s55, 24 +; GFX940-NEXT: v_writelane_b32 v21, s56, 25 +; GFX940-NEXT: v_writelane_b32 v21, s57, 26 +; GFX940-NEXT: v_writelane_b32 v21, s58, 27 +; GFX940-NEXT: v_writelane_b32 v21, s59, 28 +; GFX940-NEXT: v_writelane_b32 v21, s60, 29 +; GFX940-NEXT: v_writelane_b32 v21, s61, 30 +; GFX940-NEXT: s_and_b64 s[60:61], 0, exec +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: s_addc_u32 s60, s32, 16 +; GFX940-NEXT: s_bitcmp1_b32 s60, 0 +; GFX940-NEXT: s_bitset0_b32 s60, 0 +; GFX940-NEXT: s_mov_b32 s59, s60 +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: v_readlane_b32 s61, v21, 30 +; GFX940-NEXT: v_readlane_b32 s60, v21, 29 +; GFX940-NEXT: v_readlane_b32 s59, v21, 28 +; GFX940-NEXT: v_readlane_b32 s58, v21, 27 +; GFX940-NEXT: v_readlane_b32 s57, v21, 26 +; GFX940-NEXT: v_readlane_b32 s56, v21, 25 +; GFX940-NEXT: v_readlane_b32 s55, v21, 24 +; GFX940-NEXT: v_readlane_b32 s54, v21, 23 +; GFX940-NEXT: v_readlane_b32 s53, v21, 22 +; GFX940-NEXT: v_readlane_b32 s52, v21, 21 +; GFX940-NEXT: v_readlane_b32 s51, v21, 20 +; GFX940-NEXT: v_readlane_b32 s50, v21, 19 +; GFX940-NEXT: v_readlane_b32 s49, v21, 18 +; GFX940-NEXT: v_readlane_b32 s48, v21, 17 +; GFX940-NEXT: v_readlane_b32 s47, v21, 16 +; GFX940-NEXT: v_readlane_b32 s46, v21, 15 +; GFX940-NEXT: v_readlane_b32 s45, v21, 14 +; GFX940-NEXT: v_readlane_b32 s44, v21, 13 +; GFX940-NEXT: v_readlane_b32 s43, v21, 12 +; GFX940-NEXT: v_readlane_b32 s42, v21, 11 +; GFX940-NEXT: v_readlane_b32 s41, v21, 10 +; GFX940-NEXT: v_readlane_b32 s40, v21, 9 +; GFX940-NEXT: v_readlane_b32 s39, v21, 8 +; GFX940-NEXT: v_readlane_b32 s38, v21, 7 +; GFX940-NEXT: v_readlane_b32 s37, v21, 6 +; GFX940-NEXT: v_readlane_b32 s36, v21, 5 +; GFX940-NEXT: v_readlane_b32 s35, v21, 4 +; GFX940-NEXT: v_readlane_b32 s34, v21, 3 +; GFX940-NEXT: v_readlane_b32 s33, v21, 2 +; GFX940-NEXT: v_readlane_b32 s31, v21, 1 +; GFX940-NEXT: v_readlane_b32 s30, v21, 0 +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s2, s32, 0x4010 +; GFX940-NEXT: scratch_load_dword v21, off, s2 ; 4-byte Folded Reload +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX10_1-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: +; GFX10_1: ; %bb.0: +; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80200 +; GFX10_1-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: v_writelane_b32 v21, s30, 0 +; GFX10_1-NEXT: v_writelane_b32 v21, s31, 1 +; GFX10_1-NEXT: v_writelane_b32 v21, s33, 2 +; GFX10_1-NEXT: v_writelane_b32 v21, s34, 3 +; GFX10_1-NEXT: v_writelane_b32 v21, s35, 4 +; GFX10_1-NEXT: v_writelane_b32 v21, s36, 5 +; GFX10_1-NEXT: v_writelane_b32 v21, s37, 6 +; GFX10_1-NEXT: v_writelane_b32 v21, s38, 7 +; GFX10_1-NEXT: v_writelane_b32 v21, s39, 8 +; GFX10_1-NEXT: v_writelane_b32 v21, s40, 9 +; GFX10_1-NEXT: v_writelane_b32 v21, s41, 10 +; GFX10_1-NEXT: v_writelane_b32 v21, s42, 11 +; GFX10_1-NEXT: v_writelane_b32 v21, s43, 12 +; GFX10_1-NEXT: v_writelane_b32 v21, s44, 13 +; GFX10_1-NEXT: v_writelane_b32 v21, s45, 14 +; GFX10_1-NEXT: v_writelane_b32 v21, s46, 15 +; GFX10_1-NEXT: v_writelane_b32 v21, s47, 16 +; GFX10_1-NEXT: v_writelane_b32 v21, s48, 17 +; GFX10_1-NEXT: v_writelane_b32 v21, s49, 18 +; GFX10_1-NEXT: v_writelane_b32 v21, s50, 19 +; GFX10_1-NEXT: v_writelane_b32 v21, s51, 20 +; GFX10_1-NEXT: v_writelane_b32 v21, s52, 21 +; GFX10_1-NEXT: v_writelane_b32 v21, s53, 22 +; GFX10_1-NEXT: v_writelane_b32 v21, s54, 23 +; GFX10_1-NEXT: v_writelane_b32 v21, s55, 24 +; GFX10_1-NEXT: v_writelane_b32 v21, s56, 25 +; GFX10_1-NEXT: v_writelane_b32 v21, s57, 26 +; GFX10_1-NEXT: v_writelane_b32 v21, s58, 27 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_lshrrev_b32_e64 v22, 5, s32 +; GFX10_1-NEXT: v_writelane_b32 v21, s59, 28 +; GFX10_1-NEXT: s_and_b32 s59, 0, exec_lo +; GFX10_1-NEXT: v_add_nc_u32_e32 v22, 16, v22 +; GFX10_1-NEXT: v_readfirstlane_b32 s59, v22 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s59, v21, 28 +; GFX10_1-NEXT: v_readlane_b32 s58, v21, 27 +; GFX10_1-NEXT: v_readlane_b32 s57, v21, 26 +; GFX10_1-NEXT: v_readlane_b32 s56, v21, 25 +; GFX10_1-NEXT: v_readlane_b32 s55, v21, 24 +; GFX10_1-NEXT: v_readlane_b32 s54, v21, 23 +; GFX10_1-NEXT: v_readlane_b32 s53, v21, 22 +; GFX10_1-NEXT: v_readlane_b32 s52, v21, 21 +; GFX10_1-NEXT: v_readlane_b32 s51, v21, 20 +; GFX10_1-NEXT: v_readlane_b32 s50, v21, 19 +; GFX10_1-NEXT: v_readlane_b32 s49, v21, 18 +; GFX10_1-NEXT: v_readlane_b32 s48, v21, 17 +; GFX10_1-NEXT: v_readlane_b32 s47, v21, 16 +; GFX10_1-NEXT: v_readlane_b32 s46, v21, 15 +; GFX10_1-NEXT: v_readlane_b32 s45, v21, 14 +; GFX10_1-NEXT: v_readlane_b32 s44, v21, 13 +; GFX10_1-NEXT: v_readlane_b32 s43, v21, 12 +; GFX10_1-NEXT: v_readlane_b32 s42, v21, 11 +; GFX10_1-NEXT: v_readlane_b32 s41, v21, 10 +; GFX10_1-NEXT: v_readlane_b32 s40, v21, 9 +; GFX10_1-NEXT: v_readlane_b32 s39, v21, 8 +; GFX10_1-NEXT: v_readlane_b32 s38, v21, 7 +; GFX10_1-NEXT: v_readlane_b32 s37, v21, 6 +; GFX10_1-NEXT: v_readlane_b32 s36, v21, 5 +; GFX10_1-NEXT: v_readlane_b32 s35, v21, 4 +; GFX10_1-NEXT: v_readlane_b32 s34, v21, 3 +; GFX10_1-NEXT: v_readlane_b32 s33, v21, 2 +; GFX10_1-NEXT: v_readlane_b32 s31, v21, 1 +; GFX10_1-NEXT: v_readlane_b32 s30, v21, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80200 +; GFX10_1-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) +; GFX10_1-NEXT: s_setpc_b64 s[30:31] +; +; GFX10_3-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: +; GFX10_3: ; %bb.0: +; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80200 +; GFX10_3-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: v_writelane_b32 v21, s30, 0 +; GFX10_3-NEXT: v_writelane_b32 v21, s31, 1 +; GFX10_3-NEXT: v_writelane_b32 v21, s33, 2 +; GFX10_3-NEXT: v_writelane_b32 v21, s34, 3 +; GFX10_3-NEXT: v_writelane_b32 v21, s35, 4 +; GFX10_3-NEXT: v_writelane_b32 v21, s36, 5 +; GFX10_3-NEXT: v_writelane_b32 v21, s37, 6 +; GFX10_3-NEXT: v_writelane_b32 v21, s38, 7 +; GFX10_3-NEXT: v_writelane_b32 v21, s39, 8 +; GFX10_3-NEXT: v_writelane_b32 v21, s40, 9 +; GFX10_3-NEXT: v_writelane_b32 v21, s41, 10 +; GFX10_3-NEXT: v_writelane_b32 v21, s42, 11 +; GFX10_3-NEXT: v_writelane_b32 v21, s43, 12 +; GFX10_3-NEXT: v_writelane_b32 v21, s44, 13 +; GFX10_3-NEXT: v_writelane_b32 v21, s45, 14 +; GFX10_3-NEXT: v_writelane_b32 v21, s46, 15 +; GFX10_3-NEXT: v_writelane_b32 v21, s47, 16 +; GFX10_3-NEXT: v_writelane_b32 v21, s48, 17 +; GFX10_3-NEXT: v_writelane_b32 v21, s49, 18 +; GFX10_3-NEXT: v_writelane_b32 v21, s50, 19 +; GFX10_3-NEXT: v_writelane_b32 v21, s51, 20 +; GFX10_3-NEXT: v_writelane_b32 v21, s52, 21 +; GFX10_3-NEXT: v_writelane_b32 v21, s53, 22 +; GFX10_3-NEXT: v_writelane_b32 v21, s54, 23 +; GFX10_3-NEXT: v_writelane_b32 v21, s55, 24 +; GFX10_3-NEXT: v_writelane_b32 v21, s56, 25 +; GFX10_3-NEXT: v_writelane_b32 v21, s57, 26 +; GFX10_3-NEXT: v_writelane_b32 v21, s58, 27 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_lshrrev_b32_e64 v22, 5, s32 +; GFX10_3-NEXT: v_writelane_b32 v21, s59, 28 +; GFX10_3-NEXT: s_and_b32 s59, 0, exec_lo +; GFX10_3-NEXT: v_add_nc_u32_e32 v22, 16, v22 +; GFX10_3-NEXT: v_readfirstlane_b32 s59, v22 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s59, v21, 28 +; GFX10_3-NEXT: v_readlane_b32 s58, v21, 27 +; GFX10_3-NEXT: v_readlane_b32 s57, v21, 26 +; GFX10_3-NEXT: v_readlane_b32 s56, v21, 25 +; GFX10_3-NEXT: v_readlane_b32 s55, v21, 24 +; GFX10_3-NEXT: v_readlane_b32 s54, v21, 23 +; GFX10_3-NEXT: v_readlane_b32 s53, v21, 22 +; GFX10_3-NEXT: v_readlane_b32 s52, v21, 21 +; GFX10_3-NEXT: v_readlane_b32 s51, v21, 20 +; GFX10_3-NEXT: v_readlane_b32 s50, v21, 19 +; GFX10_3-NEXT: v_readlane_b32 s49, v21, 18 +; GFX10_3-NEXT: v_readlane_b32 s48, v21, 17 +; GFX10_3-NEXT: v_readlane_b32 s47, v21, 16 +; GFX10_3-NEXT: v_readlane_b32 s46, v21, 15 +; GFX10_3-NEXT: v_readlane_b32 s45, v21, 14 +; GFX10_3-NEXT: v_readlane_b32 s44, v21, 13 +; GFX10_3-NEXT: v_readlane_b32 s43, v21, 12 +; GFX10_3-NEXT: v_readlane_b32 s42, v21, 11 +; GFX10_3-NEXT: v_readlane_b32 s41, v21, 10 +; GFX10_3-NEXT: v_readlane_b32 s40, v21, 9 +; GFX10_3-NEXT: v_readlane_b32 s39, v21, 8 +; GFX10_3-NEXT: v_readlane_b32 s38, v21, 7 +; GFX10_3-NEXT: v_readlane_b32 s37, v21, 6 +; GFX10_3-NEXT: v_readlane_b32 s36, v21, 5 +; GFX10_3-NEXT: v_readlane_b32 s35, v21, 4 +; GFX10_3-NEXT: v_readlane_b32 s34, v21, 3 +; GFX10_3-NEXT: v_readlane_b32 s33, v21, 2 +; GFX10_3-NEXT: v_readlane_b32 s31, v21, 1 +; GFX10_3-NEXT: v_readlane_b32 s30, v21, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80200 +; GFX10_3-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) +; GFX10_3-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4010 +; GFX11-NEXT: scratch_store_b32 off, v21, s1 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v21, s30, 0 +; GFX11-NEXT: v_writelane_b32 v21, s31, 1 +; GFX11-NEXT: v_writelane_b32 v21, s33, 2 +; GFX11-NEXT: v_writelane_b32 v21, s34, 3 +; GFX11-NEXT: v_writelane_b32 v21, s35, 4 +; GFX11-NEXT: v_writelane_b32 v21, s36, 5 +; GFX11-NEXT: v_writelane_b32 v21, s37, 6 +; GFX11-NEXT: v_writelane_b32 v21, s38, 7 +; GFX11-NEXT: v_writelane_b32 v21, s39, 8 +; GFX11-NEXT: v_writelane_b32 v21, s40, 9 +; GFX11-NEXT: v_writelane_b32 v21, s41, 10 +; GFX11-NEXT: v_writelane_b32 v21, s42, 11 +; GFX11-NEXT: v_writelane_b32 v21, s43, 12 +; GFX11-NEXT: v_writelane_b32 v21, s44, 13 +; GFX11-NEXT: v_writelane_b32 v21, s45, 14 +; GFX11-NEXT: v_writelane_b32 v21, s46, 15 +; GFX11-NEXT: v_writelane_b32 v21, s47, 16 +; GFX11-NEXT: v_writelane_b32 v21, s48, 17 +; GFX11-NEXT: v_writelane_b32 v21, s49, 18 +; GFX11-NEXT: v_writelane_b32 v21, s50, 19 +; GFX11-NEXT: v_writelane_b32 v21, s51, 20 +; GFX11-NEXT: v_writelane_b32 v21, s52, 21 +; GFX11-NEXT: v_writelane_b32 v21, s53, 22 +; GFX11-NEXT: v_writelane_b32 v21, s54, 23 +; GFX11-NEXT: v_writelane_b32 v21, s55, 24 +; GFX11-NEXT: v_writelane_b32 v21, s56, 25 +; GFX11-NEXT: v_writelane_b32 v21, s57, 26 +; GFX11-NEXT: v_writelane_b32 v21, s58, 27 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_writelane_b32 v21, s59, 28 +; GFX11-NEXT: s_and_b32 s59, 0, exec_lo +; GFX11-NEXT: s_addc_u32 s32, s32, 16 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_bitcmp1_b32 s32, 0 +; GFX11-NEXT: s_bitset0_b32 s32, 0 +; GFX11-NEXT: s_mov_b32 s59, s32 +; GFX11-NEXT: s_addc_u32 s32, s32, -16 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_bitcmp1_b32 s32, 0 +; GFX11-NEXT: s_bitset0_b32 s32, 0 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s59, v21, 28 +; GFX11-NEXT: v_readlane_b32 s58, v21, 27 +; GFX11-NEXT: v_readlane_b32 s57, v21, 26 +; GFX11-NEXT: v_readlane_b32 s56, v21, 25 +; GFX11-NEXT: v_readlane_b32 s55, v21, 24 +; GFX11-NEXT: v_readlane_b32 s54, v21, 23 +; GFX11-NEXT: v_readlane_b32 s53, v21, 22 +; GFX11-NEXT: v_readlane_b32 s52, v21, 21 +; GFX11-NEXT: v_readlane_b32 s51, v21, 20 +; GFX11-NEXT: v_readlane_b32 s50, v21, 19 +; GFX11-NEXT: v_readlane_b32 s49, v21, 18 +; GFX11-NEXT: v_readlane_b32 s48, v21, 17 +; GFX11-NEXT: v_readlane_b32 s47, v21, 16 +; GFX11-NEXT: v_readlane_b32 s46, v21, 15 +; GFX11-NEXT: v_readlane_b32 s45, v21, 14 +; GFX11-NEXT: v_readlane_b32 s44, v21, 13 +; GFX11-NEXT: v_readlane_b32 s43, v21, 12 +; GFX11-NEXT: v_readlane_b32 s42, v21, 11 +; GFX11-NEXT: v_readlane_b32 s41, v21, 10 +; GFX11-NEXT: v_readlane_b32 s40, v21, 9 +; GFX11-NEXT: v_readlane_b32 s39, v21, 8 +; GFX11-NEXT: v_readlane_b32 s38, v21, 7 +; GFX11-NEXT: v_readlane_b32 s37, v21, 6 +; GFX11-NEXT: v_readlane_b32 s36, v21, 5 +; GFX11-NEXT: v_readlane_b32 s35, v21, 4 +; GFX11-NEXT: v_readlane_b32 s34, v21, 3 +; GFX11-NEXT: v_readlane_b32 s33, v21, 2 +; GFX11-NEXT: v_readlane_b32 s31, v21, 1 +; GFX11-NEXT: v_readlane_b32 s30, v21, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4010 +; GFX11-NEXT: scratch_load_b32 v21, off, s1 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_store_b32 off, v21, s32 offset:16384 ; 4-byte Folded Spill +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: v_writelane_b32 v21, s30, 0 +; GFX12-NEXT: v_writelane_b32 v21, s31, 1 +; GFX12-NEXT: v_writelane_b32 v21, s33, 2 +; GFX12-NEXT: v_writelane_b32 v21, s34, 3 +; GFX12-NEXT: v_writelane_b32 v21, s35, 4 +; GFX12-NEXT: v_writelane_b32 v21, s36, 5 +; GFX12-NEXT: v_writelane_b32 v21, s37, 6 +; GFX12-NEXT: v_writelane_b32 v21, s38, 7 +; GFX12-NEXT: v_writelane_b32 v21, s39, 8 +; GFX12-NEXT: v_writelane_b32 v21, s40, 9 +; GFX12-NEXT: v_writelane_b32 v21, s41, 10 +; GFX12-NEXT: v_writelane_b32 v21, s42, 11 +; GFX12-NEXT: v_writelane_b32 v21, s43, 12 +; GFX12-NEXT: v_writelane_b32 v21, s44, 13 +; GFX12-NEXT: v_writelane_b32 v21, s45, 14 +; GFX12-NEXT: v_writelane_b32 v21, s46, 15 +; GFX12-NEXT: v_writelane_b32 v21, s47, 16 +; GFX12-NEXT: v_writelane_b32 v21, s48, 17 +; GFX12-NEXT: v_writelane_b32 v21, s49, 18 +; GFX12-NEXT: v_writelane_b32 v21, s50, 19 +; GFX12-NEXT: v_writelane_b32 v21, s51, 20 +; GFX12-NEXT: v_writelane_b32 v21, s52, 21 +; GFX12-NEXT: v_writelane_b32 v21, s53, 22 +; GFX12-NEXT: v_writelane_b32 v21, s54, 23 +; GFX12-NEXT: v_writelane_b32 v21, s55, 24 +; GFX12-NEXT: v_writelane_b32 v21, s56, 25 +; GFX12-NEXT: v_writelane_b32 v21, s57, 26 +; GFX12-NEXT: v_writelane_b32 v21, s58, 27 +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: v_writelane_b32 v21, s59, 28 +; GFX12-NEXT: s_and_b32 s59, 0, exec_lo +; GFX12-NEXT: s_mov_b32 s59, s32 +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_readlane_b32 s59, v21, 28 +; GFX12-NEXT: v_readlane_b32 s58, v21, 27 +; GFX12-NEXT: v_readlane_b32 s57, v21, 26 +; GFX12-NEXT: v_readlane_b32 s56, v21, 25 +; GFX12-NEXT: v_readlane_b32 s55, v21, 24 +; GFX12-NEXT: v_readlane_b32 s54, v21, 23 +; GFX12-NEXT: v_readlane_b32 s53, v21, 22 +; GFX12-NEXT: v_readlane_b32 s52, v21, 21 +; GFX12-NEXT: v_readlane_b32 s51, v21, 20 +; GFX12-NEXT: v_readlane_b32 s50, v21, 19 +; GFX12-NEXT: v_readlane_b32 s49, v21, 18 +; GFX12-NEXT: v_readlane_b32 s48, v21, 17 +; GFX12-NEXT: v_readlane_b32 s47, v21, 16 +; GFX12-NEXT: v_readlane_b32 s46, v21, 15 +; GFX12-NEXT: v_readlane_b32 s45, v21, 14 +; GFX12-NEXT: v_readlane_b32 s44, v21, 13 +; GFX12-NEXT: v_readlane_b32 s43, v21, 12 +; GFX12-NEXT: v_readlane_b32 s42, v21, 11 +; GFX12-NEXT: v_readlane_b32 s41, v21, 10 +; GFX12-NEXT: v_readlane_b32 s40, v21, 9 +; GFX12-NEXT: v_readlane_b32 s39, v21, 8 +; GFX12-NEXT: v_readlane_b32 s38, v21, 7 +; GFX12-NEXT: v_readlane_b32 s37, v21, 6 +; GFX12-NEXT: v_readlane_b32 s36, v21, 5 +; GFX12-NEXT: v_readlane_b32 s35, v21, 4 +; GFX12-NEXT: v_readlane_b32 s34, v21, 3 +; GFX12-NEXT: v_readlane_b32 s33, v21, 2 +; GFX12-NEXT: v_readlane_b32 s31, v21, 1 +; GFX12-NEXT: v_readlane_b32 s30, v21, 0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_load_b32 v21, off, s32 offset:16384 ; 4-byte Folded Reload +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %alloca0 = alloca [4096 x i32], align 16, addrspace(5) + + ; Force no SGPRs to be available for the carry-out of the vector add. + %asm = call %asm.output2 asm sideeffect + "; def $0, $1, $2, $3, $4, $5, $6, $7, $8", + "={s[0:15]},={s[16:31]},={s[32:47]},={s[48:55]},={s[56:57]},={s58},={v[0:15]},={v[16:20]},={vcc}"() + + %s0 = extractvalue %asm.output2 %asm, 0 + %s1 = extractvalue %asm.output2 %asm, 1 + %s2 = extractvalue %asm.output2 %asm, 2 + %s3 = extractvalue %asm.output2 %asm, 3 + %s4 = extractvalue %asm.output2 %asm, 4 + %s5 = extractvalue %asm.output2 %asm, 5 + + %v0 = extractvalue %asm.output2 %asm, 6 + %v1 = extractvalue %asm.output2 %asm, 7 + + %vcc = extractvalue %asm.output2 %asm, 8 + + ; scc is unavailable since it is live in + call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10", + "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:20]},{vcc},{s59},{scc}"( + <16 x i32> %s0, + <16 x i32> %s1, + <16 x i32> %s2, + <8 x i32> %s3, + <2 x i32> %s4, + i32 %s5, + <16 x i32> %v0, + <5 x i32> %v1, + i64 %vcc, + ptr addrspace(5) %alloca0, + i32 0) ; use of scc + + ret void +} + +; This case isn't using SGPRs yet. +; FIXME: Should also use one more VGPR, but currently fails to allocate on gfx8. +define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset() #0 { +; GFX7-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX7-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX7-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX7-NEXT: s_add_i32 s6, s32, 0x201200 +; GFX7-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX7-NEXT: s_mov_b64 exec, s[4:5] +; GFX7-NEXT: v_writelane_b32 v23, s28, 28 +; GFX7-NEXT: v_writelane_b32 v23, s29, 29 +; GFX7-NEXT: v_writelane_b32 v23, s30, 0 +; GFX7-NEXT: v_writelane_b32 v23, s31, 1 +; GFX7-NEXT: v_writelane_b32 v23, s33, 2 +; GFX7-NEXT: v_writelane_b32 v23, s34, 3 +; GFX7-NEXT: v_writelane_b32 v23, s35, 4 +; GFX7-NEXT: v_writelane_b32 v23, s36, 5 +; GFX7-NEXT: v_writelane_b32 v23, s37, 6 +; GFX7-NEXT: v_writelane_b32 v23, s38, 7 +; GFX7-NEXT: v_writelane_b32 v23, s39, 8 +; GFX7-NEXT: v_writelane_b32 v23, s40, 9 +; GFX7-NEXT: v_writelane_b32 v23, s41, 10 +; GFX7-NEXT: v_writelane_b32 v23, s42, 11 +; GFX7-NEXT: v_writelane_b32 v23, s43, 12 +; GFX7-NEXT: v_writelane_b32 v23, s44, 13 +; GFX7-NEXT: v_writelane_b32 v23, s45, 14 +; GFX7-NEXT: v_writelane_b32 v23, s46, 15 +; GFX7-NEXT: v_writelane_b32 v23, s47, 16 +; GFX7-NEXT: v_writelane_b32 v23, s48, 17 +; GFX7-NEXT: v_writelane_b32 v23, s49, 18 +; GFX7-NEXT: v_writelane_b32 v23, s50, 19 +; GFX7-NEXT: v_writelane_b32 v23, s51, 20 +; GFX7-NEXT: v_writelane_b32 v23, s52, 21 +; GFX7-NEXT: v_writelane_b32 v23, s53, 22 +; GFX7-NEXT: v_writelane_b32 v23, s54, 23 +; GFX7-NEXT: v_writelane_b32 v23, s55, 24 +; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6 +; GFX7-NEXT: v_writelane_b32 v23, s56, 25 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0 +; GFX7-NEXT: v_writelane_b32 v23, s57, 26 +; GFX7-NEXT: ;;#ASMSTART +; GFX7-NEXT: ; use alloca0 v0 +; GFX7-NEXT: ;;#ASMEND +; GFX7-NEXT: ;;#ASMSTART +; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc +; GFX7-NEXT: ;;#ASMEND +; GFX7-NEXT: ; implicit-def: $vgpr22 +; GFX7-NEXT: v_writelane_b32 v23, s59, 27 +; GFX7-NEXT: v_writelane_b32 v22, vcc_lo, 0 +; GFX7-NEXT: v_writelane_b32 v22, vcc_hi, 1 +; GFX7-NEXT: s_or_saveexec_b64 s[28:29], -1 +; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s32 +; GFX7-NEXT: v_mov_b32_e32 v0, 0x8044 +; GFX7-NEXT: buffer_store_dword v22, v0, s[0:3], s32 offen ; 4-byte Folded Spill +; GFX7-NEXT: s_mov_b64 exec, s[28:29] +; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s32 +; GFX7-NEXT: v_lshr_b32_e64 v22, s32, 6 +; GFX7-NEXT: s_movk_i32 vcc_lo, 0x4040 +; GFX7-NEXT: v_add_i32_e32 v22, vcc, vcc_lo, v22 +; GFX7-NEXT: v_add_i32_e32 v22, vcc, 0x200, v22 +; GFX7-NEXT: v_readfirstlane_b32 s59, v22 +; GFX7-NEXT: s_and_b64 vcc, 0, exec +; GFX7-NEXT: s_mov_b64 s[28:29], exec +; GFX7-NEXT: s_mov_b64 exec, -1 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s32 +; GFX7-NEXT: v_mov_b32_e32 v0, 0x8044 +; GFX7-NEXT: buffer_load_dword v22, v0, s[0:3], s32 offen ; 4-byte Folded Reload +; GFX7-NEXT: s_mov_b64 exec, s[28:29] +; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s32 +; GFX7-NEXT: s_waitcnt vmcnt(1) +; GFX7-NEXT: v_readlane_b32 vcc_lo, v22, 0 +; GFX7-NEXT: v_readlane_b32 vcc_hi, v22, 1 +; GFX7-NEXT: s_mov_b64 s[28:29], exec +; GFX7-NEXT: s_mov_b64 exec, -1 +; GFX7-NEXT: s_mov_b64 exec, s[28:29] +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: ;;#ASMSTART +; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc +; GFX7-NEXT: ;;#ASMEND +; GFX7-NEXT: v_readlane_b32 s59, v23, 27 +; GFX7-NEXT: v_readlane_b32 s57, v23, 26 +; GFX7-NEXT: v_readlane_b32 s56, v23, 25 +; GFX7-NEXT: v_readlane_b32 s55, v23, 24 +; GFX7-NEXT: v_readlane_b32 s54, v23, 23 +; GFX7-NEXT: v_readlane_b32 s53, v23, 22 +; GFX7-NEXT: v_readlane_b32 s52, v23, 21 +; GFX7-NEXT: v_readlane_b32 s51, v23, 20 +; GFX7-NEXT: v_readlane_b32 s50, v23, 19 +; GFX7-NEXT: v_readlane_b32 s49, v23, 18 +; GFX7-NEXT: v_readlane_b32 s48, v23, 17 +; GFX7-NEXT: v_readlane_b32 s47, v23, 16 +; GFX7-NEXT: v_readlane_b32 s46, v23, 15 +; GFX7-NEXT: v_readlane_b32 s45, v23, 14 +; GFX7-NEXT: v_readlane_b32 s44, v23, 13 +; GFX7-NEXT: v_readlane_b32 s43, v23, 12 +; GFX7-NEXT: v_readlane_b32 s42, v23, 11 +; GFX7-NEXT: v_readlane_b32 s41, v23, 10 +; GFX7-NEXT: v_readlane_b32 s40, v23, 9 +; GFX7-NEXT: v_readlane_b32 s39, v23, 8 +; GFX7-NEXT: v_readlane_b32 s38, v23, 7 +; GFX7-NEXT: v_readlane_b32 s37, v23, 6 +; GFX7-NEXT: v_readlane_b32 s36, v23, 5 +; GFX7-NEXT: v_readlane_b32 s35, v23, 4 +; GFX7-NEXT: v_readlane_b32 s34, v23, 3 +; GFX7-NEXT: v_readlane_b32 s33, v23, 2 +; GFX7-NEXT: v_readlane_b32 s31, v23, 1 +; GFX7-NEXT: v_readlane_b32 s30, v23, 0 +; GFX7-NEXT: ; kill: killed $vgpr22 +; GFX7-NEXT: v_readlane_b32 s28, v23, 28 +; GFX7-NEXT: v_readlane_b32 s29, v23, 29 +; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX7-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX7-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX7-NEXT: s_add_i32 s6, s32, 0x201200 +; GFX7-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX7-NEXT: s_mov_b64 exec, s[4:5] +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX8-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX8-NEXT: s_add_i32 s6, s32, 0x201200 +; GFX8-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_writelane_b32 v23, s58, 28 +; GFX8-NEXT: v_writelane_b32 v23, s59, 29 +; GFX8-NEXT: v_writelane_b32 v23, s30, 0 +; GFX8-NEXT: v_writelane_b32 v23, s31, 1 +; GFX8-NEXT: v_writelane_b32 v23, s33, 2 +; GFX8-NEXT: v_writelane_b32 v23, s34, 3 +; GFX8-NEXT: v_writelane_b32 v23, s35, 4 +; GFX8-NEXT: v_writelane_b32 v23, s36, 5 +; GFX8-NEXT: v_writelane_b32 v23, s37, 6 +; GFX8-NEXT: v_writelane_b32 v23, s38, 7 +; GFX8-NEXT: v_writelane_b32 v23, s39, 8 +; GFX8-NEXT: v_writelane_b32 v23, s40, 9 +; GFX8-NEXT: v_writelane_b32 v23, s41, 10 +; GFX8-NEXT: v_writelane_b32 v23, s42, 11 +; GFX8-NEXT: v_writelane_b32 v23, s43, 12 +; GFX8-NEXT: v_writelane_b32 v23, s44, 13 +; GFX8-NEXT: v_writelane_b32 v23, s45, 14 +; GFX8-NEXT: v_writelane_b32 v23, s46, 15 +; GFX8-NEXT: v_writelane_b32 v23, s47, 16 +; GFX8-NEXT: v_writelane_b32 v23, s48, 17 +; GFX8-NEXT: v_writelane_b32 v23, s49, 18 +; GFX8-NEXT: v_writelane_b32 v23, s50, 19 +; GFX8-NEXT: v_writelane_b32 v23, s51, 20 +; GFX8-NEXT: v_writelane_b32 v23, s52, 21 +; GFX8-NEXT: v_writelane_b32 v23, s53, 22 +; GFX8-NEXT: v_writelane_b32 v23, s54, 23 +; GFX8-NEXT: v_writelane_b32 v23, s55, 24 +; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX8-NEXT: v_writelane_b32 v23, s56, 25 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 +; GFX8-NEXT: v_writelane_b32 v23, s57, 26 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use alloca0 v0 +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: ; implicit-def: $vgpr22 +; GFX8-NEXT: v_writelane_b32 v23, s59, 27 +; GFX8-NEXT: v_writelane_b32 v22, vcc_lo, 0 +; GFX8-NEXT: v_writelane_b32 v22, vcc_hi, 1 +; GFX8-NEXT: s_or_saveexec_b64 s[58:59], -1 +; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s32 +; GFX8-NEXT: v_mov_b32_e32 v0, 0x8044 +; GFX8-NEXT: buffer_store_dword v22, v0, s[0:3], s32 offen ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[58:59] +; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32 +; GFX8-NEXT: v_lshrrev_b32_e64 v22, 6, s32 +; GFX8-NEXT: s_movk_i32 vcc_lo, 0x4040 +; GFX8-NEXT: v_add_u32_e32 v22, vcc, vcc_lo, v22 +; GFX8-NEXT: v_add_u32_e32 v22, vcc, 0x200, v22 +; GFX8-NEXT: v_readfirstlane_b32 s59, v22 +; GFX8-NEXT: s_and_b64 vcc, 0, exec +; GFX8-NEXT: s_mov_b64 s[58:59], exec +; GFX8-NEXT: s_mov_b64 exec, -1 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s32 +; GFX8-NEXT: v_mov_b32_e32 v0, 0x8044 +; GFX8-NEXT: buffer_load_dword v22, v0, s[0:3], s32 offen ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[58:59] +; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32 +; GFX8-NEXT: s_waitcnt vmcnt(1) +; GFX8-NEXT: v_readlane_b32 vcc_lo, v22, 0 +; GFX8-NEXT: v_readlane_b32 vcc_hi, v22, 1 +; GFX8-NEXT: s_mov_b64 s[58:59], exec +; GFX8-NEXT: s_mov_b64 exec, -1 +; GFX8-NEXT: s_mov_b64 exec, s[58:59] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s59, v23, 27 +; GFX8-NEXT: v_readlane_b32 s57, v23, 26 +; GFX8-NEXT: v_readlane_b32 s56, v23, 25 +; GFX8-NEXT: v_readlane_b32 s55, v23, 24 +; GFX8-NEXT: v_readlane_b32 s54, v23, 23 +; GFX8-NEXT: v_readlane_b32 s53, v23, 22 +; GFX8-NEXT: v_readlane_b32 s52, v23, 21 +; GFX8-NEXT: v_readlane_b32 s51, v23, 20 +; GFX8-NEXT: v_readlane_b32 s50, v23, 19 +; GFX8-NEXT: v_readlane_b32 s49, v23, 18 +; GFX8-NEXT: v_readlane_b32 s48, v23, 17 +; GFX8-NEXT: v_readlane_b32 s47, v23, 16 +; GFX8-NEXT: v_readlane_b32 s46, v23, 15 +; GFX8-NEXT: v_readlane_b32 s45, v23, 14 +; GFX8-NEXT: v_readlane_b32 s44, v23, 13 +; GFX8-NEXT: v_readlane_b32 s43, v23, 12 +; GFX8-NEXT: v_readlane_b32 s42, v23, 11 +; GFX8-NEXT: v_readlane_b32 s41, v23, 10 +; GFX8-NEXT: v_readlane_b32 s40, v23, 9 +; GFX8-NEXT: v_readlane_b32 s39, v23, 8 +; GFX8-NEXT: v_readlane_b32 s38, v23, 7 +; GFX8-NEXT: v_readlane_b32 s37, v23, 6 +; GFX8-NEXT: v_readlane_b32 s36, v23, 5 +; GFX8-NEXT: v_readlane_b32 s35, v23, 4 +; GFX8-NEXT: v_readlane_b32 s34, v23, 3 +; GFX8-NEXT: v_readlane_b32 s33, v23, 2 +; GFX8-NEXT: v_readlane_b32 s31, v23, 1 +; GFX8-NEXT: v_readlane_b32 s30, v23, 0 +; GFX8-NEXT: ; kill: killed $vgpr22 +; GFX8-NEXT: v_readlane_b32 s58, v23, 28 +; GFX8-NEXT: v_readlane_b32 s59, v23, 29 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX8-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX8-NEXT: s_add_i32 s6, s32, 0x201200 +; GFX8-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX900-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_writelane_b32 v22, s30, 0 +; GFX900-NEXT: v_writelane_b32 v22, s31, 1 +; GFX900-NEXT: v_writelane_b32 v22, s33, 2 +; GFX900-NEXT: v_writelane_b32 v22, s34, 3 +; GFX900-NEXT: v_writelane_b32 v22, s35, 4 +; GFX900-NEXT: v_writelane_b32 v22, s36, 5 +; GFX900-NEXT: v_writelane_b32 v22, s37, 6 +; GFX900-NEXT: v_writelane_b32 v22, s38, 7 +; GFX900-NEXT: v_writelane_b32 v22, s39, 8 +; GFX900-NEXT: v_writelane_b32 v22, s40, 9 +; GFX900-NEXT: v_writelane_b32 v22, s41, 10 +; GFX900-NEXT: v_writelane_b32 v22, s42, 11 +; GFX900-NEXT: v_writelane_b32 v22, s43, 12 +; GFX900-NEXT: v_writelane_b32 v22, s44, 13 +; GFX900-NEXT: v_writelane_b32 v22, s45, 14 +; GFX900-NEXT: v_writelane_b32 v22, s46, 15 +; GFX900-NEXT: v_writelane_b32 v22, s47, 16 +; GFX900-NEXT: v_writelane_b32 v22, s48, 17 +; GFX900-NEXT: v_writelane_b32 v22, s49, 18 +; GFX900-NEXT: v_writelane_b32 v22, s50, 19 +; GFX900-NEXT: v_writelane_b32 v22, s51, 20 +; GFX900-NEXT: v_writelane_b32 v22, s52, 21 +; GFX900-NEXT: v_writelane_b32 v22, s53, 22 +; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX900-NEXT: v_writelane_b32 v22, s54, 23 +; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 +; GFX900-NEXT: v_writelane_b32 v22, s55, 24 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use alloca0 v0 +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX900-NEXT: v_writelane_b32 v22, s56, 25 +; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 +; GFX900-NEXT: v_writelane_b32 v22, s57, 26 +; GFX900-NEXT: v_add_u32_e32 v0, 0x200, v0 +; GFX900-NEXT: s_and_b64 s[4:5], 0, exec +; GFX900-NEXT: v_writelane_b32 v22, s59, 27 +; GFX900-NEXT: v_readfirstlane_b32 s59, v0 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s59, v22, 27 +; GFX900-NEXT: v_readlane_b32 s57, v22, 26 +; GFX900-NEXT: v_readlane_b32 s56, v22, 25 +; GFX900-NEXT: v_readlane_b32 s55, v22, 24 +; GFX900-NEXT: v_readlane_b32 s54, v22, 23 +; GFX900-NEXT: v_readlane_b32 s53, v22, 22 +; GFX900-NEXT: v_readlane_b32 s52, v22, 21 +; GFX900-NEXT: v_readlane_b32 s51, v22, 20 +; GFX900-NEXT: v_readlane_b32 s50, v22, 19 +; GFX900-NEXT: v_readlane_b32 s49, v22, 18 +; GFX900-NEXT: v_readlane_b32 s48, v22, 17 +; GFX900-NEXT: v_readlane_b32 s47, v22, 16 +; GFX900-NEXT: v_readlane_b32 s46, v22, 15 +; GFX900-NEXT: v_readlane_b32 s45, v22, 14 +; GFX900-NEXT: v_readlane_b32 s44, v22, 13 +; GFX900-NEXT: v_readlane_b32 s43, v22, 12 +; GFX900-NEXT: v_readlane_b32 s42, v22, 11 +; GFX900-NEXT: v_readlane_b32 s41, v22, 10 +; GFX900-NEXT: v_readlane_b32 s40, v22, 9 +; GFX900-NEXT: v_readlane_b32 s39, v22, 8 +; GFX900-NEXT: v_readlane_b32 s38, v22, 7 +; GFX900-NEXT: v_readlane_b32 s37, v22, 6 +; GFX900-NEXT: v_readlane_b32 s36, v22, 5 +; GFX900-NEXT: v_readlane_b32 s35, v22, 4 +; GFX900-NEXT: v_readlane_b32 s34, v22, 3 +; GFX900-NEXT: v_readlane_b32 s33, v22, 2 +; GFX900-NEXT: v_readlane_b32 s31, v22, 1 +; GFX900-NEXT: v_readlane_b32 s30, v22, 0 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX900-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s2, s32, 0x8040 +; GFX940-NEXT: scratch_store_dword off, v23, s2 sc0 sc1 ; 4-byte Folded Spill +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: v_writelane_b32 v23, s30, 0 +; GFX940-NEXT: v_writelane_b32 v23, s31, 1 +; GFX940-NEXT: v_writelane_b32 v23, s33, 2 +; GFX940-NEXT: v_writelane_b32 v23, s34, 3 +; GFX940-NEXT: v_writelane_b32 v23, s35, 4 +; GFX940-NEXT: v_writelane_b32 v23, s36, 5 +; GFX940-NEXT: v_writelane_b32 v23, s37, 6 +; GFX940-NEXT: v_writelane_b32 v23, s38, 7 +; GFX940-NEXT: v_writelane_b32 v23, s39, 8 +; GFX940-NEXT: v_writelane_b32 v23, s40, 9 +; GFX940-NEXT: v_writelane_b32 v23, s41, 10 +; GFX940-NEXT: v_writelane_b32 v23, s42, 11 +; GFX940-NEXT: v_writelane_b32 v23, s43, 12 +; GFX940-NEXT: v_writelane_b32 v23, s44, 13 +; GFX940-NEXT: v_writelane_b32 v23, s45, 14 +; GFX940-NEXT: v_writelane_b32 v23, s46, 15 +; GFX940-NEXT: v_writelane_b32 v23, s47, 16 +; GFX940-NEXT: v_writelane_b32 v23, s48, 17 +; GFX940-NEXT: v_writelane_b32 v23, s49, 18 +; GFX940-NEXT: v_writelane_b32 v23, s50, 19 +; GFX940-NEXT: v_writelane_b32 v23, s51, 20 +; GFX940-NEXT: v_writelane_b32 v23, s52, 21 +; GFX940-NEXT: v_writelane_b32 v23, s53, 22 +; GFX940-NEXT: v_writelane_b32 v23, s54, 23 +; GFX940-NEXT: v_writelane_b32 v23, s55, 24 +; GFX940-NEXT: v_writelane_b32 v23, s56, 25 +; GFX940-NEXT: s_add_i32 s0, s32, 64 +; GFX940-NEXT: v_writelane_b32 v23, s57, 26 +; GFX940-NEXT: v_mov_b32_e32 v0, s0 +; GFX940-NEXT: v_writelane_b32 v23, s59, 27 +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; use alloca0 v0 +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: s_add_i32 s59, s32, 0x4040 +; GFX940-NEXT: v_mov_b32_e32 v22, s59 +; GFX940-NEXT: v_writelane_b32 v23, s60, 28 +; GFX940-NEXT: v_add_u32_e32 v22, 0x200, v22 +; GFX940-NEXT: v_writelane_b32 v23, s61, 29 +; GFX940-NEXT: v_readfirstlane_b32 s59, v22 +; GFX940-NEXT: s_and_b64 s[60:61], 0, exec +; GFX940-NEXT: ;;#ASMSTART +; GFX940-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc +; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: v_readlane_b32 s61, v23, 29 +; GFX940-NEXT: v_readlane_b32 s60, v23, 28 +; GFX940-NEXT: v_readlane_b32 s59, v23, 27 +; GFX940-NEXT: v_readlane_b32 s57, v23, 26 +; GFX940-NEXT: v_readlane_b32 s56, v23, 25 +; GFX940-NEXT: v_readlane_b32 s55, v23, 24 +; GFX940-NEXT: v_readlane_b32 s54, v23, 23 +; GFX940-NEXT: v_readlane_b32 s53, v23, 22 +; GFX940-NEXT: v_readlane_b32 s52, v23, 21 +; GFX940-NEXT: v_readlane_b32 s51, v23, 20 +; GFX940-NEXT: v_readlane_b32 s50, v23, 19 +; GFX940-NEXT: v_readlane_b32 s49, v23, 18 +; GFX940-NEXT: v_readlane_b32 s48, v23, 17 +; GFX940-NEXT: v_readlane_b32 s47, v23, 16 +; GFX940-NEXT: v_readlane_b32 s46, v23, 15 +; GFX940-NEXT: v_readlane_b32 s45, v23, 14 +; GFX940-NEXT: v_readlane_b32 s44, v23, 13 +; GFX940-NEXT: v_readlane_b32 s43, v23, 12 +; GFX940-NEXT: v_readlane_b32 s42, v23, 11 +; GFX940-NEXT: v_readlane_b32 s41, v23, 10 +; GFX940-NEXT: v_readlane_b32 s40, v23, 9 +; GFX940-NEXT: v_readlane_b32 s39, v23, 8 +; GFX940-NEXT: v_readlane_b32 s38, v23, 7 +; GFX940-NEXT: v_readlane_b32 s37, v23, 6 +; GFX940-NEXT: v_readlane_b32 s36, v23, 5 +; GFX940-NEXT: v_readlane_b32 s35, v23, 4 +; GFX940-NEXT: v_readlane_b32 s34, v23, 3 +; GFX940-NEXT: v_readlane_b32 s33, v23, 2 +; GFX940-NEXT: v_readlane_b32 s31, v23, 1 +; GFX940-NEXT: v_readlane_b32 s30, v23, 0 +; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX940-NEXT: s_add_i32 s2, s32, 0x8040 +; GFX940-NEXT: scratch_load_dword v23, off, s2 ; 4-byte Folded Reload +; GFX940-NEXT: s_mov_b64 exec, s[0:1] +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX10_1-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: +; GFX10_1: ; %bb.0: +; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_1-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: v_writelane_b32 v23, s30, 0 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s32 +; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_1-NEXT: v_writelane_b32 v23, s31, 1 +; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 +; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; use alloca0 v1 +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_writelane_b32 v23, s33, 2 +; GFX10_1-NEXT: v_add_nc_u32_e32 v22, 0x200, v0 +; GFX10_1-NEXT: v_writelane_b32 v23, s34, 3 +; GFX10_1-NEXT: v_writelane_b32 v23, s35, 4 +; GFX10_1-NEXT: v_writelane_b32 v23, s36, 5 +; GFX10_1-NEXT: v_writelane_b32 v23, s37, 6 +; GFX10_1-NEXT: v_writelane_b32 v23, s38, 7 +; GFX10_1-NEXT: v_writelane_b32 v23, s39, 8 +; GFX10_1-NEXT: v_writelane_b32 v23, s40, 9 +; GFX10_1-NEXT: v_writelane_b32 v23, s41, 10 +; GFX10_1-NEXT: v_writelane_b32 v23, s42, 11 +; GFX10_1-NEXT: v_writelane_b32 v23, s43, 12 +; GFX10_1-NEXT: v_writelane_b32 v23, s44, 13 +; GFX10_1-NEXT: v_writelane_b32 v23, s45, 14 +; GFX10_1-NEXT: v_writelane_b32 v23, s46, 15 +; GFX10_1-NEXT: v_writelane_b32 v23, s47, 16 +; GFX10_1-NEXT: v_writelane_b32 v23, s48, 17 +; GFX10_1-NEXT: v_writelane_b32 v23, s49, 18 +; GFX10_1-NEXT: v_writelane_b32 v23, s50, 19 +; GFX10_1-NEXT: v_writelane_b32 v23, s51, 20 +; GFX10_1-NEXT: v_writelane_b32 v23, s52, 21 +; GFX10_1-NEXT: v_writelane_b32 v23, s53, 22 +; GFX10_1-NEXT: v_writelane_b32 v23, s54, 23 +; GFX10_1-NEXT: v_writelane_b32 v23, s55, 24 +; GFX10_1-NEXT: v_writelane_b32 v23, s56, 25 +; GFX10_1-NEXT: v_writelane_b32 v23, s57, 26 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_writelane_b32 v23, s59, 27 +; GFX10_1-NEXT: v_readfirstlane_b32 s59, v22 +; GFX10_1-NEXT: ;;#ASMSTART +; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc +; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s59, v23, 27 +; GFX10_1-NEXT: v_readlane_b32 s57, v23, 26 +; GFX10_1-NEXT: v_readlane_b32 s56, v23, 25 +; GFX10_1-NEXT: v_readlane_b32 s55, v23, 24 +; GFX10_1-NEXT: v_readlane_b32 s54, v23, 23 +; GFX10_1-NEXT: v_readlane_b32 s53, v23, 22 +; GFX10_1-NEXT: v_readlane_b32 s52, v23, 21 +; GFX10_1-NEXT: v_readlane_b32 s51, v23, 20 +; GFX10_1-NEXT: v_readlane_b32 s50, v23, 19 +; GFX10_1-NEXT: v_readlane_b32 s49, v23, 18 +; GFX10_1-NEXT: v_readlane_b32 s48, v23, 17 +; GFX10_1-NEXT: v_readlane_b32 s47, v23, 16 +; GFX10_1-NEXT: v_readlane_b32 s46, v23, 15 +; GFX10_1-NEXT: v_readlane_b32 s45, v23, 14 +; GFX10_1-NEXT: v_readlane_b32 s44, v23, 13 +; GFX10_1-NEXT: v_readlane_b32 s43, v23, 12 +; GFX10_1-NEXT: v_readlane_b32 s42, v23, 11 +; GFX10_1-NEXT: v_readlane_b32 s41, v23, 10 +; GFX10_1-NEXT: v_readlane_b32 s40, v23, 9 +; GFX10_1-NEXT: v_readlane_b32 s39, v23, 8 +; GFX10_1-NEXT: v_readlane_b32 s38, v23, 7 +; GFX10_1-NEXT: v_readlane_b32 s37, v23, 6 +; GFX10_1-NEXT: v_readlane_b32 s36, v23, 5 +; GFX10_1-NEXT: v_readlane_b32 s35, v23, 4 +; GFX10_1-NEXT: v_readlane_b32 s34, v23, 3 +; GFX10_1-NEXT: v_readlane_b32 s33, v23, 2 +; GFX10_1-NEXT: v_readlane_b32 s31, v23, 1 +; GFX10_1-NEXT: v_readlane_b32 s30, v23, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_1-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) +; GFX10_1-NEXT: s_setpc_b64 s[30:31] +; +; GFX10_3-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: +; GFX10_3: ; %bb.0: +; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_3-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: v_writelane_b32 v23, s30, 0 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s32 +; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_3-NEXT: v_writelane_b32 v23, s31, 1 +; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 +; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; use alloca0 v1 +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_writelane_b32 v23, s33, 2 +; GFX10_3-NEXT: v_add_nc_u32_e32 v22, 0x200, v0 +; GFX10_3-NEXT: v_writelane_b32 v23, s34, 3 +; GFX10_3-NEXT: v_writelane_b32 v23, s35, 4 +; GFX10_3-NEXT: v_writelane_b32 v23, s36, 5 +; GFX10_3-NEXT: v_writelane_b32 v23, s37, 6 +; GFX10_3-NEXT: v_writelane_b32 v23, s38, 7 +; GFX10_3-NEXT: v_writelane_b32 v23, s39, 8 +; GFX10_3-NEXT: v_writelane_b32 v23, s40, 9 +; GFX10_3-NEXT: v_writelane_b32 v23, s41, 10 +; GFX10_3-NEXT: v_writelane_b32 v23, s42, 11 +; GFX10_3-NEXT: v_writelane_b32 v23, s43, 12 +; GFX10_3-NEXT: v_writelane_b32 v23, s44, 13 +; GFX10_3-NEXT: v_writelane_b32 v23, s45, 14 +; GFX10_3-NEXT: v_writelane_b32 v23, s46, 15 +; GFX10_3-NEXT: v_writelane_b32 v23, s47, 16 +; GFX10_3-NEXT: v_writelane_b32 v23, s48, 17 +; GFX10_3-NEXT: v_writelane_b32 v23, s49, 18 +; GFX10_3-NEXT: v_writelane_b32 v23, s50, 19 +; GFX10_3-NEXT: v_writelane_b32 v23, s51, 20 +; GFX10_3-NEXT: v_writelane_b32 v23, s52, 21 +; GFX10_3-NEXT: v_writelane_b32 v23, s53, 22 +; GFX10_3-NEXT: v_writelane_b32 v23, s54, 23 +; GFX10_3-NEXT: v_writelane_b32 v23, s55, 24 +; GFX10_3-NEXT: v_writelane_b32 v23, s56, 25 +; GFX10_3-NEXT: v_writelane_b32 v23, s57, 26 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_writelane_b32 v23, s59, 27 +; GFX10_3-NEXT: v_readfirstlane_b32 s59, v22 +; GFX10_3-NEXT: ;;#ASMSTART +; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc +; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s59, v23, 27 +; GFX10_3-NEXT: v_readlane_b32 s57, v23, 26 +; GFX10_3-NEXT: v_readlane_b32 s56, v23, 25 +; GFX10_3-NEXT: v_readlane_b32 s55, v23, 24 +; GFX10_3-NEXT: v_readlane_b32 s54, v23, 23 +; GFX10_3-NEXT: v_readlane_b32 s53, v23, 22 +; GFX10_3-NEXT: v_readlane_b32 s52, v23, 21 +; GFX10_3-NEXT: v_readlane_b32 s51, v23, 20 +; GFX10_3-NEXT: v_readlane_b32 s50, v23, 19 +; GFX10_3-NEXT: v_readlane_b32 s49, v23, 18 +; GFX10_3-NEXT: v_readlane_b32 s48, v23, 17 +; GFX10_3-NEXT: v_readlane_b32 s47, v23, 16 +; GFX10_3-NEXT: v_readlane_b32 s46, v23, 15 +; GFX10_3-NEXT: v_readlane_b32 s45, v23, 14 +; GFX10_3-NEXT: v_readlane_b32 s44, v23, 13 +; GFX10_3-NEXT: v_readlane_b32 s43, v23, 12 +; GFX10_3-NEXT: v_readlane_b32 s42, v23, 11 +; GFX10_3-NEXT: v_readlane_b32 s41, v23, 10 +; GFX10_3-NEXT: v_readlane_b32 s40, v23, 9 +; GFX10_3-NEXT: v_readlane_b32 s39, v23, 8 +; GFX10_3-NEXT: v_readlane_b32 s38, v23, 7 +; GFX10_3-NEXT: v_readlane_b32 s37, v23, 6 +; GFX10_3-NEXT: v_readlane_b32 s36, v23, 5 +; GFX10_3-NEXT: v_readlane_b32 s35, v23, 4 +; GFX10_3-NEXT: v_readlane_b32 s34, v23, 3 +; GFX10_3-NEXT: v_readlane_b32 s33, v23, 2 +; GFX10_3-NEXT: v_readlane_b32 s31, v23, 1 +; GFX10_3-NEXT: v_readlane_b32 s30, v23, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_3-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) +; GFX10_3-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 +; GFX11-NEXT: scratch_store_b32 off, v23, s1 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v23, s30, 0 +; GFX11-NEXT: s_add_i32 s0, s32, 0x4040 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_add_i32 s0, s32, 64 +; GFX11-NEXT: v_writelane_b32 v23, s31, 1 +; GFX11-NEXT: v_mov_b32_e32 v1, s0 +; GFX11-NEXT: s_and_b32 s0, 0, exec_lo +; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x200, v0 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use alloca0 v1 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_writelane_b32 v23, s33, 2 +; GFX11-NEXT: v_writelane_b32 v23, s34, 3 +; GFX11-NEXT: v_writelane_b32 v23, s35, 4 +; GFX11-NEXT: v_writelane_b32 v23, s36, 5 +; GFX11-NEXT: v_writelane_b32 v23, s37, 6 +; GFX11-NEXT: v_writelane_b32 v23, s38, 7 +; GFX11-NEXT: v_writelane_b32 v23, s39, 8 +; GFX11-NEXT: v_writelane_b32 v23, s40, 9 +; GFX11-NEXT: v_writelane_b32 v23, s41, 10 +; GFX11-NEXT: v_writelane_b32 v23, s42, 11 +; GFX11-NEXT: v_writelane_b32 v23, s43, 12 +; GFX11-NEXT: v_writelane_b32 v23, s44, 13 +; GFX11-NEXT: v_writelane_b32 v23, s45, 14 +; GFX11-NEXT: v_writelane_b32 v23, s46, 15 +; GFX11-NEXT: v_writelane_b32 v23, s47, 16 +; GFX11-NEXT: v_writelane_b32 v23, s48, 17 +; GFX11-NEXT: v_writelane_b32 v23, s49, 18 +; GFX11-NEXT: v_writelane_b32 v23, s50, 19 +; GFX11-NEXT: v_writelane_b32 v23, s51, 20 +; GFX11-NEXT: v_writelane_b32 v23, s52, 21 +; GFX11-NEXT: v_writelane_b32 v23, s53, 22 +; GFX11-NEXT: v_writelane_b32 v23, s54, 23 +; GFX11-NEXT: v_writelane_b32 v23, s55, 24 +; GFX11-NEXT: v_writelane_b32 v23, s56, 25 +; GFX11-NEXT: v_writelane_b32 v23, s57, 26 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_writelane_b32 v23, s59, 27 +; GFX11-NEXT: v_readfirstlane_b32 s59, v22 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_readlane_b32 s59, v23, 27 +; GFX11-NEXT: v_readlane_b32 s57, v23, 26 +; GFX11-NEXT: v_readlane_b32 s56, v23, 25 +; GFX11-NEXT: v_readlane_b32 s55, v23, 24 +; GFX11-NEXT: v_readlane_b32 s54, v23, 23 +; GFX11-NEXT: v_readlane_b32 s53, v23, 22 +; GFX11-NEXT: v_readlane_b32 s52, v23, 21 +; GFX11-NEXT: v_readlane_b32 s51, v23, 20 +; GFX11-NEXT: v_readlane_b32 s50, v23, 19 +; GFX11-NEXT: v_readlane_b32 s49, v23, 18 +; GFX11-NEXT: v_readlane_b32 s48, v23, 17 +; GFX11-NEXT: v_readlane_b32 s47, v23, 16 +; GFX11-NEXT: v_readlane_b32 s46, v23, 15 +; GFX11-NEXT: v_readlane_b32 s45, v23, 14 +; GFX11-NEXT: v_readlane_b32 s44, v23, 13 +; GFX11-NEXT: v_readlane_b32 s43, v23, 12 +; GFX11-NEXT: v_readlane_b32 s42, v23, 11 +; GFX11-NEXT: v_readlane_b32 s41, v23, 10 +; GFX11-NEXT: v_readlane_b32 s40, v23, 9 +; GFX11-NEXT: v_readlane_b32 s39, v23, 8 +; GFX11-NEXT: v_readlane_b32 s38, v23, 7 +; GFX11-NEXT: v_readlane_b32 s37, v23, 6 +; GFX11-NEXT: v_readlane_b32 s36, v23, 5 +; GFX11-NEXT: v_readlane_b32 s35, v23, 4 +; GFX11-NEXT: v_readlane_b32 s34, v23, 3 +; GFX11-NEXT: v_readlane_b32 s33, v23, 2 +; GFX11-NEXT: v_readlane_b32 s31, v23, 1 +; GFX11-NEXT: v_readlane_b32 s30, v23, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 +; GFX11-NEXT: scratch_load_b32 v23, off, s1 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_store_b32 off, v23, s32 offset:32768 ; 4-byte Folded Spill +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: v_writelane_b32 v23, s30, 0 +; GFX12-NEXT: s_add_co_i32 s0, s32, 0x4000 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_dual_mov_b32 v1, s32 :: v_dual_mov_b32 v0, s0 +; GFX12-NEXT: s_and_b32 s0, 0, exec_lo +; GFX12-NEXT: v_writelane_b32 v23, s31, 1 +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; use alloca0 v1 +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: v_add_nc_u32_e32 v22, 0x200, v0 +; GFX12-NEXT: v_writelane_b32 v23, s33, 2 +; GFX12-NEXT: v_writelane_b32 v23, s34, 3 +; GFX12-NEXT: v_writelane_b32 v23, s35, 4 +; GFX12-NEXT: v_writelane_b32 v23, s36, 5 +; GFX12-NEXT: v_writelane_b32 v23, s37, 6 +; GFX12-NEXT: v_writelane_b32 v23, s38, 7 +; GFX12-NEXT: v_writelane_b32 v23, s39, 8 +; GFX12-NEXT: v_writelane_b32 v23, s40, 9 +; GFX12-NEXT: v_writelane_b32 v23, s41, 10 +; GFX12-NEXT: v_writelane_b32 v23, s42, 11 +; GFX12-NEXT: v_writelane_b32 v23, s43, 12 +; GFX12-NEXT: v_writelane_b32 v23, s44, 13 +; GFX12-NEXT: v_writelane_b32 v23, s45, 14 +; GFX12-NEXT: v_writelane_b32 v23, s46, 15 +; GFX12-NEXT: v_writelane_b32 v23, s47, 16 +; GFX12-NEXT: v_writelane_b32 v23, s48, 17 +; GFX12-NEXT: v_writelane_b32 v23, s49, 18 +; GFX12-NEXT: v_writelane_b32 v23, s50, 19 +; GFX12-NEXT: v_writelane_b32 v23, s51, 20 +; GFX12-NEXT: v_writelane_b32 v23, s52, 21 +; GFX12-NEXT: v_writelane_b32 v23, s53, 22 +; GFX12-NEXT: v_writelane_b32 v23, s54, 23 +; GFX12-NEXT: v_writelane_b32 v23, s55, 24 +; GFX12-NEXT: v_writelane_b32 v23, s56, 25 +; GFX12-NEXT: v_writelane_b32 v23, s57, 26 +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: v_writelane_b32 v23, s59, 27 +; GFX12-NEXT: v_readfirstlane_b32 s59, v22 +; GFX12-NEXT: ;;#ASMSTART +; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc +; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-NEXT: v_readlane_b32 s59, v23, 27 +; GFX12-NEXT: v_readlane_b32 s57, v23, 26 +; GFX12-NEXT: v_readlane_b32 s56, v23, 25 +; GFX12-NEXT: v_readlane_b32 s55, v23, 24 +; GFX12-NEXT: v_readlane_b32 s54, v23, 23 +; GFX12-NEXT: v_readlane_b32 s53, v23, 22 +; GFX12-NEXT: v_readlane_b32 s52, v23, 21 +; GFX12-NEXT: v_readlane_b32 s51, v23, 20 +; GFX12-NEXT: v_readlane_b32 s50, v23, 19 +; GFX12-NEXT: v_readlane_b32 s49, v23, 18 +; GFX12-NEXT: v_readlane_b32 s48, v23, 17 +; GFX12-NEXT: v_readlane_b32 s47, v23, 16 +; GFX12-NEXT: v_readlane_b32 s46, v23, 15 +; GFX12-NEXT: v_readlane_b32 s45, v23, 14 +; GFX12-NEXT: v_readlane_b32 s44, v23, 13 +; GFX12-NEXT: v_readlane_b32 s43, v23, 12 +; GFX12-NEXT: v_readlane_b32 s42, v23, 11 +; GFX12-NEXT: v_readlane_b32 s41, v23, 10 +; GFX12-NEXT: v_readlane_b32 s40, v23, 9 +; GFX12-NEXT: v_readlane_b32 s39, v23, 8 +; GFX12-NEXT: v_readlane_b32 s38, v23, 7 +; GFX12-NEXT: v_readlane_b32 s37, v23, 6 +; GFX12-NEXT: v_readlane_b32 s36, v23, 5 +; GFX12-NEXT: v_readlane_b32 s35, v23, 4 +; GFX12-NEXT: v_readlane_b32 s34, v23, 3 +; GFX12-NEXT: v_readlane_b32 s33, v23, 2 +; GFX12-NEXT: v_readlane_b32 s31, v23, 1 +; GFX12-NEXT: v_readlane_b32 s30, v23, 0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_load_b32 v23, off, s32 offset:32768 ; 4-byte Folded Reload +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %alloca0 = alloca [4096 x i32], align 64, addrspace(5) + %alloca1 = alloca [4096 x i32], align 4, addrspace(5) + call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) + + ; Force no SGPRs to be available for the carry-out of the vector add. + %asm = call %asm.output3 asm sideeffect + "; def $0, $1, $2, $3, $4, $5, $6, $7", + "={s[0:15]},={s[16:31]},={s[32:47]},={s[48:55]},={s[56:57]},={v[0:15]},={v[16:21]},={vcc}"() + + %s0 = extractvalue %asm.output3 %asm, 0 + %s1 = extractvalue %asm.output3 %asm, 1 + %s2 = extractvalue %asm.output3 %asm, 2 + %s3 = extractvalue %asm.output3 %asm, 3 + %s4 = extractvalue %asm.output3 %asm, 4 + + %v0 = extractvalue %asm.output3 %asm, 5 + %v1 = extractvalue %asm.output3 %asm, 6 + + %vcc = extractvalue %asm.output3 %asm, 7 + + %alloca1.offset = getelementptr [4096 x i32], ptr addrspace(5) %alloca1, i32 0, i32 128 + + ; scc is unavailable since it is live in + call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9", + "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{v[0:15]},{v[16:21]},{vcc},{s59},{scc}"( + <16 x i32> %s0, + <16 x i32> %s1, + <16 x i32> %s2, + <8 x i32> %s3, + <2 x i32> %s4, + <16 x i32> %v0, + <6 x i32> %v1, + i64 %vcc, + ptr addrspace(5) %alloca1.offset, + i32 0) ; use of scc + + ret void +} + +; For gfx8/gfx9, this should enforce a budget of 24 VGPRs, and 60 SGPRs (4 +; are reserved at the end for xnack + vcc). +attributes #0 = { nounwind alignstack=64 "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="10,10" "no-realign-stack" } +attributes #1 = { nounwind alignstack=16 "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="10,10" "no-realign-stack" } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX9: {{.*}} From 6740d701bde4ad9b95d7d811852fa0a2542e6b28 Mon Sep 17 00:00:00 2001 From: MaheshRavishankar <1663364+MaheshRavishankar@users.noreply.github.com> Date: Wed, 31 Jul 2024 12:32:07 -0700 Subject: [PATCH 017/114] [mlir][Linalg] Deprecate `linalg::tileToForallOp` and `linalg::tileToForallOpUsingTileSizes` (#91878) The implementation of these methods are legacy and they are removed in favor of using the `scf::tileUsingSCF` methods as replacements. To get the latter on par with requirements of the deprecated methods, the tiling allows one to specify the maximum number of tiles to use instead of specifying the tile sizes. When tiling to `scf.forall` this specification is used to generate the `num_threads` version of the operation. A slight deviation from previous implementation is that the deprecated method always generated the `num_threads` variant of the `scf.forall` operation. Instead now this is driven by the tiling options specified. This reduces the indexing math generated when the tile sizes are specified. **Moving from `linalg::tileToForallOp` to `scf::tileUsingSCF`** ``` OpBuilder b; TilingInterface op; ArrayRef numThreads; ArrayAttr mapping; FailureOr result =linalg::tileToForallOp(b, op, numThreads, mapping); ``` can be replaced by ``` scf::SCFTilingOptions options; options.setNumThreads(numThreads); options.setLoopType(scf::SCFTilingOptions::LoopType::ForallOp); options.setMapping(mapping.getValue()); /*note the difference that setMapping takes an ArrayRef */ FailureOr result = scf::tileUsingSCF(b, op, options); ``` This generates the `numThreads` version of the `scf.forall` for the inter-tile loops, i.e. ``` ... = scf.forall (%arg0, %arg1) in (%nt0, %nt1) shared_outs(...) ``` **Moving from `linalg::tileToForallOpUsingTileSizes` to `scf::tileUsingSCF`** ``` OpBuilder b; TilingInterface op; ArrayRef tileSizes; ArrayAttr mapping; FailureOr result =linalg::tileToForallOpUsingTileSizes(b, op, tileSizes, mapping); ``` can be replaced by ``` scf::SCFTilingOptions options; options.setTileSizes(tileSizes); options.setLoopType(scf::SCFTilingOptions::LoopType::ForallOp); options.setMapping(mapping.getValue()); /*note the difference that setMapping takes an ArrayRef */ FailureOr result = scf::tileUsingSCF(b, op, options); ``` Also note that `linalg::tileToForallOpUsingTileSizes` would effectively call the `linalg::tileToForallOp` by computing the `numThreads` from the `op` and `tileSizes` and generate the `numThreads` version of the `scf.forall`. That is not the case anymore. Instead this will directly generate the `tileSizes` version of the `scf.forall` op ``` ... = scf.forall(%arg0, %arg1) = (%lb0, %lb1) to (%ub0, %ub1) step(%step0, %step1) shared_outs(...) ``` If you actually want to use the `numThreads` version, it is upto the caller to compute the `numThreads` and set `options.setNumThreads` instead of `options.setTileSizes`. Note that there is a slight difference in the num threads version and tile size version. The former requires an additional `affine.max` on the tile size to ensure non-negative tile sizes. When lowering to `numThreads` version this `affine.max` is not needed since by construction the tile sizes are non-negative. In previous implementations, the `numThreads` version generated when using the `linalg::tileToForallOpUsingTileSizes` method would avoid generating the `affine.max` operation. To get the same state, downstream users will have to additionally normalize the `scf.forall` operation. **Changes to `transform.structured.tile_using_forall`** The transform dialect op that called into `linalg::tileToForallOp` and `linalg::tileToForallOpUsingTileSizes` have been modified to call `scf::tileUsingSCF`. The transform dialect op always generates the `numThreads` version of the `scf.forall` op. So when `tile_sizes` are specified for the transform dialect op, first the `tile_sizes` version of the `scf.forall` is generated by the `scf::tileUsingSCF` method which is then further normalized to get back to the same state. So there is no functional change to `transform.structured.tile_using_forall`. It always generates the `numThreads` version of the `scf.forall` op (as it did before this change). --------- Signed-off-by: MaheshRavishankar --- .../Linalg/TransformOps/LinalgTransformOps.h | 6 +- .../Dialect/Linalg/Transforms/Transforms.h | 33 +- .../SCF/Transforms/TileUsingInterface.h | 35 +- mlir/include/mlir/Dialect/SCF/Utils/Utils.h | 8 + .../TransformOps/LinalgTransformOps.cpp | 131 +++++- mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp | 182 -------- .../SCF/Transforms/TileUsingInterface.cpp | 388 ++++++++++++++---- mlir/lib/Dialect/SCF/Utils/Utils.cpp | 38 +- mlir/test/Dialect/Linalg/tile-tensors.mlir | 2 +- mlir/test/Dialect/Linalg/tile-to-forall.mlir | 60 ++- .../Dialect/Linalg/transform-op-tile.mlir | 31 +- .../tile-and-fuse-using-interface.mlir | 2 +- .../tile-pad-using-interface.mlir | 10 +- .../TilingInterface/tile-using-interface.mlir | 56 +-- .../TilingInterface/tile-using-scfforall.mlir | 20 +- .../TestTilingInterfaceTransformOps.cpp | 6 +- .../llvm-project-overlay/mlir/BUILD.bazel | 1 + 17 files changed, 600 insertions(+), 409 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h index 3af642752724c4..db25c9b241734f 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h @@ -30,6 +30,10 @@ class GenericOp; class LinalgOp; } // namespace linalg +namespace scf { +struct SCFTilingResult; +} // namespace scf + namespace tensor { class InsertSliceOp; class PackOp; @@ -60,7 +64,7 @@ tileToForallOpImpl(RewriterBase &rewriter, transform::TransformState &state, ArrayRef mixedNumThreads, ArrayRef mixedTileSizes, std::optional mapping, - linalg::ForallTilingResult &tilingResult); + scf::SCFTilingResult &tilingResult); } // namespace transform } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 0c7a8edff222f5..477ef7bfafb181 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -866,29 +866,6 @@ FailureOr computeContinuousTileSizes(OpBuilder &builder, TilingInterface op, unsigned dimension, OpFoldResult targetSize, bool emitAssertions); -/// Rewrite a TilingInterface `op` to a tiled `scf.forall`, applying -/// tiling by `numThreads`. -/// If non-empty, the `mapping` is added as an attribute to the -/// resulting `scf.forall`. -/// Zero tile sizes indicate that the dimension is not tiled, and can be -/// thought of as tiling by the full size of data. It is the user's -/// responsibility to ensure that `numThreads` is a valid tiling specification -/// (i.e. that only tiles parallel dimensions, e.g. in the Linalg case). -struct ForallTilingResult { - Operation *tileOp; - Operation *tiledOp; -}; -FailureOr tileToForallOp(RewriterBase &builder, - TilingInterface op, - ArrayRef numThreads, - std::optional mapping); - -/// Same as `tileToForallOp`, but calculate the number of threads -/// required using the given tileSizes. -FailureOr -tileToForallOpUsingTileSizes(RewriterBase &builder, TilingInterface op, - ArrayRef tileSizes, - std::optional mapping); /// Transformation information returned after reduction tiling. struct ForallReductionTilingResult { @@ -1750,10 +1727,12 @@ void populateWinogradConv2DPatterns(RewritePatternSet &patterns, int64_t m, void populateDecomposeWinogradOpsPatterns(RewritePatternSet &patterns); /// Adds patterns that reduce the rank of named contraction ops that have -/// unit dimensions in the operand(s) by converting to a sequence of `collapse_shape`, -/// ``, `expand_shape` (if on tensors). For example a -/// `linalg.batch_matmul` with unit batch size will convert to `linalg.matmul` -/// and a `linalg.matvec` with with unit spatial dim in lhs will convert to a `linalg.dot`. +/// unit dimensions in the operand(s) by converting to a sequence of +/// `collapse_shape`, +/// ``, `expand_shape` (if on tensors). For +/// example a `linalg.batch_matmul` with unit batch size will convert to +/// `linalg.matmul` and a `linalg.matvec` with with unit spatial dim in lhs will +/// convert to a `linalg.dot`. void populateContractionOpRankReducingPatterns(RewritePatternSet &patterns); } // namespace linalg diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h b/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h index d68ca112073763..1f21af6d6a29ac 100644 --- a/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h +++ b/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h @@ -32,9 +32,11 @@ using SCFTileSizeComputationFunction = /// Options to use to control tiling. struct SCFTilingOptions { - /// Computation function that returns the tile sizes for each operation. - /// Delayed construction of constant tile sizes should occur to interoperate - /// with folding. + /// Computation function that returns the tile sizes to use for each loop. + /// Returning a tile size of zero implies no tiling for that loop. If the + /// size of the returned vector is smaller than the number of loops, the inner + /// loops are not tiled. If the size of the returned vector is larger, then + /// the vector is truncated to number of loops. SCFTileSizeComputationFunction tileSizeComputationFunction = nullptr; SCFTilingOptions & @@ -45,7 +47,27 @@ struct SCFTilingOptions { /// Convenience function to set the `tileSizeComputationFunction` to a /// function that computes tile sizes at the point they are needed. Allows /// proper interaction with folding. - SCFTilingOptions &setTileSizes(ArrayRef ts); + SCFTilingOptions &setTileSizes(ArrayRef tileSizes); + + /// Computation function that returns the number of threads to use for + /// each loop. Returning a num threads of zero implies no tiling for that + /// loop. If the size of the returned vector is smaller than the number of + /// loops, the inner loops are not tiled. If the size of the returned vector + /// is larger, then the vector is truncated to number of loops. Note: This + /// option is only supported with loopType set to `LoopType::ForallOp`. If the + /// tile size function is not specified while the num threads computation is, + /// then the tile size is determined automatically to map at most one tile per + /// thread. + SCFTileSizeComputationFunction numThreadsComputationFunction = nullptr; + + SCFTilingOptions & + setNumThreadsComputationFunction(SCFTileSizeComputationFunction fun) { + numThreadsComputationFunction = std::move(fun); + return *this; + } + /// Convenience function to set the `numThreadsComputationFunction` to a + /// function that computes num threads at the point they are needed. + SCFTilingOptions &setNumThreads(ArrayRef numThreads); /// The interchange vector to reorder the tiled loops. SmallVector interchangeVector = {}; @@ -67,9 +89,8 @@ struct SCFTilingOptions { /// when using loop constructs that dont support such a mapping (like /// `scf.for`) SmallVector mappingVector = {}; - SCFTilingOptions &setMapping(ArrayRef mapping) { - mappingVector = llvm::map_to_vector( - mapping, [](auto attr) -> Attribute { return attr; }); + SCFTilingOptions &setMapping(ArrayRef mapping) { + mappingVector = llvm::to_vector(mapping); return *this; } }; diff --git a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h index b7d6e99b5fdcc8..4001ba3fc84c9d 100644 --- a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h @@ -195,6 +195,14 @@ scf::ForallOp fuseIndependentSiblingForallLoops(scf::ForallOp target, scf::ForOp fuseIndependentSiblingForLoops(scf::ForOp target, scf::ForOp source, RewriterBase &rewriter); +/// Normalize an `scf.forall` operation. Returns `failure()`if normalization +/// fails. +// On `success()` returns the +/// newly created operation with all uses of the original operation replaced +/// with results of the new operation. +FailureOr normalizeForallOp(RewriterBase &rewriter, + scf::ForallOp forallOp); + } // namespace mlir #endif // MLIR_DIALECT_SCF_UTILS_UTILS_H_ diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index b611347b8de2e4..9baf358a95503c 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -12,6 +12,7 @@ #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" @@ -3151,12 +3152,100 @@ void transform::TileUsingForallOp::build(OpBuilder &builder, /*mapping=*/mapping); } +/// Given `lbs`, `ubs` and `steps` of loops, return (for each loop), the +/// normalized upper bound. +static SmallVector +normalizeUpperBounds(RewriterBase &rewriter, Location loc, + ArrayRef lbs, ArrayRef ubs, + ArrayRef steps) { + AffineExpr s0, s1, s2; + bindSymbols(rewriter.getContext(), s0, s1, s2); + AffineExpr normalizedUbExpr = (s1 - s0).ceilDiv(s2); + SmallVector normalizedUbs; + for (auto [lb, ub, step] : llvm::zip_equal(lbs, ubs, steps)) { + OpFoldResult normalizedUb = affine::makeComposedFoldedAffineApply( + rewriter, loc, normalizedUbExpr, {lb, ub, step}); + normalizedUbs.push_back(normalizedUb); + } + return normalizedUbs; +} + +/// When a loop is normalized, the uses of the induction variable within the +/// loop need to replaced with `original_lb + old_iv * original_step`. +static SmallVector denormalizeIndVar(RewriterBase &rewriter, + Location loc, ValueRange ivs, + ArrayRef lbs, + ArrayRef steps) { + AffineExpr s0, s1; + AffineExpr d0; + bindSymbols(rewriter.getContext(), s0, s1); + bindDims(rewriter.getContext(), d0); + AffineExpr denormExpr = s0 + d0 * s1; + SmallVector denormalizedIvs; + + for (auto [iv, lb, step] : llvm::zip_equal(ivs, lbs, steps)) { + OpFoldResult denormValue = affine::makeComposedFoldedAffineApply( + rewriter, loc, denormExpr, ArrayRef{iv, lb, step}); + denormalizedIvs.push_back( + getValueOrCreateConstantIndexOp(rewriter, loc, denormValue)); + } + return denormalizedIvs; +} + +/// Given a `scf.forall` loop return a loop op with the loop bounds +/// normalized. +/// TODO: Replace this with a general utility to normalize `scf.forall`. +/// At the time of writing, this wasnt done since adding this to `scf` +/// dialect would disallow using of `affine.apply` operations due +/// to cyclic dependencies. To avoid churn in lit tests +/// with the change this was added with, defer that to a follow up. +static scf::ForallOp normalizeForallLoopOp(RewriterBase &rewriter, + scf::ForallOp loop) { + SmallVector lbs = loop.getMixedLowerBound(); + SmallVector ubs = loop.getMixedUpperBound(); + SmallVector steps = loop.getMixedStep(); + + if (llvm::all_of( + lbs, [](OpFoldResult ofr) { return isConstantIntValue(ofr, 0); }) && + llvm::all_of( + steps, [](OpFoldResult ofr) { return isConstantIntValue(ofr, 1); })) { + return loop; + } + + Location loc = loop.getLoc(); + SmallVector normalizedUbs = + normalizeUpperBounds(rewriter, loc, lbs, ubs, steps); + SmallVector normalizedLbs(normalizedUbs.size(), + rewriter.getIndexAttr(0)); + SmallVector normalizedSteps(normalizedUbs.size(), + rewriter.getIndexAttr(1)); + + auto normalizedForallOp = rewriter.create( + loc, normalizedLbs, normalizedUbs, normalizedSteps, loop.getOutputs(), + loop.getMapping(), [](OpBuilder &, Location, ValueRange) {}); + + auto normalizedLoopIvs = normalizedForallOp.getInductionVars(); + OpBuilder::InsertionGuard g(rewriter); + Block *normalizedLoopBlock = normalizedForallOp.getBody(); + rewriter.setInsertionPointToStart(normalizedLoopBlock); + + SmallVector argValues = + denormalizeIndVar(rewriter, loc, normalizedLoopIvs, lbs, steps); + argValues.append(normalizedForallOp.getRegionIterArgs().begin(), + normalizedForallOp.getRegionIterArgs().end()); + Block *origLoopBlock = loop.getBody(); + rewriter.mergeBlocks(origLoopBlock, normalizedLoopBlock, argValues); + + rewriter.replaceOp(loop, normalizedForallOp); + return normalizedForallOp; +} + DiagnosedSilenceableFailure transform::tileToForallOpImpl( RewriterBase &rewriter, transform::TransformState &state, TransformOpInterface transformOp, Operation *target, ArrayRef mixedNumThreads, ArrayRef mixedTileSizes, std::optional mapping, - linalg::ForallTilingResult &tilingResult) { + scf::SCFTilingResult &tilingResult) { // Transform all targets one by one. auto tileableOp = dyn_cast(target); if (!tileableOp) { @@ -3167,20 +3256,35 @@ DiagnosedSilenceableFailure transform::tileToForallOpImpl( return diag; } rewriter.setInsertionPoint(tileableOp); - FailureOr maybeTilingResult = failure(); + scf::SCFTilingOptions options; + options.setLoopType(scf::SCFTilingOptions::LoopType::ForallOp); if (!mixedNumThreads.empty()) { - maybeTilingResult = - linalg::tileToForallOp(rewriter, tileableOp, mixedNumThreads, mapping); + options.setNumThreads(mixedNumThreads); } else { - maybeTilingResult = linalg::tileToForallOpUsingTileSizes( - rewriter, tileableOp, mixedTileSizes, mapping); + options.setTileSizes(mixedTileSizes); } + if (mapping) { + options.setMapping(mapping.value().getValue()); + } + FailureOr maybeTilingResult = + scf::tileUsingSCF(rewriter, tileableOp, options); if (failed(maybeTilingResult)) return transformOp.emitDefaultSilenceableFailure(tileableOp); - rewriter.replaceOp(tileableOp, maybeTilingResult->tileOp->getResults()); + + rewriter.replaceOp(tileableOp, maybeTilingResult->replacements); tilingResult = *maybeTilingResult; + + if (mixedNumThreads.empty()) { + auto generatedForallOp = cast(tilingResult.loops.front()); + OpBuilder::InsertionGuard g(rewriter); + rewriter.setInsertionPoint(generatedForallOp); + scf::ForallOp normalizedForallOp = + normalizeForallLoopOp(rewriter, generatedForallOp); + tilingResult.loops.front() = normalizedForallOp; + } + return DiagnosedSilenceableFailure::success(); } @@ -3214,14 +3318,14 @@ DiagnosedSilenceableFailure transform::TileUsingForallOp::apply( return status; for (Operation *target : state.getPayloadOps(getTarget())) { - linalg::ForallTilingResult tilingResult; + scf::SCFTilingResult tilingResult; DiagnosedSilenceableFailure diag = tileToForallOpImpl( rewriter, state, transformOp, target, mixedNumThreads, mixedTileSizes, getMapping(), tilingResult); if (!diag.succeeded()) return diag; - tileOps.push_back(tilingResult.tileOp); - tiledOps.push_back(tilingResult.tiledOp); + tileOps.push_back(tilingResult.loops.front()); + tiledOps.append(tilingResult.tiledOps); } transformResults.set(cast(getForallOp()), tileOps); @@ -3699,7 +3803,7 @@ DiagnosedSilenceableFailure transform::MapCopyToThreadsOp::applyToOne( // OpBuilder only used to compute attributes. OpBuilder b(getContext()); - linalg::ForallTilingResult tilingResult; + scf::SCFTilingResult tilingResult; DiagnosedSilenceableFailure diag = tileToForallOpImpl( /*rewriter=*/rewriter, /*state=*/state, @@ -3712,8 +3816,9 @@ DiagnosedSilenceableFailure transform::MapCopyToThreadsOp::applyToOne( if (!diag.succeeded()) return diag; - results.push_back(tilingResult.tileOp); - results.push_back(tilingResult.tiledOp); + results.push_back(tilingResult.loops.front()); + for (auto op : tilingResult.tiledOps) + results.push_back(op); return DiagnosedSilenceableFailure::success(); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp index 8ef8651646829f..fb6ab2055e7dda 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -435,188 +435,6 @@ static void calculateTileOffsetsAndSizes( } } -/// Returns a vector of bools representing if, for each axis, `op` can be tiled -/// without incurring in a race condition and thus it is thread-safe to do the -/// tiling. This is checked by iterating over numThreads and ensuring that the -/// corresponding iterator type is "parallel". If it is not, then we know that -/// such dimension is unsafe to tile. -SmallVector safeToTileToForall(mlir::MLIRContext *ctx, LinalgOp linalgOp, - ArrayRef numThreads) { - auto iterators = linalgOp.getIteratorTypesArray(); - SmallVector safeToTile(numThreads.size(), true); - - for (unsigned i = 0, e = numThreads.size(); i != e; i++) { - if (auto attr = llvm::dyn_cast_if_present(numThreads[i])) { - if (cast(attr).getValue().getSExtValue() > 1) { - safeToTile[i] = iterators[i] == utils::IteratorType::parallel; - } - } else { - safeToTile[i] = iterators[i] == utils::IteratorType::parallel; - } - } - return safeToTile; -} - -/// Rewrite a TilingInterface `op` to a tiled `scf.forall`. The -/// tiling is specified by the number of tiles/threads `numThreads` and the -/// optional nominal tile size `nominalTileSizes`. If `nominalTilSizes` is -/// not specified, then it is derived from `numThreads` as `ceilDiv(dimSize[i], -/// numThreads[i])`. If non-empty, the `mapping` is added as an -/// attribute to the resulting `scf.forall`. A zero tile sizes indicate -/// that the dimension is not tiled, and can be thought of as tiling by the full -/// size of data. -/// It is the user's responsibility to ensure that `numThreads` is a valid -/// tiling specification (i.e. that only tiles parallel dimensions, e.g. in the -/// Linalg case). If the dimension is not parallelizable, a warning is issued to -/// notify the user that the generated code is not safe to parallelize. If -/// `omitTileOffsetBoundsCheck` is true, then the function will assume that -/// `tileSize[i] * (numThread[i] -1) <= dimSize[i]` holds. -static FailureOr tileToForallOpImpl( - RewriterBase &b, TilingInterface op, ArrayRef numThreads, - std::optional> nominalTileSizes, - std::optional mapping, bool omitTileOffsetBoundsCheck) { - Location loc = op->getLoc(); - OpBuilder::InsertionGuard g(b); - - SmallVector loopRanges = op.getIterationDomain(b); - if (loopRanges.empty()) - return op->emitOpError("expected non-empty loop ranges"); - auto hasStrideOne = [](Range r) { return !isConstantIntValue(r.stride, 1); }; - if (llvm::any_of(loopRanges, hasStrideOne)) - return op->emitOpError("only stride-1 supported atm"); - - // Gather destination tensors. - SmallVector dest; - if (failed(tensor::getOrCreateDestinations(b, loc, op, dest))) - return op->emitOpError("failed to get destination tensors"); - - SmallVector nonZeroNumThreads = - llvm::to_vector(llvm::make_filter_range(numThreads, [](OpFoldResult ofr) { - return !isConstantIntValue(ofr, 0); - })); - SmallVector materializedNonZeroNumThreads = - llvm::to_vector(llvm::map_range(nonZeroNumThreads, [&](OpFoldResult ofr) { - return getValueOrCreateConstantIndexOp(b, loc, ofr); - })); - - LinalgOp linalgOp = dyn_cast(op.getOperation()); - if (linalgOp) { - // Check if tiling is thread safe and print a warning if not. - SmallVector tilingSafety = - safeToTileToForall(b.getContext(), linalgOp, numThreads); - for (size_t i = 0; i < tilingSafety.size(); i++) - if (!tilingSafety[i]) - op.emitWarning() << "tiling is not thread safe at axis #" << i; - } - - // 1. Create the ForallOp. We don't use the lambda body-builder - // version because we require the use of RewriterBase in the body, so we - // manually move the insertion point to the body below. - scf::ForallOp forallOp = b.create( - loc, getAsOpFoldResult((materializedNonZeroNumThreads)), dest, mapping); - - // 2. Fill out the ForallOp body. - SmallVector tiledOffsets, tiledSizes; - calculateTileOffsetsAndSizes(b, loc, forallOp, numThreads, loopRanges, - omitTileOffsetBoundsCheck, nominalTileSizes, - tiledOffsets, tiledSizes); - - // 3. Clone the tileable op and update its destination operands to use the - // output bbArgs of the ForallOp. - ArrayRef destBbArgs = forallOp.getRegionIterArgs(); - Operation *tiledOp = nullptr; - SmallVector tiledValues; - { - // 3.a. RAII guard, inserting within forallOp, before terminator. - OpBuilder::InsertionGuard g(b); - b.setInsertionPoint(forallOp.getTerminator()); - Operation *clonedOp = b.clone(*op.getOperation()); - auto destinationStyleOp = dyn_cast(clonedOp); - if (destinationStyleOp) { - for (OpOperand &outOperand : destinationStyleOp.getDpsInitsMutable()) { - // Swap tensor inits with the corresponding block argument of the - // scf.forall op. Memref inits remain as is. - if (isa(outOperand.get().getType())) { - auto *it = llvm::find(dest, outOperand.get()); - assert(it != dest.end() && "could not find destination tensor"); - unsigned destNum = std::distance(dest.begin(), it); - outOperand.set(destBbArgs[destNum]); - } - } - } - - // 4. Tile the cloned op and delete the clone. - FailureOr tilingResult = - cast(clonedOp).getTiledImplementation(b, tiledOffsets, - tiledSizes); - if (failed(tilingResult)) - return clonedOp->emitError("Failed to tile op: "); - if (tilingResult->tiledOps.size() != 1) { - return clonedOp->emitError("expected a single produced tiled op, got ") - << tilingResult->tiledOps.size(); - } - - b.eraseOp(clonedOp); - tiledOp = tilingResult->tiledOps.front(); - tiledValues = tilingResult->tiledValues; - } - - // 5. Parallel insert back into the result tensor. - for (auto it : llvm::zip(llvm::seq(unsigned(0), unsigned(dest.size())), - tiledValues, destBbArgs)) { - // 5.a. Partial subset information is inserted just before the terminator. - OpBuilder::InsertionGuard g(b); - b.setInsertionPoint(forallOp.getTerminator()); - - SmallVector resultOffsets, resultSizes; - if (failed(op.getResultTilePosition(b, std::get<0>(it), tiledOffsets, - tiledSizes, resultOffsets, - resultSizes))) - return op->emitOpError("output offsets couldn't be calculated"); - SmallVector strides(resultSizes.size(), b.getIndexAttr(1)); - - // 5.b. Parallel insertions are inserted at the end of the combining - // terminator. - b.setInsertionPointToEnd(forallOp.getTerminator().getBody()); - b.create(loc, std::get<1>(it), - std::get<2>(it), resultOffsets, - resultSizes, strides); - } - return ForallTilingResult{forallOp, tiledOp}; -} - -FailureOr -linalg::tileToForallOp(RewriterBase &b, TilingInterface op, - ArrayRef numThreads, - std::optional mapping) { - return tileToForallOpImpl(b, op, numThreads, - /*nominalTileSizes=*/std::nullopt, mapping, - /*omitTileOffsetBoundsCheck=*/false); -} - -FailureOr -linalg::tileToForallOpUsingTileSizes(RewriterBase &b, TilingInterface op, - ArrayRef tileSizes, - std::optional mapping) { - SmallVector loopRanges = op.getIterationDomain(b); - unsigned nLoops = loopRanges.size(); - SmallVector numThreads; - numThreads.reserve(nLoops); - AffineExpr s0, s1; - bindSymbols(b.getContext(), s0, s1); - AffineExpr divExpr = s0.ceilDiv(s1); - for (const auto &it : llvm::zip(tileSizes, loopRanges)) { - OpFoldResult numTiles = std::get<0>(it); - if (!isConstantIntValue(numTiles, 0)) - numTiles = makeComposedFoldedAffineApply( - b, op.getLoc(), divExpr, {std::get<1>(it).size, std::get<0>(it)}); - numThreads.push_back(numTiles); - } - return tileToForallOpImpl(b, op, numThreads, - /*nominalTileSizes=*/tileSizes, mapping, - /*omitTileOffsetBoundsCheck=*/true); -} - template static FailureOr tileLinalgOpImpl(RewriterBase &b, LinalgOp op, ArrayRef tileSizes, diff --git a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp index a1392813d6de33..e404c01010a325 100644 --- a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp @@ -42,6 +42,16 @@ scf::SCFTilingOptions::setTileSizes(ArrayRef ts) { return *this; } +scf::SCFTilingOptions & +scf::SCFTilingOptions::setNumThreads(ArrayRef nt) { + assert(!numThreadsComputationFunction && "num tiles already set"); + auto numThreads = llvm::to_vector(nt); + numThreadsComputationFunction = [numThreads](OpBuilder &b, Operation *op) { + return numThreads; + }; + return *this; +} + /// Helper method to adjust the interchange vector to match the iteration /// domain. static SmallVector @@ -61,7 +71,120 @@ fillInterchangeVector(ArrayRef interchangeVector, // tileUsingSCF implementation. //===----------------------------------------------------------------------===// -// Check if `stride` evenly divides the trip count `size - offset`. +/// Verify the tile size options are set in a consistent manner. +static LogicalResult +verifyTileSizeOptions(RewriterBase &rewriter, Location loc, + const scf::SCFTilingOptions &options) { + // Specifying number of threads is only supported on `scf.forall` op. + if (options.numThreadsComputationFunction && + options.loopType != scf::SCFTilingOptions::LoopType::ForallOp) { + return rewriter.notifyMatchFailure( + loc, "number of threads can only by specified when loop type is " + "set to use `scf.forall`"); + } + + // If specified, check that the interchange vector is a permutation. + if (!options.interchangeVector.empty()) { + if (!isPermutationVector(options.interchangeVector)) { + return rewriter.notifyMatchFailure( + loc, "invalid interchange vector, not a permutation of the entire " + "iteration space"); + } + } + return success(); +} + +/// Method to instantiate the tile sizes and/or number of threads specified +/// by the user. +static std::tuple, SmallVector> +getUserTileSizesAndNumThreads(RewriterBase &rewriter, TilingInterface op, + ArrayRef iterationDomain, + const scf::SCFTilingOptions &options) { + OpFoldResult zero = rewriter.getIndexAttr(0); + SmallVector tileSizes, numThreads; + size_t numLoops = iterationDomain.size(); + + // Check whether the number of tiles to use is specified. + if (options.numThreadsComputationFunction) { + numThreads = options.numThreadsComputationFunction(rewriter, op); + numThreads.resize(numLoops, zero); + + // If the number of tiles is also specified, use that. + if (options.tileSizeComputationFunction) { + tileSizes = options.tileSizeComputationFunction(rewriter, op); + tileSizes.resize(numLoops, zero); + return {tileSizes, numThreads}; + } + + // Compute the tile sizes from the iteration domain and number + // of tiles as follows + // - niters = ceilDiv(ub - lb, step) + // - tileSize = ceilDiv(niters, numThreads) + AffineExpr s0, s1, s2; + bindSymbols(rewriter.getContext(), s0, s1, s2); + // TODO: The step here is assumed to be 1. + AffineExpr numItersExpr = (s1 - s0); + AffineExpr tileSizeExpr = numItersExpr.ceilDiv(s2); + tileSizes.resize(numLoops, zero); + for (auto [index, range, nt] : + llvm::enumerate(iterationDomain, numThreads)) { + if (isConstantIntValue(nt, 0)) + continue; + + tileSizes[index] = affine::makeComposedFoldedAffineApply( + rewriter, op.getLoc(), tileSizeExpr, {range.offset, range.size, nt}); + } + tileSizes.resize(numLoops, zero); + return {tileSizes, numThreads}; + } + + // Enforce the convention that "tiling by zero" + // skips tiling a particular dimension. This convention is significantly + // simpler to handle instead of adjusting affine maps to account for missing + // dimensions. + assert(options.tileSizeComputationFunction && + "expected tile sizes to be specified"); + tileSizes = options.tileSizeComputationFunction(rewriter, op); + tileSizes.resize(numLoops, zero); + + return {tileSizes, numThreads}; +} + +/// Checks if any of the tiled loops are not parallel. +static void checkSafeToTileToForall(TilingInterface op, + ArrayRef tileSizes, + ArrayRef numThreads) { + auto iterators = op.getLoopIteratorTypes(); + assert(iterators.size() == tileSizes.size() && + "expected as many tile size values as number of loops"); + assert((numThreads.empty() || (numThreads.size() == iterators.size())) && + "when specified, expected number of threads to use for each loop"); + + for (auto [index, iterator, tileSize] : + llvm::enumerate(iterators, tileSizes)) { + // If num threads is specified, check that it is greater than one only for + // parallel dimensions. + if (!numThreads.empty()) { + if (std::optional constNumThreads = + getConstantIntValue(numThreads[index])) { + if (constNumThreads.value() > 1 && + iterator != utils::IteratorType::parallel) { + op.emitWarning() << "tiling is not thread safe at axis #" << index; + } + } + continue; + } + + if (std::optional constTileSize = getConstantIntValue(tileSize)) { + if (constTileSize.value() > 0 && + iterator != utils::IteratorType::parallel) { + op.emitWarning() << "tiling is not thread safe at axis #" << index; + } + } + } +} + +/// Check if `stride` evenly divides the trip count `size - offset`. static bool tileDividesIterationDomain(Range loopRange) { std::optional offsetAsInt = getConstantIntValue(loopRange.offset); if (!offsetAsInt) @@ -75,10 +198,10 @@ static bool tileDividesIterationDomain(Range loopRange) { return ((sizeAsInt.value() - offsetAsInt.value()) % strideAsInt.value() == 0); } -/// Returns the bounded tile size given the current `iv`, `loopRange` and -/// `tileSize`, i.e., `min(tileSize, range.end() - iv)`. +/// Returns the bounded tile size given the current `offset`, `loopRange` and +/// `tileSize`, i.e., `min(tileSize, range.end() - offset)`. static OpFoldResult getBoundedTileSize(OpBuilder &b, Location loc, - Range loopRange, Value iv, + Range loopRange, OpFoldResult offset, OpFoldResult tileSize) { std::optional ts = getConstantIntValue(tileSize); if (ts && ts.value() == 1) @@ -94,10 +217,132 @@ static OpFoldResult getBoundedTileSize(OpBuilder &b, Location loc, AffineExpr s0, s1, d0; bindDims(b.getContext(), d0); bindSymbols(b.getContext(), s0, s1); - AffineMap minMap = AffineMap::get(1, 2, {s0, s1 - d0}, b.getContext()); + AffineMap minMap = AffineMap::get(1, 2, {s0 - d0, s1}, b.getContext()); Value size = getValueOrCreateConstantIndexOp(b, loc, loopRange.size); return affine::makeComposedFoldedAffineMin( - b, loc, minMap, SmallVector{iv, tileSize, size}); + b, loc, minMap, SmallVector{offset, size, tileSize}); +} + +/// Returns true if the maximum tile offset `tileSize * numThreads-1` is less +/// than `iterationSize`. +static bool canOmitTileOffsetInBoundsCheck(OpFoldResult tileSize, + OpFoldResult numThreads, + OpFoldResult iterationSize) { + std::optional tileSizeConst = getConstantIntValue(tileSize); + std::optional numThreadsConst = getConstantIntValue(numThreads); + std::optional iterSizeConst = getConstantIntValue(iterationSize); + if (!tileSizeConst || !numThreadsConst || !iterSizeConst) + return false; + return *tileSizeConst * (*numThreadsConst - 1) < *iterSizeConst; +} + +/// Compute the `OpFoldResult`s that represents the multi-dimensional +/// `offset`s and `size`s of the tile of the iteration space that the +/// innermost loop body of the generated tiled loops corresponds to. +static std::tuple, SmallVector> +getTileOffsetAndSizes(RewriterBase &rewriter, Location loc, ValueRange ivs, + ArrayRef iterationDomain, + ArrayRef tileSizes, + ArrayRef numThreads) { + SmallVector offsets, sizes; + int materializedLoopNum = 0; + + if (!numThreads.empty()) { + AffineExpr d0, d1, s0, s1; + AffineExpr offsetExpr, residualTileSizeExpr; + bindDims(rewriter.getContext(), d0, d1); + bindSymbols(rewriter.getContext(), s0, s1); + offsetExpr = d0 + d1 * s0; + residualTileSizeExpr = s1 - (d0 + d1 * s0); + + for (auto [nt, tileSize, loopRange] : + llvm::zip_equal(numThreads, tileSizes, iterationDomain)) { + + // Non-tiled cases, set the offset and size to the + // `loopRange.offset/size`. + if (isConstantIntValue(nt, 0)) { + offsets.push_back(loopRange.offset); + sizes.push_back(loopRange.size); + continue; + } + + Value iv = ivs[materializedLoopNum++]; + OpFoldResult offset = affine::makeComposedFoldedAffineApply( + rewriter, loc, offsetExpr, + ArrayRef{loopRange.offset, iv, tileSize}); + OpFoldResult residualTileSize = affine::makeComposedFoldedAffineApply( + rewriter, loc, residualTileSizeExpr, + {loopRange.offset, nt, tileSize, loopRange.size}); + + OpFoldResult size = tileSize; + if (!isConstantIntValue(residualTileSize, 0)) { + OpFoldResult sizeMinusOffsetPerThread = + affine::makeComposedFoldedAffineApply(rewriter, loc, s0 - d0, + {offset, loopRange.size}); + size = affine::makeComposedFoldedAffineMin( + rewriter, loc, + AffineMap::getMultiDimIdentityMap(2, rewriter.getContext()), + {sizeMinusOffsetPerThread, tileSize}); + } + + // Consider the case where the original loop was `[0, 100)`. + // If number of threads are `7`, the tile size would be computed as + // `ceilDiv(100, 7) = 15`. For the last thread (thread_id = 6) + // - `offset = 0 + 6 * 15 = 105` + // - `tileSize = min(15, 100 - 105) = -5` + // To avoid negative tile sizes, we need to do a further + // `nonNegativeTileSize = affine.max(0, tileSize)`. + // This `max` can be avoided if + // `offset + tileSize * (numThreads - 1) < (ub - lb)` + if (!canOmitTileOffsetInBoundsCheck(tileSize, nt, loopRange.size)) { + AffineMap maxMap = + AffineMap::getMultiDimIdentityMap(2, rewriter.getContext()); + size = affine::makeComposedFoldedAffineMax( + rewriter, loc, maxMap, {rewriter.getIndexAttr(0), size}); + } + + offsets.push_back(offset); + sizes.push_back(size); + } + return {offsets, sizes}; + } else { + for (auto [tileSize, loopRange] : + llvm::zip_equal(tileSizes, iterationDomain)) { + + // Non-tiled cases, set the offset and size to the + // `loopRange.offset/size`. + if (isConstantIntValue(tileSize, 0)) { + offsets.push_back(loopRange.offset); + sizes.push_back(loopRange.size); + continue; + } + + Value iv = ivs[materializedLoopNum++]; + OpFoldResult offset = getAsOpFoldResult(iv); + offsets.push_back(offset); + OpFoldResult size = + getBoundedTileSize(rewriter, loc, loopRange, offset, tileSize); + sizes.push_back(size); + } + return {offsets, sizes}; + } +} + +/// Function to return the bounds of the loops to be generated. +static std::tuple, SmallVector, + SmallVector> +getLoopBounds(RewriterBase &rewriter, Location loc, ArrayRef loopRanges, + ArrayRef tileSizes) { + SmallVector lbs, ubs, steps; + for (auto [loopRange, tileSize] : llvm::zip_equal(loopRanges, tileSizes)) { + // No loop if the tile size is 0. + if (isConstantIntValue(tileSize, 0)) + continue; + lbs.push_back(loopRange.offset); + ubs.push_back(loopRange.size); + steps.push_back(tileSize); + } + return {lbs, ubs, steps}; } /// A function that allows returning additional yielded values during @@ -152,17 +397,19 @@ static LogicalResult generateLoopNestUsingForOp( assert(loopRanges.size() == tileSizes.size() && "expected as many tile sizes as loop ranges"); OpBuilder::InsertionGuard guard(rewriter); - SmallVector ivs; - for (auto [loopRange, tileSize] : llvm::zip_equal(loopRanges, tileSizes)) { - // No loops if tile size is zero. Set offset and size to the loop - // offset and size. - if (isConstantIntValue(tileSize, 0)) - continue; + SmallVector lbs, ubs, steps; + std::tie(lbs, ubs, steps) = + getLoopBounds(rewriter, loc, loopRanges, tileSizes); + SmallVector lbVals = + getValueOrCreateConstantIndexOp(rewriter, loc, lbs); + SmallVector ubVals = + getValueOrCreateConstantIndexOp(rewriter, loc, ubs); + SmallVector stepVals = + getValueOrCreateConstantIndexOp(rewriter, loc, steps); - Value lb = getValueOrCreateConstantIndexOp(rewriter, loc, loopRange.offset); - Value ub = getValueOrCreateConstantIndexOp(rewriter, loc, loopRange.size); - Value step = getValueOrCreateConstantIndexOp(rewriter, loc, tileSize); + SmallVector ivs; + for (auto [lb, ub, step] : llvm::zip_equal(lbVals, ubVals, stepVals)) { auto loop = rewriter.create(loc, lb, ub, step, destinationTensors, [](OpBuilder &bodyBuilder, Location bodyLoc, @@ -224,10 +471,9 @@ static LogicalResult generateLoopNestUsingForOp( /// populated. static LogicalResult generateLoopNestUsingForallOp( RewriterBase &rewriter, Location loc, ArrayRef loopRanges, - ArrayRef tileSizes, ArrayRef mappingVector, - ValueRange destinationTensors, YieldTiledValuesFn tiledBodyFn, - SmallVector &loops) { - SmallVector lbs, ubs, steps; + ArrayRef tileSizes, ArrayRef numThreads, + ArrayRef mappingVector, ValueRange destinationTensors, + YieldTiledValuesFn tiledBodyFn, SmallVector &loops) { assert(!loopRanges.empty() && "unexpected empty loop ranges"); assert(loopRanges.size() == tileSizes.size() && "expected as many tile sizes as loop ranges"); @@ -235,21 +481,30 @@ static LogicalResult generateLoopNestUsingForallOp( SmallVector offsets(loopRanges.size()), sizes(loopRanges.size()); - for (auto [tileSize, loopRange] : llvm::zip_equal(tileSizes, loopRanges)) { - if (isConstantIntValue(tileSize, 0)) - continue; - lbs.push_back(loopRange.offset); - ubs.push_back(loopRange.size); - steps.push_back(tileSize); - } - assert(!lbs.empty() && "Expected at least one loop range"); - std::optional mappingAttr; if (!mappingVector.empty()) mappingAttr = rewriter.getArrayAttr(mappingVector); - auto forallOp = rewriter.create( - loc, lbs, ubs, steps, destinationTensors, mappingAttr); + scf::ForallOp forallOp; + bool useNumThreads = !numThreads.empty(); + + if (useNumThreads) { + // Prune the zero numthreads. + SmallVector nonZeroNumThreads; + for (auto nt : numThreads) { + if (isConstantIntValue(nt, 0)) + continue; + nonZeroNumThreads.push_back(nt); + } + forallOp = rewriter.create(loc, nonZeroNumThreads, + destinationTensors, mappingAttr); + } else { + SmallVector lbs, ubs, steps; + std::tie(lbs, ubs, steps) = + getLoopBounds(rewriter, loc, loopRanges, tileSizes); + forallOp = rewriter.create(loc, lbs, ubs, steps, + destinationTensors, mappingAttr); + } loops.push_back(forallOp); rewriter.setInsertionPoint(forallOp.getTerminator()); @@ -286,13 +541,11 @@ static LogicalResult generateLoopNestUsingForallOp( /// loop. /// - `loops` is an in-out parameter into which the generated loops are /// populated. -static LogicalResult generateLoopNest(RewriterBase &rewriter, Location loc, - const scf::SCFTilingOptions &options, - ArrayRef loopRanges, - ArrayRef tileSizes, - ValueRange destinationTensors, - YieldTiledValuesFn tiledBodyFn, - SmallVector &loops) { +static LogicalResult generateLoopNest( + RewriterBase &rewriter, Location loc, const scf::SCFTilingOptions &options, + ArrayRef loopRanges, ArrayRef tileSizes, + ArrayRef numThreads, ValueRange destinationTensors, + YieldTiledValuesFn tiledBodyFn, SmallVector &loops) { // If the tile sizes are all zero, no loops are generated. Just call the // callback function to handle untiled case. if (llvm::all_of(tileSizes, isZeroIndex)) { @@ -307,7 +560,7 @@ static LogicalResult generateLoopNest(RewriterBase &rewriter, Location loc, } if (options.loopType == scf::SCFTilingOptions::LoopType::ForallOp) { return generateLoopNestUsingForallOp( - rewriter, loc, loopRanges, tileSizes, options.mappingVector, + rewriter, loc, loopRanges, tileSizes, numThreads, options.mappingVector, destinationTensors, tiledBodyFn, loops); } return rewriter.notifyMatchFailure(loc, "unhandled loop type"); @@ -531,27 +784,25 @@ static LogicalResult addInitOperandsToLoopNest( FailureOr mlir::scf::tileUsingSCF(RewriterBase &rewriter, TilingInterface op, const scf::SCFTilingOptions &options) { + if (failed(verifyTileSizeOptions(rewriter, op.getLoc(), options))) { + return failure(); + } + OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPointAfter(op); - if (!options.tileSizeComputationFunction) { - return rewriter.notifyMatchFailure( - op, "missing tile size computation function"); - } - // 1. Get the range of the loops that are represented by the operation. SmallVector iterationDomain = op.getIterationDomain(rewriter); - size_t numLoops = iterationDomain.size(); - // 2. Materialize the tile sizes. Enforce the convention that "tiling by zero" - // skips tiling a particular dimension. This convention is significantly - // simpler to handle instead of adjusting affine maps to account for missing - // dimensions. - SmallVector tileSizes = - options.tileSizeComputationFunction(rewriter, op); - if (tileSizes.size() < iterationDomain.size()) { - auto zero = rewriter.getIndexAttr(0); - tileSizes.append(numLoops - tileSizes.size(), zero); + // 2. Materialize the tile sizes and/or number of threads; + SmallVector tileSizes, numThreads; + std::tie(tileSizes, numThreads) = + getUserTileSizesAndNumThreads(rewriter, op, iterationDomain, options); + + // Check if it is safe to tile. This is hold over from previous iterations + // of tile to for-all. Consider dropping it. + if (options.loopType == scf::SCFTilingOptions::LoopType::ForallOp) { + checkSafeToTileToForall(op, tileSizes, numThreads); } // 3. If there is an interchange specified, permute the iteration domain and @@ -560,16 +811,13 @@ mlir::scf::tileUsingSCF(RewriterBase &rewriter, TilingInterface op, if (!options.interchangeVector.empty()) { interchangeVector = fillInterchangeVector(options.interchangeVector, iterationDomain.size()); - } - if (!interchangeVector.empty()) { - if (!isPermutationVector(interchangeVector)) { - return rewriter.notifyMatchFailure( - op, "invalid intechange vector, not a permutation of the entire " - "iteration space"); - } + assert(isPermutationVector(interchangeVector) && + "expected interchange vector to be a permutation"); applyPermutationToVector(iterationDomain, interchangeVector); applyPermutationToVector(tileSizes, interchangeVector); + if (!numThreads.empty()) + applyPermutationToVector(numThreads, interchangeVector); } FailureOr tilingResult; @@ -583,21 +831,8 @@ mlir::scf::tileUsingSCF(RewriterBase &rewriter, TilingInterface op, -> LogicalResult { // 4a. Compute the `offsets` and `sizes` to use for tiling. SmallVector offsets, sizes; - { - int materializedLoopNum = 0; - for (auto [tileSize, loopRange] : - llvm::zip_equal(tileSizes, iterationDomain)) { - if (isConstantIntValue(tileSize, 0)) { - offsets.push_back(loopRange.offset); - sizes.push_back(loopRange.size); - continue; - } - Value iv = ivs[materializedLoopNum++]; - offsets.push_back(iv); - sizes.push_back( - getBoundedTileSize(rewriter, loc, loopRange, iv, tileSize)); - } - } + std::tie(offsets, sizes) = getTileOffsetAndSizes( + rewriter, loc, ivs, iterationDomain, tileSizes, numThreads); // 4b. If interchange was provided, apply inverse of the interchange // to get back the offsets/sizes in the order to be specified. @@ -665,7 +900,7 @@ mlir::scf::tileUsingSCF(RewriterBase &rewriter, TilingInterface op, // 7. Generate the tiled loops nest using the callback defined above. SmallVector loops; if (failed(generateLoopNest(rewriter, op.getLoc(), options, iterationDomain, - tileSizes, destinationTensors, + tileSizes, numThreads, destinationTensors, innerYieldTiledValuesFn, loops))) return op.emitOpError("failed to generate tiling loops"); assert(succeeded(tilingResult) && @@ -781,6 +1016,7 @@ mlir::scf::tileReductionUsingScf(RewriterBase &b, scf::SCFTilingOptions options; options.setLoopType(scf::SCFTilingOptions::LoopType::ForOp); if (failed(generateLoopNest(b, loc, options, iterationDomain, tileSizesVector, + /*numThreads=*/ArrayRef{}, initTensors, innerYieldTiledValuesFn, loops))) return b.notifyMatchFailure(op, "failed to tile for parallel reduction"); diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp index c0ee9d2afe91c1..9df6e24de178f8 100644 --- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp +++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp @@ -294,8 +294,8 @@ static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, } /// Returns the trip count of `forOp` if its' low bound, high bound and step are -/// constants, or optional otherwise. Trip count is computed as ceilDiv(highBound -/// - lowBound, step). +/// constants, or optional otherwise. Trip count is computed as +/// ceilDiv(highBound - lowBound, step). static std::optional getConstantTripCount(scf::ForOp forOp) { std::optional lbCstOp = getConstantIntValue(forOp.getLowerBound()); std::optional ubCstOp = getConstantIntValue(forOp.getUpperBound()); @@ -1363,3 +1363,37 @@ scf::ForOp mlir::fuseIndependentSiblingForLoops(scf::ForOp target, return fusedLoop; } + +FailureOr mlir::normalizeForallOp(RewriterBase &rewriter, + scf::ForallOp forallOp) { + SmallVector lbs = forallOp.getMixedLowerBound(); + SmallVector ubs = forallOp.getMixedUpperBound(); + SmallVector steps = forallOp.getMixedStep(); + + if (llvm::all_of( + lbs, [](OpFoldResult ofr) { return isConstantIntValue(ofr, 0); }) && + llvm::all_of( + steps, [](OpFoldResult ofr) { return isConstantIntValue(ofr, 1); })) { + return forallOp; + } + + SmallVector newLbs, newUbs, newSteps; + for (auto [lb, ub, step] : llvm::zip_equal(lbs, ubs, steps)) { + Range normalizedLoopParams = + emitNormalizedLoopBounds(rewriter, forallOp.getLoc(), lb, ub, step); + newLbs.push_back(normalizedLoopParams.offset); + newUbs.push_back(normalizedLoopParams.size); + newSteps.push_back(normalizedLoopParams.stride); + } + + auto normalizedForallOp = rewriter.create( + forallOp.getLoc(), newLbs, newUbs, newSteps, forallOp.getOutputs(), + forallOp.getMapping(), [](OpBuilder &, Location, ValueRange) {}); + + rewriter.inlineRegionBefore(forallOp.getBodyRegion(), + normalizedForallOp.getBodyRegion(), + normalizedForallOp.getBodyRegion().begin()); + + rewriter.replaceAllOpUsesWith(forallOp, normalizedForallOp); + return success(); +} diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir index 89183813c080ba..8f13c690704572 100644 --- a/mlir/test/Dialect/Linalg/tile-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir @@ -119,7 +119,7 @@ module attributes {transform.with_named_sequence} { // ----- -// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)> +// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> // CHECK: fold_extract_slice // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor diff --git a/mlir/test/Dialect/Linalg/tile-to-forall.mlir b/mlir/test/Dialect/Linalg/tile-to-forall.mlir index 8545dfd25eccf8..778d5bb8b9c845 100644 --- a/mlir/test/Dialect/Linalg/tile-to-forall.mlir +++ b/mlir/test/Dialect/Linalg/tile-to-forall.mlir @@ -177,7 +177,6 @@ module attributes {transform.with_named_sequence} { } } - // ----- // CHECK-DAG: #[[$map0:.+]] = affine_map<()[s0] -> (s0 ceildiv 10)> @@ -194,13 +193,13 @@ module attributes {transform.with_named_sequence} { func.func @matmul_tile_size_dynamic(%A: tensor, %B: tensor, %C: tensor) -> tensor { // CHECK: %[[M:.+]] = tensor.dim %[[A]], %c0 : // CHECK: %[[N:.+]] = tensor.dim %[[B]], %c1 : - // CHECK: %[[NT0:.+]] = affine.apply #map()[%[[M]]] - // CHECK: %[[NT1:.+]] = affine.apply #map1()[%[[N]]] + // CHECK: %[[NT0:.+]] = affine.apply #[[$map0]]()[%[[M]]] + // CHECK: %[[NT1:.+]] = affine.apply #[[$map1]]()[%[[N]]] // CHECK: scf.forall (%[[IV0:.+]], %[[IV1:.+]]) in (%[[NT0]], %[[NT1]]) shared_outs(%[[C_BLK:.*]] = %[[C]]) - // CHECK: %[[TS0:.+]] = affine.min #[[$map2]](%[[IV0]])[%[[M]]] - // CHECK: %[[TS1:.+]] = affine.min #[[$map4]](%[[IV1]])[%[[N]]] - // CHECK: %[[LB0:.+]] = affine.apply #[[$map5]](%[[IV0]]) - // CHECK: %[[LB1:.+]] = affine.apply #[[$map6]](%[[IV1]]) + // CHECK-DAG: %[[TS0:.+]] = affine.min #[[$map2]](%[[IV0]])[%[[M]]] + // CHECK-DAG: %[[TS1:.+]] = affine.min #[[$map4]](%[[IV1]])[%[[N]]] + // CHECK-DAG: %[[LB0:.+]] = affine.apply #[[$map5]](%[[IV0]]) + // CHECK-DAG: %[[LB1:.+]] = affine.apply #[[$map6]](%[[IV1]]) // CHECK: tensor.extract_slice %[[A]] // CHECK: tensor.extract_slice %[[B]] // CHECK: tensor.extract_slice %[[C_BLK]] @@ -220,7 +219,6 @@ module attributes {transform.with_named_sequence} { transform.yield } } - // ----- // Tests that dimension 0 can eliminate affine.min/max, dimension 1 cannot. @@ -235,11 +233,11 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[C:[0-9a-z]+]]: tensor func.func @matmul_tile_size_static(%A: tensor<100x200xf32>, %B: tensor<200x300xf32>, %C: tensor<100x300xf32>) -> tensor<100x300xf32> { // CHECK: scf.forall (%[[IV0:.+]], %[[IV1:.+]]) in (10, 15) shared_outs(%[[C_BLK:.*]] = %[[C]]) - // CHECK: %[[TS:.+]] = affine.min #[[$map0]](%[[IV1]]) + // CHECK-DAG: %[[TS:.+]] = affine.min #[[$map0]](%[[IV1]]) + // CHECK-DAG: %[[LB0:.+]] = affine.apply #[[$map2]](%[[IV0]]) + // CHECK-DAG: %[[LB1:.+]] = affine.apply #[[$map3]](%[[IV1]]) // CHECK-NOT: affine.max // CHECK-NOT: affine.min - // CHECK: %[[LB0:.+]] = affine.apply #[[$map2]](%[[IV0]]) - // CHECK: %[[LB1:.+]] = affine.apply #[[$map3]](%[[IV1]]) // CHECK: %[[tA:.+]] = tensor.extract_slice %[[A]][%[[LB0]], 0] [10, 200] [1, 1] : // CHECK: %[[tB:.+]] = tensor.extract_slice %[[B]][0, %[[LB1]]] [200, %[[TS]]] [1, 1] : // CHECK: %[[tC:.+]] = tensor.extract_slice %[[C_BLK]][%[[LB0]], %[[LB1]]] [10, %[[TS]]] [1, 1] : @@ -342,7 +340,6 @@ module attributes {transform.with_named_sequence} { // ----- // CHECK-DAG: #[[$map0:.+]] = affine_map<(d0) -> (d0 * -15 + 100, 15)> -// CHECK-DAG: #[[$map1:.+]] = affine_map<(d0) -> (0, d0)> // CHECK-DAG: #[[$map2:.+]] = affine_map<(d0) -> (d0 * 15)> // CHECK-DAG: #[[$map3:.+]] = affine_map<(d0) -> (d0)> @@ -355,8 +352,7 @@ module attributes {transform.with_named_sequence} { %OUT1: tensor<100xf32>, %OUT2: tensor<100xf32>) -> (tensor<100xf32>, tensor<100xf32>) { // CHECK: scf.forall (%[[IV0:.+]]) in (7) shared_outs(%[[OUT1:[0-9a-z]+]] = %[[ORGOUT1]], %[[OUT2:[0-9a-z]+]] = %[[ORGOUT2]]) -// CHECK: %[[TSMIN:.+]] = affine.min #[[$map0]](%[[IV0]]) -// CHECK: %[[TS:.+]] = affine.max #[[$map1]](%[[TSMIN]]) +// CHECK: %[[TS:.+]] = affine.min #[[$map0]](%[[IV0]]) // CHECK-NOT: affine.min // CHECK-NOT: affine.max // CHECK: %[[LB:.+]] = affine.apply #[[$map2]](%[[IV0]]) @@ -467,16 +463,16 @@ module attributes {transform.with_named_sequence} { func.func @matmul_tile_size_dynamic(%A: tensor, %B: tensor, %C: tensor) -> tensor { // CHECK: %[[c1:.*]] = arith.constant 1 : index // CHECK: %[[c0:.*]] = arith.constant 0 : index - // CHECK: %[[M:.+]] = tensor.dim %[[A]], %[[c0]] : - // CHECK: %[[N:.+]] = tensor.dim %[[B]], %[[c1]] : - // CHECK: %[[NT0:.+]] = affine.apply #map()[%[[M]]] - // CHECK: %[[NT1:.+]] = affine.apply #map1()[%[[N]]] - // CHECK: %[[K:.+]] = tensor.dim %[[A]], %[[c1]] : + // CHECK-DAG: %[[M:.+]] = tensor.dim %[[A]], %[[c0]] : + // CHECK-DAG: %[[N:.+]] = tensor.dim %[[B]], %[[c1]] : + // CHECK-DAG: %[[NT0:.+]] = affine.apply #map()[%[[M]]] + // CHECK-DAG: %[[NT1:.+]] = affine.apply #map1()[%[[N]]] + // CHECK-DAG: %[[K:.+]] = tensor.dim %[[A]], %[[c1]] : // CHECK: scf.forall (%[[IV0:.+]], %[[IV1:.+]]) in (%[[NT0]], %[[NT1]]) shared_outs(%[[C_BLK:.*]] = %[[C]]) - // CHECK: %[[TS0:.+]] = affine.min #[[$map2]](%[[IV0]])[%[[M]]] - // CHECK: %[[TS1:.+]] = affine.min #[[$map3]](%[[IV1]])[%[[N]]] - // CHECK: %[[LB0:.+]] = affine.apply #[[$map4]](%[[IV0]]) - // CHECK: %[[LB1:.+]] = affine.apply #[[$map5]](%[[IV1]]) + // CHECK-DAG: %[[TS0:.+]] = affine.min #[[$map2]](%[[IV0]])[%[[M]]] + // CHECK-DAG: %[[TS1:.+]] = affine.min #[[$map3]](%[[IV1]])[%[[N]]] + // CHECK-DAG: %[[LB0:.+]] = affine.apply #[[$map4]](%[[IV0]]) + // CHECK-DAG: %[[LB1:.+]] = affine.apply #[[$map5]](%[[IV1]]) // CHECK: tensor.extract_slice %[[A]][%[[LB0]], 0] [%[[TS0]], %[[K]]] [1, 1] : // CHECK: tensor.extract_slice %[[B]][0, %[[LB1]]] [%[[K]], %[[TS1]]] [1, 1] : // CHECK: tensor.extract_slice %[[C_BLK]][%[[LB0]], %[[LB1]]] [%[[TS0]], %[[TS1]]] [1, 1] : @@ -535,16 +531,16 @@ module attributes {transform.with_named_sequence} { func.func @matmul_tile_size_dynamic(%A: tensor, %B: tensor, %C: tensor) -> tensor { // CHECK: %[[c1:.*]] = arith.constant 1 : index // CHECK: %[[c0:.*]] = arith.constant 0 : index - // CHECK: %[[M:.+]] = tensor.dim %[[A]], %[[c0]] : - // CHECK: %[[N:.+]] = tensor.dim %[[B]], %[[c1]] : - // CHECK: %[[NT0:.+]] = affine.apply #map()[%[[M]]] - // CHECK: %[[NT1:.+]] = affine.apply #map1()[%[[N]]] - // CHECK: %[[K:.+]] = tensor.dim %[[A]], %[[c1]] : + // CHECK-DAG: %[[M:.+]] = tensor.dim %[[A]], %[[c0]] : + // CHECK-DAG: %[[N:.+]] = tensor.dim %[[B]], %[[c1]] : + // CHECK-DAG: %[[NT0:.+]] = affine.apply #map()[%[[M]]] + // CHECK-DAG: %[[NT1:.+]] = affine.apply #map1()[%[[N]]] + // CHECK-DAG: %[[K:.+]] = tensor.dim %[[A]], %[[c1]] : // CHECK: scf.forall (%[[IV0:.+]], %[[IV1:.+]]) in (%[[NT0]], %[[NT1]]) shared_outs(%[[C_BLK:.*]] = %[[C]]) - // CHECK: %[[TS0:.+]] = affine.min #[[$map2]](%[[IV0]])[%[[M]]] - // CHECK: %[[TS1:.+]] = affine.min #[[$map3]](%[[IV1]])[%[[N]]] - // CHECK: %[[LB0:.+]] = affine.apply #[[$map4]](%[[IV0]]) - // CHECK: %[[LB1:.+]] = affine.apply #[[$map5]](%[[IV1]]) + // CHECK-DAG: %[[TS0:.+]] = affine.min #[[$map2]](%[[IV0]])[%[[M]]] + // CHECK-DAG: %[[TS1:.+]] = affine.min #[[$map3]](%[[IV1]])[%[[N]]] + // CHECK-DAG: %[[LB0:.+]] = affine.apply #[[$map4]](%[[IV0]]) + // CHECK-DAG: %[[LB1:.+]] = affine.apply #[[$map5]](%[[IV1]]) // CHECK: tensor.extract_slice %[[A]][%[[LB0]], 0] [%[[TS0]], %[[K]]] [1, 1] : // CHECK: tensor.extract_slice %[[B]][0, %[[LB1]]] [%[[K]], %[[TS1]]] [1, 1] : // CHECK: tensor.extract_slice %[[C_BLK]][%[[LB0]], %[[LB1]]] [%[[TS0]], %[[TS1]]] [1, 1] : diff --git a/mlir/test/Dialect/Linalg/transform-op-tile.mlir b/mlir/test/Dialect/Linalg/transform-op-tile.mlir index 955ea6b0ebbbd7..7bac850d0b7fe9 100644 --- a/mlir/test/Dialect/Linalg/transform-op-tile.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-tile.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --transform-interpreter --mlir-print-local-scope --split-input-file --verify-diagnostics %s | FileCheck %s +// RUN: mlir-opt --transform-interpreter --mlir-print-local-scope --split-input-file --verify-diagnostics --cse %s | FileCheck %s module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -178,14 +178,13 @@ module { // CHECK-LABEL: func.func @scalable_tile( // CHECK-SAME: %[[ARG_0:.*]]: tensor, %[[ARG_1:.*]]: tensor, %[[ARG_2:.*]]: tensor, -// CHECK: %[[C4:.*]] = arith.constant 0 : index -// CHECK: %[[DIM:.*]] = tensor.dim %[[ARG_0]], %[[C4]] : tensor +// CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[DIM:.*]] = tensor.dim %[[ARG_0]], %[[C0]] : tensor // CHECK: %[[VEC_SIZE:.*]] = arith.constant 4 : index // CHECK: %[[VS:.*]] = vector.vscale // CHECK: %[[STEP:.*]] = arith.muli %[[VEC_SIZE]], %[[VS]] : index -// CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[DIM]] step %[[STEP]] iter_args(%[[VAL:.*]] = %[[ARG_2]]) -> (tensor) { -// CHECK: %[[SIZE:.*]] = affine.min affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>(%[[IV]])[%[[STEP]], %[[DIM]]] +// CHECK: %[[SIZE:.*]] = affine.min affine_map<(d0)[s0, s1] -> (-d0 + s0, s1)>(%[[IV]])[%[[DIM]], %[[STEP]]] // CHECK: %[[SLICE_ARG0:.*]] = tensor.extract_slice %[[ARG_0]][%[[IV]]] [%[[SIZE]]] [1] : tensor to tensor // CHECK: %[[SLICE_ARG1:.*]] = tensor.extract_slice %[[ARG_1]][%[[IV]]] [%[[SIZE]]] [1] : tensor to tensor // CHECK: %[[SLICE_ARG2:.*]] = tensor.extract_slice %[[VAL]][%[[IV]]] [%[[SIZE]]] [1] : tensor to tensor @@ -202,20 +201,14 @@ module { // ----- // CHECK-LABEL: func.func @scalable_and_fixed_length_tile -// CHECK: %[[C4:.*]] = arith.constant 4 : index -// CHECK: %[[VS:.*]] = vector.vscale -// CHECK: %[[STEP_2:.*]] = arith.muli %[[C4]], %[[VS]] : index -// CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[C128:.*]] = arith.constant 128 : index -// CHECK: %[[STEP_0:.*]] = arith.constant 4 : index -// CHECK: scf.for %[[VAL_11:.*]] = %[[C0]] to %[[C128]] step %[[STEP_0]] -// CHECK: %[[C0_1:.*]] = arith.constant 0 : index -// CHECK: %[[C128_1:.*]] = arith.constant 128 : index -// CHECK: %[[STEP_1:.*]] = arith.constant 4 : index -// CHECK: scf.for %[[VAL_16:.*]] = %[[C0_1]] to %[[C128_1]] step %[[STEP_1]] -// CHECK: %[[C0_2:.*]] = arith.constant 0 : index -// CHECK: %[[C128_2:.*]] = arith.constant 128 : index -// CHECK: scf.for %{{.*}} = %[[C0_2]] to %[[C128_2]] step %[[STEP_2]] +// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index +// CHECK-DAG: %[[VS:.*]] = vector.vscale +// CHECK-DAG: %[[STEP_2:.*]] = arith.muli %[[C4]], %[[VS]] : index +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index +// CHECK: scf.for %[[VAL_11:.*]] = %[[C0]] to %[[C128]] step %[[C4]] +// CHECK: scf.for %[[VAL_16:.*]] = %[[C0]] to %[[C128]] step %[[C4]] +// CHECK: scf.for %{{.*}} = %[[C0]] to %[[C128]] step %[[STEP_2]] func.func @scalable_and_fixed_length_tile( %arg0: tensor<128x128xf32>, %arg1: tensor<128x128xf32>, %arg2: tensor<128x128xf32>) diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir index 11ab30a7d237c4..d1aed593f45451 100644 --- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir @@ -428,7 +428,7 @@ module attributes {transform.with_named_sequence} { transform.yield } } -// CHECK: #[[MAP:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> +// CHECK: #[[MAP:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 10)> // CHECK: func @matmul_sequence_fusion( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor diff --git a/mlir/test/Interfaces/TilingInterface/tile-pad-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-pad-using-interface.mlir index 7d247aefcf6b1a..ccf8e37c094f4f 100644 --- a/mlir/test/Interfaces/TilingInterface/tile-pad-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-pad-using-interface.mlir @@ -31,8 +31,8 @@ module attributes {transform.with_named_sequence} { // CHECK-DAG: %[[DIM_IN1:.+]] = tensor.dim %[[IN]], %[[C1]] // CHECK-DAG: %[[DIM1:.+]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]] // CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index +// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index // CHECK: %[[RESULT:[a-zA-Z0-9]+]] = scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[DIM0]] step %[[C2]] -// CHECK: %[[C3:.+]] = arith.constant 3 : index // CHECK: scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = // CHECK: %[[SWAP_RESULT:.*]] = scf.if // CHECK: tensor.generate @@ -62,8 +62,8 @@ module attributes {transform.with_named_sequence} { transform.yield } } -// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 8)> -// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 7)> +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 + 8)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 + 7)> // CHECK: func @dynamic_2d_pad_tensor_inner_tiling( // CHECK-SAME: %[[IN:.*]]: tensor // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index @@ -107,9 +107,9 @@ module attributes {transform.with_named_sequence} { // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C15:.*]] = arith.constant 15 : index // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index +// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index // CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C15]] step %[[C2]] -// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index -// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index // CHECK: scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = // CHECK: %[[SWAP_RESULT:.*]] = scf.if // CHECK: tensor.generate diff --git a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir index 488a52e8e3e91d..8eb1311170c668 100644 --- a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir @@ -16,21 +16,21 @@ module attributes {transform.with_named_sequence} { transform.yield } } -// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> -// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (20, -d0 + s0)> +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 10)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 20)> // CHECK-LABEL: func.func @simple_matmul( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index -// CHECK-DAG: %[[C10:.+]] = arith.constant 10 : index // CHECK-DAG: %[[M:.+]] = tensor.dim %[[ARG0]], %[[C0]] // CHECK-DAG: %[[K:.+]] = tensor.dim %[[ARG0]], %[[C1]] // CHECK-DAG: %[[N:.+]] = tensor.dim %[[ARG1]], %[[C1]] +// CHECK-DAG: %[[C10:.+]] = arith.constant 10 : index +// CHECK-DAG: %[[C20:.+]] = arith.constant 20 : index // CHECK: %[[OUTER:[a-zA-Z0-9]+]] = scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[M]] step %[[C10]] // CHECK-SAME: iter_args(%[[INIT0:.+]] = %[[ARG2]]) -// CHECK-DAG: %[[C20:.+]] = arith.constant 20 : index // CHECK: %[[INNER:[a-zA-Z0-9]+]] = scf.for %[[IV1:[a-zA-Z0-9]+]] = %[[C0]] to %[[N]] step %[[C20]] // CHECK-SAME: iter_args(%[[INIT1:.+]] = %[[INIT0]]) // CHECK-DAG: %[[TS_Y:.+]] = affine.min #[[$MAP0]](%[[IV0]])[%[[M]]] @@ -68,23 +68,23 @@ module attributes {transform.with_named_sequence} { transform.yield } } -// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> -// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (20, -d0 + s0)> -// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0)[s0] -> (30, -d0 + s0)> +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 10)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 20)> +// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 30)> // CHECK-LABEL: func.func @simple_matmul_memref( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: memref // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: memref // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index -// CHECK-DAG: %[[C10:.+]] = arith.constant 10 : index // CHECK-DAG: %[[M:.+]] = memref.dim %[[ARG0]], %[[C0]] // CHECK-DAG: %[[K:.+]] = memref.dim %[[ARG0]], %[[C1]] // CHECK-DAG: %[[N:.+]] = memref.dim %[[ARG1]], %[[C1]] +// CHECK-DAG: %[[C10:.+]] = arith.constant 10 : index +// CHECK-DAG: %[[C20:.+]] = arith.constant 20 : index +// CHECK-DAG: %[[C30:.+]] = arith.constant 30 : index // CHECK: scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[M]] step %[[C10]] -// CHECK-DAG: %[[C20:.+]] = arith.constant 20 : index // CHECK: scf.for %[[IV1:[a-zA-Z0-9]+]] = %[[C0]] to %[[N]] step %[[C20]] -// CHECK-DAG: %[[C30:.+]] = arith.constant 30 : index // CHECK: scf.for %[[IV2:[a-zA-Z0-9]+]] = %[[C0]] to %[[K]] step %[[C30]] // CHECK-DAG: %[[TS_M:.+]] = affine.min #[[$MAP0]](%[[IV0]])[%[[M]]] // CHECK-DAG: %[[TS_N:.+]] = affine.min #[[$MAP1]](%[[IV1]])[%[[N]]] @@ -127,18 +127,18 @@ module attributes {transform.with_named_sequence} { transform.yield } } -// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0) -> (10, -d0 + 128)> +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0) -> (-d0 + 128, 10)> // CHECK-LABEL: func.func @multi_result( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<128x200x300xf32>) -// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[C10:.+]] = arith.constant 10 : index -// CHECK-DAG: %[[C128:.+]] = arith.constant 128 : index // CHECK-DAG: %[[INIT0:.+]] = tensor.empty() // CHECK-DAG: %[[INIT1:.+]] = tensor.empty() +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[C128:.+]] = arith.constant 128 : index +// CHECK-DAG: %[[C300:.+]] = arith.constant 300 : index +// CHECK-DAG: %[[C10:.+]] = arith.constant 10 : index +// CHECK-DAG: %[[C20:.+]] = arith.constant 20 : index // CHECK: %[[OUTER:[a-zA-Z0-9]+]]:2 = scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[C128]] step %[[C10]] // CHECK-SAME: iter_args(%[[ARG1:[a-zA-Z0-9]+]] = %[[INIT0]], %[[ARG2:[a-zA-Z0-9]+]] = %[[INIT1]]) -// CHECK-DAG: %[[C300:.+]] = arith.constant 300 : index -// CHECK-DAG: %[[C20:.+]] = arith.constant 20 : index // CHECK: %[[INNER:[a-zA-Z0-9]+]]:2 = scf.for %[[IV1:[a-zA-Z0-9]+]] = %[[C0]] to %[[C300]] step %[[C20]] // CHECK-SAME: iter_args(%[[ARG3:[a-zA-Z0-9]+]] = %[[ARG1]], %[[ARG4:[a-zA-Z0-9]+]] = %[[ARG2]]) // CHECK-DAG: %[[TS_Y:.+]] = affine.min #[[$MAP0]](%[[IV0]]) @@ -180,9 +180,9 @@ module attributes {transform.with_named_sequence} { transform.yield } } -// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> -// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (20, -d0 + s0)> -// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0)[s0] -> (30, -d0 + s0)> +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 10)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 20)> +// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 30)> // CHECK-DAG: #[[$MAP3:.+]] = affine_map<(d0)[s0] -> (d0 + s0 * 2 - 2)> // CHECK-DAG: #[[$MAP4:.+]] = affine_map<(d0)[s0] -> (d0 + s0 * 3 - 3)> // CHECK-LABEL: func.func @conv2D( @@ -193,7 +193,6 @@ module attributes {transform.with_named_sequence} { // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index // CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index -// CHECK-DAG: %[[C10:.+]] = arith.constant 10 : index // CHECK-DAG: %[[N:.+]] = tensor.dim %[[INPUT]], %[[C0]] // CHECK-DAG: %[[C:.+]] = tensor.dim %[[INPUT]], %[[C3]] // CHECK-DAG: %[[P:.+]] = tensor.dim %[[FILTER]], %[[C0]] @@ -201,12 +200,13 @@ module attributes {transform.with_named_sequence} { // CHECK-DAG: %[[F:.+]] = tensor.dim %[[FILTER]], %[[C3]] // CHECK-DAG: %[[R:.+]] = tensor.dim %[[INIT]], %[[C1]] // CHECK-DAG: %[[S:.+]] = tensor.dim %[[INIT]], %[[C2]] +// CHECK-DAG: %[[C10:.+]] = arith.constant 10 : index +// CHECK-DAG: %[[C20:.+]] = arith.constant 20 : index +// CHECK-DAG: %[[C30:.+]] = arith.constant 30 : index // CHECK: scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[P]] step %[[C10]] // CHECK-SAME: iter_args(%[[INIT0:.+]] = %[[INIT]]) -// CHECK-DAG: %[[C20:.+]] = arith.constant 20 : index // CHECK: scf.for %[[IV1:[a-zA-Z0-9]+]] = %[[C0]] to %[[Q]] step %[[C20]] // CHECK-SAME: iter_args(%[[INIT1:.+]] = %[[INIT0]]) -// CHECK-DAG: %[[C30:.+]] = arith.constant 30 : index // CHECK: scf.for %[[IV2:[a-zA-Z0-9]+]] = %[[C0]] to %[[C]] step %[[C30]] // CHECK-SAME: iter_args(%[[INIT2:.+]] = %[[INIT1]]) // CHECK-DAG: %[[TS_P:.+]] = affine.min #[[$MAP0]](%[[IV0]])[%[[P]]] @@ -287,25 +287,25 @@ module attributes {transform.with_named_sequence} { transform.yield } } -// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (20, -d0 + s0)> -// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (30, -d0 + s0)> -// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 20)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 30)> +// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 10)> // CHECK-LABEL: func.func @interchange_matmul( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index -// CHECK-DAG: %[[C20:.+]] = arith.constant 20 : index // CHECK-DAG: %[[M:.+]] = tensor.dim %[[ARG0]], %[[C0]] // CHECK-DAG: %[[K:.+]] = tensor.dim %[[ARG0]], %[[C1]] // CHECK-DAG: %[[N:.+]] = tensor.dim %[[ARG1]], %[[C1]] +// CHECK-DAG: %[[C10:.+]] = arith.constant 10 : index +// CHECK-DAG: %[[C20:.+]] = arith.constant 20 : index +// CHECK-DAG: %[[C30:.+]] = arith.constant 30 : index // CHECK: %[[OUTER:[a-zA-Z0-9]+]] = scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[N]] step %[[C20]] // CHECK-SAME: iter_args(%[[INIT0:.+]] = %[[ARG2]]) -// CHECK-DAG: %[[C30:.+]] = arith.constant 30 : index // CHECK: %[[INNER1:[a-zA-Z0-9]+]] = scf.for %[[IV1:[a-zA-Z0-9]+]] = %[[C0]] to %[[K]] step %[[C30]] // CHECK-SAME: iter_args(%[[INIT1:.+]] = %[[INIT0]]) -// CHECK-DAG: %[[C10:.+]] = arith.constant 10 : index // CHECK: %[[INNER2:[a-zA-Z0-9]+]] = scf.for %[[IV2:[a-zA-Z0-9]+]] = %[[C0]] to %[[M]] step %[[C10]] // CHECK-SAME: iter_args(%[[INIT2:.+]] = %[[INIT1]]) // CHECK-DAG: %[[TS_N:.+]] = affine.min #[[$MAP0]](%[[IV0]])[%[[N]]] diff --git a/mlir/test/Interfaces/TilingInterface/tile-using-scfforall.mlir b/mlir/test/Interfaces/TilingInterface/tile-using-scfforall.mlir index c5aff744b57ee6..53dd0c6a2425ce 100644 --- a/mlir/test/Interfaces/TilingInterface/tile-using-scfforall.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-using-scfforall.mlir @@ -17,8 +17,8 @@ module attributes {transform.with_named_sequence} { transform.yield } } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (20, -d0 + s0)> +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 10)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 20)> // CHECK: func.func @simple_matmul( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor @@ -65,8 +65,8 @@ module attributes {transform.with_named_sequence} { transform.yield } } -// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> -// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (20, -d0 + s0)> +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 10)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 20)> // CHECK-LABEL: func.func @simple_matmul_memref( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: memref @@ -117,7 +117,7 @@ module attributes {transform.with_named_sequence} { transform.yield } } -// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0) -> (10, -d0 + 128)> +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0) -> (-d0 + 128, 10)> // CHECK-LABEL: func.func @multi_result( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<128x200x300xf32>) // CHECK-DAG: %[[INIT0:.+]] = tensor.empty() @@ -161,9 +161,9 @@ module attributes {transform.with_named_sequence} { transform.yield } } -// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> -// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (20, -d0 + s0)> -// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0)[s0] -> (30, -d0 + s0)> +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 10)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 20)> +// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 30)> // CHECK-DAG: #[[$MAP3:.+]] = affine_map<(d0)[s0] -> (d0 + s0 * 2 - 2)> // CHECK-DAG: #[[$MAP4:.+]] = affine_map<(d0)[s0] -> (d0 + s0 * 3 - 3)> // CHECK-LABEL: func.func @conv2D( @@ -264,8 +264,8 @@ module attributes {transform.with_named_sequence} { transform.yield } } -// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (20, -d0 + s0)> -// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 20)> +// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 10)> // CHECK-LABEL: func.func @interchange_matmul( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor diff --git a/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterfaceTransformOps.cpp b/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterfaceTransformOps.cpp index 8f206d90772723..a99441cd7147b5 100644 --- a/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterfaceTransformOps.cpp +++ b/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterfaceTransformOps.cpp @@ -234,11 +234,7 @@ applyTileToAll(RewriterBase &rewriter, Operation *transformOp, scf::SCFTilingOptions tilingOptions; tilingOptions.setTileSizes(tileSizes).setInterchange(interchange); if (mapping) { - auto mappingAttrs = - llvm::map_to_vector(mapping.value(), [](Attribute attr) { - return cast(attr); - }); - tilingOptions.setMapping(mappingAttrs); + tilingOptions.setMapping(mapping.value().getValue()); } tilingOptions.setLoopType(scf::SCFTilingOptions::LoopType::ForallOp); diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 407523c690cb35..84938231140127 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -11213,6 +11213,7 @@ cc_library( ":AffineDialect", ":Analysis", ":ArithDialect", + ":ArithUtils", ":AsmParser", ":BufferizationDialect", ":BufferizationTransforms", From 2bf58f5d27a233e63e58d644ff7aff126ee99aa7 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 31 Jul 2024 14:41:55 -0500 Subject: [PATCH 018/114] [Clang] Suppress missing architecture error when doing LTO (#100652) Summary: The `nvlink-wrapper` can do LTO now, which means we can still create some LLVM-IR without needing an architecture. In the case that we try to invoke `nvlink` internally, that will still fail. This patch simply defers the error until later so we can use `--lto-emit-llvm` to get the IR without specifying an architecture. --- clang/lib/Driver/ToolChains/Cuda.cpp | 8 +++++--- clang/test/Driver/cuda-cross-compiling.c | 7 +++++++ clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp | 7 +++++++ libc/cmake/modules/LLVMLibCLibraryRules.cmake | 2 +- libc/startup/gpu/CMakeLists.txt | 2 +- 5 files changed, 21 insertions(+), 5 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index e98e574d6cc2b9..6e10e3d006767c 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -596,14 +596,16 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-v"); StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ); - if (GPUArch.empty()) { + if (GPUArch.empty() && !C.getDriver().isUsingLTO()) { C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch) << getToolChain().getArchName() << getShortName(); return; } - CmdArgs.push_back("-arch"); - CmdArgs.push_back(Args.MakeArgString(GPUArch)); + if (!GPUArch.empty()) { + CmdArgs.push_back("-arch"); + CmdArgs.push_back(Args.MakeArgString(GPUArch)); + } if (Args.hasArg(options::OPT_ptxas_path_EQ)) CmdArgs.push_back(Args.MakeArgString( diff --git a/clang/test/Driver/cuda-cross-compiling.c b/clang/test/Driver/cuda-cross-compiling.c index c2e538c25329e1..5f24e7a5accb08 100644 --- a/clang/test/Driver/cuda-cross-compiling.c +++ b/clang/test/Driver/cuda-cross-compiling.c @@ -84,6 +84,13 @@ // MISSING: error: must pass in an explicit nvptx64 gpu architecture to 'ptxas' // MISSING: error: must pass in an explicit nvptx64 gpu architecture to 'nvlink' +// Do not error when performing LTO. +// +// RUN: %clang -target nvptx64-nvidia-cuda -flto %s -### 2>&1 \ +// RUN: | FileCheck -check-prefix=MISSING-LTO %s + +// MISSING-LTO-NOT: error: must pass in an explicit nvptx64 gpu architecture to 'nvlink' + // RUN: %clang -target nvptx64-nvidia-cuda -flto -c %s -### 2>&1 \ // RUN: | FileCheck -check-prefix=GENERIC %s // RUN: %clang -target nvptx64-nvidia-cuda -march=sm_52 -march=generic -flto -c %s -### 2>&1 \ diff --git a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp index 30c45eda66288f..7851414ba7f4dd 100644 --- a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp +++ b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp @@ -302,6 +302,9 @@ Expected runPTXAs(StringRef File, const ArgList &Args) { findProgram(Args, "ptxas", {CudaPath + "/bin", GivenPath}); if (!PTXAsPath) return PTXAsPath.takeError(); + if (!Args.hasArg(OPT_arch)) + return createStringError( + "must pass in an explicit nvptx64 gpu architecture to 'ptxas'"); auto TempFileOrErr = createTempFile( Args, sys::path::stem(Args.getLastArgValue(OPT_o, "a.out")), "cubin"); @@ -694,6 +697,10 @@ Error runNVLink(ArrayRef Files, const ArgList &Args) { if (!NVLinkPath) return NVLinkPath.takeError(); + if (!Args.hasArg(OPT_arch)) + return createStringError( + "must pass in an explicit nvptx64 gpu architecture to 'nvlink'"); + ArgStringList NewLinkerArgs; for (const opt::Arg *Arg : Args) { // Do not forward arguments only intended for the linker wrapper. diff --git a/libc/cmake/modules/LLVMLibCLibraryRules.cmake b/libc/cmake/modules/LLVMLibCLibraryRules.cmake index ad931d445720dd..0b1878d7b5222f 100644 --- a/libc/cmake/modules/LLVMLibCLibraryRules.cmake +++ b/libc/cmake/modules/LLVMLibCLibraryRules.cmake @@ -113,7 +113,7 @@ function(add_bitcode_entrypoint_library target_name base_target_name) add_executable(${target_name} ${objects}) target_link_options(${target_name} PRIVATE - "-r" "-nostdlib" "-flto" "-Wl,--lto-emit-llvm" "-march= ") + "-r" "-nostdlib" "-flto" "-Wl,--lto-emit-llvm") endfunction(add_bitcode_entrypoint_library) # A rule to build a library from a collection of entrypoint objects. diff --git a/libc/startup/gpu/CMakeLists.txt b/libc/startup/gpu/CMakeLists.txt index 3830bf39916af1..5e5745063fc8c3 100644 --- a/libc/startup/gpu/CMakeLists.txt +++ b/libc/startup/gpu/CMakeLists.txt @@ -34,7 +34,7 @@ function(add_startup_object name) RUNTIME_OUTPUT_DIRECTORY ${LIBC_LIBRARY_DIR} RUNTIME_OUTPUT_NAME ${name}.o) target_link_options(${fq_target_name}.exe PRIVATE - "-nostdlib" "-flto" "-Wl,--lto-emit-llvm" "-march= ") + "-nostdlib" "-flto" "-Wl,--lto-emit-llvm") endif() endfunction() From 5d972c582a076768885e6d33df8f5d3860594e43 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 31 Jul 2024 12:57:23 -0700 Subject: [PATCH 019/114] [ELF] Add -z nosectionheader GNU ld since 2.41 supports this option, which is mildly useful. It omits the section header table and non-ALLOC sections (including .symtab/.strtab (--strip-all)). This option is simple to implement and might be used by LLDB to test program headers parsing without the section header table (#100900). -z sectionheader, which is the default, is also added. Pull Request: https://github.com/llvm/llvm-project/pull/101286 --- lld/ELF/Config.h | 1 + lld/ELF/Driver.cpp | 8 +++++++- lld/ELF/SyntheticSections.cpp | 6 ++++-- lld/ELF/Writer.cpp | 11 +++++++++-- lld/docs/ReleaseNotes.rst | 4 ++++ lld/docs/ld.lld.1 | 3 +++ lld/test/CMakeLists.txt | 1 + lld/test/ELF/zsectionheader.s | 36 +++++++++++++++++++++++++++++++++++ 8 files changed, 65 insertions(+), 5 deletions(-) create mode 100644 lld/test/ELF/zsectionheader.s diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 6abd929d2343d4..183dc88a93e2f5 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -333,6 +333,7 @@ struct Config { bool zPacPlt; bool zRelro; bool zRodynamic; + bool zSectionHeader; bool zShstk; bool zStartStopGC; uint8_t zStartStopVisibility; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 7e0a5a1937c7f4..a8c52e8c2b8e14 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -447,6 +447,8 @@ static void checkOptions() { error("-r and --export-dynamic may not be used together"); if (config->debugNames) error("-r and --debug-names may not be used together"); + if (!config->zSectionHeader) + error("-r and -z nosectionheader may not be used together"); } if (config->executeOnly) { @@ -836,6 +838,8 @@ static ICFLevel getICF(opt::InputArgList &args) { static StripPolicy getStrip(opt::InputArgList &args) { if (args.hasArg(OPT_relocatable)) return StripPolicy::None; + if (!config->zSectionHeader) + return StripPolicy::All; auto *arg = args.getLastArg(OPT_strip_all, OPT_strip_debug); if (!arg) @@ -1411,7 +1415,9 @@ static void readConfigs(opt::InputArgList &args) { config->soName = args.getLastArgValue(OPT_soname); config->sortSection = getSortSection(args); config->splitStackAdjustSize = args::getInteger(args, OPT_split_stack_adjust_size, 16384); - config->strip = getStrip(args); + config->zSectionHeader = + getZFlag(args, "sectionheader", "nosectionheader", true); + config->strip = getStrip(args); // needs zSectionHeader config->sysroot = args.getLastArgValue(OPT_sysroot); config->target1Rel = args.hasFlag(OPT_target1_rel, OPT_target1_abs, false); config->target2 = getTarget2(args); diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index b767392c4456cf..d0b1933121fa26 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -4665,7 +4665,8 @@ template void elf::createSyntheticSections() { auto add = [](SyntheticSection &sec) { ctx.inputSections.push_back(&sec); }; - in.shStrTab = std::make_unique(".shstrtab", false); + if (config->zSectionHeader) + in.shStrTab = std::make_unique(".shstrtab", false); Out::programHeaders = make("", 0, SHF_ALLOC); Out::programHeaders->addralign = config->wordsize; @@ -4917,7 +4918,8 @@ template void elf::createSyntheticSections() { add(*in.symTab); if (in.symTabShndx) add(*in.symTabShndx); - add(*in.shStrTab); + if (in.shStrTab) + add(*in.shStrTab); if (in.strTab) add(*in.strTab); } diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 5cffdb771a7384..515ebb7453ad56 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1875,13 +1875,16 @@ template void Writer::finalizeSections() { sortSections(); // Create a list of OutputSections, assign sectionIndex, and populate - // in.shStrTab. + // in.shStrTab. If -z nosectionheader is specified, drop non-ALLOC sections. for (SectionCommand *cmd : script->sectionCommands) if (auto *osd = dyn_cast(cmd)) { OutputSection *osec = &osd->osec; + if (!in.shStrTab && !(osec->flags & SHF_ALLOC)) + continue; outputSections.push_back(osec); osec->sectionIndex = outputSections.size(); - osec->shName = in.shStrTab->addString(osec->name); + if (in.shStrTab) + osec->shName = in.shStrTab->addString(osec->name); } // Prefer command line supplied address over other constraints. @@ -2703,6 +2706,10 @@ template void Writer::writeHeader() { auto *eHdr = reinterpret_cast(Out::bufferStart); eHdr->e_type = getELFType(); eHdr->e_entry = getEntryAddr(); + + // If -z nosectionheader is specified, omit the section header table. + if (!in.shStrTab) + return; eHdr->e_shoff = sectionHeaderOff; // Write the section header table. diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 6f60efd87c975b..e9d3c12b765450 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -26,6 +26,10 @@ Non-comprehensive list of changes in this release ELF Improvements ---------------- +* ``-z nosectionheader`` has been implemented to omit the section header table. + The operation is similar to ``llvm-objcopy --strip-sections``. + (`#101286 `_) + Breaking changes ---------------- diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index f9a00b78750388..b22cb362837715 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -857,6 +857,9 @@ The object will omit the .Dv PT_GNU_RELRO segment. .Pp +.It Cm nosectionheader +Don't generate the section header table. +.Pp .It Cm notext Allow relocations against read-only segments. Sets the diff --git a/lld/test/CMakeLists.txt b/lld/test/CMakeLists.txt index 25d8f0a424926d..5d4a2757c529b8 100644 --- a/lld/test/CMakeLists.txt +++ b/lld/test/CMakeLists.txt @@ -64,6 +64,7 @@ if (NOT LLD_BUILT_STANDALONE) llvm-profdata llvm-readelf llvm-readobj + llvm-strings llvm-strip llvm-symbolizer not diff --git a/lld/test/ELF/zsectionheader.s b/lld/test/ELF/zsectionheader.s new file mode 100644 index 00000000000000..c1e654ac1082dd --- /dev/null +++ b/lld/test/ELF/zsectionheader.s @@ -0,0 +1,36 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: ld.lld -shared -z nosectionheader -z sectionheader %t.o -o %t.so 2>&1 | count 0 +# RUN: llvm-readelf -hS %t.so | FileCheck %s --check-prefixes=CHECK,SHDR + +# RUN: ld.lld -shared -z nosectionheader %t.o -o %t0.so +# RUN: llvm-readelf -h --dyn-syms %t0.so | FileCheck %s --check-prefixes=CHECK,NOSHDR +# RUN: llvm-strings %t0.so | FileCheck %s --check-prefixes=NOSHDR-STR + +# CHECK: Size of this header: 64 (bytes) +# CHECK-NEXT: Size of program headers: 56 (bytes) +# CHECK-NEXT: Number of program headers: 6 +# CHECK-NEXT: Size of section headers: 64 (bytes) +# SHDR-NEXT: Number of section headers: 13 +# SHDR-NEXT: Section header string table index: 11 +# NOSHDR-NEXT: Number of section headers: 0 +# NOSHDR-NEXT: Section header string table index: 0 + +# SHDR: Section Headers: +# NOSHDR: Symbol table for image contains 2 entries: +# NOSHDR: _start + +## _start occurs as a dynamic string table entry. There is no static string table +## entry. `nonalloc` is not in the output. +# NOSHDR-STR: _start +# NOSHDR-STR-NOT: _start + +# RUN: not ld.lld -r -z nosectionheader %t.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR + +# ERR: error: -r and -z nosectionheader may not be used together + +.globl _start +_start: + +.section nonalloc,"" +.asciz "_start" From bf5e56deba1e6d69f5ff0714fcade08c1329f882 Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Wed, 31 Jul 2024 12:57:59 -0700 Subject: [PATCH 020/114] [NFC][LLVM] Add RealtimeSanitizer LLVM code owners (#101231) Split from #100596 --- llvm/CODE_OWNERS.TXT | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/CODE_OWNERS.TXT b/llvm/CODE_OWNERS.TXT index d1620d1cbf870e..5b4df555fd6668 100644 --- a/llvm/CODE_OWNERS.TXT +++ b/llvm/CODE_OWNERS.TXT @@ -263,3 +263,7 @@ D: C-SKY backend (lib/Target/CSKY/*) N: Ilia Diachkov E: ilia.diachkov@gmail.com D: SPIR-V backend (lib/Target/SPIRV/*) + +N: Christopher Apple, David Trevelyan +E: cja-private@pm.me, david.trevelyan@gmail.com +D: RealtimeSanitizer (LLVM part) From 36264435071ecb5790d7944f0653c8195033135f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 31 Jul 2024 13:01:31 -0700 Subject: [PATCH 021/114] [RISCV] Use X0 for VLMax for slide1up/slide1down in lowerVectorIntrinsicScalars. (#101384) Previously, we created a vsetvlimax intrinsic. Using X0 simplifies the code and enables some optimizations to kick when the exact value of vlmax is known. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 9 +--- .../RISCV/rvv/vslide1down-constant-vl-rv32.ll | 44 ++++++++++++------- .../RISCV/rvv/vslide1up-constant-vl-rv32.ll | 44 ++++++++++++------- 3 files changed, 59 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 96edd331eb678b..68b614d1d3fdc5 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -8857,14 +8857,7 @@ static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT); } else if (AVLInt >= 2 * MaxVLMAX) { // Just set vl to VLMAX in this situation - RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT); - SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); - unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits()); - SDValue SEW = DAG.getConstant(Sew, DL, XLenVT); - SDValue SETVLMAX = DAG.getTargetConstant( - Intrinsic::riscv_vsetvlimax, DL, MVT::i32); - I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW, - LMUL); + I32VL = DAG.getRegister(RISCV::X0, XLenVT); } else { // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl // is related to the hardware implementation. diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1down-constant-vl-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1down-constant-vl-rv32.ll index 4115e6a91f965f..fd90e67b1fb2c9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vslide1down-constant-vl-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vslide1down-constant-vl-rv32.ll @@ -51,7 +51,7 @@ define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2( @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl3( @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl8( @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl9( @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl15( @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl16( @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2047(< ; ; CHECK-512-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2047: ; CHECK-512: # %bb.0: # %entry -; CHECK-512-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-512-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-512-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-512-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-512-NEXT: ret ; ; CHECK-64-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2047: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-64-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; CHECK-64-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-64-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-64-NEXT: ret @@ -269,12 +269,26 @@ entry: } define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2048( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2048: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; CHECK-NEXT: vslide1down.vx v8, v8, a0 -; CHECK-NEXT: vslide1down.vx v8, v8, a1 -; CHECK-NEXT: ret +; CHECK-128-65536-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2048: +; CHECK-128-65536: # %bb.0: # %entry +; CHECK-128-65536-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-128-65536-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-128-65536-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-128-65536-NEXT: ret +; +; CHECK-512-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2048: +; CHECK-512: # %bb.0: # %entry +; CHECK-512-NEXT: vsetivli zero, 16, e32, m1, ta, ma +; CHECK-512-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-512-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-512-NEXT: ret +; +; CHECK-64-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2048: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; CHECK-64-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-64-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-64-NEXT: ret entry: %a = call @llvm.riscv.vslide1down.nxv1i64.i64( undef, diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1up-constant-vl-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1up-constant-vl-rv32.ll index f0d621bef2b91f..b26f1cab97c771 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vslide1up-constant-vl-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vslide1up-constant-vl-rv32.ll @@ -51,7 +51,7 @@ define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2( @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl3( @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl8( @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl9( @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl15( @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl16( @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2047( @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2048( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2048: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; CHECK-NEXT: vslide1up.vx v9, v8, a1 -; CHECK-NEXT: vslide1up.vx v8, v9, a0 -; CHECK-NEXT: ret +; CHECK-128-65536-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2048: +; CHECK-128-65536: # %bb.0: # %entry +; CHECK-128-65536-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-128-65536-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-128-65536-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-128-65536-NEXT: ret +; +; CHECK-512-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2048: +; CHECK-512: # %bb.0: # %entry +; CHECK-512-NEXT: vsetivli zero, 16, e32, m1, ta, ma +; CHECK-512-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-512-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-512-NEXT: ret +; +; CHECK-64-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2048: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; CHECK-64-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-64-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-64-NEXT: ret entry: %a = call @llvm.riscv.vslide1up.nxv1i64.i64( undef, From 30b5d4a76357feebf4797d1d80bc9d5608c74a88 Mon Sep 17 00:00:00 2001 From: aaryanshukla <53713108+aaryanshukla@users.noreply.github.com> Date: Wed, 31 Jul 2024 13:07:03 -0700 Subject: [PATCH 022/114] [libc][math][c23] Add dfma{l,f128} and dsub{l,f128} C23 math functions (#101089) Co-authored-by: OverMighty --- libc/config/darwin/arm/entrypoints.txt | 2 + libc/config/darwin/x86_64/entrypoints.txt | 2 + libc/config/linux/aarch64/entrypoints.txt | 4 ++ libc/config/linux/arm/entrypoints.txt | 2 + libc/config/linux/riscv/entrypoints.txt | 2 + libc/config/linux/x86_64/entrypoints.txt | 4 ++ libc/config/windows/entrypoints.txt | 2 + libc/docs/math/index.rst | 4 +- libc/spec/llvm_libc_ext.td | 3 ++ libc/spec/stdc.td | 3 ++ libc/src/math/CMakeLists.txt | 6 +++ libc/src/math/dfmaf128.h | 21 ++++++++ libc/src/math/dfmal.h | 20 ++++++++ libc/src/math/dsubf128.h | 21 ++++++++ libc/src/math/dsubl.h | 20 ++++++++ libc/src/math/generic/CMakeLists.txt | 51 ++++++++++++++++++ libc/src/math/generic/dfmaf128.cpp | 25 +++++++++ libc/src/math/generic/dfmal.cpp | 21 ++++++++ libc/src/math/generic/dsubf128.cpp | 20 ++++++++ libc/src/math/generic/dsubl.cpp | 20 ++++++++ libc/test/src/math/CMakeLists.txt | 29 +++++++++++ libc/test/src/math/dfmal_test.cpp | 13 +++++ libc/test/src/math/dsubl_test.cpp | 13 +++++ libc/test/src/math/smoke/CMakeLists.txt | 60 ++++++++++++++++++++-- libc/test/src/math/smoke/SubTest.h | 2 +- libc/test/src/math/smoke/dfmaf128_test.cpp | 13 +++++ libc/test/src/math/smoke/dfmal_test.cpp | 13 +++++ libc/test/src/math/smoke/dsubf128_test.cpp | 13 +++++ libc/test/src/math/smoke/dsubl_test.cpp | 13 +++++ libc/utils/MPFRWrapper/MPFRUtils.cpp | 6 +++ 30 files changed, 421 insertions(+), 7 deletions(-) create mode 100644 libc/src/math/dfmaf128.h create mode 100644 libc/src/math/dfmal.h create mode 100644 libc/src/math/dsubf128.h create mode 100644 libc/src/math/dsubl.h create mode 100644 libc/src/math/generic/dfmaf128.cpp create mode 100644 libc/src/math/generic/dfmal.cpp create mode 100644 libc/src/math/generic/dsubf128.cpp create mode 100644 libc/src/math/generic/dsubl.cpp create mode 100644 libc/test/src/math/dfmal_test.cpp create mode 100644 libc/test/src/math/dsubl_test.cpp create mode 100644 libc/test/src/math/smoke/dfmaf128_test.cpp create mode 100644 libc/test/src/math/smoke/dfmal_test.cpp create mode 100644 libc/test/src/math/smoke/dsubf128_test.cpp create mode 100644 libc/test/src/math/smoke/dsubl_test.cpp diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt index 38eace26f10ab2..13280d2dd56d4c 100644 --- a/libc/config/darwin/arm/entrypoints.txt +++ b/libc/config/darwin/arm/entrypoints.txt @@ -136,7 +136,9 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.cos libc.src.math.cosf libc.src.math.cospif + libc.src.math.dfmal libc.src.math.dsqrtl + libc.src.math.dsubl libc.src.math.erff libc.src.math.exp libc.src.math.expf diff --git a/libc/config/darwin/x86_64/entrypoints.txt b/libc/config/darwin/x86_64/entrypoints.txt index df9f9bfd54e813..1cff157c629df3 100644 --- a/libc/config/darwin/x86_64/entrypoints.txt +++ b/libc/config/darwin/x86_64/entrypoints.txt @@ -119,7 +119,9 @@ set(TARGET_LIBM_ENTRYPOINTS #libc.src.math.ceill #libc.src.math.coshf #libc.src.math.cosf + #libc.src.math.dfmal #libc.src.math.dsqrtl + #libc.src.math.dsubl #libc.src.math.expf #libc.src.math.exp2f #libc.src.math.expm1f diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 15c210eece95b1..2003f6929d5cb2 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -361,8 +361,10 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.cosf libc.src.math.coshf libc.src.math.cospif + libc.src.math.dfmal libc.src.math.dmull libc.src.math.dsqrtl + libc.src.math.dsubl libc.src.math.erff libc.src.math.exp libc.src.math.exp10 @@ -612,7 +614,9 @@ if(LIBC_TYPES_HAS_FLOAT128) # math.h C23 _Float128 entrypoints libc.src.math.ceilf128 libc.src.math.copysignf128 + libc.src.math.dfmaf128 libc.src.math.dsqrtf128 + libc.src.math.dsubf128 libc.src.math.fabsf128 libc.src.math.fdimf128 libc.src.math.floorf128 diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt index 451213e7ce904a..d691f63a78e4b3 100644 --- a/libc/config/linux/arm/entrypoints.txt +++ b/libc/config/linux/arm/entrypoints.txt @@ -228,7 +228,9 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.cos libc.src.math.cosf libc.src.math.coshf + libc.src.math.dfmal libc.src.math.dsqrtl + libc.src.math.dsubl libc.src.math.erff libc.src.math.exp libc.src.math.exp10 diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index 084f899c2b957f..c229a11b5bb52f 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -383,8 +383,10 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.cosf libc.src.math.coshf libc.src.math.cospif + libc.src.math.dfmal libc.src.math.dmull libc.src.math.dsqrtl + libc.src.math.dsubl libc.src.math.erff libc.src.math.exp libc.src.math.exp10 diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index dbd9cf07d6b7eb..533d0cd368863c 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -383,8 +383,10 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.cosf libc.src.math.coshf libc.src.math.cospif + libc.src.math.dfmal libc.src.math.dmull libc.src.math.dsqrtl + libc.src.math.dsubl libc.src.math.erff libc.src.math.exp libc.src.math.exp10 @@ -653,8 +655,10 @@ if(LIBC_TYPES_HAS_FLOAT128) libc.src.math.canonicalizef128 libc.src.math.ceilf128 libc.src.math.copysignf128 + libc.src.math.dfmaf128 libc.src.math.dmulf128 libc.src.math.dsqrtf128 + libc.src.math.dsubf128 libc.src.math.fabsf128 libc.src.math.fdimf128 libc.src.math.floorf128 diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt index 06c3682255c452..e45219a9070e36 100644 --- a/libc/config/windows/entrypoints.txt +++ b/libc/config/windows/entrypoints.txt @@ -133,6 +133,8 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.cos libc.src.math.cosf libc.src.math.coshf + libc.src.math.dfmal + libc.src.math.dsubl libc.src.math.erff libc.src.math.exp libc.src.math.expf diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index 3845e413a47e46..7ba2c4cb812dcf 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -118,11 +118,11 @@ Basic Operations +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | ddiv | N/A | N/A | | N/A | | 7.12.14.4 | F.10.11 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| dfma | N/A | N/A | | N/A | | 7.12.14.5 | F.10.11 | +| dfma | N/A | N/A | |check| | N/A | |check|\* | 7.12.14.5 | F.10.11 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | dmul | N/A | N/A | |check| | N/A | |check|\* | 7.12.14.3 | F.10.11 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| dsub | N/A | N/A | | N/A | | 7.12.14.2 | F.10.11 | +| dsub | N/A | N/A | |check| | N/A | |check|\* | 7.12.14.2 | F.10.11 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | f16add | |check|\* | |check|\* | |check|\* | N/A | |check| | 7.12.14.1 | F.10.11 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/spec/llvm_libc_ext.td b/libc/spec/llvm_libc_ext.td index f3a8862574ac53..1bd001f414187e 100644 --- a/libc/spec/llvm_libc_ext.td +++ b/libc/spec/llvm_libc_ext.td @@ -57,7 +57,10 @@ def LLVMLibcExt : StandardSpec<"llvm_libc_ext"> { [], // Types [], // Enumerations [ + GuardedFunctionSpec<"dfmaf128", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, GuardedFunctionSpec<"dsqrtf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, + GuardedFunctionSpec<"dsubf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, + GuardedFunctionSpec<"f16add", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, GuardedFunctionSpec<"f16addf", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 6aaf05ffd9f65e..f3b8db598c4a1a 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -397,6 +397,9 @@ def StdC : StandardSpec<"stdc"> { GuardedFunctionSpec<"ceilf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, GuardedFunctionSpec<"ceilf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, + FunctionSpec<"dfmal", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, + FunctionSpec<"dsubl", RetValSpec, [ArgSpec, ArgSpec]>, + FunctionSpec<"fabs", RetValSpec, [ArgSpec], [ConstAttr]>, FunctionSpec<"fabsf", RetValSpec, [ArgSpec]>, FunctionSpec<"fabsl", RetValSpec, [ArgSpec]>, diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 7fd1e550fd5239..42126e01898fee 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -89,9 +89,15 @@ add_math_entrypoint_object(cospif) add_math_entrypoint_object(dmull) add_math_entrypoint_object(dmulf128) +add_math_entrypoint_object(dfmal) +add_math_entrypoint_object(dfmaf128) + add_math_entrypoint_object(dsqrtl) add_math_entrypoint_object(dsqrtf128) +add_math_entrypoint_object(dsubl) +add_math_entrypoint_object(dsubf128) + add_math_entrypoint_object(erf) add_math_entrypoint_object(erff) diff --git a/libc/src/math/dfmaf128.h b/libc/src/math/dfmaf128.h new file mode 100644 index 00000000000000..1b2e72851c806d --- /dev/null +++ b/libc/src/math/dfmaf128.h @@ -0,0 +1,21 @@ +//===-- Implementation header for dfmaf128 ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_DFMAF128_H +#define LLVM_LIBC_SRC_MATH_DFMAF128_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +double dfmaf128(float128 x, float128 y, float128 z); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_DFMAF128_H diff --git a/libc/src/math/dfmal.h b/libc/src/math/dfmal.h new file mode 100644 index 00000000000000..e0867305b12861 --- /dev/null +++ b/libc/src/math/dfmal.h @@ -0,0 +1,20 @@ +//===-- Implementation header for dfmal -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_DFMAL_H +#define LLVM_LIBC_SRC_MATH_DFMAL_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +double dfmal(long double x, long double y, long double z); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_DFMAL_H diff --git a/libc/src/math/dsubf128.h b/libc/src/math/dsubf128.h new file mode 100644 index 00000000000000..8ac58a916f0deb --- /dev/null +++ b/libc/src/math/dsubf128.h @@ -0,0 +1,21 @@ +//===-- Implementation header for dsubf128 ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_DSUBF128_H +#define LLVM_LIBC_SRC_MATH_DSUBF128_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +double dsubf128(float128 x, float128 y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_DSUBF128_H diff --git a/libc/src/math/dsubl.h b/libc/src/math/dsubl.h new file mode 100644 index 00000000000000..a512bbe06d4e83 --- /dev/null +++ b/libc/src/math/dsubl.h @@ -0,0 +1,20 @@ +//===-- Implementation header for dsubl -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_DSUBL_H +#define LLVM_LIBC_SRC_MATH_DSUBL_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +double dsubl(long double x, long double y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_DSUBL_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index e707615e69b971..96c8e3520d5e2f 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -129,6 +129,31 @@ add_entrypoint_object( libc.src.__support.FPUtil.nearest_integer_operations ) +add_entrypoint_object( + dfmaf128 + SRCS + dfmaf128.cpp + HDRS + ../dfmaf128.h + DEPENDS + libc.src.__support.FPUtil.fma + libc.src.__support.macros.properties.types + COMPILE_OPTIONS + -O3 +) + +add_entrypoint_object( + dfmal + SRCS + dfmal.cpp + HDRS + ../dfmal.h + DEPENDS + libc.src.__support.FPUtil.fma + COMPILE_OPTIONS + -O3 +) + add_entrypoint_object( dsqrtl SRCS @@ -154,6 +179,32 @@ add_entrypoint_object( -O3 ) + +add_entrypoint_object( + dsubf128 + SRCS + dsubf128.cpp + HDRS + ../dsubf128.h + DEPENDS + libc.src.__support.FPUtil.generic.add_sub + libc.src.__support.macros.properties.types + COMPILE_OPTIONS + -O3 +) + +add_entrypoint_object( + dsubl + SRCS + dsubl.cpp + HDRS + ../dsubl.h + DEPENDS + libc.src.__support.FPUtil.generic.add_sub + COMPILE_OPTIONS + -O3 +) + add_header_library( range_reduction HDRS diff --git a/libc/src/math/generic/dfmaf128.cpp b/libc/src/math/generic/dfmaf128.cpp new file mode 100644 index 00000000000000..b6e1bdb085cf79 --- /dev/null +++ b/libc/src/math/generic/dfmaf128.cpp @@ -0,0 +1,25 @@ +//===-- Implementation of dfmaf128 function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_DFMAf128_H +#define LLVM_LIBC_SRC_MATH_DFMAf128_H + +#include "src/math/dfmaf128.h" +#include "src/__support/FPUtil/FMA.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(double, dfmaf128, (float128 x, float128 y, float128 z)) { + return fputil::fma(x, y, z); +} + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_DFMAf128_H diff --git a/libc/src/math/generic/dfmal.cpp b/libc/src/math/generic/dfmal.cpp new file mode 100644 index 00000000000000..02e0ce84ace839 --- /dev/null +++ b/libc/src/math/generic/dfmal.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of dfmal function ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/dfmal.h" +#include "src/__support/FPUtil/FMA.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(double, dfmal, + (long double x, long double y, long double z)) { + return fputil::fma(x, y, z); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/dsubf128.cpp b/libc/src/math/generic/dsubf128.cpp new file mode 100644 index 00000000000000..1b2f1214b3a6fb --- /dev/null +++ b/libc/src/math/generic/dsubf128.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of dsubf128 function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/dsubf128.h" +#include "src/__support/FPUtil/generic/add_sub.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(double, dsubf128, (float128 x, float128 y)) { + return fputil::generic::sub(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/dsubl.cpp b/libc/src/math/generic/dsubl.cpp new file mode 100644 index 00000000000000..8b567d0869d2a3 --- /dev/null +++ b/libc/src/math/generic/dsubl.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of dsubl function ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/dsubl.h" +#include "src/__support/FPUtil/generic/add_sub.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(double, dsubl, (long double x, long double y)) { + return fputil::generic::sub(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index 56b27b0952b585..cc5955903b20d6 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -2391,6 +2391,35 @@ add_fp_unittest( libc.src.stdlib.srand ) +add_fp_unittest( + dfmal_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + dfmal_test.cpp + HDRS + FmaTest.h + DEPENDS + libc.src.math.dfmal + libc.src.stdlib.rand + libc.src.stdlib.srand +) + +add_fp_unittest( + dsubl_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + dsubl_test.cpp + HDRS + SubTest.h + DEPENDS + libc.src.math.dsubl +) + + add_subdirectory(generic) add_subdirectory(smoke) diff --git a/libc/test/src/math/dfmal_test.cpp b/libc/test/src/math/dfmal_test.cpp new file mode 100644 index 00000000000000..3c38f5eb7db9d5 --- /dev/null +++ b/libc/test/src/math/dfmal_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for dfmal -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FmaTest.h" + +#include "src/math/dfmal.h" + +LIST_NARROWING_FMA_TESTS(double, long double, LIBC_NAMESPACE::dfmal) diff --git a/libc/test/src/math/dsubl_test.cpp b/libc/test/src/math/dsubl_test.cpp new file mode 100644 index 00000000000000..98846e0b6e3b35 --- /dev/null +++ b/libc/test/src/math/dsubl_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for dsubl -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SubTest.h" + +#include "src/math/dsubl.h" + +LIST_SUB_TESTS(double, long double, LIBC_NAMESPACE::dsubl) diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 36d66bf146a37b..c8266042258488 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -358,6 +358,58 @@ add_fp_unittest( libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + dfmal_test + SUITE + libc-math-smoke-tests + SRCS + dfmal_test.cpp + HDRS + FmaTest.h + DEPENDS + libc.src.math.dfmal +) + +add_fp_unittest( + dfmaf128_test + SUITE + libc-math-smoke-tests + SRCS + dfmaf128_test.cpp + HDRS + FmaTest.h + DEPENDS + libc.src.math.dfmaf128 +) + +add_fp_unittest( + dsubl_test + SUITE + libc-math-smoke-tests + SRCS + dsubl_test.cpp + HDRS + SubTest.h + DEPENDS + libc.src.errno.errno + libc.hdr.fenv_macros + libc.src.math.dsubl +) + +add_fp_unittest( + dsubf128_test + SUITE + libc-math-smoke-tests + SRCS + dsubf128_test.cpp + HDRS + SubTest.h + DEPENDS + libc.hdr.errno_macros + libc.hdr.fenv_macros + libc.src.math.dsubf128 +) + add_fp_unittest( floor_test SUITE @@ -3903,8 +3955,8 @@ add_fp_unittest( HDRS SubTest.h DEPENDS + libc.hdr.errno_macros libc.hdr.fenv_macros - libc.src.__support.FPUtil.basic_operations libc.src.math.f16sub ) @@ -3917,8 +3969,8 @@ add_fp_unittest( HDRS SubTest.h DEPENDS + libc.hdr.errno_macros libc.hdr.fenv_macros - libc.src.__support.FPUtil.basic_operations libc.src.math.f16subf ) @@ -3931,8 +3983,8 @@ add_fp_unittest( HDRS SubTest.h DEPENDS + libc.hdr.errno_macros libc.hdr.fenv_macros - libc.src.__support.FPUtil.basic_operations libc.src.math.f16subl ) @@ -3945,8 +3997,8 @@ add_fp_unittest( HDRS SubTest.h DEPENDS + libc.hdr.errno_macros libc.hdr.fenv_macros - libc.src.__support.FPUtil.basic_operations libc.src.math.f16subf128 ) diff --git a/libc/test/src/math/smoke/SubTest.h b/libc/test/src/math/smoke/SubTest.h index e5e04996affa8c..9ee4220b382085 100644 --- a/libc/test/src/math/smoke/SubTest.h +++ b/libc/test/src/math/smoke/SubTest.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_SUBTEST_H #define LLVM_LIBC_TEST_SRC_MATH_SMOKE_SUBTEST_H +#include "hdr/errno_macros.h" #include "hdr/fenv_macros.h" -#include "src/__support/FPUtil/BasicOperations.h" #include "test/UnitTest/FEnvSafeTest.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" diff --git a/libc/test/src/math/smoke/dfmaf128_test.cpp b/libc/test/src/math/smoke/dfmaf128_test.cpp new file mode 100644 index 00000000000000..56c11747047b1e --- /dev/null +++ b/libc/test/src/math/smoke/dfmaf128_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for dfmaf128 --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FmaTest.h" + +#include "src/math/dfmaf128.h" + +LIST_NARROWING_FMA_TESTS(double, float128, LIBC_NAMESPACE::dfmaf128) diff --git a/libc/test/src/math/smoke/dfmal_test.cpp b/libc/test/src/math/smoke/dfmal_test.cpp new file mode 100644 index 00000000000000..3c38f5eb7db9d5 --- /dev/null +++ b/libc/test/src/math/smoke/dfmal_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for dfmal -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FmaTest.h" + +#include "src/math/dfmal.h" + +LIST_NARROWING_FMA_TESTS(double, long double, LIBC_NAMESPACE::dfmal) diff --git a/libc/test/src/math/smoke/dsubf128_test.cpp b/libc/test/src/math/smoke/dsubf128_test.cpp new file mode 100644 index 00000000000000..e496cdd2450702 --- /dev/null +++ b/libc/test/src/math/smoke/dsubf128_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for dsubf128 --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SubTest.h" + +#include "src/math/dsubf128.h" + +LIST_SUB_TESTS(double, float128, LIBC_NAMESPACE::dsubf128) diff --git a/libc/test/src/math/smoke/dsubl_test.cpp b/libc/test/src/math/smoke/dsubl_test.cpp new file mode 100644 index 00000000000000..98846e0b6e3b35 --- /dev/null +++ b/libc/test/src/math/smoke/dsubl_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for dsubl -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SubTest.h" + +#include "src/math/dsubl.h" + +LIST_SUB_TESTS(double, long double, LIBC_NAMESPACE::dsubl) diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index f88ee2af35c52a..4263c9dccb6a56 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -1086,6 +1086,9 @@ template void explain_ternary_operation_one_output_error(Operation, const TernaryInput &, long double, double, RoundingMode); + +template void explain_ternary_operation_one_output_error( + Operation, const TernaryInput &, double, double, RoundingMode); #ifdef LIBC_TYPES_HAS_FLOAT16 template void explain_ternary_operation_one_output_error( Operation, const TernaryInput &, float16, double, RoundingMode); @@ -1271,6 +1274,9 @@ template bool compare_ternary_operation_one_output(Operation, const TernaryInput &, long double, double, RoundingMode); + +template bool compare_ternary_operation_one_output( + Operation, const TernaryInput &, double, double, RoundingMode); #ifdef LIBC_TYPES_HAS_FLOAT16 template bool compare_ternary_operation_one_output(Operation, const TernaryInput &, From 910012e7c5854a7e04e15a5a8750aad41b8bb137 Mon Sep 17 00:00:00 2001 From: Sayhaan Siddiqui <49014204+sayhaan@users.noreply.github.com> Date: Wed, 31 Jul 2024 13:41:38 -0700 Subject: [PATCH 023/114] [BOLT][DWARF][NFC] Split DIEBuilder::finish (#101244) Split DIEBuilder::finish so that code updating .debug_names is in a separate function. --- bolt/include/bolt/Core/DIEBuilder.h | 17 ++++++-- bolt/lib/Core/DIEBuilder.cpp | 62 +++++++++++++++++++++-------- bolt/lib/Rewrite/DWARFRewriter.cpp | 16 ++++++-- 3 files changed, 71 insertions(+), 24 deletions(-) diff --git a/bolt/include/bolt/Core/DIEBuilder.h b/bolt/include/bolt/Core/DIEBuilder.h index 0b840c142ed812..e5b057ea1e42b7 100644 --- a/bolt/include/bolt/Core/DIEBuilder.h +++ b/bolt/include/bolt/Core/DIEBuilder.h @@ -127,6 +127,9 @@ class DIEBuilder { DWARFContext *DwarfContext{nullptr}; DWARFUnit *SkeletonCU{nullptr}; uint64_t UnitSize{0}; + /// Adds separate UnitSize counter for updating DebugNames + /// so there is no dependency between the functions. + uint64_t DebugNamesUnitSize{0}; llvm::DenseSet AllProcessed; DWARF5AcceleratorTable &DebugNamesTable; // Unordered map to handle name collision if output DWO directory is @@ -203,13 +206,16 @@ class DIEBuilder { /// Update references once the layout is finalized. void updateReferences(); - /// Update the Offset and Size of DIE, populate DebugNames table. + /// Update the Offset and Size of DIE. /// Along with current CU, and DIE being processed and the new DIE offset to /// be updated, it takes in Parents vector that can be empty if this DIE has /// no parents. - uint32_t finalizeDIEs(DWARFUnit &CU, DIE &Die, - std::optional Parent, - uint32_t NumberParentsInChain, uint32_t &CurOffset); + uint32_t finalizeDIEs(DWARFUnit &CU, DIE &Die, uint32_t &CurOffset); + + /// Populates DebugNames table. + void populateDebugNamesTable(DWARFUnit &CU, const DIE &Die, + std::optional Parent, + uint32_t NumberParentsInChain); void registerUnit(DWARFUnit &DU, bool NeedSort); @@ -338,6 +344,9 @@ class DIEBuilder { /// Finish current DIE construction. void finish(); + /// Update debug names table. + void updateDebugNamesTable(); + // Interface to edit DIE template T *allocateDIEValue() { return new (getState().DIEAlloc) T; diff --git a/bolt/lib/Core/DIEBuilder.cpp b/bolt/lib/Core/DIEBuilder.cpp index 8f6195f6b6ea19..69cfd58a1df04b 100644 --- a/bolt/lib/Core/DIEBuilder.cpp +++ b/bolt/lib/Core/DIEBuilder.cpp @@ -461,17 +461,11 @@ getUnitForOffset(DIEBuilder &Builder, DWARFContext &DWCtx, return nullptr; } -uint32_t -DIEBuilder::finalizeDIEs(DWARFUnit &CU, DIE &Die, - std::optional Parent, - uint32_t NumberParentsInChain, uint32_t &CurOffset) { +uint32_t DIEBuilder::finalizeDIEs(DWARFUnit &CU, DIE &Die, + uint32_t &CurOffset) { getState().DWARFDieAddressesParsed.erase(Die.getOffset()); uint32_t CurSize = 0; Die.setOffset(CurOffset); - std::optional NameEntry = - DebugNamesTable.addAccelTableEntry( - CU, Die, SkeletonCU ? SkeletonCU->getDWOId() : std::nullopt, - NumberParentsInChain, Parent); // It is possible that an indexed debugging information entry has a parent // that is not indexed (for example, if its parent does not have a name // attribute). In such a case, a parent attribute may point to a nameless @@ -485,18 +479,13 @@ DIEBuilder::finalizeDIEs(DWARFUnit &CU, DIE &Die, // If Parent is nullopt and NumberParentsInChain is not zero, then forward // declaration was encountered in this DF traversal. Propagating nullopt for // Parent to children. - if (!Parent && NumberParentsInChain) - NameEntry = std::nullopt; - if (NameEntry) - ++NumberParentsInChain; for (DIEValue &Val : Die.values()) CurSize += Val.sizeOf(CU.getFormParams()); CurSize += getULEB128Size(Die.getAbbrevNumber()); CurOffset += CurSize; for (DIE &Child : Die.children()) { - uint32_t ChildSize = - finalizeDIEs(CU, Child, NameEntry, NumberParentsInChain, CurOffset); + uint32_t ChildSize = finalizeDIEs(CU, Child, CurOffset); CurSize += ChildSize; } // for children end mark. @@ -514,10 +503,9 @@ void DIEBuilder::finish() { DIE *UnitDIE = getUnitDIEbyUnit(CU); uint32_t HeaderSize = CU.getHeaderSize(); uint32_t CurOffset = HeaderSize; - DebugNamesTable.setCurrentUnit(CU, UnitStartOffset); std::vector> Parents; Parents.push_back(std::nullopt); - finalizeDIEs(CU, *UnitDIE, std::nullopt, 0, CurOffset); + finalizeDIEs(CU, *UnitDIE, CurOffset); DWARFUnitInfo &CurUnitInfo = getUnitInfoByDwarfUnit(CU); CurUnitInfo.UnitOffset = UnitStartOffset; @@ -548,6 +536,48 @@ void DIEBuilder::finish() { dbgs() << Twine::utohexstr(Address) << "\n"; } } +} + +void DIEBuilder::populateDebugNamesTable( + DWARFUnit &CU, const DIE &Die, + std::optional Parent, + uint32_t NumberParentsInChain) { + std::optional NameEntry = + DebugNamesTable.addAccelTableEntry( + CU, Die, SkeletonCU ? SkeletonCU->getDWOId() : std::nullopt, + NumberParentsInChain, Parent); + if (!Parent && NumberParentsInChain) + NameEntry = std::nullopt; + if (NameEntry) + ++NumberParentsInChain; + + for (const DIE &Child : Die.children()) + populateDebugNamesTable(CU, Child, NameEntry, NumberParentsInChain); +} + +void DIEBuilder::updateDebugNamesTable() { + auto finalizeDebugNamesTableForCU = [&](DWARFUnit &CU, + uint64_t &UnitStartOffset) -> void { + DIE *UnitDIE = getUnitDIEbyUnit(CU); + DebugNamesTable.setCurrentUnit(CU, UnitStartOffset); + populateDebugNamesTable(CU, *UnitDIE, std::nullopt, 0); + + DWARFUnitInfo &CurUnitInfo = getUnitInfoByDwarfUnit(CU); + UnitStartOffset += CurUnitInfo.UnitLength; + }; + + uint64_t TypeUnitStartOffset = 0; + for (DWARFUnit *CU : getState().DUList) { + if (!(CU->getVersion() < 5 && CU->isTypeUnit())) + break; + finalizeDebugNamesTableForCU(*CU, TypeUnitStartOffset); + } + + for (DWARFUnit *CU : getState().DUList) { + if (CU->getVersion() < 5 && CU->isTypeUnit()) + continue; + finalizeDebugNamesTableForCU(*CU, DebugNamesUnitSize); + } updateReferences(); } diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp index 3dfb65a744c8a4..98f81f44d64901 100644 --- a/bolt/lib/Rewrite/DWARFRewriter.cpp +++ b/bolt/lib/Rewrite/DWARFRewriter.cpp @@ -650,9 +650,8 @@ void DWARFRewriter::updateDebugInfo() { DebugRangesSectionWriter &TempRangesSectionWriter, DebugAddrWriter &AddressWriter, const std::string &DWOName, - const std::optional &DwarfOutputPath) { - DIEBuilder DWODIEBuilder(BC, &(SplitCU).getContext(), DebugNamesTable, - &Unit); + const std::optional &DwarfOutputPath, + DIEBuilder &DWODIEBuilder) { DWODIEBuilder.buildDWOUnit(SplitCU); DebugStrOffsetsWriter DWOStrOffstsWriter(BC); DebugStrWriter DWOStrWriter((SplitCU).getContext(), true); @@ -719,6 +718,7 @@ void DWARFRewriter::updateDebugInfo() { CUPartitionVector PartVec = partitionCUs(*BC.DwCtx); for (std::vector &Vec : PartVec) { DIEBlder.buildCompileUnits(Vec); + llvm::SmallVector, 72> DWODIEBuildersByCU; for (DWARFUnit *CU : DIEBlder.getProcessedCUs()) { createRangeLocListAddressWriters(*CU); std::optional SplitCU; @@ -738,11 +738,17 @@ void DWARFRewriter::updateDebugInfo() { : std::optional(opts::DwarfOutputPath.c_str()); std::string DWOName = DIEBlder.updateDWONameCompDir( *StrOffstsWriter, *StrWriter, *CU, DwarfOutputPath, std::nullopt); + auto DWODIEBuilderPtr = std::make_unique( + BC, &(**SplitCU).getContext(), DebugNamesTable, CU); + DIEBuilder &DWODIEBuilder = + *DWODIEBuildersByCU.emplace_back(std::move(DWODIEBuilderPtr)).get(); if (CU->getVersion() >= 5) StrOffstsWriter->finalizeSection(*CU, DIEBlder); processSplitCU(*CU, **SplitCU, DIEBlder, *TempRangesSectionWriter, - AddressWriter, DWOName, DwarfOutputPath); + AddressWriter, DWOName, DwarfOutputPath, DWODIEBuilder); } + for (std::unique_ptr &DWODIEBuilderPtr : DWODIEBuildersByCU) + DWODIEBuilderPtr->updateDebugNamesTable(); for (DWARFUnit *CU : DIEBlder.getProcessedCUs()) processMainBinaryCU(*CU, DIEBlder); finalizeCompileUnits(DIEBlder, *Streamer, OffsetMap, @@ -1442,6 +1448,7 @@ CUOffsetMap DWARFRewriter::finalizeTypeSections(DIEBuilder &DIEBlder, // generate and populate abbrevs here DIEBlder.generateAbbrevs(); DIEBlder.finish(); + DIEBlder.updateDebugNamesTable(); SmallVector OutBuffer; std::shared_ptr ObjOS = std::make_shared(OutBuffer); @@ -1646,6 +1653,7 @@ void DWARFRewriter::finalizeCompileUnits(DIEBuilder &DIEBlder, } DIEBlder.generateAbbrevs(); DIEBlder.finish(); + DIEBlder.updateDebugNamesTable(); // generate debug_info and CUMap for (DWARFUnit *CU : CUs) { emitUnit(DIEBlder, Streamer, *CU); From c6a3f4e2f2586cb1ce51306c305752a78bdba263 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 31 Jul 2024 13:55:35 -0700 Subject: [PATCH 024/114] [sanitizer] Make file headers more conventional Add "-*- C++ -*-" --- llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h | 2 +- .../include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h | 2 +- llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h | 2 +- llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h index 6dfdfb729cf502..1b85766f6cb7cc 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h @@ -1,4 +1,4 @@ -//===--------- Definition of the AddressSanitizer class ---------*- C++ -*-===// +//===- AddressSanitizer.h - AddressSanitizer instrumentation ----*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h index 41ba05cd67f0c8..3256dddd12b388 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h @@ -1,4 +1,4 @@ -//===- DataFlowSanitizer.h - dynamic data flow analysis -------------------===// +//===- DataFlowSanitizer.h - dynamic data flow analysis ---------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h index 0984e8ec32656a..f88d8323511185 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h @@ -1,4 +1,4 @@ -//===- Transforms/Instrumentation/MemorySanitizer.h - MSan Pass -----------===// +//===- MemorySanitizer.h - MemorySanitizer instrumentation ------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h index fd37130d545968..346951febf7359 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h @@ -1,4 +1,4 @@ -//===- Transforms/Instrumentation/ThreadSanitizer.h - TSan Pass -----------===// +//===- ThreadSanitizer.h - ThreadSanitizer instrumentation ------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From 9effefbae8d96006a4dd29bb9ab8532fd408559d Mon Sep 17 00:00:00 2001 From: Haowei Wu Date: Wed, 31 Jul 2024 13:58:30 -0700 Subject: [PATCH 025/114] Revert "[lldb] Reland 2402b3213c2f with `/H` to debug the windows build issue" This reverts commit e72cdae47b4e263ea97b2bdd75cf44c1510cf3be, which broke LLVM's lldb builder for Windows msvc. --- .../Python/Interfaces/CMakeLists.txt | 9 +---- .../ScriptedProcessPythonInterface.cpp | 39 ++++--------------- .../ScriptedProcessPythonInterface.h | 18 ++------- .../CMakeLists.txt | 16 -------- .../Python/ScriptInterpreterPython.cpp | 2 +- 5 files changed, 12 insertions(+), 72 deletions(-) rename lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/{ScriptedProcessPythonInterface => }/ScriptedProcessPythonInterface.cpp (85%) rename lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/{ScriptedProcessPythonInterface => }/ScriptedProcessPythonInterface.h (88%) delete mode 100644 lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface/CMakeLists.txt diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt index 9efb97286ce976..8c7e92bead32c8 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt @@ -21,6 +21,7 @@ endif() add_lldb_library(lldbPluginScriptInterpreterPythonInterfaces ScriptedPythonInterface.cpp + ScriptedProcessPythonInterface.cpp ScriptedThreadPythonInterface.cpp LINK_LIBS @@ -37,13 +38,5 @@ add_lldb_library(lldbPluginScriptInterpreterPythonInterfaces add_subdirectory(OperatingSystemPythonInterface) add_subdirectory(ScriptedPlatformPythonInterface) -if (WIN32) - set(ORIGINAL_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - set(CMAKE_CXX_FLAGS "${ORIGINAL_CMAKE_CXX_FLAGS} /H") -endif() -add_subdirectory(ScriptedProcessPythonInterface) -if (WIN32) - set(CMAKE_CXX_FLAGS "${ORIGINAL_CMAKE_CXX_FLAGS}") -endif() add_subdirectory(ScriptedThreadPlanPythonInterface) diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface/ScriptedProcessPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.cpp similarity index 85% rename from lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface/ScriptedProcessPythonInterface.cpp rename to lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.cpp index f4fba0848fe272..313c597ce48f3c 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface/ScriptedProcessPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.cpp @@ -6,8 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "lldb/Core/PluginManager.h" #include "lldb/Host/Config.h" +#if LLDB_ENABLE_PYTHON +// LLDB Python header must be included first +#include "../lldb-python.h" +#endif #include "lldb/Target/Process.h" #include "lldb/Utility/Log.h" #include "lldb/Utility/Status.h" @@ -15,16 +18,10 @@ #if LLDB_ENABLE_PYTHON -// clang-format off -// LLDB Python header must be included first -#include "../../lldb-python.h" -//clang-format on - -#include "../../SWIGPythonBridge.h" -#include "../../ScriptInterpreterPythonImpl.h" -#include "../ScriptedThreadPythonInterface.h" +#include "../SWIGPythonBridge.h" +#include "../ScriptInterpreterPythonImpl.h" #include "ScriptedProcessPythonInterface.h" - +#include "ScriptedThreadPythonInterface.h" #include using namespace lldb; @@ -32,8 +29,6 @@ using namespace lldb_private; using namespace lldb_private::python; using Locker = ScriptInterpreterPythonImpl::Locker; -LLDB_PLUGIN_DEFINE_ADV(ScriptedProcessPythonInterface, ScriptInterpreterPythonScriptedProcessPythonInterface) - ScriptedProcessPythonInterface::ScriptedProcessPythonInterface( ScriptInterpreterPythonImpl &interpreter) : ScriptedProcessInterface(), ScriptedPythonInterface(interpreter) {} @@ -213,24 +208,4 @@ StructuredData::DictionarySP ScriptedProcessPythonInterface::GetMetadata() { return dict; } -void ScriptedProcessPythonInterface::Initialize() { - const std::vector ci_usages = { - "process attach -C [-k key -v value ...]", - "process launch -C [-k key -v value ...]"}; - const std::vector api_usages = { - "SBAttachInfo.SetScriptedProcessClassName", - "SBAttachInfo.SetScriptedProcessDictionary", - "SBTarget.Attach", - "SBLaunchInfo.SetScriptedProcessClassName", - "SBLaunchInfo.SetScriptedProcessDictionary", - "SBTarget.Launch"}; - PluginManager::RegisterPlugin( - GetPluginNameStatic(), llvm::StringRef("Mock process state"), - CreateInstance, eScriptLanguagePython, {ci_usages, api_usages}); -} - -void ScriptedProcessPythonInterface::Terminate() { - PluginManager::UnregisterPlugin(CreateInstance); -} - #endif diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface/ScriptedProcessPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.h similarity index 88% rename from lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface/ScriptedProcessPythonInterface.h rename to lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.h index bb27734739f43a..c75caa9340f250 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface/ScriptedProcessPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.h @@ -10,18 +10,16 @@ #define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDPROCESSPYTHONINTERFACE_H #include "lldb/Host/Config.h" -#include "lldb/Interpreter/Interfaces/ScriptedProcessInterface.h" #if LLDB_ENABLE_PYTHON -#include "../ScriptedPythonInterface.h" - +#include "ScriptedPythonInterface.h" +#include "lldb/Interpreter/Interfaces/ScriptedProcessInterface.h" #include namespace lldb_private { class ScriptedProcessPythonInterface : public ScriptedProcessInterface, - public ScriptedPythonInterface, - public PluginInterface { + public ScriptedPythonInterface { public: ScriptedProcessPythonInterface(ScriptInterpreterPythonImpl &interpreter); @@ -69,16 +67,6 @@ class ScriptedProcessPythonInterface : public ScriptedProcessInterface, StructuredData::DictionarySP GetMetadata() override; - static void Initialize(); - - static void Terminate(); - - static llvm::StringRef GetPluginNameStatic() { - return "ScriptedProcessPythonInterface"; - } - - llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } - private: lldb::ScriptedThreadInterfaceSP CreateScriptedThreadInterface() override; }; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface/CMakeLists.txt b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface/CMakeLists.txt deleted file mode 100644 index 66ed041853f675..00000000000000 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -add_lldb_library(lldbPluginScriptInterpreterPythonScriptedProcessPythonInterface PLUGIN - - ScriptedProcessPythonInterface.cpp - - LINK_LIBS - lldbCore - lldbHost - lldbInterpreter - lldbTarget - lldbPluginScriptInterpreterPython - ${Python3_LIBRARIES} - ${LLDB_LIBEDIT_LIBS} - - LINK_COMPONENTS - Support - ) diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index a78c76b5f94ff7..d34fdf14122f26 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -16,7 +16,7 @@ #include "Interfaces/OperatingSystemPythonInterface/OperatingSystemPythonInterface.h" #include "Interfaces/ScriptedPlatformPythonInterface/ScriptedPlatformPythonInterface.h" -#include "Interfaces/ScriptedProcessPythonInterface/ScriptedProcessPythonInterface.h" +#include "Interfaces/ScriptedProcessPythonInterface.h" #include "Interfaces/ScriptedThreadPlanPythonInterface/ScriptedThreadPlanPythonInterface.h" #include "PythonDataObjects.h" #include "PythonReadline.h" From 7583c484c81218ab4c8c2fba2774cb518d8bbd43 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 31 Jul 2024 14:18:20 -0700 Subject: [PATCH 026/114] [SCEV] Use power of two facts involving vscale when inferring wrap flags (#101380) SCEV has logic for inferring wrap flags on AddRecs which are known to control an exit based on whether the step is a power of two. This logic only considered constants, and thus did not trigger for steps such as (4 x vscale) which are common in scalably vectorized loops. The net effect is that we were very sensative to the preservation of nsw/nuw flags on such IVs, and could not infer trip counts if they got lost for any reason. --------- Co-authored-by: Nikita Popov --- llvm/include/llvm/Analysis/ScalarEvolution.h | 3 + llvm/lib/Analysis/ScalarEvolution.cpp | 130 ++++++++++-------- .../trip-count-scalable-stride.ll | 31 +++-- 3 files changed, 90 insertions(+), 74 deletions(-) diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index d9bfca763819f1..fbefa2bd074dd4 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1028,6 +1028,9 @@ class ScalarEvolution { /// Test if the given expression is known to be non-zero. bool isKnownNonZero(const SCEV *S); + /// Test if the given expression is known to be a power of 2. + bool isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero = false); + /// Splits SCEV expression \p S into two SCEVs. One of them is obtained from /// \p S by substitution of all AddRec sub-expression related to loop \p L /// with initial value of that SCEV. The second is obtained from \p S by diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index fb56d5d4366537..264ac392b16d13 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -9149,23 +9149,21 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromICmp( // behaviour), and we can prove the test sequence produced must repeat // the same values on self-wrap of the IV, then we can infer that IV // doesn't self wrap because if it did, we'd have an infinite (undefined) - // loop. + // loop. Note that a stride of 0 is trivially no-self-wrap by definition. if (ControllingFiniteLoop && isLoopInvariant(RHS, L)) { // TODO: We can peel off any functions which are invertible *in L*. Loop // invariant terms are effectively constants for our purposes here. auto *InnerLHS = LHS; if (auto *ZExt = dyn_cast(LHS)) InnerLHS = ZExt->getOperand(); - if (const SCEVAddRecExpr *AR = dyn_cast(InnerLHS)) { - auto *StrideC = dyn_cast(AR->getStepRecurrence(*this)); - if (!AR->hasNoSelfWrap() && AR->getLoop() == L && AR->isAffine() && - StrideC && StrideC->getAPInt().isPowerOf2()) { - auto Flags = AR->getNoWrapFlags(); - Flags = setFlags(Flags, SCEV::FlagNW); - SmallVector Operands{AR->operands()}; - Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags); - setNoWrapFlags(const_cast(AR), Flags); - } + if (const SCEVAddRecExpr *AR = dyn_cast(InnerLHS); + AR && !AR->hasNoSelfWrap() && AR->getLoop() == L && AR->isAffine() && + isKnownToBeAPowerOfTwo(AR->getStepRecurrence(*this), /*OrZero=*/true)) { + auto Flags = AR->getNoWrapFlags(); + Flags = setFlags(Flags, SCEV::FlagNW); + SmallVector Operands{AR->operands()}; + Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags); + setNoWrapFlags(const_cast(AR), Flags); } } @@ -10845,6 +10843,23 @@ bool ScalarEvolution::isKnownNonZero(const SCEV *S) { return getUnsignedRangeMin(S) != 0; } +bool ScalarEvolution::isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero) { + auto NonRecursive = [this](const SCEV *S) { + if (auto *C = dyn_cast(S)) + return C->getAPInt().isPowerOf2(); + // The vscale_range indicates vscale is a power-of-two. + return isa(S) && F.hasFnAttribute(Attribute::VScaleRange); + }; + + if (NonRecursive(S)) + return true; + + auto *Mul = dyn_cast(S); + if (!Mul) + return false; + return all_of(Mul->operands(), NonRecursive) && (OrZero || isKnownNonZero(S)); +} + std::pair ScalarEvolution::SplitIntoInitAndPostInc(const Loop *L, const SCEV *S) { // Compute SCEV on entry of loop L. @@ -12775,8 +12790,7 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, if (!isLoopInvariant(RHS, L)) return false; - auto *StrideC = dyn_cast(AR->getStepRecurrence(*this)); - if (!StrideC || !StrideC->getAPInt().isPowerOf2()) + if (!isKnownToBeAPowerOfTwo(AR->getStepRecurrence(*this), /*OrZero=*/true)) return false; if (!ControlsOnlyExit || !loopHasNoAbnormalExits(L)) @@ -13132,52 +13146,50 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, // "(Start - End) + (Stride - 1)" has unsigned overflow. const SCEV *One = getOne(Stride->getType()); bool MayAddOverflow = [&] { - if (auto *StrideC = dyn_cast(Stride)) { - if (StrideC->getAPInt().isPowerOf2()) { - // Suppose Stride is a power of two, and Start/End are unsigned - // integers. Let UMAX be the largest representable unsigned - // integer. - // - // By the preconditions of this function, we know - // "(Start + Stride * N) >= End", and this doesn't overflow. - // As a formula: - // - // End <= (Start + Stride * N) <= UMAX - // - // Subtracting Start from all the terms: - // - // End - Start <= Stride * N <= UMAX - Start - // - // Since Start is unsigned, UMAX - Start <= UMAX. Therefore: - // - // End - Start <= Stride * N <= UMAX - // - // Stride * N is a multiple of Stride. Therefore, - // - // End - Start <= Stride * N <= UMAX - (UMAX mod Stride) - // - // Since Stride is a power of two, UMAX + 1 is divisible by - // Stride. Therefore, UMAX mod Stride == Stride - 1. So we can - // write: - // - // End - Start <= Stride * N <= UMAX - Stride - 1 - // - // Dropping the middle term: - // - // End - Start <= UMAX - Stride - 1 - // - // Adding Stride - 1 to both sides: - // - // (End - Start) + (Stride - 1) <= UMAX - // - // In other words, the addition doesn't have unsigned overflow. - // - // A similar proof works if we treat Start/End as signed values. - // Just rewrite steps before "End - Start <= Stride * N <= UMAX" - // to use signed max instead of unsigned max. Note that we're - // trying to prove a lack of unsigned overflow in either case. - return false; - } + if (isKnownToBeAPowerOfTwo(Stride)) { + // Suppose Stride is a power of two, and Start/End are unsigned + // integers. Let UMAX be the largest representable unsigned + // integer. + // + // By the preconditions of this function, we know + // "(Start + Stride * N) >= End", and this doesn't overflow. + // As a formula: + // + // End <= (Start + Stride * N) <= UMAX + // + // Subtracting Start from all the terms: + // + // End - Start <= Stride * N <= UMAX - Start + // + // Since Start is unsigned, UMAX - Start <= UMAX. Therefore: + // + // End - Start <= Stride * N <= UMAX + // + // Stride * N is a multiple of Stride. Therefore, + // + // End - Start <= Stride * N <= UMAX - (UMAX mod Stride) + // + // Since Stride is a power of two, UMAX + 1 is divisible by + // Stride. Therefore, UMAX mod Stride == Stride - 1. So we can + // write: + // + // End - Start <= Stride * N <= UMAX - Stride - 1 + // + // Dropping the middle term: + // + // End - Start <= UMAX - Stride - 1 + // + // Adding Stride - 1 to both sides: + // + // (End - Start) + (Stride - 1) <= UMAX + // + // In other words, the addition doesn't have unsigned overflow. + // + // A similar proof works if we treat Start/End as signed values. + // Just rewrite steps before "End - Start <= Stride * N <= UMAX" + // to use signed max instead of unsigned max. Note that we're + // trying to prove a lack of unsigned overflow in either case. + return false; } if (Start == Stride || Start == getMinusSCEV(Stride, One)) { // If Start is equal to Stride, (End - Start) + (Stride - 1) == End diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll index 943389d07eb8b8..7c9498304e9394 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll @@ -364,9 +364,8 @@ for.end: ; preds = %for.body, %entry } ; The next two cases check to see if we can infer the flags on the IV -; of a countup loop using vscale strides. -; TODO: We should be able to because vscale is a power of two and these -; are finite loops by assumption. +; of a countup loop using vscale strides. vscale is a power of two +; and these are finite loops by assumption. define void @vscale_slt_noflags(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { ; CHECK-LABEL: 'vscale_slt_noflags' @@ -374,15 +373,16 @@ define void @vscale_slt_noflags(ptr nocapture %A, i32 %n) mustprogress vscale_ra ; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() ; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) ; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] -; CHECK-NEXT: --> {0,+,vscale}<%for.body> U: full-set S: full-set Exits: <> LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {0,+,vscale}<%for.body> U: full-set S: full-set Exits: (vscale * ((-1 + %n) /u vscale)) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 -; CHECK-NEXT: --> {%A,+,(4 * vscale)}<%for.body> U: full-set S: full-set Exits: <> LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {%A,+,(4 * vscale)}<%for.body> U: full-set S: full-set Exits: ((4 * vscale * ((-1 + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %add = add i32 %i.05, %vscale -; CHECK-NEXT: --> {vscale,+,vscale}<%for.body> U: full-set S: full-set Exits: <> LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {vscale,+,vscale}<%for.body> U: full-set S: full-set Exits: (vscale * (1 + ((-1 + %n) /u vscale))) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: Determining loop execution counts for: @vscale_slt_noflags -; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u vscale) +; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 1073741822 +; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u vscale) +; CHECK-NEXT: Loop %for.body: Trip multiple is 1 ; entry: %vscale = call i32 @llvm.vscale.i32() @@ -411,15 +411,16 @@ define void @vscalex4_ult_noflags(ptr nocapture %A, i32 %n) mustprogress vscale_ ; CHECK-NEXT: %VF = mul i32 %vscale, 4 ; CHECK-NEXT: --> (4 * vscale) U: [8,4097) S: [8,4097) ; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] -; CHECK-NEXT: --> {0,+,(4 * vscale)}<%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: <> LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {0,+,(4 * vscale)}<%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale))) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 -; CHECK-NEXT: --> {%A,+,(16 * vscale)}<%for.body> U: full-set S: full-set Exits: <> LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {%A,+,(16 * vscale)}<%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale))) + %A) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %add = add i32 %i.05, %VF -; CHECK-NEXT: --> {(4 * vscale),+,(4 * vscale)}<%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: <> LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {(4 * vscale),+,(4 * vscale)}<%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale))))) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: Determining loop execution counts for: @vscalex4_ult_noflags -; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)) +; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 536870910 +; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)) +; CHECK-NEXT: Loop %for.body: Trip multiple is 1 ; entry: %vscale = call i32 @llvm.vscale.i32() From 2aa96fcf751ee948702e8447de62d6bea8235e3a Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Wed, 31 Jul 2024 23:36:50 +0200 Subject: [PATCH 027/114] [mlir][Transforms] Dialect conversion: Skip materializations when running without converter (#101318) TODO: test case --- .../Transforms/Utils/DialectConversion.cpp | 38 +++++++++++-------- .../test-legalize-type-conversion.mlir | 3 +- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index f26aa0a1516a69..fdd0175ffae53f 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -1316,37 +1316,43 @@ Block *ConversionPatternRewriterImpl::applySignatureConversion( continue; } - // This is a 1->1+ mapping. 1->N mappings are not fully supported in the - // dialect conversion. Therefore, we need an argument materialization to - // turn the replacement block arguments into a single SSA value that can be - // used as a replacement. + // This is a 1->1+ mapping. auto replArgs = newBlock->getArguments().slice(inputMap->inputNo, inputMap->size); + + // When there is no type converter, assume that the new block argument + // types are legal. This is reasonable to assume because they were + // specified by the user. + // FIXME: This won't work for 1->N conversions because multiple output + // types are not supported in parts of the dialect conversion. In such a + // case, we currently use the original block argument type (produced by + // the argument materialization). + if (!converter && replArgs.size() == 1) { + mapping.map(origArg, replArgs[0]); + appendRewrite(block, origArg); + continue; + } + + // 1->N mappings are not fully supported in the dialect conversion. + // Therefore, we need an argument materialization to turn the replacement + // block arguments into a single SSA value (of the original type) that can + // be used as a replacement. Value argMat = buildUnresolvedMaterialization( MaterializationKind::Argument, newBlock, newBlock->begin(), origArg.getLoc(), /*inputs=*/replArgs, origArgType, converter); mapping.map(origArg, argMat); appendRewrite(block, origArg); + // Now legalize the type by building a target materialization. Type legalOutputType; - if (converter) { + if (converter) legalOutputType = converter->convertType(origArgType); - } else if (replArgs.size() == 1) { - // When there is no type converter, assume that the new block argument - // types are legal. This is reasonable to assume because they were - // specified by the user. - // FIXME: This won't work for 1->N conversions because multiple output - // types are not supported in parts of the dialect conversion. In such a - // case, we currently use the original block argument type (produced by - // the argument materialization). - legalOutputType = replArgs[0].getType(); - } if (legalOutputType && legalOutputType != origArgType) { Value targetMat = buildUnresolvedTargetMaterialization( origArg.getLoc(), argMat, legalOutputType, converter); mapping.map(argMat, targetMat); + appendRewrite(block, origArg); } - appendRewrite(block, origArg); } appendRewrite(newBlock, block, converter); diff --git a/mlir/test/Transforms/test-legalize-type-conversion.mlir b/mlir/test/Transforms/test-legalize-type-conversion.mlir index d0563fed8e5d94..07dfb49473f5eb 100644 --- a/mlir/test/Transforms/test-legalize-type-conversion.mlir +++ b/mlir/test/Transforms/test-legalize-type-conversion.mlir @@ -103,8 +103,9 @@ func.func @test_block_argument_not_converted() { // Make sure argument type changes aren't implicitly forwarded. func.func @test_signature_conversion_no_converter() { "test.signature_conversion_no_converter"() ({ - // expected-error@below {{failed to legalize unresolved materialization from ('f64') to 'f32' that remained live after conversion}} + // expected-error@below {{failed to materialize conversion for block argument #0 that remained live after conversion, type was 'f32'}} ^bb0(%arg0: f32): + // expected-note@below{{see existing live user here}} "test.type_consumer"(%arg0) : (f32) -> () "test.return"(%arg0) : (f32) -> () }) : () -> () From 951a36309787c39d102798c7b86b06caa1a35257 Mon Sep 17 00:00:00 2001 From: Peiming Liu Date: Wed, 31 Jul 2024 14:47:29 -0700 Subject: [PATCH 028/114] [mlir][sparse] implement `sparse_tensor.extract_value` operation. (#101220) --- .../Transforms/SparseIterationToScf.cpp | 24 +++++++++++++++---- .../Transforms/Sparsification.cpp | 10 ++++++++ .../Transforms/Utils/LoopEmitter.h | 5 ++++ .../sparse_kernels_to_iterator.mlir | 8 ++++--- 4 files changed, 40 insertions(+), 7 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseIterationToScf.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseIterationToScf.cpp index 1d614b7b293617..b1451dee738ac3 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseIterationToScf.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseIterationToScf.cpp @@ -2,6 +2,7 @@ #include "Utils/CodegenUtils.h" #include "Utils/SparseTensorIterator.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" #include "mlir/Dialect/SparseTensor/Transforms/Passes.h" @@ -10,8 +11,8 @@ using namespace mlir; using namespace mlir::sparse_tensor; -void convertLevelType(SparseTensorEncodingAttr enc, Level lvl, - SmallVectorImpl &fields) { +static void convertLevelType(SparseTensorEncodingAttr enc, Level lvl, + SmallVectorImpl &fields) { // Position and coordinate buffer in the sparse structure. if (enc.getLvlType(lvl).isWithPosLT()) fields.push_back(enc.getPosMemRefType()); @@ -71,6 +72,21 @@ class ExtractIterSpaceConverter } }; +/// Sparse codegen rule for number of entries operator. +class ExtractValOpConverter : public OneToNOpConversionPattern { +public: + using OneToNOpConversionPattern::OneToNOpConversionPattern; + LogicalResult + matchAndRewrite(ExtractValOp op, OpAdaptor adaptor, + OneToNPatternRewriter &rewriter) const override { + Location loc = op.getLoc(); + Value pos = adaptor.getIterator().back(); + Value valBuf = rewriter.create(loc, op.getTensor()); + rewriter.replaceOpWithNewOp(op, valBuf, pos); + return success(); + } +}; + class SparseIterateOpConverter : public OneToNOpConversionPattern { public: using OneToNOpConversionPattern::OneToNOpConversionPattern; @@ -193,6 +209,6 @@ void mlir::populateLowerSparseIterationToSCFPatterns( TypeConverter &converter, RewritePatternSet &patterns) { IterateOp::getCanonicalizationPatterns(patterns, patterns.getContext()); - patterns.add( - converter, patterns.getContext()); + patterns.add(converter, patterns.getContext()); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index c612a52aa8d507..08fc104fcbeead 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -357,6 +357,9 @@ static Value genSubscript(CodegenEnv &env, OpBuilder &builder, OpOperand *t, const auto pos = env.emitter().getValPosits(tid); assert(!pos.empty()); args.append(pos); + // Simply returns the tensor to extract value using iterators. + if (env.options().sparseEmitStrategy == SparseEmitStrategy::kSparseIterator) + return t->get(); } else { // For dense tensors we push all level's coordinates onto `args`. const Level lvlRank = stt.getLvlRank(); @@ -512,9 +515,16 @@ static Value genTensorLoad(CodegenEnv &env, OpBuilder &builder, ExprId exp) { return genInsertionLoadReduce(env, builder, t); return genInsertionLoad(env, builder, t); } + // Actual load. SmallVector args; Value ptr = genSubscript(env, builder, t, args); + if (llvm::isa(ptr.getType())) { + assert(env.options().sparseEmitStrategy == + SparseEmitStrategy::kSparseIterator && + args.size() == 1); + return builder.create(loc, ptr, args.front()); + } return builder.create(loc, ptr, args); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/LoopEmitter.h b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/LoopEmitter.h index 2a884b10e36b06..f3e73e4692c1fd 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/LoopEmitter.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/LoopEmitter.h @@ -221,6 +221,11 @@ class LoopEmitter { /// Getters. /// SmallVector getValPosits(TensorId tid) const { + // Returns the iterator if we are generating sparse (co)iterate-based loops. + if (emitStrategy == SparseEmitStrategy::kSparseIterator) + return {spIterVals[tid].back()}; + + // Returns {[batch coords], last-level position}. SmallVector batchCrds = iters[tid].back().back()->getBatchCrds(); Value lastLvlPos = iters[tid].back().back()->getCurPosition().front(); batchCrds.push_back(lastLvlPos); diff --git a/mlir/test/Dialect/SparseTensor/sparse_kernels_to_iterator.mlir b/mlir/test/Dialect/SparseTensor/sparse_kernels_to_iterator.mlir index f5bbea0d340fb4..268b3940418b71 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_kernels_to_iterator.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_kernels_to_iterator.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --sparse-reinterpret-map -sparsification="sparse-emit-strategy=sparse-iterator" --sparse-space-collapse --lower-sparse-iteration-to-scf | FileCheck %s +// RUN: mlir-opt %s --sparse-reinterpret-map -sparsification="sparse-emit-strategy=sparse-iterator" --cse --sparse-space-collapse --lower-sparse-iteration-to-scf --loop-invariant-code-motion | FileCheck %s #COO = #sparse_tensor.encoding<{ @@ -7,8 +7,7 @@ d1 : singleton(nonunique, soa), d2 : singleton(nonunique, soa), d3 : singleton(soa) - ), - explicitVal = 1 : i32 + ) }> // CHECK-LABEL: func.func @sqsum( @@ -17,7 +16,10 @@ // CHECK-DAG: %[[POS_BUF:.*]] = sparse_tensor.positions %{{.*}} {level = 0 : index} : tensor to memref // CHECK: %[[POS_LO:.*]] = memref.load %[[POS_BUF]]{{\[}}%[[C0]]] : memref // CHECK: %[[POS_HI:.*]] = memref.load %[[POS_BUF]]{{\[}}%[[C1]]] : memref +// CHECK: %[[VAL_BUF:.*]] = sparse_tensor.values %{{.*}} : tensor to memref // CHECK: %[[SQ_SUM:.*]] = scf.for %[[POS:.*]] = %[[POS_LO]] to %[[POS_HI]] step %[[C1]] {{.*}} { +// CHECK: %[[VAL:.*]] = memref.load %[[VAL_BUF]]{{\[}}%[[POS]]] : memref +// CHECK: %[[MUL:.*]] = arith.muli %[[VAL]], %[[VAL]] : i32 // CHECK: %[[SUM:.*]] = arith.addi // CHECK: scf.yield %[[SUM]] : i32 // CHECK: } From c2dc46cd1532c57e3d16c69f54edeed67e2d46c7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 31 Jul 2024 14:41:30 -0700 Subject: [PATCH 029/114] [TableGen] Pass ValueTypeByHwMode by const reference in a couple places. NFC ValueTypeByHwMode contains a std::map. We shouldn't copy it if we don't need to . Fixes #101406. --- llvm/utils/TableGen/Common/CodeGenDAGPatterns.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h index bac213a356d845..2f1b9aadd9ea96 100644 --- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h +++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h @@ -264,7 +264,8 @@ struct TypeInfer { bool MergeInTypeInfo(TypeSetByHwMode &Out, MVT::SimpleValueType InVT) const { return MergeInTypeInfo(Out, TypeSetByHwMode(InVT)); } - bool MergeInTypeInfo(TypeSetByHwMode &Out, ValueTypeByHwMode InVT) const { + bool MergeInTypeInfo(TypeSetByHwMode &Out, + const ValueTypeByHwMode &InVT) const { return MergeInTypeInfo(Out, TypeSetByHwMode(InVT)); } @@ -841,7 +842,8 @@ class TreePatternNode : public RefCountedBase { TreePattern &TP); bool UpdateNodeType(unsigned ResNo, MVT::SimpleValueType InTy, TreePattern &TP); - bool UpdateNodeType(unsigned ResNo, ValueTypeByHwMode InTy, TreePattern &TP); + bool UpdateNodeType(unsigned ResNo, const ValueTypeByHwMode &InTy, + TreePattern &TP); // Update node type with types inferred from an instruction operand or result // def from the ins/outs lists. @@ -996,7 +998,7 @@ inline bool TreePatternNode::UpdateNodeType(unsigned ResNo, } inline bool TreePatternNode::UpdateNodeType(unsigned ResNo, - ValueTypeByHwMode InTy, + const ValueTypeByHwMode &InTy, TreePattern &TP) { TypeSetByHwMode VTS(InTy); TP.getInfer().expandOverloads(VTS); From 24f8d1009e5359590a619cbbf596229ae5bfbbca Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 31 Jul 2024 14:50:25 -0700 Subject: [PATCH 030/114] [TableGen] Add an explicit cast to allow one TypeSetByHwMode constructor to be removed. NFC This constructor was taking a ValueTypeByMode by value to create an ArrayRef. By adding an explicit cast from ValueTypeByHwMode to TypeSetByHwMode we allow the ArrayRef to be implicitly converted from a single element. --- llvm/utils/TableGen/Common/CodeGenDAGPatterns.h | 2 -- llvm/utils/TableGen/GlobalISelEmitter.cpp | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h index 2f1b9aadd9ea96..b4de20bb13184c 100644 --- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h +++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h @@ -193,8 +193,6 @@ struct TypeSetByHwMode : public InfoByHwMode { TypeSetByHwMode &operator=(const TypeSetByHwMode &) = default; TypeSetByHwMode(MVT::SimpleValueType VT) : TypeSetByHwMode(ValueTypeByHwMode(VT)) {} - TypeSetByHwMode(ValueTypeByHwMode VT) - : TypeSetByHwMode(ArrayRef(&VT, 1)) {} TypeSetByHwMode(ArrayRef VTList); SetType &getOrCreate(unsigned Mode) { return Map[Mode]; } diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index c29cb4edec181a..2ebe8f75cd6fe3 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -1162,7 +1162,7 @@ Error GlobalISelEmitter::importChildMatcher( OperandMatcher &OM = InsnOperand.getInsnMatcher().addOperand(0, "", TempOpIdx); if (auto Error = - OM.addTypeCheckPredicate(VTy, false /* OperandIsAPointer */)) + OM.addTypeCheckPredicate(TypeSetByHwMode(VTy), false /* OperandIsAPointer */)) return failedImport(toString(std::move(Error)) + " for result of Src pattern operator"); From bf1666fb0bc19ffa18072e2727e4611c293a9aee Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 31 Jul 2024 16:53:09 -0500 Subject: [PATCH 031/114] [libc++] Drop support for the C++20 Synchronization Library before C++20 (#82008) When we initially implemented the C++20 synchronization library, we reluctantly accepted for the implementation to be backported to C++03 upon request from the person who provided the patch. This was when we were only starting to have experience with the issues this can create, so we flinched. Nowadays, we have a much stricter stance about not backporting features to previous standards. We have recently started fixing several bugs (and near bugs) in our implementation of the synchronization library. A recurring theme during these reviews has been how difficult to understand the current code is, and upon inspection it becomes clear that being able to use a few recent C++ features (in particular lambdas) would help a great deal. The code would still be pretty intricate, but it would be a lot easier to reason about the flow of callbacks through things like __thread_poll_with_backoff. As a result, this patch drops support for the synchronization library before C++20. This makes us more strictly conforming and opens the door to major simplifications, in particular around atomic_wait which was supported all the way to C++03. This change will probably have some impact on downstream users, however since the C++20 synchronization library was added only in LLVM 10 (~3 years ago) and it's quite a niche feature, the set of people trying to use this part of the library before C++20 should be reasonably small. --- libcxx/.clang-format | 1 - libcxx/docs/ReleaseNotes/20.rst | 4 +- libcxx/include/__atomic/atomic.h | 16 ++-- libcxx/include/__atomic/atomic_base.h | 2 + libcxx/include/__atomic/atomic_flag.h | 38 ++++---- libcxx/include/__config | 8 -- libcxx/include/atomic | 92 +++++++++---------- libcxx/include/barrier | 10 +- libcxx/include/latch | 10 +- libcxx/include/semaphore | 16 ++-- .../general.compile.pass.cpp | 2 + .../pointer.compile.pass.cpp | 2 + .../atomic_notify_all.pass.cpp | 5 +- .../atomic_notify_one.pass.cpp | 5 +- .../atomic_wait.pass.cpp | 5 +- .../atomic_wait_explicit.pass.cpp | 5 +- .../std/thread/thread.barrier/arrive.pass.cpp | 5 +- .../thread.barrier/arrive_and_drop.pass.cpp | 5 +- .../thread.barrier/arrive_and_wait.pass.cpp | 5 +- .../thread/thread.barrier/completion.pass.cpp | 5 +- .../thread.barrier/ctor.compile.pass.cpp | 5 +- .../std/thread/thread.barrier/max.pass.cpp | 5 +- .../thread.latch/arrive_and_wait.pass.cpp | 5 +- .../thread/thread.latch/count_down.pass.cpp | 5 +- .../std/thread/thread.latch/ctor.pass.cpp | 5 +- .../test/std/thread/thread.latch/max.pass.cpp | 5 +- .../std/thread/thread.latch/try_wait.pass.cpp | 5 +- .../thread/thread.semaphore/acquire.pass.cpp | 5 +- .../thread/thread.semaphore/binary.pass.cpp | 5 +- .../thread.semaphore/ctor.compile.pass.cpp | 5 +- .../std/thread/thread.semaphore/max.pass.cpp | 5 +- .../thread/thread.semaphore/release.pass.cpp | 5 +- .../thread/thread.semaphore/timed.pass.cpp | 5 +- .../thread.semaphore/try_acquire.pass.cpp | 5 +- 34 files changed, 120 insertions(+), 191 deletions(-) diff --git a/libcxx/.clang-format b/libcxx/.clang-format index 871920f15b5bc9..b2ca452931fec5 100644 --- a/libcxx/.clang-format +++ b/libcxx/.clang-format @@ -24,7 +24,6 @@ AttributeMacros: [ '_LIBCPP_CONSTEXPR_SINCE_CXX23', '_LIBCPP_CONSTEXPR', '_LIBCPP_CONSTINIT', - '_LIBCPP_DEPRECATED_ATOMIC_SYNC', '_LIBCPP_DEPRECATED_IN_CXX11', '_LIBCPP_DEPRECATED_IN_CXX14', '_LIBCPP_DEPRECATED_IN_CXX17', diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst index f959c8829277e6..960fdd7ce05626 100644 --- a/libcxx/docs/ReleaseNotes/20.rst +++ b/libcxx/docs/ReleaseNotes/20.rst @@ -53,7 +53,9 @@ Deprecations and Removals - TODO: The ``LIBCXX_ENABLE_ASSERTIONS`` CMake variable and the ``_LIBCPP_ENABLE_ASSERTIONS`` macro that were used to enable the safe mode will be removed in LLVM 20. -- TODO: The C++20 synchronization library will be removed entirely in language modes prior to C++20 in LLVM 20. +- Support for the C++20 synchronization library (````, ````, ``atomic::wait``, etc.) has been + removed in language modes prior to C++20. If you are using these features prior to C++20, you will need to + update to ``-std=c++20``. - TODO: The relational operators for ``std::chrono::weekday`` will be removed entirely, and the ``_LIBCPP_ENABLE_REMOVED_WEEKDAY_RELATIONAL_OPERATORS`` macro that was used to re-enable this extension will be diff --git a/libcxx/include/__atomic/atomic.h b/libcxx/include/__atomic/atomic.h index bd3f659c22df01..bcea21f5ce2e17 100644 --- a/libcxx/include/__atomic/atomic.h +++ b/libcxx/include/__atomic/atomic.h @@ -429,6 +429,8 @@ _LIBCPP_HIDE_FROM_ABI bool atomic_compare_exchange_strong_explicit( return __o->compare_exchange_strong(*__e, __d, __s, __f); } +#if _LIBCPP_STD_VER >= 20 + // atomic_wait template @@ -462,29 +464,27 @@ atomic_wait_explicit(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __ // atomic_notify_one template -_LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void -atomic_notify_one(volatile atomic<_Tp>* __o) _NOEXCEPT { +_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_one(volatile atomic<_Tp>* __o) _NOEXCEPT { __o->notify_one(); } template -_LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void -atomic_notify_one(atomic<_Tp>* __o) _NOEXCEPT { +_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_one(atomic<_Tp>* __o) _NOEXCEPT { __o->notify_one(); } // atomic_notify_all template -_LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void -atomic_notify_all(volatile atomic<_Tp>* __o) _NOEXCEPT { +_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_all(volatile atomic<_Tp>* __o) _NOEXCEPT { __o->notify_all(); } template -_LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void -atomic_notify_all(atomic<_Tp>* __o) _NOEXCEPT { +_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_all(atomic<_Tp>* __o) _NOEXCEPT { __o->notify_all(); } +#endif // _LIBCPP_STD_VER >= 20 + // atomic_fetch_add template diff --git a/libcxx/include/__atomic/atomic_base.h b/libcxx/include/__atomic/atomic_base.h index 7e26434c9c3a0a..93f5c4cff0d1bc 100644 --- a/libcxx/include/__atomic/atomic_base.h +++ b/libcxx/include/__atomic/atomic_base.h @@ -101,6 +101,7 @@ struct __atomic_base // false return std::__cxx_atomic_compare_exchange_strong(std::addressof(__a_), std::addressof(__e), __d, __m, __m); } +#if _LIBCPP_STD_VER >= 20 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT { std::__atomic_wait(*this, __v, __m); @@ -117,6 +118,7 @@ struct __atomic_base // false std::__atomic_notify_all(*this); } _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() _NOEXCEPT { std::__atomic_notify_all(*this); } +#endif // _LIBCPP_STD_VER >= 20 #if _LIBCPP_STD_VER >= 20 _LIBCPP_HIDE_FROM_ABI constexpr __atomic_base() noexcept(is_nothrow_default_constructible_v<_Tp>) : __a_(_Tp()) {} diff --git a/libcxx/include/__atomic/atomic_flag.h b/libcxx/include/__atomic/atomic_flag.h index 00b157cdff78b7..abebfc112cb8ef 100644 --- a/libcxx/include/__atomic/atomic_flag.h +++ b/libcxx/include/__atomic/atomic_flag.h @@ -48,26 +48,24 @@ struct atomic_flag { __cxx_atomic_store(&__a_, _LIBCPP_ATOMIC_FLAG_TYPE(false), __m); } - _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void - wait(bool __v, memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT { +#if _LIBCPP_STD_VER >= 20 + _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(bool __v, memory_order __m = memory_order_seq_cst) const + volatile _NOEXCEPT { std::__atomic_wait(*this, _LIBCPP_ATOMIC_FLAG_TYPE(__v), __m); } - _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void + _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(bool __v, memory_order __m = memory_order_seq_cst) const _NOEXCEPT { std::__atomic_wait(*this, _LIBCPP_ATOMIC_FLAG_TYPE(__v), __m); } - _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() volatile _NOEXCEPT { - std::__atomic_notify_one(*this); - } - _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() _NOEXCEPT { + _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() volatile _NOEXCEPT { std::__atomic_notify_one(*this); } + _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() _NOEXCEPT { std::__atomic_notify_one(*this); } _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() volatile _NOEXCEPT { std::__atomic_notify_all(*this); } - _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() _NOEXCEPT { - std::__atomic_notify_all(*this); - } + _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() _NOEXCEPT { std::__atomic_notify_all(*this); } +#endif #if _LIBCPP_STD_VER >= 20 _LIBCPP_HIDE_FROM_ABI constexpr atomic_flag() _NOEXCEPT : __a_(false) {} @@ -144,45 +142,45 @@ inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_clear_explicit(atomic_flag* __o, m __o->clear(__m); } -inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +#if _LIBCPP_STD_VER >= 20 +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_wait(const volatile atomic_flag* __o, bool __v) _NOEXCEPT { __o->wait(__v); } -inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_wait(const atomic_flag* __o, bool __v) _NOEXCEPT { __o->wait(__v); } -inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_wait_explicit(const volatile atomic_flag* __o, bool __v, memory_order __m) _NOEXCEPT { __o->wait(__v, __m); } -inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_wait_explicit(const atomic_flag* __o, bool __v, memory_order __m) _NOEXCEPT { __o->wait(__v, __m); } -inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_notify_one(volatile atomic_flag* __o) _NOEXCEPT { __o->notify_one(); } -inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void -atomic_flag_notify_one(atomic_flag* __o) _NOEXCEPT { +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_notify_one(atomic_flag* __o) _NOEXCEPT { __o->notify_one(); } -inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_notify_all(volatile atomic_flag* __o) _NOEXCEPT { __o->notify_all(); } -inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void -atomic_flag_notify_all(atomic_flag* __o) _NOEXCEPT { +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_notify_all(atomic_flag* __o) _NOEXCEPT { __o->notify_all(); } +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__config b/libcxx/include/__config index 0be25a5fd226f8..392053a64a8dc1 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -699,14 +699,6 @@ typedef __char32_t char32_t; # define _LIBCPP_DEPRECATED_(m) # endif -# if _LIBCPP_STD_VER < 20 -# define _LIBCPP_DEPRECATED_ATOMIC_SYNC \ - _LIBCPP_DEPRECATED_("The C++20 synchronization library has been deprecated prior to C++20. Please update to " \ - "using -std=c++20 if you need to use these facilities.") -# else -# define _LIBCPP_DEPRECATED_ATOMIC_SYNC /* nothing */ -# endif - # if !defined(_LIBCPP_CXX03_LANG) # define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED # else diff --git a/libcxx/include/atomic b/libcxx/include/atomic index 0d13619d6ce458..772ac998615a93 100644 --- a/libcxx/include/atomic +++ b/libcxx/include/atomic @@ -101,12 +101,12 @@ struct atomic bool compare_exchange_strong(T& expc, T desr, memory_order m = memory_order_seq_cst) noexcept; - void wait(T, memory_order = memory_order::seq_cst) const volatile noexcept; - void wait(T, memory_order = memory_order::seq_cst) const noexcept; - void notify_one() volatile noexcept; - void notify_one() noexcept; - void notify_all() volatile noexcept; - void notify_all() noexcept; + void wait(T, memory_order = memory_order::seq_cst) const volatile noexcept; // since C++20 + void wait(T, memory_order = memory_order::seq_cst) const noexcept; // since C++20 + void notify_one() volatile noexcept; // since C++20 + void notify_one() noexcept; // since C++20 + void notify_all() volatile noexcept; // since C++20 + void notify_all() noexcept; // since C++20 }; template <> @@ -184,12 +184,12 @@ struct atomic integral operator^=(integral op) volatile noexcept; integral operator^=(integral op) noexcept; - void wait(integral, memory_order = memory_order::seq_cst) const volatile noexcept; - void wait(integral, memory_order = memory_order::seq_cst) const noexcept; - void notify_one() volatile noexcept; - void notify_one() noexcept; - void notify_all() volatile noexcept; - void notify_all() noexcept; + void wait(integral, memory_order = memory_order::seq_cst) const volatile noexcept; // since C++20 + void wait(integral, memory_order = memory_order::seq_cst) const noexcept; // since C++20 + void notify_one() volatile noexcept; // since C++20 + void notify_one() noexcept; // since C++20 + void notify_all() volatile noexcept; // since C++20 + void notify_all() noexcept; // since C++20 }; template @@ -254,12 +254,12 @@ struct atomic T* operator-=(ptrdiff_t op) volatile noexcept; T* operator-=(ptrdiff_t op) noexcept; - void wait(T*, memory_order = memory_order::seq_cst) const volatile noexcept; - void wait(T*, memory_order = memory_order::seq_cst) const noexcept; - void notify_one() volatile noexcept; - void notify_one() noexcept; - void notify_all() volatile noexcept; - void notify_all() noexcept; + void wait(T*, memory_order = memory_order::seq_cst) const volatile noexcept; // since C++20 + void wait(T*, memory_order = memory_order::seq_cst) const noexcept; // since C++20 + void notify_one() volatile noexcept; // since C++20 + void notify_one() noexcept; // since C++20 + void notify_all() volatile noexcept; // since C++20 + void notify_all() noexcept; // since C++20 }; template<> @@ -321,12 +321,12 @@ struct atomic { // since C++20 floating-point-type operator-=(floating-point-type) volatile noexcept; floating-point-type operator-=(floating-point-type) noexcept; - void wait(floating-point-type, memory_order = memory_order::seq_cst) const volatile noexcept; - void wait(floating-point-type, memory_order = memory_order::seq_cst) const noexcept; - void notify_one() volatile noexcept; - void notify_one() noexcept; - void notify_all() volatile noexcept; - void notify_all() noexcept; + void wait(floating-point-type, memory_order = memory_order::seq_cst) const volatile noexcept; // since C++20 + void wait(floating-point-type, memory_order = memory_order::seq_cst) const noexcept; // since C++20 + void notify_one() volatile noexcept; // since C++20 + void notify_one() noexcept; // since C++20 + void notify_all() volatile noexcept; // since C++20 + void notify_all() noexcept; // since C++20 }; // [atomics.nonmembers], non-member functions @@ -443,23 +443,23 @@ template memory_order) noexcept; template - void atomic_wait(const volatile atomic*, atomic::value_type) noexcept; + void atomic_wait(const volatile atomic*, atomic::value_type) noexcept; // since C++20 template - void atomic_wait(const atomic*, atomic::value_type) noexcept; + void atomic_wait(const atomic*, atomic::value_type) noexcept; // since C++20 template - void atomic_wait_explicit(const volatile atomic*, atomic::value_type, + void atomic_wait_explicit(const volatile atomic*, atomic::value_type, // since C++20 memory_order) noexcept; template - void atomic_wait_explicit(const atomic*, atomic::value_type, + void atomic_wait_explicit(const atomic*, atomic::value_type, // since C++20 memory_order) noexcept; template - void atomic_notify_one(volatile atomic*) noexcept; + void atomic_notify_one(volatile atomic*) noexcept; // since C++20 template - void atomic_notify_one(atomic*) noexcept; + void atomic_notify_one(atomic*) noexcept; // since C++20 template - void atomic_notify_all(volatile atomic*) noexcept; + void atomic_notify_all(volatile atomic*) noexcept; // since C++20 template - void atomic_notify_all(atomic*) noexcept; + void atomic_notify_all(atomic*) noexcept; // since C++20 // Atomics for standard typedef types @@ -534,12 +534,12 @@ typedef struct atomic_flag void clear(memory_order m = memory_order_seq_cst) volatile noexcept; void clear(memory_order m = memory_order_seq_cst) noexcept; - void wait(bool, memory_order = memory_order::seq_cst) const volatile noexcept; - void wait(bool, memory_order = memory_order::seq_cst) const noexcept; - void notify_one() volatile noexcept; - void notify_one() noexcept; - void notify_all() volatile noexcept; - void notify_all() noexcept; + void wait(bool, memory_order = memory_order::seq_cst) const volatile noexcept; // since C++20 + void wait(bool, memory_order = memory_order::seq_cst) const noexcept; // since C++20 + void notify_one() volatile noexcept; // since C++20 + void notify_one() noexcept; // since C++20 + void notify_all() volatile noexcept; // since C++20 + void notify_all() noexcept; // since C++20 } atomic_flag; bool atomic_flag_test(volatile atomic_flag* obj) noexcept; @@ -557,14 +557,14 @@ void atomic_flag_clear(atomic_flag* obj) noexcept; void atomic_flag_clear_explicit(volatile atomic_flag* obj, memory_order m) noexcept; void atomic_flag_clear_explicit(atomic_flag* obj, memory_order m) noexcept; -void atomic_wait(const volatile atomic_flag* obj, T old) noexcept; -void atomic_wait(const atomic_flag* obj, T old) noexcept; -void atomic_wait_explicit(const volatile atomic_flag* obj, T old, memory_order m) noexcept; -void atomic_wait_explicit(const atomic_flag* obj, T old, memory_order m) noexcept; -void atomic_one(volatile atomic_flag* obj) noexcept; -void atomic_one(atomic_flag* obj) noexcept; -void atomic_all(volatile atomic_flag* obj) noexcept; -void atomic_all(atomic_flag* obj) noexcept; +void atomic_wait(const volatile atomic_flag* obj, T old) noexcept; // since C++20 +void atomic_wait(const atomic_flag* obj, T old) noexcept; // since C++20 +void atomic_wait_explicit(const volatile atomic_flag* obj, T old, memory_order m) noexcept; // since C++20 +void atomic_wait_explicit(const atomic_flag* obj, T old, memory_order m) noexcept; // since C++20 +void atomic_one(volatile atomic_flag* obj) noexcept; // since C++20 +void atomic_one(atomic_flag* obj) noexcept; // since C++20 +void atomic_all(volatile atomic_flag* obj) noexcept; // since C++20 +void atomic_all(atomic_flag* obj) noexcept; // since C++20 // fences diff --git a/libcxx/include/barrier b/libcxx/include/barrier index edee181273e248..ba29ebc3212ee8 100644 --- a/libcxx/include/barrier +++ b/libcxx/include/barrier @@ -17,7 +17,7 @@ namespace std { template - class barrier + class barrier // since C++20 { public: using arrival_token = see below; @@ -68,7 +68,7 @@ namespace std _LIBCPP_PUSH_MACROS # include <__undef_macros> -# if _LIBCPP_STD_VER >= 14 +# if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -254,7 +254,7 @@ public: # endif // !_LIBCPP_HAS_NO_TREE_BARRIER template -class _LIBCPP_DEPRECATED_ATOMIC_SYNC barrier { +class barrier { __barrier_base<_CompletionF> __b_; public: @@ -290,7 +290,7 @@ public: _LIBCPP_END_NAMESPACE_STD -# endif // _LIBCPP_STD_VER >= 14 +# endif // _LIBCPP_STD_VER >= 20 _LIBCPP_POP_MACROS @@ -305,4 +305,4 @@ _LIBCPP_POP_MACROS # include #endif -#endif //_LIBCPP_BARRIER +#endif // _LIBCPP_BARRIER diff --git a/libcxx/include/latch b/libcxx/include/latch index 81d6028a9c2ce1..b56e49bc768bf9 100644 --- a/libcxx/include/latch +++ b/libcxx/include/latch @@ -16,7 +16,7 @@ namespace std { - class latch + class latch // since C++20 { public: static constexpr ptrdiff_t max() noexcept; @@ -59,11 +59,11 @@ namespace std _LIBCPP_PUSH_MACROS # include <__undef_macros> -# if _LIBCPP_STD_VER >= 14 +# if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD -class _LIBCPP_DEPRECATED_ATOMIC_SYNC latch { +class latch { __atomic_base __a_; public: @@ -116,7 +116,7 @@ private: _LIBCPP_END_NAMESPACE_STD -# endif // _LIBCPP_STD_VER >= 14 +# endif // _LIBCPP_STD_VER >= 20 _LIBCPP_POP_MACROS @@ -126,4 +126,4 @@ _LIBCPP_POP_MACROS # include #endif -#endif //_LIBCPP_LATCH +#endif // _LIBCPP_LATCH diff --git a/libcxx/include/semaphore b/libcxx/include/semaphore index 95a4375f21c175..bf6317c587e2f9 100644 --- a/libcxx/include/semaphore +++ b/libcxx/include/semaphore @@ -16,7 +16,7 @@ namespace std { template -class counting_semaphore +class counting_semaphore // since C++20 { public: static constexpr ptrdiff_t max() noexcept; @@ -39,7 +39,7 @@ private: ptrdiff_t counter; // exposition only }; -using binary_semaphore = counting_semaphore<1>; +using binary_semaphore = counting_semaphore<1>; // since C++20 } @@ -68,7 +68,7 @@ using binary_semaphore = counting_semaphore<1>; _LIBCPP_PUSH_MACROS # include <__undef_macros> -# if _LIBCPP_STD_VER >= 14 +# if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -124,7 +124,7 @@ private: }; template -class _LIBCPP_DEPRECATED_ATOMIC_SYNC counting_semaphore { +class counting_semaphore { __atomic_semaphore_base __semaphore_; public: @@ -169,13 +169,11 @@ public: } }; -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -using binary_semaphore _LIBCPP_DEPRECATED_ATOMIC_SYNC = counting_semaphore<1>; -_LIBCPP_SUPPRESS_DEPRECATED_POP +using binary_semaphore = counting_semaphore<1>; _LIBCPP_END_NAMESPACE_STD -# endif // _LIBCPP_STD_VER >= 14 +# endif // _LIBCPP_STD_VER >= 20 _LIBCPP_POP_MACROS @@ -185,4 +183,4 @@ _LIBCPP_POP_MACROS # include #endif -#endif //_LIBCPP_SEMAPHORE +#endif // _LIBCPP_SEMAPHORE diff --git a/libcxx/test/std/atomics/atomics.types.generic/general.compile.pass.cpp b/libcxx/test/std/atomics/atomics.types.generic/general.compile.pass.cpp index fead6e2e5f6c2c..817a70d2ce3642 100644 --- a/libcxx/test/std/atomics/atomics.types.generic/general.compile.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.generic/general.compile.pass.cpp @@ -94,9 +94,11 @@ void test() { TEST_IGNORE_NODISCARD a.compare_exchange_weak(v, v); TEST_IGNORE_NODISCARD a.compare_exchange_strong(v, v, m); +#if TEST_STD_VER >= 20 a.wait(v); a.notify_one(); a.notify_all(); +#endif } void test() { diff --git a/libcxx/test/std/atomics/atomics.types.generic/pointer.compile.pass.cpp b/libcxx/test/std/atomics/atomics.types.generic/pointer.compile.pass.cpp index 961aed3b4fb1a9..c62127f3883b01 100644 --- a/libcxx/test/std/atomics/atomics.types.generic/pointer.compile.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.generic/pointer.compile.pass.cpp @@ -128,9 +128,11 @@ void test() { a += 0; a -= 0; +#if TEST_STD_VER >= 20 a.wait(v); a.notify_one(); a.notify_all(); +#endif } void test() { diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp index 0ec530c922e707..fc159b15e78e12 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp @@ -7,12 +7,9 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03 +// UNSUPPORTED: c++03, c++11, c++14, c++17 // XFAIL: !has-1024-bit-atomics -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS - // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_one.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_one.pass.cpp index c21b67d479ae24..330d8a44bfc2fa 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_one.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_one.pass.cpp @@ -7,12 +7,9 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03 +// UNSUPPORTED: c++03, c++11, c++14, c++17 // XFAIL: !has-1024-bit-atomics -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS - // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp index af99113f13499d..7c5169b64cbe33 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp @@ -7,12 +7,9 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03 +// UNSUPPORTED: c++03, c++11, c++14, c++17 // XFAIL: !has-1024-bit-atomics -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS - // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait_explicit.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait_explicit.pass.cpp index bb8c64593b54b5..c84eecff3eac45 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait_explicit.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait_explicit.pass.cpp @@ -7,12 +7,9 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03 +// UNSUPPORTED: c++03, c++11, c++14, c++17 // XFAIL: !has-1024-bit-atomics -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS - // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp b/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp index d9d9c1dba6bbb8..b1ad6447a9e214 100644 --- a/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03, c++11 - -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// UNSUPPORTED: c++03, c++11, c++14, c++17 // XFAIL: availability-synchronization_library-missing diff --git a/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp b/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp index aff7b26e16f70a..b0d94a8a3f4fef 100644 --- a/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03, c++11 - -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// UNSUPPORTED: c++03, c++11, c++14, c++17 // XFAIL: availability-synchronization_library-missing diff --git a/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp b/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp index 8c45ba9278f289..2d747e3c9b9dac 100644 --- a/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03, c++11 - -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// UNSUPPORTED: c++03, c++11, c++14, c++17 // XFAIL: availability-synchronization_library-missing diff --git a/libcxx/test/std/thread/thread.barrier/completion.pass.cpp b/libcxx/test/std/thread/thread.barrier/completion.pass.cpp index 633a0c8bf23664..892e29b9dfa9e1 100644 --- a/libcxx/test/std/thread/thread.barrier/completion.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/completion.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03, c++11 - -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// UNSUPPORTED: c++03, c++11, c++14, c++17 // XFAIL: availability-synchronization_library-missing diff --git a/libcxx/test/std/thread/thread.barrier/ctor.compile.pass.cpp b/libcxx/test/std/thread/thread.barrier/ctor.compile.pass.cpp index fe7068d2a574ca..d67cf36c860ead 100644 --- a/libcxx/test/std/thread/thread.barrier/ctor.compile.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/ctor.compile.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03, c++11 - -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// UNSUPPORTED: c++03, c++11, c++14, c++17 // diff --git a/libcxx/test/std/thread/thread.barrier/max.pass.cpp b/libcxx/test/std/thread/thread.barrier/max.pass.cpp index b09a02e1bdef4c..a3ec904897a185 100644 --- a/libcxx/test/std/thread/thread.barrier/max.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/max.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03, c++11 - -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// UNSUPPORTED: c++03, c++11, c++14, c++17 // diff --git a/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp b/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp index 8ca4f37b73b950..23cb2706beb5b5 100644 --- a/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp +++ b/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03, c++11 - -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// UNSUPPORTED: c++03, c++11, c++14, c++17 // XFAIL: availability-synchronization_library-missing diff --git a/libcxx/test/std/thread/thread.latch/count_down.pass.cpp b/libcxx/test/std/thread/thread.latch/count_down.pass.cpp index eb524abd24b98a..f33f7b21908d41 100644 --- a/libcxx/test/std/thread/thread.latch/count_down.pass.cpp +++ b/libcxx/test/std/thread/thread.latch/count_down.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03, c++11 - -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// UNSUPPORTED: c++03, c++11, c++14, c++17 // XFAIL: availability-synchronization_library-missing diff --git a/libcxx/test/std/thread/thread.latch/ctor.pass.cpp b/libcxx/test/std/thread/thread.latch/ctor.pass.cpp index bca4561bd2f742..df258b01be33c9 100644 --- a/libcxx/test/std/thread/thread.latch/ctor.pass.cpp +++ b/libcxx/test/std/thread/thread.latch/ctor.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03, c++11 - -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// UNSUPPORTED: c++03, c++11, c++14, c++17 // diff --git a/libcxx/test/std/thread/thread.latch/max.pass.cpp b/libcxx/test/std/thread/thread.latch/max.pass.cpp index bcf353ed9712ee..4490f94a2dac72 100644 --- a/libcxx/test/std/thread/thread.latch/max.pass.cpp +++ b/libcxx/test/std/thread/thread.latch/max.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03, c++11 - -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// UNSUPPORTED: c++03, c++11, c++14, c++17 // diff --git a/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp b/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp index 8f354463a8697d..fa09e5632fbfa5 100644 --- a/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp +++ b/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03, c++11 - -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// UNSUPPORTED: c++03, c++11, c++14, c++17 // XFAIL: availability-synchronization_library-missing diff --git a/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp b/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp index 22eed736c6b753..5a4a0a94b01919 100644 --- a/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03, c++11 - -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// UNSUPPORTED: c++03, c++11, c++14, c++17 // XFAIL: availability-synchronization_library-missing diff --git a/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp b/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp index c01c78506587cd..b244a9d9eda2a2 100644 --- a/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03, c++11 - -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// UNSUPPORTED: c++03, c++11, c++14, c++17 // XFAIL: availability-synchronization_library-missing diff --git a/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp b/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp index dcc298ce11ce88..b7c8d5340b9826 100644 --- a/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03, c++11 - -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// UNSUPPORTED: c++03, c++11, c++14, c++17 // diff --git a/libcxx/test/std/thread/thread.semaphore/max.pass.cpp b/libcxx/test/std/thread/thread.semaphore/max.pass.cpp index 6f3ed5e345e0b5..bf6b0f05e64f0a 100644 --- a/libcxx/test/std/thread/thread.semaphore/max.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/max.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03, c++11 - -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// UNSUPPORTED: c++03, c++11, c++14, c++17 // diff --git a/libcxx/test/std/thread/thread.semaphore/release.pass.cpp b/libcxx/test/std/thread/thread.semaphore/release.pass.cpp index 3c4d179e504332..d068872ea72300 100644 --- a/libcxx/test/std/thread/thread.semaphore/release.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/release.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03, c++11 - -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// UNSUPPORTED: c++03, c++11, c++14, c++17 // XFAIL: availability-synchronization_library-missing diff --git a/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp b/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp index 77f15ece221d43..ad3c0fb103790b 100644 --- a/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03, c++11 - -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// UNSUPPORTED: c++03, c++11, c++14, c++17 // XFAIL: availability-synchronization_library-missing diff --git a/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp b/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp index ec159daf87a3fb..fb6fff3baf4c4f 100644 --- a/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// UNSUPPORTED: c++03, c++11 - -// Until we drop support for the synchronization library in C++11/14/17 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// UNSUPPORTED: c++03, c++11, c++14, c++17 // XFAIL: availability-synchronization_library-missing From 38ef6929a3322fdddd74b3d6abdf6936cc4d8e62 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 31 Jul 2024 16:53:25 -0500 Subject: [PATCH 032/114] [libc] Add vsscanf function (#101402) Summary: Adds support for the `vsscanf` function similar to `sscanf`. Based off of https://github.com/llvm/llvm-project/pull/97529. --- libc/config/gpu/entrypoints.txt | 1 + libc/config/linux/riscv/entrypoints.txt | 1 + libc/config/linux/x86_64/entrypoints.txt | 1 + libc/newhdrgen/yaml/stdio.yaml | 8 ++ libc/spec/stdc.td | 7 + libc/src/stdio/CMakeLists.txt | 12 ++ libc/src/stdio/vsscanf.cpp | 33 +++++ libc/src/stdio/vsscanf.h | 20 +++ libc/test/src/stdio/CMakeLists.txt | 14 ++ libc/test/src/stdio/vsscanf_test.cpp | 159 +++++++++++++++++++++++ 10 files changed, 256 insertions(+) create mode 100644 libc/src/stdio/vsscanf.cpp create mode 100644 libc/src/stdio/vsscanf.h create mode 100644 libc/test/src/stdio/vsscanf_test.cpp diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index 04a42c3019495a..6035af5c0ebb08 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -188,6 +188,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdio.vsnprintf libc.src.stdio.vsprintf libc.src.stdio.sscanf + libc.src.stdio.vsscanf libc.src.stdio.feof libc.src.stdio.ferror libc.src.stdio.fflush diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index c229a11b5bb52f..afa7f8bdd48b06 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -217,6 +217,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdio.snprintf libc.src.stdio.sprintf libc.src.stdio.sscanf + libc.src.stdio.vsscanf libc.src.stdio.vfprintf libc.src.stdio.vprintf libc.src.stdio.vsnprintf diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 533d0cd368863c..84c52ef12f3dfc 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -217,6 +217,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdio.snprintf libc.src.stdio.sprintf libc.src.stdio.sscanf + libc.src.stdio.vsscanf libc.src.stdio.vfprintf libc.src.stdio.vprintf libc.src.stdio.vsnprintf diff --git a/libc/newhdrgen/yaml/stdio.yaml b/libc/newhdrgen/yaml/stdio.yaml index 687a6d696cad60..660087e20b0ccf 100644 --- a/libc/newhdrgen/yaml/stdio.yaml +++ b/libc/newhdrgen/yaml/stdio.yaml @@ -105,6 +105,14 @@ functions: - type: const char *__restrict - type: const char *__restrict - type: ... + - name: vsscanf + standards: + - stdc + return_type: int + arguments: + - type: const char *__restrict + - type: const char *__restrict + - type: va_list - name: scanf standards: - stdc diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index f3b8db598c4a1a..6bb249f6bf7d73 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -920,6 +920,13 @@ def StdC : StandardSpec<"stdc"> { ArgSpec, ArgSpec] >, + FunctionSpec< + "vsscanf", + RetValSpec, + [ArgSpec, + ArgSpec, + ArgSpec] + >, FunctionSpec< "scanf", RetValSpec, diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt index 2d528a903cc2f4..94f92351e92fa2 100644 --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -121,6 +121,18 @@ add_entrypoint_object( libc.src.stdio.scanf_core.scanf_main ) +add_entrypoint_object( + vsscanf + SRCS + vsscanf.cpp + HDRS + vsscanf.h + DEPENDS + libc.src.__support.arg_list + libc.src.stdio.scanf_core.reader + libc.src.stdio.scanf_core.scanf_main +) + add_entrypoint_object( fscanf SRCS diff --git a/libc/src/stdio/vsscanf.cpp b/libc/src/stdio/vsscanf.cpp new file mode 100644 index 00000000000000..fcf0b88885f17b --- /dev/null +++ b/libc/src/stdio/vsscanf.cpp @@ -0,0 +1,33 @@ +//===-- Implementation of vsscanf -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/vsscanf.h" + +#include "src/__support/CPP/limits.h" +#include "src/__support/arg_list.h" +#include "src/stdio/scanf_core/reader.h" +#include "src/stdio/scanf_core/scanf_main.h" + +#include +#include + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, vsscanf, + (const char *buffer, const char *format, va_list vlist)) { + internal::ArgList args(vlist); + scanf_core::ReadBuffer rb{const_cast(buffer), + cpp::numeric_limits::max()}; + scanf_core::Reader reader(&rb); + int ret_val = scanf_core::scanf_main(&reader, format, args); + // This is done to avoid including stdio.h in the internals. On most systems + // EOF is -1, so this will be transformed into just "return ret_val". + return (ret_val == -1) ? EOF : ret_val; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/vsscanf.h b/libc/src/stdio/vsscanf.h new file mode 100644 index 00000000000000..992c44d3d95b9a --- /dev/null +++ b/libc/src/stdio/vsscanf.h @@ -0,0 +1,20 @@ +//===-- Implementation header of vsscanf ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_VSSCANF_H +#define LLVM_LIBC_SRC_STDIO_VSSCANF_H + +#include + +namespace LIBC_NAMESPACE { + +int vsscanf(const char *s, const char *format, va_list vlist); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDIO_VSSCANF_H diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt index 10ec890b043a71..4ac83ec2dd600f 100644 --- a/libc/test/src/stdio/CMakeLists.txt +++ b/libc/test/src/stdio/CMakeLists.txt @@ -282,6 +282,20 @@ add_libc_test( ${sscanf_test_copts} ) +add_libc_test( + vsscanf_test + SUITE + libc_stdio_unittests + SRCS + vsscanf_test.cpp + DEPENDS + libc.src.stdio.vsscanf + LINK_LIBRARIES + LibcFPTestHelpers + COMPILE_OPTIONS + ${sscanf_test_copts} +) + add_libc_test( puts_test HERMETIC_TEST_ONLY # writes to libc's stdout diff --git a/libc/test/src/stdio/vsscanf_test.cpp b/libc/test/src/stdio/vsscanf_test.cpp new file mode 100644 index 00000000000000..4194e10f0602c4 --- /dev/null +++ b/libc/test/src/stdio/vsscanf_test.cpp @@ -0,0 +1,159 @@ +//===-- Unittests for sscanf ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/vsscanf.h" + +#include "test/UnitTest/Test.h" + +int call_vsscanf(const char *__restrict buffer, const char *__restrict format, + ...) { + va_list vlist; + va_start(vlist, format); + int ret = LIBC_NAMESPACE::vsscanf(buffer, format, vlist); + va_end(vlist); + return ret; +} + +TEST(LlvmLibcVSScanfTest, SimpleStringConv) { + int ret_val; + char buffer[10]; + char buffer2[10]; + ret_val = call_vsscanf("abc123", "abc %s", buffer); + ASSERT_EQ(ret_val, 1); + ASSERT_STREQ(buffer, "123"); + + ret_val = call_vsscanf("abc123", "%3s %3s", buffer, buffer2); + ASSERT_EQ(ret_val, 2); + ASSERT_STREQ(buffer, "abc"); + ASSERT_STREQ(buffer2, "123"); + + ret_val = call_vsscanf("abc 123", "%3s%3s", buffer, buffer2); + ASSERT_EQ(ret_val, 2); + ASSERT_STREQ(buffer, "abc"); + ASSERT_STREQ(buffer2, "123"); +} + +TEST(LlvmLibcVSScanfTest, IntConvSimple) { + int ret_val; + int result = 0; + ret_val = call_vsscanf("123", "%d", &result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(result, 123); + + ret_val = call_vsscanf("456", "%i", &result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(result, 456); + + ret_val = call_vsscanf("789", "%x", &result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(result, 0x789); + + ret_val = call_vsscanf("012", "%o", &result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(result, 012); + + ret_val = call_vsscanf("345", "%u", &result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(result, 345); + + // 288 characters + ret_val = call_vsscanf("10000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000", + "%d", &result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(result, int(LIBC_NAMESPACE::cpp::numeric_limits::max())); + + ret_val = call_vsscanf("Not an integer", "%d", &result); + EXPECT_EQ(ret_val, 0); +} + +TEST(LlvmLibcVSScanfTest, IntConvLengthModifier) { + int ret_val; + uintmax_t max_result = 0; + int int_result = 0; + char char_result = 0; + + ret_val = call_vsscanf("123", "%ju", &max_result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(max_result, uintmax_t(123)); + + // Check overflow handling + ret_val = + call_vsscanf("999999999999999999999999999999999999", "%ju", &max_result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(max_result, LIBC_NAMESPACE::cpp::numeric_limits::max()); + + // Because this is unsigned, any out of range value should return the maximum, + // even with a negative sign. + ret_val = + call_vsscanf("-999999999999999999999999999999999999", "%ju", &max_result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(max_result, LIBC_NAMESPACE::cpp::numeric_limits::max()); + + ret_val = call_vsscanf("-18446744073709551616", "%ju", &max_result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(max_result, LIBC_NAMESPACE::cpp::numeric_limits::max()); + + // But any number below the maximum should have the - sign applied. + ret_val = call_vsscanf("-1", "%ju", &max_result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(max_result, uintmax_t(-1)); + + ret_val = call_vsscanf("-1", "%u", &int_result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(int_result, -1); + + max_result = 0xff00ff00ff00ff00; + char_result = 0x6f; + + // Overflows for sizes larger than the maximum are handled by casting. + ret_val = call_vsscanf("8589967360", "%d", &int_result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(int_result, int(8589967360)); // 2^33 + 2^15 + + // Check that the adjacent values weren't touched by the overflow. + ASSERT_EQ(max_result, uintmax_t(0xff00ff00ff00ff00)); + ASSERT_EQ(char_result, char(0x6f)); + + ret_val = call_vsscanf("-8589967360", "%d", &int_result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(int_result, int(-8589967360)); + ASSERT_EQ(max_result, uintmax_t(0xff00ff00ff00ff00)); + ASSERT_EQ(char_result, char(0x6f)); + + ret_val = call_vsscanf("25", "%hhd", &char_result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(char_result, char(25)); +} + +TEST(LlvmLibcVSScanfTest, IntConvBaseSelection) { + int ret_val; + int result = 0; + ret_val = call_vsscanf("0xabc123", "%i", &result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(result, 0xabc123); + + ret_val = call_vsscanf("0456", "%i", &result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(result, 0456); + + ret_val = call_vsscanf("0999", "%i", &result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(result, 0); + + ret_val = call_vsscanf("123abc456", "%i", &result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(result, 123); +} From 785a24f1561c610ecbce7cdfbff053e0a3a7caec Mon Sep 17 00:00:00 2001 From: Peiming Liu Date: Wed, 31 Jul 2024 15:14:27 -0700 Subject: [PATCH 033/114] [mlir][sparse] introduce `sparse_tensor.coiterate` operation. (#101100) This PR introduces `sparse_tensor.coiterate` operation, which represents a loop that traverses multiple sparse iteration space. --- .../Dialect/SparseTensor/IR/SparseTensor.h | 55 ++- .../SparseTensor/IR/SparseTensorAttrDefs.td | 11 +- .../SparseTensor/IR/SparseTensorOps.td | 127 +++++- .../SparseTensor/IR/SparseTensorDialect.cpp | 361 ++++++++++++++---- .../Transforms/SparseSpaceCollapse.cpp | 4 +- mlir/test/Dialect/SparseTensor/invalid.mlir | 75 ++++ mlir/test/Dialect/SparseTensor/roundtrip.mlir | 35 +- 7 files changed, 569 insertions(+), 99 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h index 68ca0361215205..388efd1c454b1e 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h @@ -61,37 +61,62 @@ struct COOSegment { /// A simple wrapper to encode a bitset of (at most 64) levels, currently used /// by `sparse_tensor.iterate` operation for the set of levels on which the /// coordinates should be loaded. -class LevelSet { - uint64_t bits = 0; +class I64BitSet { + uint64_t storage = 0; public: - LevelSet() = default; - explicit LevelSet(uint64_t bits) : bits(bits) {} - operator uint64_t() const { return bits; } + using const_set_bits_iterator = llvm::const_set_bits_iterator_impl; + const_set_bits_iterator begin() const { + return const_set_bits_iterator(*this); + } + const_set_bits_iterator end() const { + return const_set_bits_iterator(*this, -1); + } + iterator_range bits() const { + return make_range(begin(), end()); + } + + I64BitSet() = default; + explicit I64BitSet(uint64_t bits) : storage(bits) {} + operator uint64_t() const { return storage; } - LevelSet &set(unsigned i) { + I64BitSet &set(unsigned i) { assert(i < 64); - bits |= static_cast(0x01u) << i; + storage |= static_cast(0x01u) << i; return *this; } - LevelSet &operator|=(LevelSet lhs) { - bits |= static_cast(lhs); + I64BitSet &operator|=(I64BitSet lhs) { + storage |= static_cast(lhs); return *this; } - LevelSet &lshift(unsigned offset) { - bits = bits << offset; + I64BitSet &lshift(unsigned offset) { + storage = storage << offset; return *this; } + // Needed by `llvm::const_set_bits_iterator_impl`. + int find_first() const { return min(); } + int find_next(unsigned prev) const { + if (prev >= max()) + return -1; + + uint64_t b = storage >> (prev + 1); + if (b == 0) + return -1; + + return llvm::countr_zero(b) + prev + 1; + } + bool operator[](unsigned i) const { assert(i < 64); - return (bits & (1 << i)) != 0; + return (storage & (1 << i)) != 0; } - unsigned max() const { return 64 - llvm::countl_zero(bits); } - unsigned count() const { return llvm::popcount(bits); } - bool empty() const { return bits == 0; } + unsigned min() const { return llvm::countr_zero(storage); } + unsigned max() const { return 64 - llvm::countl_zero(storage); } + unsigned count() const { return llvm::popcount(storage); } + bool empty() const { return storage == 0; } }; } // namespace sparse_tensor diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td index 69b212cce4ceba..cb6c1b63e4e4b0 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td @@ -24,16 +24,17 @@ class SparseTensor_Attr($_self)">, CPred<"::llvm::cast<::mlir::IntegerAttr>($_self).getType().isInteger(64)">]>, "LevelSet attribute"> { - let returnType = [{::mlir::sparse_tensor::LevelSet}]; - let convertFromStorage = [{::mlir::sparse_tensor::LevelSet($_self.getValue().getZExtValue())}]; + let returnType = [{::mlir::sparse_tensor::I64BitSet}]; + let convertFromStorage = [{::mlir::sparse_tensor::I64BitSet($_self.getValue().getZExtValue())}]; } +def I64BitSetArrayAttr : + TypedArrayAttrBase; + //===----------------------------------------------------------------------===// // These attributes are just like `IndexAttr` except that they clarify whether // the index refers to a dimension (an axis of the semantic tensor) or a level diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td index ff9858d5832ba8..6e17f804993e2a 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -1306,7 +1306,7 @@ def SparseTensor_SelectOp : SparseTensor_Op<"select", [Pure, SameOperandsAndResu def SparseTensor_YieldOp : SparseTensor_Op<"yield", [Pure, Terminator, ParentOneOf<["BinaryOp", "UnaryOp", "ReduceOp", "SelectOp", - "ForeachOp", "IterateOp"]>]> { + "ForeachOp", "IterateOp", "CoIterateOp"]>]> { let summary = "Yield from sparse_tensor set-like operations"; let description = [{ Yields a value from within a `binary`, `unary`, `reduce`, @@ -1629,14 +1629,14 @@ def IterateOp : SparseTensor_Op<"iterate", let arguments = (ins AnySparseIterSpace:$iterSpace, Variadic:$initArgs, - LevelSetAttr:$crdUsedLvls); + I64BitSetAttr:$crdUsedLvls); let results = (outs Variadic:$results); let regions = (region SizedRegion<1>:$region); let skipDefaultBuilders = 1; let builders = [ OpBuilder<(ins "Value":$iterSpace, "ValueRange":$initArgs)>, - OpBuilder<(ins "Value":$iterSpace, "ValueRange":$initArgs, "LevelSet" :$crdUsedLvls)> + OpBuilder<(ins "Value":$iterSpace, "ValueRange":$initArgs, "I64BitSet" :$crdUsedLvls)> ]; let extraClassDeclaration = [{ @@ -1669,6 +1669,127 @@ def IterateOp : SparseTensor_Op<"iterate", let hasCustomAssemblyFormat = 1; } +def SparseTensor_CoIterateOp : SparseTensor_Op<"coiterate", + [AttrSizedOperandSegments, + SingleBlockImplicitTerminator<"sparse_tensor::YieldOp">, + RecursiveMemoryEffects]> { + let summary = "Co-iterates over a set of sparse iteration spaces"; + let description = [{ + The `sparse_tensor.coiterate` operation represents a loop (nest) over + a set of iteration spaces. The operation can have multiple regions, + with each of them defining a case to compute a result at the current iterations. + The case condition is defined solely based on the pattern of specified iterators. + For example: + ```mlir + %ret = sparse_tensor.coiterate (%sp1, %sp2) at(%coord) iter_args(%arg = %init) + : (!sparse_tensor.iter_space<#CSR, lvls = 0>, + !sparse_tensor.iter_space<#COO, lvls = 0>) + -> index + case %it1, _ { + // %coord is specifed in space %sp1 but *NOT* specified in space %sp2. + } + case %it1, %it2 { + // %coord is specifed in *BOTH* spaces %sp1 and %sp2. + } + ``` + + `sparse_tensor.coiterate` can also operate on loop-carried variables. + It returns the final value for each loop-carried variable after loop termination. + The initial values of the variables are passed as additional SSA operands + to the iterator SSA value and used coordinate SSA values. + Each operation region has variadic arguments for specified (used), one argument + for each loop-carried variable, representing the value of the variable + at the current iteration, followed by a list of arguments for iterators. + The body region must contain exactly one block that terminates with + `sparse_tensor.yield`. + + The results of an `sparse_tensor.coiterate` hold the final values after + the last iteration. If the `sparse_tensor.coiterate` defines any values, + a yield must be explicitly present in every region defined in the operation. + The number and types of the `sparse_tensor.coiterate` results must match + the initial values in the iter_args binding and the yield operands. + + + A `sparse_tensor.coiterate` example that does elementwise addition between two + sparse vectors. + + + ```mlir + %ret = sparse_tensor.coiterate (%sp1, %sp2) at(%coord) iter_args(%arg = %init) + : (!sparse_tensor.iter_space<#CSR, lvls = 0>, + !sparse_tensor.iter_space<#CSR, lvls = 0>) + -> tensor + case %it1, _ { + // v = v1 + 0 = v1 + %v1 = sparse_tensor.extract_value %t1 at %it1 : index + %yield = sparse_tensor.insert %v1 into %arg[%coord] + sparse_tensor.yield %yield + } + case _, %it2 { + // v = v2 + 0 = v2 + %v2 = sparse_tensor.extract_value %t2 at %it2 : index + %yield = sparse_tensor.insert %v1 into %arg[%coord] + sparse_tensor.yield %yield + } + case %it1, %it2 { + // v = v1 + v2 + %v1 = sparse_tensor.extract_value %t1 at %it1 : index + %v2 = sparse_tensor.extract_value %t2 at %it2 : index + %v = arith.addi %v1, %v2 : index + %yield = sparse_tensor.insert %v into %arg[%coord] + sparse_tensor.yield %yield + } + ``` + }]; + + let arguments = (ins Variadic:$iterSpaces, + Variadic:$initArgs, + I64BitSetAttr:$crdUsedLvls, + I64BitSetArrayAttr:$cases); + let results = (outs Variadic:$results); + let regions = (region VariadicRegion>:$caseRegions); + + let extraClassDeclaration = [{ + unsigned getSpaceDim() { + return llvm::cast<::mlir::sparse_tensor::IterSpaceType>( + getIterSpaces().front().getType()) + .getSpaceDim(); + } + I64BitSet getRegionDefinedSpace(unsigned regionIdx) { + return I64BitSet(llvm::cast(getCases()[regionIdx]) + .getValue().getZExtValue()); + } + auto getRegionDefinedSpaces() { + return llvm::map_range(getCases().getValue(), [](Attribute attr) { + return I64BitSet(llvm::cast(attr).getValue().getZExtValue()); + }); + } + + // The block arguments starts with referenced coordinates, follows by + // user-provided iteration arguments and ends with iterators. + Block::BlockArgListType getCrds(unsigned regionIdx) { + return getRegion(regionIdx).getArguments() + .take_front(getCrdUsedLvls().count()); + } + unsigned getNumRegionIterArgs(unsigned regionIdx) { + return getInitArgs().size(); + } + Block::BlockArgListType getRegionIterArgs(unsigned regionIdx) { + return getRegion(regionIdx).getArguments() + .slice(getCrdUsedLvls().count(), getNumRegionIterArgs(regionIdx)); + } + Block::BlockArgListType getRegionIterators(unsigned regionIdx) { + return getRegion(regionIdx).getArguments() + .take_back(getRegionDefinedSpace(regionIdx).count()); + } + ValueRange getYieldedValues(unsigned regionIdx); + }]; + + let hasVerifier = 1; + let hasRegionVerifier = 1; + let hasCustomAssemblyFormat = 1; +} + //===----------------------------------------------------------------------===// // Sparse Tensor Debugging and Test-Only Operations. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 0a276d87f3bcad..1135ea32fe1abb 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -2131,10 +2131,82 @@ static void printLevelRange(OpAsmPrinter &p, Operation *, IntegerAttr lvlLo, printLevelRange(p, lo, hi); } +/// Parses a list of `optional` defined list in the form of +/// "(%val0, _, %val1, ...)", where `_` is used to annotate that the +/// corresponding value is not defined (e.g., to represent an undefined +/// coordinate in the sparse iteration space). +static ParseResult parseOptionalDefinedList( + OpAsmParser &parser, OperationState &state, I64BitSet &definedSet, + SmallVectorImpl &definedArgs, + unsigned maxCnt = std::numeric_limits::max(), + OpAsmParser::Delimiter delimiter = OpAsmParser::Delimiter::Paren) { + unsigned cnt = 0; + ParseResult crdList = + parser.parseCommaSeparatedList(delimiter, [&]() -> ParseResult { + if (parser.parseOptionalKeyword("_")) { + if (parser.parseArgument(definedArgs.emplace_back())) + return failure(); + definedSet.set(cnt); + } + cnt += 1; + return success(); + }); + + if (cnt > maxCnt) + return parser.emitError(parser.getNameLoc(), + "parsed more value than expected."); + + if (failed(crdList)) { + return parser.emitError( + parser.getNameLoc(), + "expecting SSA value or \"_\" for level coordinates"); + } + assert(definedArgs.size() == definedSet.count()); + return success(); +} + +static void printOptionalDefinedList(OpAsmPrinter &p, unsigned size, + Block::BlockArgListType blocksArgs, + I64BitSet definedSet) { + if (definedSet.empty()) + return; + + for (unsigned i = 0; i < size; i++) { + if (definedSet[i]) { + p << blocksArgs.front(); + blocksArgs = blocksArgs.drop_front(); + } else { + p << "_"; + } + if (i != size - 1) + p << ", "; + } + assert(blocksArgs.empty()); +} + static ParseResult -parseSparseSpaceLoop(OpAsmParser &parser, OperationState &state, - SmallVectorImpl &iterators, - SmallVectorImpl &iterArgs) { +parseUsedCoordList(OpAsmParser &parser, OperationState &state, + SmallVectorImpl &coords) { + // Parse "at(%crd0, _, ...)" + I64BitSet crdUsedLvlSet; + if (succeeded(parser.parseOptionalKeyword("at")) && + failed(parseOptionalDefinedList(parser, state, crdUsedLvlSet, coords))) + return failure(); + + // Always use IndexType for the coordinate. + for (auto &coord : coords) + coord.type = parser.getBuilder().getIndexType(); + + // Set the CrdUsedLvl bitset. + state.addAttribute("crdUsedLvls", + parser.getBuilder().getI64IntegerAttr(crdUsedLvlSet)); + return success(); +} + +static ParseResult +parseSparseIterateLoop(OpAsmParser &parser, OperationState &state, + SmallVectorImpl &iterators, + SmallVectorImpl &blockArgs) { SmallVector spaces; SmallVector initArgs; @@ -2148,37 +2220,14 @@ parseSparseSpaceLoop(OpAsmParser &parser, OperationState &state, parser.getNameLoc(), "mismatch in number of sparse iterators and sparse spaces"); - // Parse "at(%crd0, _, ...)" - LevelSet crdUsedLvlSet; - bool hasUsedCrds = succeeded(parser.parseOptionalKeyword("at")); - unsigned lvlCrdCnt = 0; - if (hasUsedCrds) { - ParseResult crdList = parser.parseCommaSeparatedList( - OpAsmParser::Delimiter::Paren, [&]() -> ParseResult { - if (parser.parseOptionalKeyword("_")) { - if (parser.parseArgument(iterArgs.emplace_back())) - return failure(); - // Always use IndexType for the coordinate. - crdUsedLvlSet.set(lvlCrdCnt); - iterArgs.back().type = parser.getBuilder().getIndexType(); - } - lvlCrdCnt += 1; - return success(); - }); - if (failed(crdList)) { - return parser.emitError( - parser.getNameLoc(), - "expecting SSA value or \"_\" for level coordinates"); - } - } - // Set the CrdUsedLvl bitset. - state.addAttribute("crdUsedLvls", - parser.getBuilder().getI64IntegerAttr(crdUsedLvlSet)); + if (failed(parseUsedCoordList(parser, state, blockArgs))) + return failure(); + size_t numCrds = blockArgs.size(); // Parse "iter_args(%arg = %init, ...)" bool hasIterArgs = succeeded(parser.parseOptionalKeyword("iter_args")); if (hasIterArgs) - if (parser.parseAssignmentList(iterArgs, initArgs)) + if (parser.parseAssignmentList(blockArgs, initArgs)) return failure(); SmallVector iterSpaceTps; @@ -2196,10 +2245,6 @@ parseSparseSpaceLoop(OpAsmParser &parser, OperationState &state, return parser.emitError(parser.getNameLoc(), "expected sparse_tensor.iter_space type for " "iteration space operands"); - if (hasUsedCrds && spaceTp.getSpaceDim() != lvlCrdCnt) - return parser.emitError(parser.getNameLoc(), - "mismatch in number of iteration space dimension " - "and specified coordinates"); it.type = spaceTp.getIteratorType(); } @@ -2213,9 +2258,68 @@ parseSparseSpaceLoop(OpAsmParser &parser, OperationState &state, return failure(); if (hasIterArgs) { - unsigned numCrds = crdUsedLvlSet.count(); // Strip off leading args that used for coordinates. - MutableArrayRef args = MutableArrayRef(iterArgs).drop_front(numCrds); + MutableArrayRef args = MutableArrayRef(blockArgs).drop_front(numCrds); + if (args.size() != initArgs.size() || args.size() != state.types.size()) { + return parser.emitError( + parser.getNameLoc(), + "mismatch in number of iteration arguments and return values"); + } + + for (auto [it, init, tp] : llvm::zip_equal(args, initArgs, state.types)) { + it.type = tp; + if (parser.resolveOperand(init, tp, state.operands)) + return failure(); + } + } + return success(); +} + +static ParseResult +parseSparseCoIterateLoop(OpAsmParser &parser, OperationState &state, + SmallVectorImpl &spacesVals, + SmallVectorImpl &blockArgs) { + + // Parse "(%spaces, ...)" + SmallVector spaces; + if (parser.parseOperandList(spaces, OpAsmParser::Delimiter::Paren)) + return failure(); + + if (failed(parseUsedCoordList(parser, state, blockArgs))) + return failure(); + size_t numCrds = blockArgs.size(); + + // Parse "iter_args(%arg = %init, ...)" + SmallVector initArgs; + bool hasIterArgs = succeeded(parser.parseOptionalKeyword("iter_args")); + if (hasIterArgs) + if (parser.parseAssignmentList(blockArgs, initArgs)) + return failure(); + + SmallVector iterSpaceTps; + // parse ": (sparse_tensor.iter_space, ...) -> ret" + if (parser.parseColon() || parser.parseLParen() || + parser.parseTypeList(iterSpaceTps) || parser.parseRParen()) + return failure(); + + if (iterSpaceTps.size() != spaces.size()) + return parser.emitError(parser.getNameLoc(), + "mismatch in number of iteration space operands " + "and iteration space types"); + + if (hasIterArgs) + if (parser.parseArrowTypeList(state.types)) + return failure(); + + // Resolves input sparse iteration spaces. + if (parser.resolveOperands(spaces, iterSpaceTps, parser.getNameLoc(), + spacesVals)) + return failure(); + state.operands.append(spacesVals); + + if (hasIterArgs) { + // Strip off leading args that used for coordinates. + MutableArrayRef args = MutableArrayRef(blockArgs).drop_front(numCrds); if (args.size() != initArgs.size() || args.size() != state.types.size()) { return parser.emitError( parser.getNameLoc(), @@ -2285,7 +2389,7 @@ struct RemoveUnusedLvlCrds : public OpRewritePattern { LogicalResult matchAndRewrite(IterateOp iterateOp, PatternRewriter &rewriter) const override { - LevelSet newUsedLvls(0); + I64BitSet newUsedLvls(0); llvm::BitVector toRemove(iterateOp.getBody()->getNumArguments()); for (unsigned i = 0, e = iterateOp.getSpaceDim(); i < e; i++) { if (auto crd = iterateOp.getLvlCrd(i)) { @@ -2317,13 +2421,13 @@ void IterateOp::build(OpBuilder &builder, OperationState &odsState, Value iterSpace, ValueRange initArgs) { unsigned rank = llvm::cast(iterSpace.getType()).getSpaceDim(); // All ones. - LevelSet set((1 << rank) - 1); + I64BitSet set((1 << rank) - 1); return build(builder, odsState, iterSpace, initArgs, set); } void IterateOp::build(OpBuilder &builder, OperationState &odsState, Value iterSpace, ValueRange initArgs, - LevelSet crdUsedLvls) { + I64BitSet crdUsedLvls) { OpBuilder::InsertionGuard guard(builder); odsState.addOperands(iterSpace); @@ -2353,7 +2457,7 @@ ParseResult IterateOp::parse(OpAsmParser &parser, OperationState &result) { OpAsmParser::UnresolvedOperand iterSpace; SmallVector iters, iterArgs; - if (parseSparseSpaceLoop(parser, result, iters, iterArgs)) + if (parseSparseIterateLoop(parser, result, iters, iterArgs)) return failure(); if (iters.size() != 1) return parser.emitError(parser.getNameLoc(), @@ -2393,51 +2497,39 @@ static void printInitializationList(OpAsmPrinter &p, p << ")"; } -static void printUsedCrdsList(OpAsmPrinter &p, unsigned spaceDim, - Block::BlockArgListType blocksArgs, - LevelSet crdUsedLvls) { - if (crdUsedLvls.empty()) - return; - - p << " at("; - for (unsigned i = 0; i < spaceDim; i++) { - if (crdUsedLvls[i]) { - p << blocksArgs.front(); - blocksArgs = blocksArgs.drop_front(); - } else { - p << "_"; - } - if (i != spaceDim - 1) - p << ", "; +template +static LogicalResult verifySparseLoopOp(SparseLoopOp op) { + if (op.getInitArgs().size() != op.getNumResults()) { + return op.emitOpError( + "mismatch in number of loop-carried values and defined values"); } - assert(blocksArgs.empty()); - p << ")"; + if (op.getCrdUsedLvls().max() > op.getSpaceDim()) + return op.emitOpError("required out-of-bound coordinates"); + + return success(); } +LogicalResult IterateOp::verify() { return verifySparseLoopOp(*this); } +LogicalResult CoIterateOp::verify() { return verifySparseLoopOp(*this); } + void IterateOp::print(OpAsmPrinter &p) { p << " " << getIterator() << " in " << getIterSpace(); - printUsedCrdsList(p, getSpaceDim(), getCrds(), getCrdUsedLvls()); + if (!getCrdUsedLvls().empty()) { + p << " at("; + printOptionalDefinedList(p, getSpaceDim(), getCrds(), getCrdUsedLvls()); + p << ")"; + } printInitializationList(p, getRegionIterArgs(), getInitArgs(), " iter_args"); p << " : " << getIterSpace().getType() << " "; if (!getInitArgs().empty()) - p << "-> (" << getInitArgs().getTypes() << ") "; + p.printArrowTypeList(getInitArgs().getTypes()); + p << " "; p.printRegion(getRegion(), /*printEntryBlockArgs=*/false, /*printBlockTerminators=*/!getInitArgs().empty()); } -LogicalResult IterateOp::verify() { - if (getInitArgs().size() != getNumResults()) { - return emitOpError( - "mismatch in number of loop-carried values and defined values"); - } - if (getCrdUsedLvls().max() > getSpaceDim()) - return emitOpError("required out-of-bound coordinates"); - - return success(); -} - LogicalResult IterateOp::verifyRegions() { if (getIterator().getType() != getIterSpace().getType().getIteratorType()) return emitOpError("mismatch in iterator and iteration space type"); @@ -2495,13 +2587,136 @@ OperandRange IterateOp::getEntrySuccessorOperands(RegionBranchPoint point) { void IterateOp::getSuccessorRegions(RegionBranchPoint point, SmallVectorImpl ®ions) { - // Both the operation itself and the region may be branching into the body or - // back into the operation itself. + // Both the operation itself and the region may be branching into the body + // or back into the operation itself. regions.push_back(RegionSuccessor(&getRegion(), getRegionIterArgs())); // It is possible for loop not to enter the body. regions.push_back(RegionSuccessor(getResults())); } +ParseResult CoIterateOp::parse(OpAsmParser &parser, OperationState &result) { + + SmallVector spaces; + // The block argument list of each regions, it is arranged in the order of + // ([used coordinate list], [loop iterations args], [sparse iterator list]). + SmallVector blockArgs; + if (parseSparseCoIterateLoop(parser, result, spaces, blockArgs)) + return failure(); + + result.addAttribute("operandSegmentSizes", + parser.getBuilder().getDenseI32ArrayAttr( + {static_cast(spaces.size()), + static_cast(result.types.size())})); + + SmallVector cases; + while (succeeded(parser.parseOptionalKeyword("case"))) { + // Parse one region per case. + I64BitSet definedItSet; + SmallVector definedIts; + if (parseOptionalDefinedList(parser, result, definedItSet, definedIts, + spaces.size(), OpAsmParser::Delimiter::None)) + return failure(); + + cases.push_back(parser.getBuilder().getI64IntegerAttr(definedItSet)); + + for (auto [i, definedIdx] : llvm::enumerate(definedItSet.bits())) { + // Resolve the iterator type based on the iteration space type. + auto spaceTp = llvm::cast(spaces[definedIdx].getType()); + definedIts[i].type = spaceTp.getIteratorType(); + } + definedIts.insert(definedIts.begin(), blockArgs.begin(), blockArgs.end()); + Region *body = result.addRegion(); + if (parser.parseRegion(*body, definedIts)) + return failure(); + + CoIterateOp::ensureTerminator(*body, parser.getBuilder(), result.location); + } + + result.addAttribute("cases", ArrayAttr::get(parser.getContext(), cases)); + + // Parse the optional attribute list. + if (parser.parseOptionalAttrDict(result.attributes)) + return failure(); + + return success(); +} + +void CoIterateOp::print(OpAsmPrinter &p) { + p << " ("; + llvm::interleaveComma(getIterSpaces(), p, [&](auto s) { p << s; }); + p << ")"; + + if (!getCrdUsedLvls().empty()) { + p << " at("; + printOptionalDefinedList(p, getSpaceDim(), getCrds(0), getCrdUsedLvls()); + p << ")"; + } + + printInitializationList(p, getRegionIterArgs(0), getInitArgs(), " iter_args"); + + p << " : (" << getIterSpaces().getTypes() << ")"; + if (!getInitArgs().empty()) + p.printArrowTypeList(getInitArgs().getTypes()); + + for (unsigned idx = 0, e = getRegions().size(); idx < e; idx++) { + p.printNewline(); + p << "case "; + printOptionalDefinedList(p, getIterSpaces().size(), getRegionIterators(idx), + getRegionDefinedSpace(idx)); + p << " "; + p.printRegion(getRegion(idx), /*printEntryBlockArgs=*/false, + /*printBlockTerminators=*/!getInitArgs().empty()); + } +} + +ValueRange CoIterateOp::getYieldedValues(unsigned regionIdx) { + return cast( + getRegion(regionIdx).getBlocks().front().getTerminator()) + .getResults(); +} + +LogicalResult CoIterateOp::verifyRegions() { + for (unsigned r = 0, e = getNumRegions(); r < e; r++) { + if (getNumRegionIterArgs(r) != getNumResults()) + return emitOpError( + "mismatch in number of basic block args and defined values"); + + auto initArgs = getInitArgs(); + auto iterArgs = getRegionIterArgs(r); + auto yieldVals = getYieldedValues(r); + auto opResults = getResults(); + if (!llvm::all_equal({initArgs.size(), iterArgs.size(), yieldVals.size(), + opResults.size()})) { + return emitOpError() + << "number mismatch between iter args and results on " << r + << "th region"; + } + + for (auto [i, init, iter, yield, ret] : + llvm::enumerate(initArgs, iterArgs, yieldVals, opResults)) { + if (init.getType() != ret.getType()) + return emitOpError() + << "types mismatch between " << i + << "th iter operand and defined value on " << r << "th region"; + if (iter.getType() != ret.getType()) + return emitOpError() << "types mismatch between " << i + << "th iter region arg and defined value on " << r + << "th region"; + if (yield.getType() != ret.getType()) + return emitOpError() + << "types mismatch between " << i + << "th yield value and defined value on " << r << "th region"; + } + } + + auto cases = getRegionDefinedSpaces(); + llvm::SmallSetVector set(cases.begin(), cases.end()); + if (set.size() != getNumRegions()) + return emitOpError("contains duplicated cases."); + + return success(); +} + //===----------------------------------------------------------------------===// // Sparse Tensor Dialect Setups. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseSpaceCollapse.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseSpaceCollapse.cpp index 924046fcd9961f..f85c4761a8d52b 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseSpaceCollapse.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseSpaceCollapse.cpp @@ -141,10 +141,10 @@ void collapseSparseSpace(MutableArrayRef toCollapse) { auto cloned = llvm::cast(builder.clone(*innermost, mapper)); builder.setInsertionPointToStart(cloned.getBody()); - LevelSet crdUsedLvls; + I64BitSet crdUsedLvls; unsigned shift = 0, argIdx = 1; for (auto info : toCollapse.drop_back()) { - LevelSet set = info.loop.getCrdUsedLvls(); + I64BitSet set = info.loop.getCrdUsedLvls(); crdUsedLvls |= set.lshift(shift); shift += info.loop.getSpaceDim(); for (BlockArgument crd : info.loop.getCrds()) { diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir index 61cc9be88685cc..737b736ba795fe 100644 --- a/mlir/test/Dialect/SparseTensor/invalid.mlir +++ b/mlir/test/Dialect/SparseTensor/invalid.mlir @@ -1191,3 +1191,78 @@ func.func @sparse_iterate(%sp : tensor<4x8xf32, #COO>, %i : index, %j : index) - } return %r1 : index } + +// ----- + +#COO = #sparse_tensor.encoding<{ + map = (i, j) -> ( + i : compressed(nonunique), + j : singleton(soa) + ) +}> + + +func.func @sparse_coiteration(%sp1 : !sparse_tensor.iter_space<#COO, lvls = 0>, + %sp2 : !sparse_tensor.iter_space<#COO, lvls = 1>) -> index { + %init = arith.constant 0 : index + // expected-error @+1 {{'sparse_tensor.coiterate' op contains duplicated cases.}} + %ret = sparse_tensor.coiterate (%sp1, %sp2) at (%coord) iter_args(%arg = %init) + : (!sparse_tensor.iter_space<#COO, lvls = 0>, !sparse_tensor.iter_space<#COO, lvls = 1>) + -> index + case %it1, _ { + sparse_tensor.yield %arg : index + } + case %it1, _ { + sparse_tensor.yield %arg : index + } + return %ret : index +} + + +// ----- + +#COO = #sparse_tensor.encoding<{ + map = (i, j) -> ( + i : compressed(nonunique), + j : singleton(soa) + ) +}> + + +func.func @sparse_coiteration(%sp1 : !sparse_tensor.iter_space<#COO, lvls = 0>, + %sp2 : !sparse_tensor.iter_space<#COO, lvls = 1>) -> index { + %init = arith.constant 0 : index + // expected-error @+1 {{'sparse_tensor.coiterate' op types mismatch between 0th yield value and defined value on 0th region}} + %ret = sparse_tensor.coiterate (%sp1, %sp2) at (%coord) iter_args(%arg = %init) + : (!sparse_tensor.iter_space<#COO, lvls = 0>, !sparse_tensor.iter_space<#COO, lvls = 1>) + -> index + case %it1, _ { + %i = arith.constant 1 : i32 + sparse_tensor.yield %i : i32 + } + return %ret : index +} + +// ----- + +#COO = #sparse_tensor.encoding<{ + map = (i, j) -> ( + i : compressed(nonunique), + j : singleton(soa) + ) +}> + + +func.func @sparse_coiteration(%sp1 : !sparse_tensor.iter_space<#COO, lvls = 0>, + %sp2 : !sparse_tensor.iter_space<#COO, lvls = 1>) -> index { + %init = arith.constant 0 : index + // expected-error @+1 {{'sparse_tensor.coiterate' op required out-of-bound coordinates}} + %ret = sparse_tensor.coiterate (%sp1, %sp2) at (%coord1, %coord2) iter_args(%arg = %init) + : (!sparse_tensor.iter_space<#COO, lvls = 0>, !sparse_tensor.iter_space<#COO, lvls = 1>) + -> index + case %it1, _ { + %i = arith.constant 1 : i32 + sparse_tensor.yield %i : i32 + } + return %ret : index +} diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir index 055709ee69eb71..ab861a2019dfae 100644 --- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir @@ -801,7 +801,7 @@ func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse // CHECK-SAME: %[[VAL_1:.*]]: index, // CHECK-SAME: %[[VAL_2:.*]]: index) -> index { // CHECK: %[[VAL_3:.*]] = sparse_tensor.extract_iteration_space %[[VAL_0]] lvls = 0 : tensor<4x8xf32, #sparse{{[0-9]*}}> -// CHECK: %[[VAL_4:.*]] = sparse_tensor.iterate %[[VAL_5:.*]] in %[[VAL_3]] at(%[[VAL_6:.*]]) iter_args(%[[VAL_7:.*]] = %[[VAL_1]]) : !sparse_tensor.iter_space<#sparse{{[0-9]*}}, lvls = 0> -> (index) { +// CHECK: %[[VAL_4:.*]] = sparse_tensor.iterate %[[VAL_5:.*]] in %[[VAL_3]] at(%[[VAL_6:.*]]) iter_args(%[[VAL_7:.*]] = %[[VAL_1]]) : !sparse_tensor.iter_space<#sparse{{[0-9]*}}, lvls = 0> -> index { // CHECK: sparse_tensor.yield %[[VAL_7]] : index // CHECK: } // CHECK: return %[[VAL_4]] : index @@ -813,3 +813,36 @@ func.func @sparse_iterate(%sp : tensor<4x8xf32, #COO>, %i : index, %j : index) - } return %r1 : index } + + +// ----- + +#COO = #sparse_tensor.encoding<{ + map = (i, j) -> ( + i : compressed(nonunique), + j : singleton(soa) + ) +}> + + +// CHECK-LABEL: func.func @sparse_coiteration( +// CHECK-SAME: %[[SP1:.*]]: !sparse_tensor.iter_space<#sparse, lvls = 0>, +// CHECK-SAME: %[[SP2:.*]]: !sparse_tensor.iter_space<#sparse, lvls = 1>) -> index { +// CHECK: %[[INIT:.*]] = arith.constant 0 : index +// CHECK: %[[RET:.*]] = sparse_tensor.coiterate (%[[SP1]], %[[SP2]]) at(%[[COORD:.*]]) iter_args(%[[ARG:.*]] = %[[INIT]]) +// CHECK: case %[[VAL_6:.*]], _ { +// CHECK: sparse_tensor.yield %[[ARG]] : index +// CHECK: } +// CHECK: return %[[RET]] : index +// CHECK: } +func.func @sparse_coiteration(%sp1 : !sparse_tensor.iter_space<#COO, lvls = 0>, + %sp2 : !sparse_tensor.iter_space<#COO, lvls = 1>) -> index { + %init = arith.constant 0 : index + %ret = sparse_tensor.coiterate (%sp1, %sp2) at (%coord) iter_args(%arg = %init) + : (!sparse_tensor.iter_space<#COO, lvls = 0>, !sparse_tensor.iter_space<#COO, lvls = 1>) + -> index + case %it1, _ { + sparse_tensor.yield %arg : index + } + return %ret : index +} From 26766a00ff946c281b7dd517b2ba8d594012c21e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 31 Jul 2024 15:18:21 -0700 Subject: [PATCH 034/114] [RISCV] Remove unncessary FP extensions from some integer only vector tests. I'm going to do a review to make sure we are testing Zvfhmin instead of Zvfh where clang expects it to work for half types, like loads/stores. Removing unnecessary FP makes less things to review. --- llvm/test/CodeGen/RISCV/rvv/abs-vp.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/commutable.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll | 4 ++-- .../test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll | 4 ++-- .../CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll | 8 ++++---- .../RISCV/rvv/fixed-vectors-shuffle-extract-subvector.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/regalloc-fast-crash.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll | 4 ++-- 29 files changed, 54 insertions(+), 54 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll index 05d6716e471926..cd2208e31eb6d3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK declare @llvm.vp.abs.nxv1i8(, i1 immarg, , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll index 5217148ba4f4ee..5709de567c18dc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll index aadd9852af11ea..6917d7e44a8e6a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvkb,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB diff --git a/llvm/test/CodeGen/RISCV/rvv/commutable.ll b/llvm/test/CodeGen/RISCV/rvv/commutable.ll index 5bca2eeb3fddff..e26c467f025bdc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/commutable.ll +++ b/llvm/test/CodeGen/RISCV/rvv/commutable.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+f,+d,+zvfh,+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+f,+d,+zvfh,+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ ; RUN: -verify-machineinstrs | FileCheck %s ; vadd.vv diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll index b5cafe410ae8d5..01aac122d5957d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll index df9949e617b807..4d6bc349ffacbc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32NOM -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32M diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll index a96cf5807e6c16..aba0ad022005ba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64NOM -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64M define signext i8 @extractelt_nxv1i8_0( %v) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll index f3e823562888fa..84da351de76ba9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK declare <2 x i8> @llvm.vp.abs.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll index 068c25b8210020..90bedf87e04d30 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare <2 x i8> @llvm.vp.bitreverse.v2i8(<2 x i8>, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll index 1490738687322a..6f2e86097d6ff9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16>, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll index 3286c336a0fd11..809884cb18129b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define <4 x i8> @ret_v4i8(ptr %p) { ; CHECK-LABEL: ret_v4i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll index b42fb8c6861644..f5e6b929051939 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll index 5fceab869ab85d..e90e52fba642b1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8>, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll index e7736e7f360f31..dfad7881066a27 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll index 369f90521cf00a..bb2b57fbcc3b7a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define <2 x i16> @sextload_v2i1_v2i16(ptr %x) { ; CHECK-LABEL: sextload_v2i1_v2i16: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll index 386c71cf665ced..493481ad129d29 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d,+zbs -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32ZBS -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d,+zbs -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64ZBS +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zbs -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32ZBS +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zbs -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64ZBS define i1 @extractelt_v1i1(ptr %x, i64 %idx) nounwind { ; CHECK-LABEL: extractelt_v1i1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-extract-subvector.ll index d528970def626f..a6bbbaa71d2388 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-extract-subvector.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK define <8 x i8> @v8i8_from_v16xi8_low(<16 x i8> %a) nounwind { ; CHECK-LABEL: v8i8_from_v16xi8_low: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll index 2181fd8498f5af..4805d6782a3b93 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll index 695fba6d54e033..805e2e2e6bd35d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll index 410ef8a02383f9..c3cc90c6a8de31 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @insertelt_nxv1i8_0( %v, i8 signext %elt) { diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll index 5bba1c549972bb..1523126af21243 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @insertelt_nxv1i8_0( %v, i8 signext %elt) { diff --git a/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll index 4b4cffc461d462..77438ee53b634c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s ; Check that we are able to legalize scalable-vector stores that require widening. diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll index 1c3b429202adf6..e686ac881fabe0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 %struct = type { i64, i64, ptr, i32, i32, i32, [4 x i32] } diff --git a/llvm/test/CodeGen/RISCV/rvv/regalloc-fast-crash.ll b/llvm/test/CodeGen/RISCV/rvv/regalloc-fast-crash.ll index 64bc1ef156bea6..b6f9d319fe57d0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/regalloc-fast-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/regalloc-fast-crash.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+m \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m \ ; RUN: -regalloc=fast -verify-machineinstrs < %s | FileCheck %s ; This test previously crashed with an error "ran out of registers during register allocation" diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll index 64e33916ad6461..9588c85fbdbddb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh,+zvfh \ +; RUN: llc -mtriple=riscv32 -mattr=+zve64x \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare {,, i32} @llvm.riscv.vlseg2ff.nxv16i16(,, ptr , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll index 2cc924e67e9386..02c2994d966222 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh,+zvfh \ +; RUN: llc -mtriple=riscv64 -mattr=+zve64x \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare {,, i64} @llvm.riscv.vlseg2ff.nxv16i16(,, ptr , i64) diff --git a/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll index 5aba7ef4cc5b68..e232ac255c56f0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.mul.nxv1i8(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll index 22ed56afbd94e5..5401bf7db49e38 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.mul.nxv1i8(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll index f958fe815caaab..30edcaf9b15b53 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.mul.nxv1i8(, , , i32) From 74f95794433f315a14bd6878d97877566863bc34 Mon Sep 17 00:00:00 2001 From: aaryanshukla <53713108+aaryanshukla@users.noreply.github.com> Date: Wed, 31 Jul 2024 15:24:08 -0700 Subject: [PATCH 035/114] [libc][math][c23] removed dsubl for 32 arm (#101423) --- libc/config/linux/arm/entrypoints.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt index d691f63a78e4b3..b102c3c219831d 100644 --- a/libc/config/linux/arm/entrypoints.txt +++ b/libc/config/linux/arm/entrypoints.txt @@ -230,7 +230,6 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.coshf libc.src.math.dfmal libc.src.math.dsqrtl - libc.src.math.dsubl libc.src.math.erff libc.src.math.exp libc.src.math.exp10 From cf79aba99db4909437b8977a59c51bc8899ddb9c Mon Sep 17 00:00:00 2001 From: smanna12 Date: Wed, 31 Jul 2024 15:36:21 -0700 Subject: [PATCH 036/114] [Clang] [NFC] Fix potential dereferencing of nullptr (#101405) This patch replaces getAs with castAs and dyn_cast with cast to ensure type safety and prevents potential null pointer dereferences. These changes enforce compile-time checks for correct type casting in ASTContext and CodeGenModule. --- clang/lib/AST/ASTContext.cpp | 4 ++-- clang/lib/CodeGen/CodeGenModule.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index a465cdfcf3c89e..b0cc7cb826f97e 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -3283,7 +3283,7 @@ static void encodeTypeForFunctionPointerAuth(const ASTContext &Ctx, return; case Type::Builtin: { - const auto *BTy = T->getAs(); + const auto *BTy = T->castAs(); switch (BTy->getKind()) { #define SIGNED_TYPE(Id, SingletonId) \ case BuiltinType::Id: \ @@ -3366,7 +3366,7 @@ static void encodeTypeForFunctionPointerAuth(const ASTContext &Ctx, llvm_unreachable("should never get here"); } case Type::Record: { - const RecordDecl *RD = T->getAs()->getDecl(); + const RecordDecl *RD = T->castAs()->getDecl(); const IdentifierInfo *II = RD->getIdentifier(); // In C++, an immediate typedef of an anonymous struct or union diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 344a0e538f22a2..760185d4d51ff0 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -5659,7 +5659,7 @@ void CodeGenModule::EmitExternalFunctionDeclaration(const FunctionDecl *FD) { if (getCodeGenOpts().hasReducedDebugInfo()) { auto *Ty = getTypes().ConvertType(FD->getType()); StringRef MangledName = getMangledName(FD); - auto *Fn = dyn_cast( + auto *Fn = cast( GetOrCreateLLVMFunction(MangledName, Ty, FD, /* ForVTable */ false)); if (!Fn->getSubprogram()) DI->EmitFunctionDecl(FD, FD->getLocation(), FD->getType(), Fn); From 6f318d47bfba56df65394db6c20befe3ed5bf243 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Wed, 31 Jul 2024 20:36:58 +0000 Subject: [PATCH 037/114] [NVPTX] Make minimum/maximum work on older GPUs We want to use newer instructions if we are targeting sufficiently new SM and PTX versions. If we cannot use those newer instructions, let LLVM synthesize the sequence from more fundamental instructions. --- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 34 +- llvm/test/CodeGen/NVPTX/math-intrins.ll | 1460 +++++++++++++++++++++-- 2 files changed, 1366 insertions(+), 128 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 91b239a52d17fe..a5bdc6fac3ca6c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -268,16 +268,12 @@ multiclass ADD_SUB_INT_CARRY { } } -// Template for instructions which take three fp64 or fp32 args. The -// instructions are named ".f" (e.g. "min.f64"). +// Template for minimum/maximum instructions. // // Also defines ftz (flush subnormal inputs and results to sign-preserving // zero) variants for fp32 functions. -// -// This multiclass should be used for nodes that cannot be folded into FMAs. -// For nodes that can be folded into FMAs (i.e. adds and muls), use -// F3_fma_component. -multiclass F3 { +multiclass FMINIMUMMAXIMUM { + if !not(NaN) then { def f64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a, Float64Regs:$b), @@ -288,6 +284,7 @@ multiclass F3 { (ins Float64Regs:$a, f64imm:$b), !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>; + } def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, Float32Regs:$b), @@ -322,45 +319,45 @@ multiclass F3 { (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"), [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, - Requires<[useFP16Math]>; + Requires<[useFP16Math, hasSM<80>, hasPTX<70>]>; def f16x2rr_ftz : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"), [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, - Requires<[useFP16Math, doF32FTZ]>; + Requires<[useFP16Math, hasSM<80>, hasPTX<70>, doF32FTZ]>; def f16x2rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"), [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, - Requires<[useFP16Math]>; + Requires<[useFP16Math, hasSM<80>, hasPTX<70>]>; def bf16rr_ftz : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".ftz.bf16 \t$dst, $a, $b;"), [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>, - Requires<[hasBF16Math, doF32FTZ]>; + Requires<[hasBF16Math, doF32FTZ, hasSM<80>, hasPTX<70>]>; def bf16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".bf16 \t$dst, $a, $b;"), [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>, - Requires<[hasBF16Math]>; + Requires<[hasBF16Math, hasSM<80>, hasPTX<70>]>; def bf16x2rr_ftz : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".ftz.bf16x2 \t$dst, $a, $b;"), [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>, - Requires<[hasBF16Math, doF32FTZ]>; + Requires<[hasBF16Math, hasSM<80>, hasPTX<70>, doF32FTZ]>; def bf16x2rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".bf16x2 \t$dst, $a, $b;"), [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>, - Requires<[hasBF16Math]>; + Requires<[hasBF16Math, hasSM<80>, hasPTX<70>]>; } // Template for instructions which take three FP args. The @@ -1178,11 +1175,10 @@ defm FADD : F3_fma_component<"add", fadd>; defm FSUB : F3_fma_component<"sub", fsub>; defm FMUL : F3_fma_component<"mul", fmul>; -defm FMIN : F3<"min", fminnum>; -defm FMAX : F3<"max", fmaxnum>; -// Note: min.NaN.f64 and max.NaN.f64 do not actually exist. -defm FMINNAN : F3<"min.NaN", fminimum>; -defm FMAXNAN : F3<"max.NaN", fmaximum>; +defm FMIN : FMINIMUMMAXIMUM<"min", /* NaN */ false, fminnum>; +defm FMAX : FMINIMUMMAXIMUM<"max", /* NaN */ false, fmaxnum>; +defm FMINNAN : FMINIMUMMAXIMUM<"min.NaN", /* NaN */ true, fminimum>; +defm FMAXNAN : FMINIMUMMAXIMUM<"max.NaN", /* NaN */ true, fmaximum>; defm FABS : F2<"abs", fabs>; defm FNEG : F2<"neg", fneg>; diff --git a/llvm/test/CodeGen/NVPTX/math-intrins.ll b/llvm/test/CodeGen/NVPTX/math-intrins.ll index d31844549a3220..fcc4ec6e4017f7 100644 --- a/llvm/test/CodeGen/NVPTX/math-intrins.ll +++ b/llvm/test/CodeGen/NVPTX/math-intrins.ll @@ -1,6 +1,7 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16 -; RUN: llc < %s -mcpu=sm_80 | FileCheck %s --check-prefixes=CHECK,CHECK-F16 -; RUN: llc < %s -mcpu=sm_80 --nvptx-no-f16-math | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16 +; RUN: llc < %s -mcpu=sm_80 -mattr +ptx70 | FileCheck %s --check-prefixes=CHECK,CHECK-F16 +; RUN: llc < %s -mcpu=sm_80 -mattr +ptx70 --nvptx-no-f16-math | FileCheck %s --check-prefixes=CHECK,CHECK-SM80-NOF16 ; RUN: %if ptxas %{ llc < %s | %ptxas-verify %} ; RUN: %if ptxas-11.0 %{ llc < %s -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %} ; RUN: %if ptxas-11.0 %{ llc < %s -mcpu=sm_80 --nvptx-no-f16-math | %ptxas-verify -arch=sm_80 %} @@ -29,330 +30,1571 @@ declare half @llvm.minnum.f16(half, half) #0 declare float @llvm.minnum.f32(float, float) #0 declare double @llvm.minnum.f64(double, double) #0 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #0 +declare half @llvm.minimum.f16(half, half) #0 +declare float @llvm.minimum.f32(float, float) #0 +declare double @llvm.minimum.f64(double, double) #0 +declare <2 x half> @llvm.minimum.v2f16(<2 x half>, <2 x half>) #0 declare half @llvm.maxnum.f16(half, half) #0 declare float @llvm.maxnum.f32(float, float) #0 declare double @llvm.maxnum.f64(double, double) #0 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #0 +declare half @llvm.maximum.f16(half, half) #0 +declare float @llvm.maximum.f32(float, float) #0 +declare double @llvm.maximum.f64(double, double) #0 +declare <2 x half> @llvm.maximum.v2f16(<2 x half>, <2 x half>) #0 declare float @llvm.fma.f32(float, float, float) #0 declare double @llvm.fma.f64(double, double, double) #0 ; ---- ceil ---- -; CHECK-LABEL: ceil_float define float @ceil_float(float %a) { - ; CHECK: cvt.rpi.f32.f32 +; CHECK-LABEL: ceil_float( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [ceil_float_param_0]; +; CHECK-NEXT: cvt.rpi.f32.f32 %f2, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: ret; %b = call float @llvm.ceil.f32(float %a) ret float %b } -; CHECK-LABEL: ceil_float_ftz define float @ceil_float_ftz(float %a) #1 { - ; CHECK: cvt.rpi.ftz.f32.f32 +; CHECK-LABEL: ceil_float_ftz( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [ceil_float_ftz_param_0]; +; CHECK-NEXT: cvt.rpi.ftz.f32.f32 %f2, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: ret; %b = call float @llvm.ceil.f32(float %a) ret float %b } -; CHECK-LABEL: ceil_double define double @ceil_double(double %a) { - ; CHECK: cvt.rpi.f64.f64 +; CHECK-LABEL: ceil_double( +; CHECK: { +; CHECK-NEXT: .reg .f64 %fd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f64 %fd1, [ceil_double_param_0]; +; CHECK-NEXT: cvt.rpi.f64.f64 %fd2, %fd1; +; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd2; +; CHECK-NEXT: ret; %b = call double @llvm.ceil.f64(double %a) ret double %b } ; ---- floor ---- -; CHECK-LABEL: floor_float define float @floor_float(float %a) { - ; CHECK: cvt.rmi.f32.f32 +; CHECK-LABEL: floor_float( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [floor_float_param_0]; +; CHECK-NEXT: cvt.rmi.f32.f32 %f2, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: ret; %b = call float @llvm.floor.f32(float %a) ret float %b } -; CHECK-LABEL: floor_float_ftz define float @floor_float_ftz(float %a) #1 { - ; CHECK: cvt.rmi.ftz.f32.f32 +; CHECK-LABEL: floor_float_ftz( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [floor_float_ftz_param_0]; +; CHECK-NEXT: cvt.rmi.ftz.f32.f32 %f2, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: ret; %b = call float @llvm.floor.f32(float %a) ret float %b } -; CHECK-LABEL: floor_double define double @floor_double(double %a) { - ; CHECK: cvt.rmi.f64.f64 +; CHECK-LABEL: floor_double( +; CHECK: { +; CHECK-NEXT: .reg .f64 %fd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f64 %fd1, [floor_double_param_0]; +; CHECK-NEXT: cvt.rmi.f64.f64 %fd2, %fd1; +; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd2; +; CHECK-NEXT: ret; %b = call double @llvm.floor.f64(double %a) ret double %b } ; ---- round ---- -; CHECK-LABEL: round_float define float @round_float(float %a) { ; check the use of sign mask and 0.5 to implement round -; CHECK: and.b32 [[R1:%r[0-9]+]], {{.*}}, -2147483648; -; CHECK: or.b32 {{.*}}, [[R1]], 1056964608; +; CHECK-LABEL: round_float( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-NEXT: .reg .f32 %f<9>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [round_float_param_0]; +; CHECK-NEXT: mov.b32 %r1, %f1; +; CHECK-NEXT: and.b32 %r2, %r1, -2147483648; +; CHECK-NEXT: or.b32 %r3, %r2, 1056964608; +; CHECK-NEXT: mov.b32 %f2, %r3; +; CHECK-NEXT: add.rn.f32 %f3, %f1, %f2; +; CHECK-NEXT: cvt.rzi.f32.f32 %f4, %f3; +; CHECK-NEXT: abs.f32 %f5, %f1; +; CHECK-NEXT: setp.gt.f32 %p1, %f5, 0f4B000000; +; CHECK-NEXT: selp.f32 %f6, %f1, %f4, %p1; +; CHECK-NEXT: cvt.rzi.f32.f32 %f7, %f1; +; CHECK-NEXT: setp.lt.f32 %p2, %f5, 0f3F000000; +; CHECK-NEXT: selp.f32 %f8, %f7, %f6, %p2; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f8; +; CHECK-NEXT: ret; %b = call float @llvm.round.f32(float %a) ret float %b } -; CHECK-LABEL: round_float_ftz define float @round_float_ftz(float %a) #1 { ; check the use of sign mask and 0.5 to implement round -; CHECK: and.b32 [[R1:%r[0-9]+]], {{.*}}, -2147483648; -; CHECK: or.b32 {{.*}}, [[R1]], 1056964608; +; CHECK-LABEL: round_float_ftz( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-NEXT: .reg .f32 %f<9>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [round_float_ftz_param_0]; +; CHECK-NEXT: mov.b32 %r1, %f1; +; CHECK-NEXT: and.b32 %r2, %r1, -2147483648; +; CHECK-NEXT: or.b32 %r3, %r2, 1056964608; +; CHECK-NEXT: mov.b32 %f2, %r3; +; CHECK-NEXT: add.rn.ftz.f32 %f3, %f1, %f2; +; CHECK-NEXT: cvt.rzi.ftz.f32.f32 %f4, %f3; +; CHECK-NEXT: abs.ftz.f32 %f5, %f1; +; CHECK-NEXT: setp.gt.ftz.f32 %p1, %f5, 0f4B000000; +; CHECK-NEXT: selp.f32 %f6, %f1, %f4, %p1; +; CHECK-NEXT: cvt.rzi.ftz.f32.f32 %f7, %f1; +; CHECK-NEXT: setp.lt.ftz.f32 %p2, %f5, 0f3F000000; +; CHECK-NEXT: selp.f32 %f8, %f7, %f6, %p2; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f8; +; CHECK-NEXT: ret; %b = call float @llvm.round.f32(float %a) ret float %b } -; CHECK-LABEL: round_double define double @round_double(double %a) { ; check the use of 0.5 to implement round -; CHECK: setp.lt.f64 {{.*}}, [[R:%fd[0-9]+]], 0d3FE0000000000000; -; CHECK: add.rn.f64 {{.*}}, [[R]], 0d3FE0000000000000; +; CHECK-LABEL: round_double( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<4>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-NEXT: .reg .f64 %fd<10>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f64 %fd1, [round_double_param_0]; +; CHECK-NEXT: abs.f64 %fd2, %fd1; +; CHECK-NEXT: setp.lt.f64 %p1, %fd2, 0d3FE0000000000000; +; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FE0000000000000; +; CHECK-NEXT: cvt.rzi.f64.f64 %fd4, %fd3; +; CHECK-NEXT: selp.f64 %fd5, 0d0000000000000000, %fd4, %p1; +; CHECK-NEXT: abs.f64 %fd6, %fd5; +; CHECK-NEXT: neg.f64 %fd7, %fd6; +; CHECK-NEXT: mov.b64 %rd1, %fd1; +; CHECK-NEXT: shr.u64 %rd2, %rd1, 63; +; CHECK-NEXT: and.b64 %rd3, %rd2, 1; +; CHECK-NEXT: setp.eq.b64 %p2, %rd3, 1; +; CHECK-NEXT: selp.f64 %fd8, %fd7, %fd6, %p2; +; CHECK-NEXT: setp.gt.f64 %p3, %fd2, 0d4330000000000000; +; CHECK-NEXT: selp.f64 %fd9, %fd1, %fd8, %p3; +; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd9; +; CHECK-NEXT: ret; %b = call double @llvm.round.f64(double %a) ret double %b } ; ---- nearbyint ---- -; CHECK-LABEL: nearbyint_float define float @nearbyint_float(float %a) { - ; CHECK: cvt.rni.f32.f32 +; CHECK-LABEL: nearbyint_float( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [nearbyint_float_param_0]; +; CHECK-NEXT: cvt.rni.f32.f32 %f2, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: ret; %b = call float @llvm.nearbyint.f32(float %a) ret float %b } -; CHECK-LABEL: nearbyint_float_ftz define float @nearbyint_float_ftz(float %a) #1 { - ; CHECK: cvt.rni.ftz.f32.f32 +; CHECK-LABEL: nearbyint_float_ftz( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [nearbyint_float_ftz_param_0]; +; CHECK-NEXT: cvt.rni.ftz.f32.f32 %f2, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: ret; %b = call float @llvm.nearbyint.f32(float %a) ret float %b } -; CHECK-LABEL: nearbyint_double define double @nearbyint_double(double %a) { - ; CHECK: cvt.rni.f64.f64 +; CHECK-LABEL: nearbyint_double( +; CHECK: { +; CHECK-NEXT: .reg .f64 %fd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f64 %fd1, [nearbyint_double_param_0]; +; CHECK-NEXT: cvt.rni.f64.f64 %fd2, %fd1; +; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd2; +; CHECK-NEXT: ret; %b = call double @llvm.nearbyint.f64(double %a) ret double %b } ; ---- rint ---- -; CHECK-LABEL: rint_float define float @rint_float(float %a) { - ; CHECK: cvt.rni.f32.f32 +; CHECK-LABEL: rint_float( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [rint_float_param_0]; +; CHECK-NEXT: cvt.rni.f32.f32 %f2, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: ret; %b = call float @llvm.rint.f32(float %a) ret float %b } -; CHECK-LABEL: rint_float_ftz define float @rint_float_ftz(float %a) #1 { - ; CHECK: cvt.rni.ftz.f32.f32 +; CHECK-LABEL: rint_float_ftz( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [rint_float_ftz_param_0]; +; CHECK-NEXT: cvt.rni.ftz.f32.f32 %f2, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: ret; %b = call float @llvm.rint.f32(float %a) ret float %b } -; CHECK-LABEL: rint_double define double @rint_double(double %a) { - ; CHECK: cvt.rni.f64.f64 +; CHECK-LABEL: rint_double( +; CHECK: { +; CHECK-NEXT: .reg .f64 %fd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f64 %fd1, [rint_double_param_0]; +; CHECK-NEXT: cvt.rni.f64.f64 %fd2, %fd1; +; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd2; +; CHECK-NEXT: ret; %b = call double @llvm.rint.f64(double %a) ret double %b } ; ---- roundeven ---- -; CHECK-LABEL: roundeven_float define float @roundeven_float(float %a) { - ; CHECK: cvt.rni.f32.f32 +; CHECK-LABEL: roundeven_float( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [roundeven_float_param_0]; +; CHECK-NEXT: cvt.rni.f32.f32 %f2, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: ret; %b = call float @llvm.roundeven.f32(float %a) ret float %b } -; CHECK-LABEL: roundeven_float_ftz define float @roundeven_float_ftz(float %a) #1 { - ; CHECK: cvt.rni.ftz.f32.f32 +; CHECK-LABEL: roundeven_float_ftz( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [roundeven_float_ftz_param_0]; +; CHECK-NEXT: cvt.rni.ftz.f32.f32 %f2, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: ret; %b = call float @llvm.roundeven.f32(float %a) ret float %b } -; CHECK-LABEL: roundeven_double define double @roundeven_double(double %a) { - ; CHECK: cvt.rni.f64.f64 +; CHECK-LABEL: roundeven_double( +; CHECK: { +; CHECK-NEXT: .reg .f64 %fd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f64 %fd1, [roundeven_double_param_0]; +; CHECK-NEXT: cvt.rni.f64.f64 %fd2, %fd1; +; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd2; +; CHECK-NEXT: ret; %b = call double @llvm.roundeven.f64(double %a) ret double %b } ; ---- trunc ---- -; CHECK-LABEL: trunc_float define float @trunc_float(float %a) { - ; CHECK: cvt.rzi.f32.f32 +; CHECK-LABEL: trunc_float( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [trunc_float_param_0]; +; CHECK-NEXT: cvt.rzi.f32.f32 %f2, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: ret; %b = call float @llvm.trunc.f32(float %a) ret float %b } -; CHECK-LABEL: trunc_float_ftz define float @trunc_float_ftz(float %a) #1 { - ; CHECK: cvt.rzi.ftz.f32.f32 +; CHECK-LABEL: trunc_float_ftz( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [trunc_float_ftz_param_0]; +; CHECK-NEXT: cvt.rzi.ftz.f32.f32 %f2, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: ret; %b = call float @llvm.trunc.f32(float %a) ret float %b } -; CHECK-LABEL: trunc_double define double @trunc_double(double %a) { - ; CHECK: cvt.rzi.f64.f64 +; CHECK-LABEL: trunc_double( +; CHECK: { +; CHECK-NEXT: .reg .f64 %fd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f64 %fd1, [trunc_double_param_0]; +; CHECK-NEXT: cvt.rzi.f64.f64 %fd2, %fd1; +; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd2; +; CHECK-NEXT: ret; %b = call double @llvm.trunc.f64(double %a) ret double %b } ; ---- abs ---- -; CHECK-LABEL: abs_float define float @abs_float(float %a) { - ; CHECK: abs.f32 +; CHECK-LABEL: abs_float( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [abs_float_param_0]; +; CHECK-NEXT: abs.f32 %f2, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: ret; %b = call float @llvm.fabs.f32(float %a) ret float %b } -; CHECK-LABEL: abs_float_ftz define float @abs_float_ftz(float %a) #1 { - ; CHECK: abs.ftz.f32 +; CHECK-LABEL: abs_float_ftz( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [abs_float_ftz_param_0]; +; CHECK-NEXT: abs.ftz.f32 %f2, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: ret; %b = call float @llvm.fabs.f32(float %a) ret float %b } -; CHECK-LABEL: abs_double define double @abs_double(double %a) { - ; CHECK: abs.f64 +; CHECK-LABEL: abs_double( +; CHECK: { +; CHECK-NEXT: .reg .f64 %fd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f64 %fd1, [abs_double_param_0]; +; CHECK-NEXT: abs.f64 %fd2, %fd1; +; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd2; +; CHECK-NEXT: ret; %b = call double @llvm.fabs.f64(double %a) ret double %b } -; ---- min ---- +; ---- minnum ---- -; CHECK-LABEL: min_half -define half @min_half(half %a, half %b) { - ; CHECK-NOF16: min.f32 - ; CHECK-F16: min.f16 +define half @minnum_half(half %a, half %b) { +; CHECK-NOF16-LABEL: minnum_half( +; CHECK-NOF16: { +; CHECK-NOF16-NEXT: .reg .b16 %rs<4>; +; CHECK-NOF16-NEXT: .reg .f32 %f<4>; +; CHECK-NOF16-EMPTY: +; CHECK-NOF16-NEXT: // %bb.0: +; CHECK-NOF16-NEXT: ld.param.b16 %rs1, [minnum_half_param_0]; +; CHECK-NOF16-NEXT: ld.param.b16 %rs2, [minnum_half_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; +; CHECK-NOF16-NEXT: min.f32 %f3, %f2, %f1; +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %f3; +; CHECK-NOF16-NEXT: st.param.b16 [func_retval0+0], %rs3; +; CHECK-NOF16-NEXT: ret; +; +; CHECK-F16-LABEL: minnum_half( +; CHECK-F16: { +; CHECK-F16-NEXT: .reg .b16 %rs<4>; +; CHECK-F16-EMPTY: +; CHECK-F16-NEXT: // %bb.0: +; CHECK-F16-NEXT: ld.param.b16 %rs1, [minnum_half_param_0]; +; CHECK-F16-NEXT: ld.param.b16 %rs2, [minnum_half_param_1]; +; CHECK-F16-NEXT: min.f16 %rs3, %rs1, %rs2; +; CHECK-F16-NEXT: st.param.b16 [func_retval0+0], %rs3; +; CHECK-F16-NEXT: ret; +; +; CHECK-SM80-NOF16-LABEL: minnum_half( +; CHECK-SM80-NOF16: { +; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<4>; +; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<4>; +; CHECK-SM80-NOF16-EMPTY: +; CHECK-SM80-NOF16-NEXT: // %bb.0: +; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs1, [minnum_half_param_0]; +; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs2, [minnum_half_param_1]; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; +; CHECK-SM80-NOF16-NEXT: min.f32 %f3, %f2, %f1; +; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %f3; +; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0+0], %rs3; +; CHECK-SM80-NOF16-NEXT: ret; %x = call half @llvm.minnum.f16(half %a, half %b) ret half %x } -; CHECK-LABEL: min_float -define float @min_float(float %a, float %b) { - ; CHECK: min.f32 +define float @minnum_float(float %a, float %b) { +; CHECK-LABEL: minnum_float( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [minnum_float_param_0]; +; CHECK-NEXT: ld.param.f32 %f2, [minnum_float_param_1]; +; CHECK-NEXT: min.f32 %f3, %f1, %f2; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-NEXT: ret; %x = call float @llvm.minnum.f32(float %a, float %b) ret float %x } -; CHECK-LABEL: min_imm1 -define float @min_imm1(float %a) { - ; CHECK: min.f32 +define float @minnum_imm1(float %a) { +; CHECK-LABEL: minnum_imm1( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [minnum_imm1_param_0]; +; CHECK-NEXT: min.f32 %f2, %f1, 0f00000000; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: ret; %x = call float @llvm.minnum.f32(float %a, float 0.0) ret float %x } -; CHECK-LABEL: min_imm2 -define float @min_imm2(float %a) { - ; CHECK: min.f32 +define float @minnum_imm2(float %a) { +; CHECK-LABEL: minnum_imm2( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [minnum_imm2_param_0]; +; CHECK-NEXT: min.f32 %f2, %f1, 0f00000000; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: ret; %x = call float @llvm.minnum.f32(float 0.0, float %a) ret float %x } -; CHECK-LABEL: min_float_ftz -define float @min_float_ftz(float %a, float %b) #1 { - ; CHECK: min.ftz.f32 +define float @minnum_float_ftz(float %a, float %b) #1 { +; CHECK-LABEL: minnum_float_ftz( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [minnum_float_ftz_param_0]; +; CHECK-NEXT: ld.param.f32 %f2, [minnum_float_ftz_param_1]; +; CHECK-NEXT: min.ftz.f32 %f3, %f1, %f2; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-NEXT: ret; %x = call float @llvm.minnum.f32(float %a, float %b) ret float %x } -; CHECK-LABEL: min_double -define double @min_double(double %a, double %b) { - ; CHECK: min.f64 +define double @minnum_double(double %a, double %b) { +; CHECK-LABEL: minnum_double( +; CHECK: { +; CHECK-NEXT: .reg .f64 %fd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f64 %fd1, [minnum_double_param_0]; +; CHECK-NEXT: ld.param.f64 %fd2, [minnum_double_param_1]; +; CHECK-NEXT: min.f64 %fd3, %fd1, %fd2; +; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3; +; CHECK-NEXT: ret; %x = call double @llvm.minnum.f64(double %a, double %b) ret double %x } -; CHECK-LABEL: min_v2half -define <2 x half> @min_v2half(<2 x half> %a, <2 x half> %b) { - ; CHECK-NOF16: min.f32 - ; CHECK-NOF16: min.f32 - ; CHECK-F16: min.f16x2 +define <2 x half> @minnum_v2half(<2 x half> %a, <2 x half> %b) { +; CHECK-NOF16-LABEL: minnum_v2half( +; CHECK-NOF16: { +; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; +; CHECK-NOF16-NEXT: .reg .b32 %r<4>; +; CHECK-NOF16-NEXT: .reg .f32 %f<7>; +; CHECK-NOF16-EMPTY: +; CHECK-NOF16-NEXT: // %bb.0: +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minnum_v2half_param_0]; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [minnum_v2half_param_1]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; +; CHECK-NOF16-NEXT: min.f32 %f3, %f2, %f1; +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %f3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; +; CHECK-NOF16-NEXT: min.f32 %f6, %f5, %f4; +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %f6; +; CHECK-NOF16-NEXT: mov.b32 %r3, {%rs6, %rs5}; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NOF16-NEXT: ret; +; +; CHECK-F16-LABEL: minnum_v2half( +; CHECK-F16: { +; CHECK-F16-NEXT: .reg .b32 %r<4>; +; CHECK-F16-EMPTY: +; CHECK-F16-NEXT: // %bb.0: +; CHECK-F16-NEXT: ld.param.b32 %r1, [minnum_v2half_param_1]; +; CHECK-F16-NEXT: ld.param.b32 %r2, [minnum_v2half_param_0]; +; CHECK-F16-NEXT: min.f16x2 %r3, %r2, %r1; +; CHECK-F16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-F16-NEXT: ret; +; +; CHECK-SM80-NOF16-LABEL: minnum_v2half( +; CHECK-SM80-NOF16: { +; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<7>; +; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>; +; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<7>; +; CHECK-SM80-NOF16-EMPTY: +; CHECK-SM80-NOF16-NEXT: // %bb.0: +; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [minnum_v2half_param_0]; +; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [minnum_v2half_param_1]; +; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; +; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; +; CHECK-SM80-NOF16-NEXT: min.f32 %f3, %f2, %f1; +; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %f3; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; +; CHECK-SM80-NOF16-NEXT: min.f32 %f6, %f5, %f4; +; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %f6; +; CHECK-SM80-NOF16-NEXT: mov.b32 %r3, {%rs6, %rs5}; +; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-SM80-NOF16-NEXT: ret; %x = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b) ret <2 x half> %x } -; ---- max ---- +; ---- minimum ---- -; CHECK-LABEL: max_half -define half @max_half(half %a, half %b) { - ; CHECK-NOF16: max.f32 - ; CHECK-F16: max.f16 +define half @minimum_half(half %a, half %b) { +; CHECK-NOF16-LABEL: minimum_half( +; CHECK-NOF16: { +; CHECK-NOF16-NEXT: .reg .pred %p<6>; +; CHECK-NOF16-NEXT: .reg .b16 %rs<10>; +; CHECK-NOF16-NEXT: .reg .f32 %f<4>; +; CHECK-NOF16-EMPTY: +; CHECK-NOF16-NEXT: // %bb.0: +; CHECK-NOF16-NEXT: ld.param.b16 %rs1, [minimum_half_param_0]; +; CHECK-NOF16-NEXT: ld.param.b16 %rs3, [minimum_half_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; +; CHECK-NOF16-NEXT: setp.lt.f32 %p1, %f2, %f1; +; CHECK-NOF16-NEXT: selp.b16 %rs4, %rs1, %rs3, %p1; +; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %f2, %f1; +; CHECK-NOF16-NEXT: selp.b16 %rs5, 0x7E00, %rs4, %p2; +; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs1, -32768; +; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs1, %rs5, %p3; +; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs3, -32768; +; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs3, %rs6, %p4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs5; +; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; +; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs5, %p5; +; CHECK-NOF16-NEXT: st.param.b16 [func_retval0+0], %rs9; +; CHECK-NOF16-NEXT: ret; +; +; CHECK-F16-LABEL: minimum_half( +; CHECK-F16: { +; CHECK-F16-NEXT: .reg .b16 %rs<4>; +; CHECK-F16-EMPTY: +; CHECK-F16-NEXT: // %bb.0: +; CHECK-F16-NEXT: ld.param.b16 %rs1, [minimum_half_param_0]; +; CHECK-F16-NEXT: ld.param.b16 %rs2, [minimum_half_param_1]; +; CHECK-F16-NEXT: min.NaN.f16 %rs3, %rs1, %rs2; +; CHECK-F16-NEXT: st.param.b16 [func_retval0+0], %rs3; +; CHECK-F16-NEXT: ret; +; +; CHECK-SM80-NOF16-LABEL: minimum_half( +; CHECK-SM80-NOF16: { +; CHECK-SM80-NOF16-NEXT: .reg .pred %p<6>; +; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<10>; +; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<4>; +; CHECK-SM80-NOF16-EMPTY: +; CHECK-SM80-NOF16-NEXT: // %bb.0: +; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs1, [minimum_half_param_0]; +; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs3, [minimum_half_param_1]; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f1, %rs3; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; +; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p1, %f2, %f1; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs4, %rs1, %rs3, %p1; +; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %f2, %f1; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, 0x7E00, %rs4, %p2; +; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs1, -32768; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs1, %rs5, %p3; +; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs3, -32768; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs3, %rs6, %p4; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f3, %rs5; +; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs5, %p5; +; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0+0], %rs9; +; CHECK-SM80-NOF16-NEXT: ret; + %x = call half @llvm.minimum.f16(half %a, half %b) + ret half %x +} + +define float @minimum_float(float %a, float %b) { +; CHECK-NOF16-LABEL: minimum_float( +; CHECK-NOF16: { +; CHECK-NOF16-NEXT: .reg .pred %p<5>; +; CHECK-NOF16-NEXT: .reg .b32 %r<3>; +; CHECK-NOF16-NEXT: .reg .f32 %f<8>; +; CHECK-NOF16-EMPTY: +; CHECK-NOF16-NEXT: // %bb.0: +; CHECK-NOF16-NEXT: ld.param.f32 %f1, [minimum_float_param_0]; +; CHECK-NOF16-NEXT: mov.b32 %r1, %f1; +; CHECK-NOF16-NEXT: ld.param.f32 %f2, [minimum_float_param_1]; +; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %f1, %f2; +; CHECK-NOF16-NEXT: min.f32 %f3, %f1, %f2; +; CHECK-NOF16-NEXT: selp.f32 %f4, 0f7FC00000, %f3, %p1; +; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648; +; CHECK-NOF16-NEXT: selp.f32 %f5, %f1, %f4, %p2; +; CHECK-NOF16-NEXT: mov.b32 %r2, %f2; +; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, -2147483648; +; CHECK-NOF16-NEXT: selp.f32 %f6, %f2, %f5, %p3; +; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %f4, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %f7, %f6, %f4, %p4; +; CHECK-NOF16-NEXT: st.param.f32 [func_retval0+0], %f7; +; CHECK-NOF16-NEXT: ret; +; +; CHECK-F16-LABEL: minimum_float( +; CHECK-F16: { +; CHECK-F16-NEXT: .reg .f32 %f<4>; +; CHECK-F16-EMPTY: +; CHECK-F16-NEXT: // %bb.0: +; CHECK-F16-NEXT: ld.param.f32 %f1, [minimum_float_param_0]; +; CHECK-F16-NEXT: ld.param.f32 %f2, [minimum_float_param_1]; +; CHECK-F16-NEXT: min.NaN.f32 %f3, %f1, %f2; +; CHECK-F16-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-F16-NEXT: ret; +; +; CHECK-SM80-NOF16-LABEL: minimum_float( +; CHECK-SM80-NOF16: { +; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<4>; +; CHECK-SM80-NOF16-EMPTY: +; CHECK-SM80-NOF16-NEXT: // %bb.0: +; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [minimum_float_param_0]; +; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f2, [minimum_float_param_1]; +; CHECK-SM80-NOF16-NEXT: min.NaN.f32 %f3, %f1, %f2; +; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-SM80-NOF16-NEXT: ret; + %x = call float @llvm.minimum.f32(float %a, float %b) + ret float %x +} + +define float @minimum_imm1(float %a) { +; CHECK-NOF16-LABEL: minimum_imm1( +; CHECK-NOF16: { +; CHECK-NOF16-NEXT: .reg .pred %p<4>; +; CHECK-NOF16-NEXT: .reg .b32 %r<2>; +; CHECK-NOF16-NEXT: .reg .f32 %f<6>; +; CHECK-NOF16-EMPTY: +; CHECK-NOF16-NEXT: // %bb.0: +; CHECK-NOF16-NEXT: ld.param.f32 %f1, [minimum_imm1_param_0]; +; CHECK-NOF16-NEXT: mov.b32 %r1, %f1; +; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %f1, %f1; +; CHECK-NOF16-NEXT: min.f32 %f2, %f1, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %f3, 0f7FC00000, %f2, %p1; +; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648; +; CHECK-NOF16-NEXT: selp.f32 %f4, %f1, %f3, %p2; +; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %f3, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %f5, %f4, %f3, %p3; +; CHECK-NOF16-NEXT: st.param.f32 [func_retval0+0], %f5; +; CHECK-NOF16-NEXT: ret; +; +; CHECK-F16-LABEL: minimum_imm1( +; CHECK-F16: { +; CHECK-F16-NEXT: .reg .f32 %f<3>; +; CHECK-F16-EMPTY: +; CHECK-F16-NEXT: // %bb.0: +; CHECK-F16-NEXT: ld.param.f32 %f1, [minimum_imm1_param_0]; +; CHECK-F16-NEXT: min.NaN.f32 %f2, %f1, 0f00000000; +; CHECK-F16-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-F16-NEXT: ret; +; +; CHECK-SM80-NOF16-LABEL: minimum_imm1( +; CHECK-SM80-NOF16: { +; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<3>; +; CHECK-SM80-NOF16-EMPTY: +; CHECK-SM80-NOF16-NEXT: // %bb.0: +; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [minimum_imm1_param_0]; +; CHECK-SM80-NOF16-NEXT: min.NaN.f32 %f2, %f1, 0f00000000; +; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-SM80-NOF16-NEXT: ret; + %x = call float @llvm.minimum.f32(float %a, float 0.0) + ret float %x +} + +define float @minimum_imm2(float %a) { +; CHECK-NOF16-LABEL: minimum_imm2( +; CHECK-NOF16: { +; CHECK-NOF16-NEXT: .reg .pred %p<4>; +; CHECK-NOF16-NEXT: .reg .b32 %r<2>; +; CHECK-NOF16-NEXT: .reg .f32 %f<6>; +; CHECK-NOF16-EMPTY: +; CHECK-NOF16-NEXT: // %bb.0: +; CHECK-NOF16-NEXT: ld.param.f32 %f1, [minimum_imm2_param_0]; +; CHECK-NOF16-NEXT: mov.b32 %r1, %f1; +; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %f1, %f1; +; CHECK-NOF16-NEXT: min.f32 %f2, %f1, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %f3, 0f7FC00000, %f2, %p1; +; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648; +; CHECK-NOF16-NEXT: selp.f32 %f4, %f1, %f3, %p2; +; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %f3, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %f5, %f4, %f3, %p3; +; CHECK-NOF16-NEXT: st.param.f32 [func_retval0+0], %f5; +; CHECK-NOF16-NEXT: ret; +; +; CHECK-F16-LABEL: minimum_imm2( +; CHECK-F16: { +; CHECK-F16-NEXT: .reg .f32 %f<3>; +; CHECK-F16-EMPTY: +; CHECK-F16-NEXT: // %bb.0: +; CHECK-F16-NEXT: ld.param.f32 %f1, [minimum_imm2_param_0]; +; CHECK-F16-NEXT: min.NaN.f32 %f2, %f1, 0f00000000; +; CHECK-F16-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-F16-NEXT: ret; +; +; CHECK-SM80-NOF16-LABEL: minimum_imm2( +; CHECK-SM80-NOF16: { +; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<3>; +; CHECK-SM80-NOF16-EMPTY: +; CHECK-SM80-NOF16-NEXT: // %bb.0: +; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [minimum_imm2_param_0]; +; CHECK-SM80-NOF16-NEXT: min.NaN.f32 %f2, %f1, 0f00000000; +; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-SM80-NOF16-NEXT: ret; + %x = call float @llvm.minimum.f32(float 0.0, float %a) + ret float %x +} + +define float @minimum_float_ftz(float %a, float %b) #1 { +; CHECK-NOF16-LABEL: minimum_float_ftz( +; CHECK-NOF16: { +; CHECK-NOF16-NEXT: .reg .pred %p<5>; +; CHECK-NOF16-NEXT: .reg .b32 %r<3>; +; CHECK-NOF16-NEXT: .reg .f32 %f<8>; +; CHECK-NOF16-EMPTY: +; CHECK-NOF16-NEXT: // %bb.0: +; CHECK-NOF16-NEXT: ld.param.f32 %f1, [minimum_float_ftz_param_0]; +; CHECK-NOF16-NEXT: mov.b32 %r1, %f1; +; CHECK-NOF16-NEXT: ld.param.f32 %f2, [minimum_float_ftz_param_1]; +; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p1, %f1, %f2; +; CHECK-NOF16-NEXT: min.ftz.f32 %f3, %f1, %f2; +; CHECK-NOF16-NEXT: selp.f32 %f4, 0f7FC00000, %f3, %p1; +; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648; +; CHECK-NOF16-NEXT: selp.f32 %f5, %f1, %f4, %p2; +; CHECK-NOF16-NEXT: mov.b32 %r2, %f2; +; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, -2147483648; +; CHECK-NOF16-NEXT: selp.f32 %f6, %f2, %f5, %p3; +; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p4, %f4, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %f7, %f6, %f4, %p4; +; CHECK-NOF16-NEXT: st.param.f32 [func_retval0+0], %f7; +; CHECK-NOF16-NEXT: ret; +; +; CHECK-F16-LABEL: minimum_float_ftz( +; CHECK-F16: { +; CHECK-F16-NEXT: .reg .f32 %f<4>; +; CHECK-F16-EMPTY: +; CHECK-F16-NEXT: // %bb.0: +; CHECK-F16-NEXT: ld.param.f32 %f1, [minimum_float_ftz_param_0]; +; CHECK-F16-NEXT: ld.param.f32 %f2, [minimum_float_ftz_param_1]; +; CHECK-F16-NEXT: min.NaN.ftz.f32 %f3, %f1, %f2; +; CHECK-F16-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-F16-NEXT: ret; +; +; CHECK-SM80-NOF16-LABEL: minimum_float_ftz( +; CHECK-SM80-NOF16: { +; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<4>; +; CHECK-SM80-NOF16-EMPTY: +; CHECK-SM80-NOF16-NEXT: // %bb.0: +; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [minimum_float_ftz_param_0]; +; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f2, [minimum_float_ftz_param_1]; +; CHECK-SM80-NOF16-NEXT: min.NaN.ftz.f32 %f3, %f1, %f2; +; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-SM80-NOF16-NEXT: ret; + %x = call float @llvm.minimum.f32(float %a, float %b) + ret float %x +} + +define double @minimum_double(double %a, double %b) { +; CHECK-LABEL: minimum_double( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-NEXT: .reg .f64 %fd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f64 %fd1, [minimum_double_param_0]; +; CHECK-NEXT: mov.b64 %rd1, %fd1; +; CHECK-NEXT: ld.param.f64 %fd2, [minimum_double_param_1]; +; CHECK-NEXT: setp.nan.f64 %p1, %fd1, %fd2; +; CHECK-NEXT: min.f64 %fd3, %fd1, %fd2; +; CHECK-NEXT: selp.f64 %fd4, 0d7FF8000000000000, %fd3, %p1; +; CHECK-NEXT: setp.eq.s64 %p2, %rd1, -9223372036854775808; +; CHECK-NEXT: selp.f64 %fd5, %fd1, %fd4, %p2; +; CHECK-NEXT: mov.b64 %rd2, %fd2; +; CHECK-NEXT: setp.eq.s64 %p3, %rd2, -9223372036854775808; +; CHECK-NEXT: selp.f64 %fd6, %fd2, %fd5, %p3; +; CHECK-NEXT: setp.eq.f64 %p4, %fd4, 0d0000000000000000; +; CHECK-NEXT: selp.f64 %fd7, %fd6, %fd4, %p4; +; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd7; +; CHECK-NEXT: ret; + %x = call double @llvm.minimum.f64(double %a, double %b) + ret double %x +} + +define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) { +; CHECK-NOF16-LABEL: minimum_v2half( +; CHECK-NOF16: { +; CHECK-NOF16-NEXT: .reg .pred %p<11>; +; CHECK-NOF16-NEXT: .reg .b16 %rs<19>; +; CHECK-NOF16-NEXT: .reg .b32 %r<4>; +; CHECK-NOF16-NEXT: .reg .f32 %f<7>; +; CHECK-NOF16-EMPTY: +; CHECK-NOF16-NEXT: // %bb.0: +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_v2half_param_0]; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [minimum_v2half_param_1]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; +; CHECK-NOF16-NEXT: setp.lt.f32 %p1, %f2, %f1; +; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1; +; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %f2, %f1; +; CHECK-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; +; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs4, -32768; +; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs6, %p3; +; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs2, -32768; +; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs2, %rs8, %p4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs6; +; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; +; CHECK-NOF16-NEXT: selp.b16 %rs11, %rs10, %rs6, %p5; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; +; CHECK-NOF16-NEXT: setp.lt.f32 %p6, %f5, %f4; +; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs1, %p6; +; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %f5, %f4; +; CHECK-NOF16-NEXT: selp.b16 %rs13, 0x7E00, %rs12, %p7; +; CHECK-NOF16-NEXT: setp.eq.s16 %p8, %rs3, -32768; +; CHECK-NOF16-NEXT: selp.b16 %rs15, %rs3, %rs13, %p8; +; CHECK-NOF16-NEXT: setp.eq.s16 %p9, %rs1, -32768; +; CHECK-NOF16-NEXT: selp.b16 %rs17, %rs1, %rs15, %p9; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f6, %rs13; +; CHECK-NOF16-NEXT: setp.eq.f32 %p10, %f6, 0f00000000; +; CHECK-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs13, %p10; +; CHECK-NOF16-NEXT: mov.b32 %r3, {%rs18, %rs11}; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NOF16-NEXT: ret; +; +; CHECK-F16-LABEL: minimum_v2half( +; CHECK-F16: { +; CHECK-F16-NEXT: .reg .b32 %r<4>; +; CHECK-F16-EMPTY: +; CHECK-F16-NEXT: // %bb.0: +; CHECK-F16-NEXT: ld.param.b32 %r1, [minimum_v2half_param_1]; +; CHECK-F16-NEXT: ld.param.b32 %r2, [minimum_v2half_param_0]; +; CHECK-F16-NEXT: min.NaN.f16x2 %r3, %r2, %r1; +; CHECK-F16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-F16-NEXT: ret; +; +; CHECK-SM80-NOF16-LABEL: minimum_v2half( +; CHECK-SM80-NOF16: { +; CHECK-SM80-NOF16-NEXT: .reg .pred %p<11>; +; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<19>; +; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>; +; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<7>; +; CHECK-SM80-NOF16-EMPTY: +; CHECK-SM80-NOF16-NEXT: // %bb.0: +; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [minimum_v2half_param_0]; +; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [minimum_v2half_param_1]; +; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; +; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; +; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p1, %f2, %f1; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1; +; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %f2, %f1; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; +; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs4, -32768; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs6, %p3; +; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs2, -32768; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs2, %rs8, %p4; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f3, %rs6; +; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, %rs10, %rs6, %p5; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; +; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p6, %f5, %f4; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs1, %p6; +; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %f5, %f4; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, 0x7E00, %rs12, %p7; +; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p8, %rs3, -32768; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs15, %rs3, %rs13, %p8; +; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p9, %rs1, -32768; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs17, %rs1, %rs15, %p9; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f6, %rs13; +; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p10, %f6, 0f00000000; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs13, %p10; +; CHECK-SM80-NOF16-NEXT: mov.b32 %r3, {%rs18, %rs11}; +; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-SM80-NOF16-NEXT: ret; + %x = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b) + ret <2 x half> %x +} + +; ---- maxnum ---- + +define half @maxnum_half(half %a, half %b) { +; CHECK-NOF16-LABEL: maxnum_half( +; CHECK-NOF16: { +; CHECK-NOF16-NEXT: .reg .b16 %rs<4>; +; CHECK-NOF16-NEXT: .reg .f32 %f<4>; +; CHECK-NOF16-EMPTY: +; CHECK-NOF16-NEXT: // %bb.0: +; CHECK-NOF16-NEXT: ld.param.b16 %rs1, [maxnum_half_param_0]; +; CHECK-NOF16-NEXT: ld.param.b16 %rs2, [maxnum_half_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; +; CHECK-NOF16-NEXT: max.f32 %f3, %f2, %f1; +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %f3; +; CHECK-NOF16-NEXT: st.param.b16 [func_retval0+0], %rs3; +; CHECK-NOF16-NEXT: ret; +; +; CHECK-F16-LABEL: maxnum_half( +; CHECK-F16: { +; CHECK-F16-NEXT: .reg .b16 %rs<4>; +; CHECK-F16-EMPTY: +; CHECK-F16-NEXT: // %bb.0: +; CHECK-F16-NEXT: ld.param.b16 %rs1, [maxnum_half_param_0]; +; CHECK-F16-NEXT: ld.param.b16 %rs2, [maxnum_half_param_1]; +; CHECK-F16-NEXT: max.f16 %rs3, %rs1, %rs2; +; CHECK-F16-NEXT: st.param.b16 [func_retval0+0], %rs3; +; CHECK-F16-NEXT: ret; +; +; CHECK-SM80-NOF16-LABEL: maxnum_half( +; CHECK-SM80-NOF16: { +; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<4>; +; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<4>; +; CHECK-SM80-NOF16-EMPTY: +; CHECK-SM80-NOF16-NEXT: // %bb.0: +; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs1, [maxnum_half_param_0]; +; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs2, [maxnum_half_param_1]; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; +; CHECK-SM80-NOF16-NEXT: max.f32 %f3, %f2, %f1; +; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %f3; +; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0+0], %rs3; +; CHECK-SM80-NOF16-NEXT: ret; %x = call half @llvm.maxnum.f16(half %a, half %b) ret half %x } -; CHECK-LABEL: max_imm1 -define float @max_imm1(float %a) { - ; CHECK: max.f32 +define float @maxnum_imm1(float %a) { +; CHECK-LABEL: maxnum_imm1( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [maxnum_imm1_param_0]; +; CHECK-NEXT: max.f32 %f2, %f1, 0f00000000; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: ret; %x = call float @llvm.maxnum.f32(float %a, float 0.0) ret float %x } -; CHECK-LABEL: max_imm2 -define float @max_imm2(float %a) { - ; CHECK: max.f32 +define float @maxnum_imm2(float %a) { +; CHECK-LABEL: maxnum_imm2( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [maxnum_imm2_param_0]; +; CHECK-NEXT: max.f32 %f2, %f1, 0f00000000; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: ret; %x = call float @llvm.maxnum.f32(float 0.0, float %a) ret float %x } -; CHECK-LABEL: max_float -define float @max_float(float %a, float %b) { - ; CHECK: max.f32 +define float @maxnum_float(float %a, float %b) { +; CHECK-LABEL: maxnum_float( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [maxnum_float_param_0]; +; CHECK-NEXT: ld.param.f32 %f2, [maxnum_float_param_1]; +; CHECK-NEXT: max.f32 %f3, %f1, %f2; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-NEXT: ret; %x = call float @llvm.maxnum.f32(float %a, float %b) ret float %x } -; CHECK-LABEL: max_float_ftz -define float @max_float_ftz(float %a, float %b) #1 { - ; CHECK: max.ftz.f32 +define float @maxnum_float_ftz(float %a, float %b) #1 { +; CHECK-LABEL: maxnum_float_ftz( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [maxnum_float_ftz_param_0]; +; CHECK-NEXT: ld.param.f32 %f2, [maxnum_float_ftz_param_1]; +; CHECK-NEXT: max.ftz.f32 %f3, %f1, %f2; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-NEXT: ret; %x = call float @llvm.maxnum.f32(float %a, float %b) ret float %x } -; CHECK-LABEL: max_double -define double @max_double(double %a, double %b) { - ; CHECK: max.f64 +define double @maxnum_double(double %a, double %b) { +; CHECK-LABEL: maxnum_double( +; CHECK: { +; CHECK-NEXT: .reg .f64 %fd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f64 %fd1, [maxnum_double_param_0]; +; CHECK-NEXT: ld.param.f64 %fd2, [maxnum_double_param_1]; +; CHECK-NEXT: max.f64 %fd3, %fd1, %fd2; +; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3; +; CHECK-NEXT: ret; %x = call double @llvm.maxnum.f64(double %a, double %b) ret double %x } -; CHECK-LABEL: max_v2half -define <2 x half> @max_v2half(<2 x half> %a, <2 x half> %b) { - ; CHECK-NOF16: max.f32 - ; CHECK-NOF16: max.f32 - ; CHECK-F16: max.f16x2 +define <2 x half> @maxnum_v2half(<2 x half> %a, <2 x half> %b) { +; CHECK-NOF16-LABEL: maxnum_v2half( +; CHECK-NOF16: { +; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; +; CHECK-NOF16-NEXT: .reg .b32 %r<4>; +; CHECK-NOF16-NEXT: .reg .f32 %f<7>; +; CHECK-NOF16-EMPTY: +; CHECK-NOF16-NEXT: // %bb.0: +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maxnum_v2half_param_0]; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [maxnum_v2half_param_1]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; +; CHECK-NOF16-NEXT: max.f32 %f3, %f2, %f1; +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %f3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; +; CHECK-NOF16-NEXT: max.f32 %f6, %f5, %f4; +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %f6; +; CHECK-NOF16-NEXT: mov.b32 %r3, {%rs6, %rs5}; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NOF16-NEXT: ret; +; +; CHECK-F16-LABEL: maxnum_v2half( +; CHECK-F16: { +; CHECK-F16-NEXT: .reg .b32 %r<4>; +; CHECK-F16-EMPTY: +; CHECK-F16-NEXT: // %bb.0: +; CHECK-F16-NEXT: ld.param.b32 %r1, [maxnum_v2half_param_1]; +; CHECK-F16-NEXT: ld.param.b32 %r2, [maxnum_v2half_param_0]; +; CHECK-F16-NEXT: max.f16x2 %r3, %r2, %r1; +; CHECK-F16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-F16-NEXT: ret; +; +; CHECK-SM80-NOF16-LABEL: maxnum_v2half( +; CHECK-SM80-NOF16: { +; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<7>; +; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>; +; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<7>; +; CHECK-SM80-NOF16-EMPTY: +; CHECK-SM80-NOF16-NEXT: // %bb.0: +; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [maxnum_v2half_param_0]; +; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [maxnum_v2half_param_1]; +; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; +; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; +; CHECK-SM80-NOF16-NEXT: max.f32 %f3, %f2, %f1; +; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %f3; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; +; CHECK-SM80-NOF16-NEXT: max.f32 %f6, %f5, %f4; +; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %f6; +; CHECK-SM80-NOF16-NEXT: mov.b32 %r3, {%rs6, %rs5}; +; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-SM80-NOF16-NEXT: ret; %x = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b) ret <2 x half> %x } +; ---- maximum ---- + +define half @maximum_half(half %a, half %b) { +; CHECK-NOF16-LABEL: maximum_half( +; CHECK-NOF16: { +; CHECK-NOF16-NEXT: .reg .pred %p<6>; +; CHECK-NOF16-NEXT: .reg .b16 %rs<10>; +; CHECK-NOF16-NEXT: .reg .f32 %f<4>; +; CHECK-NOF16-EMPTY: +; CHECK-NOF16-NEXT: // %bb.0: +; CHECK-NOF16-NEXT: ld.param.b16 %rs1, [maximum_half_param_0]; +; CHECK-NOF16-NEXT: ld.param.b16 %rs3, [maximum_half_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; +; CHECK-NOF16-NEXT: setp.gt.f32 %p1, %f2, %f1; +; CHECK-NOF16-NEXT: selp.b16 %rs4, %rs1, %rs3, %p1; +; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %f2, %f1; +; CHECK-NOF16-NEXT: selp.b16 %rs5, 0x7E00, %rs4, %p2; +; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs1, 0; +; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs1, %rs5, %p3; +; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs3, 0; +; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs3, %rs6, %p4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs5; +; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; +; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs5, %p5; +; CHECK-NOF16-NEXT: st.param.b16 [func_retval0+0], %rs9; +; CHECK-NOF16-NEXT: ret; +; +; CHECK-F16-LABEL: maximum_half( +; CHECK-F16: { +; CHECK-F16-NEXT: .reg .b16 %rs<4>; +; CHECK-F16-EMPTY: +; CHECK-F16-NEXT: // %bb.0: +; CHECK-F16-NEXT: ld.param.b16 %rs1, [maximum_half_param_0]; +; CHECK-F16-NEXT: ld.param.b16 %rs2, [maximum_half_param_1]; +; CHECK-F16-NEXT: max.NaN.f16 %rs3, %rs1, %rs2; +; CHECK-F16-NEXT: st.param.b16 [func_retval0+0], %rs3; +; CHECK-F16-NEXT: ret; +; +; CHECK-SM80-NOF16-LABEL: maximum_half( +; CHECK-SM80-NOF16: { +; CHECK-SM80-NOF16-NEXT: .reg .pred %p<6>; +; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<10>; +; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<4>; +; CHECK-SM80-NOF16-EMPTY: +; CHECK-SM80-NOF16-NEXT: // %bb.0: +; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs1, [maximum_half_param_0]; +; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs3, [maximum_half_param_1]; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f1, %rs3; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; +; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p1, %f2, %f1; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs4, %rs1, %rs3, %p1; +; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %f2, %f1; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, 0x7E00, %rs4, %p2; +; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs1, 0; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs1, %rs5, %p3; +; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs3, 0; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs3, %rs6, %p4; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f3, %rs5; +; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs5, %p5; +; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0+0], %rs9; +; CHECK-SM80-NOF16-NEXT: ret; + %x = call half @llvm.maximum.f16(half %a, half %b) + ret half %x +} + +define float @maximum_imm1(float %a) { +; CHECK-NOF16-LABEL: maximum_imm1( +; CHECK-NOF16: { +; CHECK-NOF16-NEXT: .reg .pred %p<3>; +; CHECK-NOF16-NEXT: .reg .f32 %f<5>; +; CHECK-NOF16-EMPTY: +; CHECK-NOF16-NEXT: // %bb.0: +; CHECK-NOF16-NEXT: ld.param.f32 %f1, [maximum_imm1_param_0]; +; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %f1, %f1; +; CHECK-NOF16-NEXT: max.f32 %f2, %f1, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %f3, 0f7FC00000, %f2, %p1; +; CHECK-NOF16-NEXT: setp.eq.f32 %p2, %f3, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %f4, 0f00000000, %f3, %p2; +; CHECK-NOF16-NEXT: st.param.f32 [func_retval0+0], %f4; +; CHECK-NOF16-NEXT: ret; +; +; CHECK-F16-LABEL: maximum_imm1( +; CHECK-F16: { +; CHECK-F16-NEXT: .reg .f32 %f<3>; +; CHECK-F16-EMPTY: +; CHECK-F16-NEXT: // %bb.0: +; CHECK-F16-NEXT: ld.param.f32 %f1, [maximum_imm1_param_0]; +; CHECK-F16-NEXT: max.NaN.f32 %f2, %f1, 0f00000000; +; CHECK-F16-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-F16-NEXT: ret; +; +; CHECK-SM80-NOF16-LABEL: maximum_imm1( +; CHECK-SM80-NOF16: { +; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<3>; +; CHECK-SM80-NOF16-EMPTY: +; CHECK-SM80-NOF16-NEXT: // %bb.0: +; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [maximum_imm1_param_0]; +; CHECK-SM80-NOF16-NEXT: max.NaN.f32 %f2, %f1, 0f00000000; +; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-SM80-NOF16-NEXT: ret; + %x = call float @llvm.maximum.f32(float %a, float 0.0) + ret float %x +} + +define float @maximum_imm2(float %a) { +; CHECK-NOF16-LABEL: maximum_imm2( +; CHECK-NOF16: { +; CHECK-NOF16-NEXT: .reg .pred %p<3>; +; CHECK-NOF16-NEXT: .reg .f32 %f<5>; +; CHECK-NOF16-EMPTY: +; CHECK-NOF16-NEXT: // %bb.0: +; CHECK-NOF16-NEXT: ld.param.f32 %f1, [maximum_imm2_param_0]; +; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %f1, %f1; +; CHECK-NOF16-NEXT: max.f32 %f2, %f1, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %f3, 0f7FC00000, %f2, %p1; +; CHECK-NOF16-NEXT: setp.eq.f32 %p2, %f3, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %f4, 0f00000000, %f3, %p2; +; CHECK-NOF16-NEXT: st.param.f32 [func_retval0+0], %f4; +; CHECK-NOF16-NEXT: ret; +; +; CHECK-F16-LABEL: maximum_imm2( +; CHECK-F16: { +; CHECK-F16-NEXT: .reg .f32 %f<3>; +; CHECK-F16-EMPTY: +; CHECK-F16-NEXT: // %bb.0: +; CHECK-F16-NEXT: ld.param.f32 %f1, [maximum_imm2_param_0]; +; CHECK-F16-NEXT: max.NaN.f32 %f2, %f1, 0f00000000; +; CHECK-F16-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-F16-NEXT: ret; +; +; CHECK-SM80-NOF16-LABEL: maximum_imm2( +; CHECK-SM80-NOF16: { +; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<3>; +; CHECK-SM80-NOF16-EMPTY: +; CHECK-SM80-NOF16-NEXT: // %bb.0: +; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [maximum_imm2_param_0]; +; CHECK-SM80-NOF16-NEXT: max.NaN.f32 %f2, %f1, 0f00000000; +; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-SM80-NOF16-NEXT: ret; + %x = call float @llvm.maximum.f32(float 0.0, float %a) + ret float %x +} + +define float @maximum_float(float %a, float %b) { +; CHECK-NOF16-LABEL: maximum_float( +; CHECK-NOF16: { +; CHECK-NOF16-NEXT: .reg .pred %p<5>; +; CHECK-NOF16-NEXT: .reg .b32 %r<3>; +; CHECK-NOF16-NEXT: .reg .f32 %f<8>; +; CHECK-NOF16-EMPTY: +; CHECK-NOF16-NEXT: // %bb.0: +; CHECK-NOF16-NEXT: ld.param.f32 %f1, [maximum_float_param_0]; +; CHECK-NOF16-NEXT: mov.b32 %r1, %f1; +; CHECK-NOF16-NEXT: ld.param.f32 %f2, [maximum_float_param_1]; +; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %f1, %f2; +; CHECK-NOF16-NEXT: max.f32 %f3, %f1, %f2; +; CHECK-NOF16-NEXT: selp.f32 %f4, 0f7FC00000, %f3, %p1; +; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, 0; +; CHECK-NOF16-NEXT: selp.f32 %f5, %f1, %f4, %p2; +; CHECK-NOF16-NEXT: mov.b32 %r2, %f2; +; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, 0; +; CHECK-NOF16-NEXT: selp.f32 %f6, %f2, %f5, %p3; +; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %f4, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %f7, %f6, %f4, %p4; +; CHECK-NOF16-NEXT: st.param.f32 [func_retval0+0], %f7; +; CHECK-NOF16-NEXT: ret; +; +; CHECK-F16-LABEL: maximum_float( +; CHECK-F16: { +; CHECK-F16-NEXT: .reg .f32 %f<4>; +; CHECK-F16-EMPTY: +; CHECK-F16-NEXT: // %bb.0: +; CHECK-F16-NEXT: ld.param.f32 %f1, [maximum_float_param_0]; +; CHECK-F16-NEXT: ld.param.f32 %f2, [maximum_float_param_1]; +; CHECK-F16-NEXT: max.NaN.f32 %f3, %f1, %f2; +; CHECK-F16-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-F16-NEXT: ret; +; +; CHECK-SM80-NOF16-LABEL: maximum_float( +; CHECK-SM80-NOF16: { +; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<4>; +; CHECK-SM80-NOF16-EMPTY: +; CHECK-SM80-NOF16-NEXT: // %bb.0: +; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [maximum_float_param_0]; +; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f2, [maximum_float_param_1]; +; CHECK-SM80-NOF16-NEXT: max.NaN.f32 %f3, %f1, %f2; +; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-SM80-NOF16-NEXT: ret; + %x = call float @llvm.maximum.f32(float %a, float %b) + ret float %x +} + +define float @maximum_float_ftz(float %a, float %b) #1 { +; CHECK-NOF16-LABEL: maximum_float_ftz( +; CHECK-NOF16: { +; CHECK-NOF16-NEXT: .reg .pred %p<5>; +; CHECK-NOF16-NEXT: .reg .b32 %r<3>; +; CHECK-NOF16-NEXT: .reg .f32 %f<8>; +; CHECK-NOF16-EMPTY: +; CHECK-NOF16-NEXT: // %bb.0: +; CHECK-NOF16-NEXT: ld.param.f32 %f1, [maximum_float_ftz_param_0]; +; CHECK-NOF16-NEXT: mov.b32 %r1, %f1; +; CHECK-NOF16-NEXT: ld.param.f32 %f2, [maximum_float_ftz_param_1]; +; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p1, %f1, %f2; +; CHECK-NOF16-NEXT: max.ftz.f32 %f3, %f1, %f2; +; CHECK-NOF16-NEXT: selp.f32 %f4, 0f7FC00000, %f3, %p1; +; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, 0; +; CHECK-NOF16-NEXT: selp.f32 %f5, %f1, %f4, %p2; +; CHECK-NOF16-NEXT: mov.b32 %r2, %f2; +; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, 0; +; CHECK-NOF16-NEXT: selp.f32 %f6, %f2, %f5, %p3; +; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p4, %f4, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %f7, %f6, %f4, %p4; +; CHECK-NOF16-NEXT: st.param.f32 [func_retval0+0], %f7; +; CHECK-NOF16-NEXT: ret; +; +; CHECK-F16-LABEL: maximum_float_ftz( +; CHECK-F16: { +; CHECK-F16-NEXT: .reg .f32 %f<4>; +; CHECK-F16-EMPTY: +; CHECK-F16-NEXT: // %bb.0: +; CHECK-F16-NEXT: ld.param.f32 %f1, [maximum_float_ftz_param_0]; +; CHECK-F16-NEXT: ld.param.f32 %f2, [maximum_float_ftz_param_1]; +; CHECK-F16-NEXT: max.NaN.ftz.f32 %f3, %f1, %f2; +; CHECK-F16-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-F16-NEXT: ret; +; +; CHECK-SM80-NOF16-LABEL: maximum_float_ftz( +; CHECK-SM80-NOF16: { +; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<4>; +; CHECK-SM80-NOF16-EMPTY: +; CHECK-SM80-NOF16-NEXT: // %bb.0: +; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [maximum_float_ftz_param_0]; +; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f2, [maximum_float_ftz_param_1]; +; CHECK-SM80-NOF16-NEXT: max.NaN.ftz.f32 %f3, %f1, %f2; +; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-SM80-NOF16-NEXT: ret; + %x = call float @llvm.maximum.f32(float %a, float %b) + ret float %x +} + +define double @maximum_double(double %a, double %b) { +; CHECK-LABEL: maximum_double( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-NEXT: .reg .f64 %fd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f64 %fd1, [maximum_double_param_0]; +; CHECK-NEXT: mov.b64 %rd1, %fd1; +; CHECK-NEXT: ld.param.f64 %fd2, [maximum_double_param_1]; +; CHECK-NEXT: setp.nan.f64 %p1, %fd1, %fd2; +; CHECK-NEXT: max.f64 %fd3, %fd1, %fd2; +; CHECK-NEXT: selp.f64 %fd4, 0d7FF8000000000000, %fd3, %p1; +; CHECK-NEXT: setp.eq.s64 %p2, %rd1, 0; +; CHECK-NEXT: selp.f64 %fd5, %fd1, %fd4, %p2; +; CHECK-NEXT: mov.b64 %rd2, %fd2; +; CHECK-NEXT: setp.eq.s64 %p3, %rd2, 0; +; CHECK-NEXT: selp.f64 %fd6, %fd2, %fd5, %p3; +; CHECK-NEXT: setp.eq.f64 %p4, %fd4, 0d0000000000000000; +; CHECK-NEXT: selp.f64 %fd7, %fd6, %fd4, %p4; +; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd7; +; CHECK-NEXT: ret; + %x = call double @llvm.maximum.f64(double %a, double %b) + ret double %x +} + +define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) { +; CHECK-NOF16-LABEL: maximum_v2half( +; CHECK-NOF16: { +; CHECK-NOF16-NEXT: .reg .pred %p<11>; +; CHECK-NOF16-NEXT: .reg .b16 %rs<19>; +; CHECK-NOF16-NEXT: .reg .b32 %r<4>; +; CHECK-NOF16-NEXT: .reg .f32 %f<7>; +; CHECK-NOF16-EMPTY: +; CHECK-NOF16-NEXT: // %bb.0: +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_v2half_param_0]; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [maximum_v2half_param_1]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; +; CHECK-NOF16-NEXT: setp.gt.f32 %p1, %f2, %f1; +; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1; +; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %f2, %f1; +; CHECK-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; +; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs4, 0; +; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs6, %p3; +; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0; +; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs2, %rs8, %p4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs6; +; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; +; CHECK-NOF16-NEXT: selp.b16 %rs11, %rs10, %rs6, %p5; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; +; CHECK-NOF16-NEXT: setp.gt.f32 %p6, %f5, %f4; +; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs1, %p6; +; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %f5, %f4; +; CHECK-NOF16-NEXT: selp.b16 %rs13, 0x7E00, %rs12, %p7; +; CHECK-NOF16-NEXT: setp.eq.s16 %p8, %rs3, 0; +; CHECK-NOF16-NEXT: selp.b16 %rs15, %rs3, %rs13, %p8; +; CHECK-NOF16-NEXT: setp.eq.s16 %p9, %rs1, 0; +; CHECK-NOF16-NEXT: selp.b16 %rs17, %rs1, %rs15, %p9; +; CHECK-NOF16-NEXT: cvt.f32.f16 %f6, %rs13; +; CHECK-NOF16-NEXT: setp.eq.f32 %p10, %f6, 0f00000000; +; CHECK-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs13, %p10; +; CHECK-NOF16-NEXT: mov.b32 %r3, {%rs18, %rs11}; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NOF16-NEXT: ret; +; +; CHECK-F16-LABEL: maximum_v2half( +; CHECK-F16: { +; CHECK-F16-NEXT: .reg .b32 %r<4>; +; CHECK-F16-EMPTY: +; CHECK-F16-NEXT: // %bb.0: +; CHECK-F16-NEXT: ld.param.b32 %r1, [maximum_v2half_param_1]; +; CHECK-F16-NEXT: ld.param.b32 %r2, [maximum_v2half_param_0]; +; CHECK-F16-NEXT: max.NaN.f16x2 %r3, %r2, %r1; +; CHECK-F16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-F16-NEXT: ret; +; +; CHECK-SM80-NOF16-LABEL: maximum_v2half( +; CHECK-SM80-NOF16: { +; CHECK-SM80-NOF16-NEXT: .reg .pred %p<11>; +; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<19>; +; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>; +; CHECK-SM80-NOF16-NEXT: .reg .f32 %f<7>; +; CHECK-SM80-NOF16-EMPTY: +; CHECK-SM80-NOF16-NEXT: // %bb.0: +; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [maximum_v2half_param_0]; +; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [maximum_v2half_param_1]; +; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; +; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f2, %rs4; +; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p1, %f2, %f1; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1; +; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %f2, %f1; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; +; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs4, 0; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs6, %p3; +; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs2, %rs8, %p4; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f3, %rs6; +; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, %rs10, %rs6, %p5; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f5, %rs3; +; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p6, %f5, %f4; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs1, %p6; +; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %f5, %f4; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, 0x7E00, %rs12, %p7; +; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p8, %rs3, 0; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs15, %rs3, %rs13, %p8; +; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p9, %rs1, 0; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs17, %rs1, %rs15, %p9; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f6, %rs13; +; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p10, %f6, 0f00000000; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs13, %p10; +; CHECK-SM80-NOF16-NEXT: mov.b32 %r3, {%rs18, %rs11}; +; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-SM80-NOF16-NEXT: ret; + %x = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b) + ret <2 x half> %x +} + ; ---- fma ---- -; CHECK-LABEL: @fma_float define float @fma_float(float %a, float %b, float %c) { - ; CHECK: fma.rn.f32 +; CHECK-LABEL: fma_float( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [fma_float_param_0]; +; CHECK-NEXT: ld.param.f32 %f2, [fma_float_param_1]; +; CHECK-NEXT: ld.param.f32 %f3, [fma_float_param_2]; +; CHECK-NEXT: fma.rn.f32 %f4, %f1, %f2, %f3; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f4; +; CHECK-NEXT: ret; %x = call float @llvm.fma.f32(float %a, float %b, float %c) ret float %x } -; CHECK-LABEL: @fma_float_ftz define float @fma_float_ftz(float %a, float %b, float %c) #1 { - ; CHECK: fma.rn.ftz.f32 +; CHECK-LABEL: fma_float_ftz( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [fma_float_ftz_param_0]; +; CHECK-NEXT: ld.param.f32 %f2, [fma_float_ftz_param_1]; +; CHECK-NEXT: ld.param.f32 %f3, [fma_float_ftz_param_2]; +; CHECK-NEXT: fma.rn.ftz.f32 %f4, %f1, %f2, %f3; +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f4; +; CHECK-NEXT: ret; %x = call float @llvm.fma.f32(float %a, float %b, float %c) ret float %x } -; CHECK-LABEL: @fma_double define double @fma_double(double %a, double %b, double %c) { - ; CHECK: fma.rn.f64 +; CHECK-LABEL: fma_double( +; CHECK: { +; CHECK-NEXT: .reg .f64 %fd<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f64 %fd1, [fma_double_param_0]; +; CHECK-NEXT: ld.param.f64 %fd2, [fma_double_param_1]; +; CHECK-NEXT: ld.param.f64 %fd3, [fma_double_param_2]; +; CHECK-NEXT: fma.rn.f64 %fd4, %fd1, %fd2, %fd3; +; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd4; +; CHECK-NEXT: ret; %x = call double @llvm.fma.f64(double %a, double %b, double %c) ret double %x } From ee0f43af2b344a7cd603a8564871c357d8fb108a Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 31 Jul 2024 18:39:52 -0400 Subject: [PATCH 038/114] [SandboxIR][NFC] Move BasicBlock class definition up (#101422) To make future PRs smaller. --- llvm/include/llvm/SandboxIR/SandboxIR.h | 102 ++++++++++++------------ 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index 3a23eb761f5cfd..38c2586f9d73c2 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -536,6 +536,57 @@ class BBIterator { pointer get() const { return getInstr(It); } }; +/// Contains a list of sandboxir::Instruction's. +class BasicBlock : public Value { + /// Builds a graph that contains all values in \p BB in their original form + /// i.e., no vectorization is taking place here. + void buildBasicBlockFromLLVMIR(llvm::BasicBlock *LLVMBB); + friend class Context; // For `buildBasicBlockFromIR` + friend class Instruction; // For LLVM Val. + + BasicBlock(llvm::BasicBlock *BB, Context &SBCtx) + : Value(ClassID::Block, BB, SBCtx) { + buildBasicBlockFromLLVMIR(BB); + } + +public: + ~BasicBlock() = default; + /// For isa/dyn_cast. + static bool classof(const Value *From) { + return From->getSubclassID() == Value::ClassID::Block; + } + Function *getParent() const; + using iterator = BBIterator; + iterator begin() const; + iterator end() const { + auto *BB = cast(Val); + return iterator(BB, BB->end(), &Ctx); + } + std::reverse_iterator rbegin() const { + return std::make_reverse_iterator(end()); + } + std::reverse_iterator rend() const { + return std::make_reverse_iterator(begin()); + } + Context &getContext() const { return Ctx; } + Instruction *getTerminator() const; + bool empty() const { return begin() == end(); } + Instruction &front() const; + Instruction &back() const; + +#ifndef NDEBUG + void verify() const final { + assert(isa(Val) && "Expected BasicBlock!"); + } + friend raw_ostream &operator<<(raw_ostream &OS, const BasicBlock &SBBB) { + SBBB.dump(OS); + return OS; + } + void dump(raw_ostream &OS) const final; + LLVM_DUMP_METHOD void dump() const final; +#endif +}; + /// A sandboxir::User with operands, opcode and linked with previous/next /// instructions in an instruction list. class Instruction : public sandboxir::User { @@ -1579,57 +1630,6 @@ class OpaqueInst : public sandboxir::Instruction { #endif }; -/// Contains a list of sandboxir::Instruction's. -class BasicBlock : public Value { - /// Builds a graph that contains all values in \p BB in their original form - /// i.e., no vectorization is taking place here. - void buildBasicBlockFromLLVMIR(llvm::BasicBlock *LLVMBB); - friend class Context; // For `buildBasicBlockFromIR` - friend class Instruction; // For LLVM Val. - - BasicBlock(llvm::BasicBlock *BB, Context &SBCtx) - : Value(ClassID::Block, BB, SBCtx) { - buildBasicBlockFromLLVMIR(BB); - } - -public: - ~BasicBlock() = default; - /// For isa/dyn_cast. - static bool classof(const Value *From) { - return From->getSubclassID() == Value::ClassID::Block; - } - Function *getParent() const; - using iterator = BBIterator; - iterator begin() const; - iterator end() const { - auto *BB = cast(Val); - return iterator(BB, BB->end(), &Ctx); - } - std::reverse_iterator rbegin() const { - return std::make_reverse_iterator(end()); - } - std::reverse_iterator rend() const { - return std::make_reverse_iterator(begin()); - } - Context &getContext() const { return Ctx; } - Instruction *getTerminator() const; - bool empty() const { return begin() == end(); } - Instruction &front() const; - Instruction &back() const; - -#ifndef NDEBUG - void verify() const final { - assert(isa(Val) && "Expected BasicBlock!"); - } - friend raw_ostream &operator<<(raw_ostream &OS, const BasicBlock &SBBB) { - SBBB.dump(OS); - return OS; - } - void dump(raw_ostream &OS) const final; - LLVM_DUMP_METHOD void dump() const final; -#endif -}; - class Context { protected: LLVMContext &LLVMCtx; From a0d8fa5d3a7e05d30004dd4faeb45c1a96fd8769 Mon Sep 17 00:00:00 2001 From: Jiahan Xie <88367305+jiahanxie353@users.noreply.github.com> Date: Wed, 31 Jul 2024 19:00:15 -0400 Subject: [PATCH 039/114] [RISCV][GlobalISel] Legalize Scalable Vector Loads and Stores (#84965) This patch supports legalizing load and store instruction for scalable vectors in RISCV --- .../CodeGen/GlobalISel/LegalityPredicates.cpp | 3 +- llvm/lib/CodeGen/MIRParser/MIParser.cpp | 2 +- .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 102 +- .../Target/RISCV/GISel/RISCVLegalizerInfo.h | 3 + .../legalizer/rvv/legalize-load.mir | 1043 +++++++++++++++++ .../legalizer/rvv/legalize-store.mir | 1043 +++++++++++++++++ 6 files changed, 2193 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-load.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-store.mir diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index 2c77ed8b060088..8fe48195c610be 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -194,7 +194,8 @@ LegalityPredicate LegalityPredicates::memSizeNotByteSizePow2(unsigned MMOIdx) { return [=](const LegalityQuery &Query) { const LLT MemTy = Query.MMODescrs[MMOIdx].MemoryTy; return !MemTy.isByteSized() || - !llvm::has_single_bit(MemTy.getSizeInBytes()); + !llvm::has_single_bit( + MemTy.getSizeInBytes().getKnownMinValue()); }; } diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 1d16729aa33873..bf10794a100eb1 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -3388,7 +3388,7 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { if (expectAndConsume(MIToken::rparen)) return true; - Size = MemoryType.getSizeInBytes(); + Size = MemoryType.getSizeInBytes().getKnownMinValue(); } MachinePointerInfo Ptr = MachinePointerInfo(); diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index f033ea7250030d..4e583d96335d9f 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/ValueTypes.h" @@ -67,6 +68,17 @@ typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list BoolVecTys, return all(typeInSet(TypeIdx, BoolVecTys), P); } +static LegalityPredicate typeIsLegalPtrVec(unsigned TypeIdx, + std::initializer_list PtrVecTys, + const RISCVSubtarget &ST) { + LegalityPredicate P = [=, &ST](const LegalityQuery &Query) { + return ST.hasVInstructions() && + (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 || + ST.getELen() == 64); + }; + return all(typeInSet(TypeIdx, PtrVecTys), P); +} + RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) : STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(XLen)) { const LLT sDoubleXLen = LLT::scalar(2 * XLen); @@ -111,6 +123,11 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) const LLT nxv4s64 = LLT::scalable_vector(4, s64); const LLT nxv8s64 = LLT::scalable_vector(8, s64); + const LLT nxv1p0 = LLT::scalable_vector(1, p0); + const LLT nxv2p0 = LLT::scalable_vector(2, p0); + const LLT nxv4p0 = LLT::scalable_vector(4, p0); + const LLT nxv8p0 = LLT::scalable_vector(8, p0); + using namespace TargetOpcode; auto BoolVecTys = {nxv1s1, nxv2s1, nxv4s1, nxv8s1, nxv16s1, nxv32s1, nxv64s1}; @@ -120,6 +137,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32, nxv1s64, nxv2s64, nxv4s64, nxv8s64}; + auto PtrVecTys = {nxv1p0, nxv2p0, nxv4p0, nxv8p0}; + getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR}) .legalFor({s32, sXLen}) .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST)) @@ -266,6 +285,23 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) {s32, p0, s16, 16}, {s32, p0, s32, 32}, {p0, p0, sXLen, XLen}}); + if (ST.hasVInstructions()) + LoadStoreActions.legalForTypesWithMemDesc({{nxv2s8, p0, nxv2s8, 8}, + {nxv4s8, p0, nxv4s8, 8}, + {nxv8s8, p0, nxv8s8, 8}, + {nxv16s8, p0, nxv16s8, 8}, + {nxv32s8, p0, nxv32s8, 8}, + {nxv64s8, p0, nxv64s8, 8}, + {nxv2s16, p0, nxv2s16, 16}, + {nxv4s16, p0, nxv4s16, 16}, + {nxv8s16, p0, nxv8s16, 16}, + {nxv16s16, p0, nxv16s16, 16}, + {nxv32s16, p0, nxv32s16, 16}, + {nxv2s32, p0, nxv2s32, 32}, + {nxv4s32, p0, nxv4s32, 32}, + {nxv8s32, p0, nxv8s32, 32}, + {nxv16s32, p0, nxv16s32, 32}}); + auto &ExtLoadActions = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 16}}); @@ -279,7 +315,28 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) } else if (ST.hasStdExtD()) { LoadStoreActions.legalForTypesWithMemDesc({{s64, p0, s64, 64}}); } - LoadStoreActions.clampScalar(0, s32, sXLen).lower(); + if (ST.hasVInstructions() && ST.getELen() == 64) + LoadStoreActions.legalForTypesWithMemDesc({{nxv1s8, p0, nxv1s8, 8}, + {nxv1s16, p0, nxv1s16, 16}, + {nxv1s32, p0, nxv1s32, 32}}); + + if (ST.hasVInstructionsI64()) + LoadStoreActions.legalForTypesWithMemDesc({{nxv1s64, p0, nxv1s64, 64}, + + {nxv2s64, p0, nxv2s64, 64}, + {nxv4s64, p0, nxv4s64, 64}, + {nxv8s64, p0, nxv8s64, 64}}); + + LoadStoreActions.widenScalarToNextPow2(0, /* MinSize = */ 8) + .lowerIfMemSizeNotByteSizePow2() + // we will take the custom lowering logic if we have scalable vector types + // with non-standard alignments + .customIf(LegalityPredicate( + LegalityPredicates::any(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), + typeIsLegalPtrVec(0, PtrVecTys, ST)))) + .clampScalar(0, s32, sXLen) + .lower(); + ExtLoadActions.widenScalarToNextPow2(0).clampScalar(0, s32, sXLen).lower(); getActionDefinitionsBuilder({G_PTR_ADD, G_PTRMASK}).legalFor({{p0, sXLen}}); @@ -651,6 +708,46 @@ bool RISCVLegalizerInfo::legalizeExt(MachineInstr &MI, return true; } +bool RISCVLegalizerInfo::legalizeLoadStore(MachineInstr &MI, + LegalizerHelper &Helper, + MachineIRBuilder &MIB) const { + assert((isa(MI) || isa(MI)) && + "Machine instructions must be Load/Store."); + MachineRegisterInfo &MRI = *MIB.getMRI(); + MachineFunction *MF = MI.getMF(); + const DataLayout &DL = MIB.getDataLayout(); + LLVMContext &Ctx = MF->getFunction().getContext(); + + Register DstReg = MI.getOperand(0).getReg(); + LLT DataTy = MRI.getType(DstReg); + if (!DataTy.isVector()) + return false; + + if (!MI.hasOneMemOperand()) + return false; + + MachineMemOperand *MMO = *MI.memoperands_begin(); + + const auto *TLI = STI.getTargetLowering(); + EVT VT = EVT::getEVT(getTypeForLLT(DataTy, Ctx)); + + if (TLI->allowsMemoryAccessForAlignment(Ctx, DL, VT, *MMO)) + return true; + + unsigned EltSizeBits = DataTy.getScalarSizeInBits(); + assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && + "Unexpected unaligned RVV load type"); + + // Calculate the new vector type with i8 elements + unsigned NumElements = + DataTy.getElementCount().getKnownMinValue() * (EltSizeBits / 8); + LLT NewDataTy = LLT::scalable_vector(NumElements, 8); + + Helper.bitcast(MI, 0, NewDataTy); + + return true; +} + /// Return the type of the mask type suitable for masking the provided /// vector type. This is simply an i1 element type vector of the same /// (possibly scalable) length. @@ -828,6 +925,9 @@ bool RISCVLegalizerInfo::legalizeCustom( return legalizeExt(MI, MIRBuilder); case TargetOpcode::G_SPLAT_VECTOR: return legalizeSplatVector(MI, MIRBuilder); + case TargetOpcode::G_LOAD: + case TargetOpcode::G_STORE: + return legalizeLoadStore(MI, Helper, MIRBuilder); } llvm_unreachable("expected switch to return"); diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h index 5bb1e7a7282788..2fc28615e7630d 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_TARGET_RISCV_RISCVMACHINELEGALIZER_H #define LLVM_LIB_TARGET_RISCV_RISCVMACHINELEGALIZER_H +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/Register.h" @@ -45,6 +46,8 @@ class RISCVLegalizerInfo : public LegalizerInfo { bool legalizeVScale(MachineInstr &MI, MachineIRBuilder &MIB) const; bool legalizeExt(MachineInstr &MI, MachineIRBuilder &MIRBuilder) const; bool legalizeSplatVector(MachineInstr &MI, MachineIRBuilder &MIB) const; + bool legalizeLoadStore(MachineInstr &MI, LegalizerHelper &Helper, + MachineIRBuilder &MIB) const; }; } // end namespace llvm #endif diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-load.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-load.mir new file mode 100644 index 00000000000000..12f218863e4002 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-load.mir @@ -0,0 +1,1043 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s +--- | + + define @vload_nxv1i8(ptr %pa) #0 { + %va = load , ptr %pa, align 1 + ret %va + } + + define @vload_nxv2i8(ptr %pa) #0 { + %va = load , ptr %pa, align 2 + ret %va + } + + define @vload_nxv4i8(ptr %pa) #0 { + %va = load , ptr %pa, align 4 + ret %va + } + + define @vload_nxv8i8(ptr %pa) #0 { + %va = load , ptr %pa, align 8 + ret %va + } + + define @vload_nxv16i8(ptr %pa) #0 { + %va = load , ptr %pa, align 16 + ret %va + } + + define @vload_nxv32i8(ptr %pa) #0 { + %va = load , ptr %pa, align 32 + ret %va + } + + define @vload_nxv64i8(ptr %pa) #0 { + %va = load , ptr %pa, align 64 + ret %va + } + + define @vload_nxv1i16(ptr %pa) #0 { + %va = load , ptr %pa, align 2 + ret %va + } + + define @vload_nxv2i16(ptr %pa) #0 { + %va = load , ptr %pa, align 4 + ret %va + } + + define @vload_nxv4i16(ptr %pa) #0 { + %va = load , ptr %pa, align 8 + ret %va + } + + define @vload_nxv8i16(ptr %pa) #0 { + %va = load , ptr %pa, align 16 + ret %va + } + + define @vload_nxv16i16(ptr %pa) #0 { + %va = load , ptr %pa, align 32 + ret %va + } + + define @vload_nxv32i16(ptr %pa) #0 { + %va = load , ptr %pa, align 64 + ret %va + } + + define @vload_nxv1i32(ptr %pa) #0 { + %va = load , ptr %pa, align 4 + ret %va + } + + define @vload_nxv2i32(ptr %pa) #0 { + %va = load , ptr %pa, align 8 + ret %va + } + + define @vload_nxv4i32(ptr %pa) #0 { + %va = load , ptr %pa, align 16 + ret %va + } + + define @vload_nxv8i32(ptr %pa) #0 { + %va = load , ptr %pa, align 32 + ret %va + } + + define @vload_nxv16i32(ptr %pa) #0 { + %va = load , ptr %pa, align 64 + ret %va + } + + define @vload_nxv1i64(ptr %pa) #0 { + %va = load , ptr %pa, align 8 + ret %va + } + + define @vload_nxv2i64(ptr %pa) #0 { + %va = load , ptr %pa, align 16 + ret %va + } + + define @vload_nxv4i64(ptr %pa) #0 { + %va = load , ptr %pa, align 32 + ret %va + } + + define @vload_nxv8i64(ptr %pa) #0 { + %va = load , ptr %pa, align 64 + ret %va + } + + define @vload_nxv16i8_align1(ptr %pa) #0 { + %va = load , ptr %pa, align 1 + ret %va + } + + define @vload_nxv16i8_align2(ptr %pa) #0 { + %va = load , ptr %pa, align 2 + ret %va + } + + define @vload_nxv16i8_align16(ptr %pa) #0 { + %va = load , ptr %pa, align 16 + ret %va + } + + define @vload_nxv16i8_align64(ptr %pa) #0 { + %va = load , ptr %pa, align 64 + ret %va + } + + define @vload_nxv4i16_align1(ptr %pa) #0 { + %va = load , ptr %pa, align 1 + ret %va + } + + define @vload_nxv4i16_align2(ptr %pa) #0 { + %va = load , ptr %pa, align 2 + ret %va + } + + define @vload_nxv4i16_align4(ptr %pa) #0 { + %va = load , ptr %pa, align 4 + ret %va + } + + define @vload_nxv4i16_align8(ptr %pa) #0 { + %va = load , ptr %pa, align 8 + ret %va + } + + define @vload_nxv4i16_align16(ptr %pa) #0 { + %va = load , ptr %pa, align 16 + ret %va + } + + define @vload_nxv2i32_align2(ptr %pa) #0 { + %va = load , ptr %pa, align 2 + ret %va + } + + define @vload_nxv2i32_align4(ptr %pa) #0 { + %va = load , ptr %pa, align 4 + ret %va + } + + define @vload_nxv2i32_align8(ptr %pa) #0 { + %va = load , ptr %pa, align 8 + ret %va + } + + define @vload_nxv2i32_align16(ptr %pa) #0 { + %va = load , ptr %pa, align 16 + ret %va + } + + define @vload_nxv2i32_align256(ptr %pa) #0 { + %va = load , ptr %pa, align 256 + ret %va + } + + define @vload_nxv2i64_align4(ptr %pa) #0 { + %va = load , ptr %pa, align 4 + ret %va + } + + define @vload_nxv2i64_align8(ptr %pa) #0 { + %va = load , ptr %pa, align 8 + ret %va + } + + define @vload_nxv2i64_align16(ptr %pa) #0 { + %va = load , ptr %pa, align 16 + ret %va + } + + define @vload_nxv2i64_align32(ptr %pa) #0 { + %va = load , ptr %pa, align 32 + ret %va + } + + define @vload_nxv1ptr(ptr %pa) #0 { + %va = load , ptr %pa, align 4 + ret %va + } + + define @vload_nxv2ptr(ptr %pa) #0 { + %va = load , ptr %pa, align 8 + ret %va + } + + define @vload_nxv8ptr(ptr %pa) #0 { + %va = load , ptr %pa, align 32 + ret %va + } + + attributes #0 = { "target-features"="+v" } + +... +--- +name: vload_nxv1i8 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv1i8 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv2i8 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv2i8 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv4i8 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv4i8 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv8i8 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv8i8 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv16i8 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv16i8 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8m2 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nxv32i8 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv32i8 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8m4 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: vload_nxv64i8 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv64i8 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8m8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: vload_nxv1i16 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv1i16 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv2i16 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv2i16 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv4i16 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv4i16 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv8i16 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv8i16 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8m2 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nxv16i16 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv16i16 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8m4 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: vload_nxv32i16 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv32i16 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8m8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: vload_nxv1i32 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv1i32 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv2i32 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv2i32 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv4i32 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv4i32 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8m2 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nxv8i32 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv8i32 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8m4 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: vload_nxv16i32 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv16i32 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8m8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: vload_nxv1i64 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv1i64 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv2i64 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv2i64 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8m2 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nxv4i64 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv4i64 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8m4 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: vload_nxv8i64 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv8i64 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8m8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: vload_nxv16i8_align1 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv16i8_align1 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 1) + ; CHECK-NEXT: $v8m2 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 1) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nxv16i8_align2 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv16i8_align2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 2) + ; CHECK-NEXT: $v8m2 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 2) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nxv16i8_align16 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv16i8_align16 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8m2 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nxv16i8_align64 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv16i8_align64 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 64) + ; CHECK-NEXT: $v8m2 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 64) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nxv4i16_align1 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv4i16_align1 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 1) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_() = G_BITCAST [[LOAD]]() + ; CHECK-NEXT: $v8 = COPY [[BITCAST]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 1) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv4i16_align2 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv4i16_align2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 2) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 2) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv4i16_align4 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv4i16_align4 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 4) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 4) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv4i16_align8 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv4i16_align8 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv4i16_align16 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv4i16_align16 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 16) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 16) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv2i32_align2 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv2i32_align2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 2) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_() = G_BITCAST [[LOAD]]() + ; CHECK-NEXT: $v8 = COPY [[BITCAST]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 2) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv2i32_align4 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv2i32_align4 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 4) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 4) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv2i32_align8 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv2i32_align8 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv2i32_align16 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv2i32_align16 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 16) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 16) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv2i32_align256 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv2i32_align256 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 256) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 256) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv2i64_align4 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv2i64_align4 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 4) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_() = G_BITCAST [[LOAD]]() + ; CHECK-NEXT: $v8m2 = COPY [[BITCAST]]() + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 4) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nxv2i64_align8 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv2i64_align8 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 8) + ; CHECK-NEXT: $v8m2 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 8) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nxv2i64_align16 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv2i64_align16 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8m2 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nxv2i64_align32 +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv2i64_align32 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 32) + ; CHECK-NEXT: $v8m2 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 32) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nxv1ptr +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv1ptr + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv2ptr +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv2ptr + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nxv8ptr +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vload_nxv8ptr + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; CHECK-NEXT: $v8m4 = COPY [[LOAD]]() + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-store.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-store.mir new file mode 100644 index 00000000000000..b91d25509646f5 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-store.mir @@ -0,0 +1,1043 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s +--- | + + define void @vstore_nx1i8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 1 + ret void + } + + define void @vstore_nx2i8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 2 + ret void + } + + define void @vstore_nx4i8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 4 + ret void + } + + define void @vstore_nx8i8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 8 + ret void + } + + define void @vstore_nx16i8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 16 + ret void + } + + define void @vstore_nx32i8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 32 + ret void + } + + define void @vstore_nx64i8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 64 + ret void + } + + define void @vstore_nx1i16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 2 + ret void + } + + define void @vstore_nx2i16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 4 + ret void + } + + define void @vstore_nx4i16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 8 + ret void + } + + define void @vstore_nx8i16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 16 + ret void + } + + define void @vstore_nx16i16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 32 + ret void + } + + define void @vstore_nx32i16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 64 + ret void + } + + define void @vstore_nx1i32(ptr %pa, %b) #0 { + store %b, ptr %pa, align 4 + ret void + } + + define void @vstore_nx2i32(ptr %pa, %b) #0 { + store %b, ptr %pa, align 8 + ret void + } + + define void @vstore_nx4i32(ptr %pa, %b) #0 { + store %b, ptr %pa, align 16 + ret void + } + + define void @vstore_nx8i32(ptr %pa, %b) #0 { + store %b, ptr %pa, align 32 + ret void + } + + define void @vstore_nx16i32(ptr %pa, %b) #0 { + store %b, ptr %pa, align 64 + ret void + } + + define void @vstore_nx1i64(ptr %pa, %b) #0 { + store %b, ptr %pa, align 8 + ret void + } + + define void @vstore_nx2i64(ptr %pa, %b) #0 { + store %b, ptr %pa, align 16 + ret void + } + + define void @vstore_nx4i64(ptr %pa, %b) #0 { + store %b, ptr %pa, align 32 + ret void + } + + define void @vstore_nx8i64(ptr %pa, %b) #0 { + store %b, ptr %pa, align 64 + ret void + } + + define void @vstore_nx16i8_align1(ptr %pa, %b) #0 { + store %b, ptr %pa, align 1 + ret void + } + + define void @vstore_nx16i8_align2(ptr %pa, %b) #0 { + store %b, ptr %pa, align 2 + ret void + } + + define void @vstore_nx16i8_align16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 16 + ret void + } + + define void @vstore_nx16i8_align64(ptr %pa, %b) #0 { + store %b, ptr %pa, align 64 + ret void + } + + define void @vstore_nx4i16_align1(ptr %pa, %b) #0 { + store %b, ptr %pa, align 1 + ret void + } + + define void @vstore_nx4i16_align2(ptr %pa, %b) #0 { + store %b, ptr %pa, align 2 + ret void + } + + define void @vstore_nx4i16_align4(ptr %pa, %b) #0 { + store %b, ptr %pa, align 4 + ret void + } + + define void @vstore_nx4i16_align8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 8 + ret void + } + + define void @vstore_nx4i16_align16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 16 + ret void + } + + define void @vstore_nx2i32_align2(ptr %pa, %b) #0 { + store %b, ptr %pa, align 2 + ret void + } + + define void @vstore_nx2i32_align4(ptr %pa, %b) #0 { + store %b, ptr %pa, align 4 + ret void + } + + define void @vstore_nx2i32_align8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 8 + ret void + } + + define void @vstore_nx2i32_align16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 16 + ret void + } + + define void @vstore_nx2i32_align256(ptr %pa, %b) #0 { + store %b, ptr %pa, align 256 + ret void + } + + define void @vstore_nx2i64_align4(ptr %pa, %b) #0 { + store %b, ptr %pa, align 4 + ret void + } + + define void @vstore_nx2i64_align8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 8 + ret void + } + + define void @vstore_nx2i64_align16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 16 + ret void + } + + define void @vstore_nx2i64_align32(ptr %pa, %b) #0 { + store %b, ptr %pa, align 32 + ret void + } + + define void @vstore_nx1ptr(ptr %pa, %b) #0 { + store %b, ptr %pa, align 4 + ret void + } + + define void @vstore_nx2ptr(ptr %pa, %b) #0 { + store %b, ptr %pa, align 8 + ret void + } + + define void @vstore_nx8ptr(ptr %pa, %b) #0 { + store %b, ptr %pa, align 32 + ret void + } + + attributes #0 = { "target-features"="+v" } + +... +--- +name: vstore_nx1i8 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx1i8 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx2i8 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx2i8 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx4i8 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx4i8 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx8i8 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx8i8 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx16i8 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; CHECK-LABEL: name: vstore_nx16i8 + ; CHECK: liveins: $x10, $v8m2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m2 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx32i8 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m4 + + ; CHECK-LABEL: name: vstore_nx32i8 + ; CHECK: liveins: $x10, $v8m4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m4 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m4 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx64i8 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m8 + + ; CHECK-LABEL: name: vstore_nx64i8 + ; CHECK: liveins: $x10, $v8m8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx1i16 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx1i16 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx2i16 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx2i16 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx4i16 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx4i16 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx8i16 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; CHECK-LABEL: name: vstore_nx8i16 + ; CHECK: liveins: $x10, $v8m2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m2 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx16i16 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m4 + + ; CHECK-LABEL: name: vstore_nx16i16 + ; CHECK: liveins: $x10, $v8m4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m4 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m4 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx32i16 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m8 + + ; CHECK-LABEL: name: vstore_nx32i16 + ; CHECK: liveins: $x10, $v8m8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx1i32 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx1i32 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx2i32 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx2i32 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx4i32 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; CHECK-LABEL: name: vstore_nx4i32 + ; CHECK: liveins: $x10, $v8m2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m2 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx8i32 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m4 + + ; CHECK-LABEL: name: vstore_nx8i32 + ; CHECK: liveins: $x10, $v8m4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m4 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m4 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx16i32 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m8 + + ; CHECK-LABEL: name: vstore_nx16i32 + ; CHECK: liveins: $x10, $v8m8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx1i64 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx1i64 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx2i64 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; CHECK-LABEL: name: vstore_nx2i64 + ; CHECK: liveins: $x10, $v8m2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m2 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx4i64 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m4 + + ; CHECK-LABEL: name: vstore_nx4i64 + ; CHECK: liveins: $x10, $v8m4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m4 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m4 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx8i64 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m8 + + ; CHECK-LABEL: name: vstore_nx8i64 + ; CHECK: liveins: $x10, $v8m8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx16i8_align1 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; CHECK-LABEL: name: vstore_nx16i8_align1 + ; CHECK: liveins: $x10, $v8m2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m2 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 1) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 1) + PseudoRET + +... +--- +name: vstore_nx16i8_align2 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; CHECK-LABEL: name: vstore_nx16i8_align2 + ; CHECK: liveins: $x10, $v8m2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m2 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 2) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 2) + PseudoRET + +... +--- +name: vstore_nx16i8_align16 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; CHECK-LABEL: name: vstore_nx16i8_align16 + ; CHECK: liveins: $x10, $v8m2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m2 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx16i8_align64 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; CHECK-LABEL: name: vstore_nx16i8_align64 + ; CHECK: liveins: $x10, $v8m2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m2 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 64) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 64) + PseudoRET + +... +--- +name: vstore_nx4i16_align1 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx4i16_align1 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_() = G_BITCAST [[COPY1]]() + ; CHECK-NEXT: G_STORE [[BITCAST]](), [[COPY]](p0) :: (store () into %ir.pa, align 1) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 1) + PseudoRET + +... +--- +name: vstore_nx4i16_align2 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx4i16_align2 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 2) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 2) + PseudoRET + +... +--- +name: vstore_nx4i16_align4 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx4i16_align4 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 4) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 4) + PseudoRET + +... +--- +name: vstore_nx4i16_align8 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx4i16_align8 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx4i16_align16 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx4i16_align16 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 16) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 16) + PseudoRET + +... +--- +name: vstore_nx2i32_align2 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx2i32_align2 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_() = G_BITCAST [[COPY1]]() + ; CHECK-NEXT: G_STORE [[BITCAST]](), [[COPY]](p0) :: (store () into %ir.pa, align 2) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 2) + PseudoRET + +... +--- +name: vstore_nx2i32_align4 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx2i32_align4 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 4) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 4) + PseudoRET + +... +--- +name: vstore_nx2i32_align8 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx2i32_align8 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx2i32_align16 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx2i32_align16 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 16) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 16) + PseudoRET + +... +--- +name: vstore_nx2i32_align256 +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx2i32_align256 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 256) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 256) + PseudoRET + +... +--- +name: vstore_nx2i64_align4 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; CHECK-LABEL: name: vstore_nx2i64_align4 + ; CHECK: liveins: $x10, $v8m2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m2 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_() = G_BITCAST [[COPY1]]() + ; CHECK-NEXT: G_STORE [[BITCAST]](), [[COPY]](p0) :: (store () into %ir.pa, align 4) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 4) + PseudoRET + +... +--- +name: vstore_nx2i64_align8 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; CHECK-LABEL: name: vstore_nx2i64_align8 + ; CHECK: liveins: $x10, $v8m2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m2 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 8) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 8) + PseudoRET + +... +--- +name: vstore_nx2i64_align16 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; CHECK-LABEL: name: vstore_nx2i64_align16 + ; CHECK: liveins: $x10, $v8m2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m2 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx2i64_align32 +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; CHECK-LABEL: name: vstore_nx2i64_align32 + ; CHECK: liveins: $x10, $v8m2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m2 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 32) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 32) + PseudoRET + +... +--- +name: vstore_nx1ptr +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; CHECK-LABEL: name: vstore_nx1ptr + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx2ptr +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; CHECK-LABEL: name: vstore_nx2ptr + ; CHECK: liveins: $x10, $v8m2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m2 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx8ptr +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m8 + + ; CHECK-LABEL: name: vstore_nx8ptr + ; CHECK: liveins: $x10, $v8m8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v8m8 + ; CHECK-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; CHECK-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... From 1c66ef915710fd4450f85ebb0486695e9bbc4dfc Mon Sep 17 00:00:00 2001 From: Jiahan Xie <88367305+jiahanxie353@users.noreply.github.com> Date: Wed, 31 Jul 2024 19:18:42 -0400 Subject: [PATCH 040/114] [GISEL][RISCV] RegBank Select for Scalable Vector Load/Store (#99932) This patch supports GlobalISel for register bank selection for scalable vector load and store instructions in RISC-V --- llvm/lib/CodeGen/RegisterBankInfo.cpp | 5 +- .../RISCV/GISel/RISCVRegisterBankInfo.cpp | 25 +- .../GlobalISel/regbankselect/rvv/load.mir | 1481 +++++++++++++++++ .../GlobalISel/regbankselect/rvv/store.mir | 1481 +++++++++++++++++ 4 files changed, 2986 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/load.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/store.mir diff --git a/llvm/lib/CodeGen/RegisterBankInfo.cpp b/llvm/lib/CodeGen/RegisterBankInfo.cpp index 72b07eb1902d9b..00dcc1fbcd0c77 100644 --- a/llvm/lib/CodeGen/RegisterBankInfo.cpp +++ b/llvm/lib/CodeGen/RegisterBankInfo.cpp @@ -215,8 +215,9 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { } } - unsigned Size = getSizeInBits(Reg, MRI, TRI); - const ValueMapping *ValMapping = &getValueMapping(0, Size, *CurRegBank); + TypeSize Size = getSizeInBits(Reg, MRI, TRI); + const ValueMapping *ValMapping = + &getValueMapping(0, Size.getKnownMinValue(), *CurRegBank); if (IsCopyLike) { if (!OperandsMapping[0]) { if (MI.isRegSequence()) { diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp index 43bbc8589e7e21..2b1df0cd4670a6 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp @@ -310,10 +310,18 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { switch (Opc) { case TargetOpcode::G_LOAD: { LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - OpdsMapping[0] = GPRValueMapping; + TypeSize Size = Ty.getSizeInBits(); + if (Ty.isVector()) + OpdsMapping[0] = getVRBValueMapping(Size.getKnownMinValue()); + else + OpdsMapping[0] = GPRValueMapping; + OpdsMapping[1] = GPRValueMapping; + + if (Ty.isVector()) + break; // Use FPR64 for s64 loads on rv32. - if (GPRSize == 32 && Ty.getSizeInBits() == 64) { + if (GPRSize == 32 && Size.getFixedValue() == 64) { assert(MF.getSubtarget().hasStdExtD()); OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits()); break; @@ -333,10 +341,19 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } case TargetOpcode::G_STORE: { LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - OpdsMapping[0] = GPRValueMapping; + TypeSize Size = Ty.getSizeInBits(); + if (Ty.isVector()) + OpdsMapping[0] = getVRBValueMapping(Size.getKnownMinValue()); + else + OpdsMapping[0] = GPRValueMapping; + OpdsMapping[1] = GPRValueMapping; + + if (Ty.isVector()) + break; + // Use FPR64 for s64 stores on rv32. - if (GPRSize == 32 && Ty.getSizeInBits() == 64) { + if (GPRSize == 32 && Size.getFixedValue() == 64) { assert(MF.getSubtarget().hasStdExtD()); OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits()); break; diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/load.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/load.mir new file mode 100644 index 00000000000000..5c02c720822b18 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/load.mir @@ -0,0 +1,1481 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv64 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV64I %s +--- | + + define @vload_nx1i8(ptr %pa) #0 { + %va = load , ptr %pa, align 1 + ret %va + } + + define @vload_nx2i8(ptr %pa) #0 { + %va = load , ptr %pa, align 2 + ret %va + } + + define @vload_nx4i8(ptr %pa) #0 { + %va = load , ptr %pa, align 4 + ret %va + } + + define @vload_nx8i8(ptr %pa) #0 { + %va = load , ptr %pa, align 8 + ret %va + } + + define @vload_nx16i8(ptr %pa) #0 { + %va = load , ptr %pa, align 16 + ret %va + } + + define @vload_nx32i8(ptr %pa) #0 { + %va = load , ptr %pa, align 32 + ret %va + } + + define @vload_nx64i8(ptr %pa) #0 { + %va = load , ptr %pa, align 64 + ret %va + } + + define @vload_nx1i16(ptr %pa) #0 { + %va = load , ptr %pa, align 2 + ret %va + } + + define @vload_nx2i16(ptr %pa) #0 { + %va = load , ptr %pa, align 4 + ret %va + } + + define @vload_nx4i16(ptr %pa) #0 { + %va = load , ptr %pa, align 8 + ret %va + } + + define @vload_nx8i16(ptr %pa) #0 { + %va = load , ptr %pa, align 16 + ret %va + } + + define @vload_nx16i16(ptr %pa) #0 { + %va = load , ptr %pa, align 32 + ret %va + } + + define @vload_nx32i16(ptr %pa) #0 { + %va = load , ptr %pa, align 64 + ret %va + } + + define @vload_nx1i32(ptr %pa) #0 { + %va = load , ptr %pa, align 4 + ret %va + } + + define @vload_nx2i32(ptr %pa) #0 { + %va = load , ptr %pa, align 8 + ret %va + } + + define @vload_nx4i32(ptr %pa) #0 { + %va = load , ptr %pa, align 16 + ret %va + } + + define @vload_nx8i32(ptr %pa) #0 { + %va = load , ptr %pa, align 32 + ret %va + } + + define @vload_nx16i32(ptr %pa) #0 { + %va = load , ptr %pa, align 64 + ret %va + } + + define @vload_nx1i64(ptr %pa) #0 { + %va = load , ptr %pa, align 8 + ret %va + } + + define @vload_nx2i64(ptr %pa) #0 { + %va = load , ptr %pa, align 16 + ret %va + } + + define @vload_nx4i64(ptr %pa) #0 { + %va = load , ptr %pa, align 32 + ret %va + } + + define @vload_nx8i64(ptr %pa) #0 { + %va = load , ptr %pa, align 64 + ret %va + } + + define @vload_nx16i8_align1(ptr %pa) #0 { + %va = load , ptr %pa, align 1 + ret %va + } + + define @vload_nx16i8_align2(ptr %pa) #0 { + %va = load , ptr %pa, align 2 + ret %va + } + + define @vload_nx16i8_align16(ptr %pa) #0 { + %va = load , ptr %pa, align 16 + ret %va + } + + define @vload_nx16i8_align64(ptr %pa) #0 { + %va = load , ptr %pa, align 64 + ret %va + } + + define @vload_nx4i16_align1(ptr %pa) #0 { + %va = load , ptr %pa, align 1 + ret %va + } + + define @vload_nx4i16_align2(ptr %pa) #0 { + %va = load , ptr %pa, align 2 + ret %va + } + + define @vload_nx4i16_align4(ptr %pa) #0 { + %va = load , ptr %pa, align 4 + ret %va + } + + define @vload_nx4i16_align8(ptr %pa) #0 { + %va = load , ptr %pa, align 8 + ret %va + } + + define @vload_nx4i16_align16(ptr %pa) #0 { + %va = load , ptr %pa, align 16 + ret %va + } + + define @vload_nx2i32_align2(ptr %pa) #0 { + %va = load , ptr %pa, align 2 + ret %va + } + + define @vload_nx2i32_align4(ptr %pa) #0 { + %va = load , ptr %pa, align 4 + ret %va + } + + define @vload_nx2i32_align8(ptr %pa) #0 { + %va = load , ptr %pa, align 8 + ret %va + } + + define @vload_nx2i32_align16(ptr %pa) #0 { + %va = load , ptr %pa, align 16 + ret %va + } + + define @vload_nx2i32_align256(ptr %pa) #0 { + %va = load , ptr %pa, align 256 + ret %va + } + + define @vload_nx2i64_align4(ptr %pa) #0 { + %va = load , ptr %pa, align 4 + ret %va + } + + define @vload_nx2i64_align8(ptr %pa) #0 { + %va = load , ptr %pa, align 8 + ret %va + } + + define @vload_nx2i64_align16(ptr %pa) #0 { + %va = load , ptr %pa, align 16 + ret %va + } + + define @vload_nx2i64_align32(ptr %pa) #0 { + %va = load , ptr %pa, align 32 + ret %va + } + + define @vload_nx1ptr(ptr %pa) #0 { + %va = load , ptr %pa, align 4 + ret %va + } + + define @vload_nx2ptr(ptr %pa) #0 { + %va = load , ptr %pa, align 8 + ret %va + } + + define @vload_nx8ptr(ptr %pa) #0 { + %va = load , ptr %pa, align 32 + ret %va + } + +... +--- +name: vload_nx1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx1i8 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx1i8 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx2i8 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx2i8 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx4i8 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx4i8 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx8i8 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx8i8 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx16i8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx16i8 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: vload_nx16i8 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nx32i8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx32i8 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8m4 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: vload_nx32i8 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8m4 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: vload_nx64i8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx64i8 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8m8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: vload_nx64i8 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8m8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: vload_nx1i16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx1i16 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx1i16 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx2i16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx2i16 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx2i16 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx4i16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx4i16 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx4i16 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx8i16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx8i16 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: vload_nx8i16 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nx16i16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx16i16 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8m4 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: vload_nx16i16 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8m4 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: vload_nx32i16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx32i16 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8m8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: vload_nx32i16 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8m8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: vload_nx1i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx1i32 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx1i32 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx2i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx2i32 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx2i32 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx4i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx4i32 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: vload_nx4i32 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nx8i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx8i32 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8m4 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: vload_nx8i32 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8m4 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: vload_nx16i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx16i32 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8m8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: vload_nx16i32 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8m8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: vload_nx1i64 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx1i64 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx1i64 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx2i64 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx2i64 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: vload_nx2i64 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nx4i64 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx4i64 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8m4 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: vload_nx4i64 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8m4 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: vload_nx8i64 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx8i64 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8m8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: vload_nx8i64 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8m8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: vload_nx16i8_align1 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx16i8_align1 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 1) + ; RV32I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: vload_nx16i8_align1 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 1) + ; RV64I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 1) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nx16i8_align2 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx16i8_align2 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 2) + ; RV32I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: vload_nx16i8_align2 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 2) + ; RV64I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 2) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nx16i8_align16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx16i8_align16 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: vload_nx16i8_align16 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nx16i8_align64 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx16i8_align64 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 64) + ; RV32I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: vload_nx16i8_align64 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 64) + ; RV64I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 64) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nx4i16_align1 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx4i16_align1 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 1) + ; RV32I-NEXT: [[BITCAST:%[0-9]+]]:vrb() = G_BITCAST [[LOAD]]() + ; RV32I-NEXT: $v8 = COPY [[BITCAST]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx4i16_align1 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 1) + ; RV64I-NEXT: [[BITCAST:%[0-9]+]]:vrb() = G_BITCAST [[LOAD]]() + ; RV64I-NEXT: $v8 = COPY [[BITCAST]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %2:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 1) + %1:_() = G_BITCAST %2() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx4i16_align2 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx4i16_align2 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 2) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx4i16_align2 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 2) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 2) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx4i16_align4 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx4i16_align4 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 4) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx4i16_align4 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 4) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 4) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx4i16_align8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx4i16_align8 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx4i16_align8 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx4i16_align16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx4i16_align16 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 16) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx4i16_align16 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 16) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 16) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx2i32_align2 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx2i32_align2 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 2) + ; RV32I-NEXT: [[BITCAST:%[0-9]+]]:vrb() = G_BITCAST [[LOAD]]() + ; RV32I-NEXT: $v8 = COPY [[BITCAST]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx2i32_align2 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 2) + ; RV64I-NEXT: [[BITCAST:%[0-9]+]]:vrb() = G_BITCAST [[LOAD]]() + ; RV64I-NEXT: $v8 = COPY [[BITCAST]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %2:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 2) + %1:_() = G_BITCAST %2() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx2i32_align4 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx2i32_align4 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 4) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx2i32_align4 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 4) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 4) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx2i32_align8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx2i32_align8 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx2i32_align8 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx2i32_align16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx2i32_align16 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 16) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx2i32_align16 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 16) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 16) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx2i32_align256 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx2i32_align256 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 256) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx2i32_align256 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 256) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 256) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx2i64_align4 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx2i64_align4 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 4) + ; RV32I-NEXT: [[BITCAST:%[0-9]+]]:vrb() = G_BITCAST [[LOAD]]() + ; RV32I-NEXT: $v8m2 = COPY [[BITCAST]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: vload_nx2i64_align4 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 4) + ; RV64I-NEXT: [[BITCAST:%[0-9]+]]:vrb() = G_BITCAST [[LOAD]]() + ; RV64I-NEXT: $v8m2 = COPY [[BITCAST]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %2:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 4) + %1:_() = G_BITCAST %2() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nx2i64_align8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx2i64_align8 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 8) + ; RV32I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: vload_nx2i64_align8 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 8) + ; RV64I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 8) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nx2i64_align16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx2i64_align16 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: vload_nx2i64_align16 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nx2i64_align32 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx2i64_align32 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 32) + ; RV32I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: vload_nx2i64_align32 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa, align 32) + ; RV64I-NEXT: $v8m2 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa, align 32) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: vload_nx1ptr +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx1ptr + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx1ptr + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx2ptr +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx2ptr + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: vload_nx2ptr + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: vload_nx8ptr +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; RV32I-LABEL: name: vload_nx8ptr + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV32I-NEXT: $v8m4 = COPY [[LOAD]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: vload_nx8ptr + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:vrb() = G_LOAD [[COPY]](p0) :: (load () from %ir.pa) + ; RV64I-NEXT: $v8m4 = COPY [[LOAD]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(p0) = COPY $x10 + %1:_() = G_LOAD %0(p0) :: (load () from %ir.pa) + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/store.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/store.mir new file mode 100644 index 00000000000000..0bcef4efea36c8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/store.mir @@ -0,0 +1,1481 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv64 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV64I %s +--- | + + define void @vstore_nx1i8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 1 + ret void + } + + define void @vstore_nx2i8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 2 + ret void + } + + define void @vstore_nx4i8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 4 + ret void + } + + define void @vstore_nx8i8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 8 + ret void + } + + define void @vstore_nx16i8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 16 + ret void + } + + define void @vstore_nx32i8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 32 + ret void + } + + define void @vstore_nx64i8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 64 + ret void + } + + define void @vstore_nx1i16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 2 + ret void + } + + define void @vstore_nx2i16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 4 + ret void + } + + define void @vstore_nx4i16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 8 + ret void + } + + define void @vstore_nx8i16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 16 + ret void + } + + define void @vstore_nx16i16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 32 + ret void + } + + define void @vstore_nx32i16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 64 + ret void + } + + define void @vstore_nx1i32(ptr %pa, %b) #0 { + store %b, ptr %pa, align 4 + ret void + } + + define void @vstore_nx2i32(ptr %pa, %b) #0 { + store %b, ptr %pa, align 8 + ret void + } + + define void @vstore_nx4i32(ptr %pa, %b) #0 { + store %b, ptr %pa, align 16 + ret void + } + + define void @vstore_nx8i32(ptr %pa, %b) #0 { + store %b, ptr %pa, align 32 + ret void + } + + define void @vstore_nx16i32(ptr %pa, %b) #0 { + store %b, ptr %pa, align 64 + ret void + } + + define void @vstore_nx1i64(ptr %pa, %b) #0 { + store %b, ptr %pa, align 8 + ret void + } + + define void @vstore_nx2i64(ptr %pa, %b) #0 { + store %b, ptr %pa, align 16 + ret void + } + + define void @vstore_nx4i64(ptr %pa, %b) #0 { + store %b, ptr %pa, align 32 + ret void + } + + define void @vstore_nx8i64(ptr %pa, %b) #0 { + store %b, ptr %pa, align 64 + ret void + } + + define void @vstore_nx16i8_align1(ptr %pa, %b) #0 { + store %b, ptr %pa, align 1 + ret void + } + + define void @vstore_nx16i8_align2(ptr %pa, %b) #0 { + store %b, ptr %pa, align 2 + ret void + } + + define void @vstore_nx16i8_align16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 16 + ret void + } + + define void @vstore_nx16i8_align64(ptr %pa, %b) #0 { + store %b, ptr %pa, align 64 + ret void + } + + define void @vstore_nx4i16_align1(ptr %pa, %b) #0 { + store %b, ptr %pa, align 1 + ret void + } + + define void @vstore_nx4i16_align2(ptr %pa, %b) #0 { + store %b, ptr %pa, align 2 + ret void + } + + define void @vstore_nx4i16_align4(ptr %pa, %b) #0 { + store %b, ptr %pa, align 4 + ret void + } + + define void @vstore_nx4i16_align8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 8 + ret void + } + + define void @vstore_nx4i16_align16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 16 + ret void + } + + define void @vstore_nx2i32_align2(ptr %pa, %b) #0 { + store %b, ptr %pa, align 2 + ret void + } + + define void @vstore_nx2i32_align4(ptr %pa, %b) #0 { + store %b, ptr %pa, align 4 + ret void + } + + define void @vstore_nx2i32_align8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 8 + ret void + } + + define void @vstore_nx2i32_align16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 16 + ret void + } + + define void @vstore_nx2i32_align256(ptr %pa, %b) #0 { + store %b, ptr %pa, align 256 + ret void + } + + define void @vstore_nx2i64_align4(ptr %pa, %b) #0 { + store %b, ptr %pa, align 4 + ret void + } + + define void @vstore_nx2i64_align8(ptr %pa, %b) #0 { + store %b, ptr %pa, align 8 + ret void + } + + define void @vstore_nx2i64_align16(ptr %pa, %b) #0 { + store %b, ptr %pa, align 16 + ret void + } + + define void @vstore_nx2i64_align32(ptr %pa, %b) #0 { + store %b, ptr %pa, align 32 + ret void + } + + define void @vstore_nx1ptr(ptr %pa, %b) #0 { + store %b, ptr %pa, align 4 + ret void + } + + define void @vstore_nx2ptr(ptr %pa, %b) #0 { + store %b, ptr %pa, align 8 + ret void + } + + define void @vstore_nx8ptr(ptr %pa, %b) #0 { + store %b, ptr %pa, align 32 + ret void + } + +... +--- +name: vstore_nx1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx1i8 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx1i8 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx2i8 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx2i8 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx4i8 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx4i8 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx8i8 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx8i8 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx16i8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; RV32I-LABEL: name: vstore_nx16i8 + ; RV32I: liveins: $x10, $v8m2 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx16i8 + ; RV64I: liveins: $x10, $v8m2 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx32i8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m4 + + ; RV32I-LABEL: name: vstore_nx32i8 + ; RV32I: liveins: $x10, $v8m4 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx32i8 + ; RV64I: liveins: $x10, $v8m4 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m4 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx64i8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m8 + + ; RV32I-LABEL: name: vstore_nx64i8 + ; RV32I: liveins: $x10, $v8m8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx64i8 + ; RV64I: liveins: $x10, $v8m8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx1i16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx1i16 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx1i16 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx2i16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx2i16 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx2i16 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx4i16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx4i16 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx4i16 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx8i16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; RV32I-LABEL: name: vstore_nx8i16 + ; RV32I: liveins: $x10, $v8m2 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx8i16 + ; RV64I: liveins: $x10, $v8m2 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx16i16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m4 + + ; RV32I-LABEL: name: vstore_nx16i16 + ; RV32I: liveins: $x10, $v8m4 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx16i16 + ; RV64I: liveins: $x10, $v8m4 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m4 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx32i16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m8 + + ; RV32I-LABEL: name: vstore_nx32i16 + ; RV32I: liveins: $x10, $v8m8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx32i16 + ; RV64I: liveins: $x10, $v8m8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx1i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx1i32 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx1i32 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx2i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx2i32 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx2i32 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx4i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; RV32I-LABEL: name: vstore_nx4i32 + ; RV32I: liveins: $x10, $v8m2 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx4i32 + ; RV64I: liveins: $x10, $v8m2 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx8i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m4 + + ; RV32I-LABEL: name: vstore_nx8i32 + ; RV32I: liveins: $x10, $v8m4 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx8i32 + ; RV64I: liveins: $x10, $v8m4 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m4 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx16i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m8 + + ; RV32I-LABEL: name: vstore_nx16i32 + ; RV32I: liveins: $x10, $v8m8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx16i32 + ; RV64I: liveins: $x10, $v8m8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx1i64 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx1i64 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx1i64 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx2i64 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; RV32I-LABEL: name: vstore_nx2i64 + ; RV32I: liveins: $x10, $v8m2 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx2i64 + ; RV64I: liveins: $x10, $v8m2 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx4i64 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m4 + + ; RV32I-LABEL: name: vstore_nx4i64 + ; RV32I: liveins: $x10, $v8m4 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx4i64 + ; RV64I: liveins: $x10, $v8m4 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m4 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx8i64 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m8 + + ; RV32I-LABEL: name: vstore_nx8i64 + ; RV32I: liveins: $x10, $v8m8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx8i64 + ; RV64I: liveins: $x10, $v8m8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx16i8_align1 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; RV32I-LABEL: name: vstore_nx16i8_align1 + ; RV32I: liveins: $x10, $v8m2 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 1) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx16i8_align1 + ; RV64I: liveins: $x10, $v8m2 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 1) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 1) + PseudoRET + +... +--- +name: vstore_nx16i8_align2 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; RV32I-LABEL: name: vstore_nx16i8_align2 + ; RV32I: liveins: $x10, $v8m2 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 2) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx16i8_align2 + ; RV64I: liveins: $x10, $v8m2 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 2) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 2) + PseudoRET + +... +--- +name: vstore_nx16i8_align16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; RV32I-LABEL: name: vstore_nx16i8_align16 + ; RV32I: liveins: $x10, $v8m2 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx16i8_align16 + ; RV64I: liveins: $x10, $v8m2 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx16i8_align64 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; RV32I-LABEL: name: vstore_nx16i8_align64 + ; RV32I: liveins: $x10, $v8m2 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 64) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx16i8_align64 + ; RV64I: liveins: $x10, $v8m2 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 64) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 64) + PseudoRET + +... +--- +name: vstore_nx4i16_align1 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx4i16_align1 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[BITCAST:%[0-9]+]]:vrb() = G_BITCAST [[COPY1]]() + ; RV32I-NEXT: G_STORE [[BITCAST]](), [[COPY]](p0) :: (store () into %ir.pa, align 1) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx4i16_align1 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[BITCAST:%[0-9]+]]:vrb() = G_BITCAST [[COPY1]]() + ; RV64I-NEXT: G_STORE [[BITCAST]](), [[COPY]](p0) :: (store () into %ir.pa, align 1) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + %2:_() = G_BITCAST %1() + G_STORE %2(), %0(p0) :: (store () into %ir.pa, align 1) + PseudoRET + +... +--- +name: vstore_nx4i16_align2 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx4i16_align2 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 2) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx4i16_align2 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 2) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 2) + PseudoRET + +... +--- +name: vstore_nx4i16_align4 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx4i16_align4 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 4) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx4i16_align4 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 4) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 4) + PseudoRET + +... +--- +name: vstore_nx4i16_align8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx4i16_align8 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx4i16_align8 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx4i16_align16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx4i16_align16 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 16) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx4i16_align16 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 16) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 16) + PseudoRET + +... +--- +name: vstore_nx2i32_align2 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx2i32_align2 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[BITCAST:%[0-9]+]]:vrb() = G_BITCAST [[COPY1]]() + ; RV32I-NEXT: G_STORE [[BITCAST]](), [[COPY]](p0) :: (store () into %ir.pa, align 2) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx2i32_align2 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[BITCAST:%[0-9]+]]:vrb() = G_BITCAST [[COPY1]]() + ; RV64I-NEXT: G_STORE [[BITCAST]](), [[COPY]](p0) :: (store () into %ir.pa, align 2) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + %2:_() = G_BITCAST %1() + G_STORE %2(), %0(p0) :: (store () into %ir.pa, align 2) + PseudoRET + +... +--- +name: vstore_nx2i32_align4 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx2i32_align4 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 4) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx2i32_align4 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 4) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 4) + PseudoRET + +... +--- +name: vstore_nx2i32_align8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx2i32_align8 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx2i32_align8 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx2i32_align16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx2i32_align16 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 16) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx2i32_align16 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 16) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 16) + PseudoRET + +... +--- +name: vstore_nx2i32_align256 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx2i32_align256 + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 256) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx2i32_align256 + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 256) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 256) + PseudoRET + +... +--- +name: vstore_nx2i64_align4 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; RV32I-LABEL: name: vstore_nx2i64_align4 + ; RV32I: liveins: $x10, $v8m2 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: [[BITCAST:%[0-9]+]]:vrb() = G_BITCAST [[COPY1]]() + ; RV32I-NEXT: G_STORE [[BITCAST]](), [[COPY]](p0) :: (store () into %ir.pa, align 4) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx2i64_align4 + ; RV64I: liveins: $x10, $v8m2 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: [[BITCAST:%[0-9]+]]:vrb() = G_BITCAST [[COPY1]]() + ; RV64I-NEXT: G_STORE [[BITCAST]](), [[COPY]](p0) :: (store () into %ir.pa, align 4) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + %2:_() = G_BITCAST %1() + G_STORE %2(), %0(p0) :: (store () into %ir.pa, align 4) + PseudoRET + +... +--- +name: vstore_nx2i64_align8 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; RV32I-LABEL: name: vstore_nx2i64_align8 + ; RV32I: liveins: $x10, $v8m2 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 8) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx2i64_align8 + ; RV64I: liveins: $x10, $v8m2 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 8) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 8) + PseudoRET + +... +--- +name: vstore_nx2i64_align16 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; RV32I-LABEL: name: vstore_nx2i64_align16 + ; RV32I: liveins: $x10, $v8m2 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx2i64_align16 + ; RV64I: liveins: $x10, $v8m2 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx2i64_align32 +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m2 + + ; RV32I-LABEL: name: vstore_nx2i64_align32 + ; RV32I: liveins: $x10, $v8m2 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 32) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx2i64_align32 + ; RV64I: liveins: $x10, $v8m2 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa, align 32) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m2 + G_STORE %1(), %0(p0) :: (store () into %ir.pa, align 32) + PseudoRET + +... +--- +name: vstore_nx1ptr +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx1ptr + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx1ptr + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx2ptr +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $v8, $x10 + + ; RV32I-LABEL: name: vstore_nx2ptr + ; RV32I: liveins: $v8, $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx2ptr + ; RV64I: liveins: $v8, $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... +--- +name: vstore_nx8ptr +legalized: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x10, $v8m4 + + ; RV32I-LABEL: name: vstore_nx8ptr + ; RV32I: liveins: $x10, $v8m4 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV32I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV32I-NEXT: PseudoRET + ; + ; RV64I-LABEL: name: vstore_nx8ptr + ; RV64I: liveins: $x10, $v8m4 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV64I-NEXT: G_STORE [[COPY1]](), [[COPY]](p0) :: (store () into %ir.pa) + ; RV64I-NEXT: PseudoRET + %0:_(p0) = COPY $x10 + %1:_() = COPY $v8m4 + G_STORE %1(), %0(p0) :: (store () into %ir.pa) + PseudoRET + +... From d5d1cf05499ba9a0adc192c723f622fc13ef9c0b Mon Sep 17 00:00:00 2001 From: smanna12 Date: Wed, 31 Jul 2024 16:25:52 -0700 Subject: [PATCH 041/114] [NFC][Clang] Clean up VisitUnaryPlus by removing unused FP feature check (#101412) This commit removes an unnecessary call to `E->hasStoredFPFeatures()` within the `VisitUnaryPlus` function. The method's return value was not being used, leading to a redundant operation. The removal of this line streamlines the function and eliminates an unneeded check for stored floating-point features. --- clang/lib/CodeGen/CGExprComplex.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 4d45f6d64c1cd9..828a09856099ac 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -649,7 +649,6 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op, ComplexPairTy ComplexExprEmitter::VisitUnaryPlus(const UnaryOperator *E, QualType PromotionType) { - E->hasStoredFPFeatures(); QualType promotionTy = PromotionType.isNull() ? getPromotionType(E->getStoredFPFeaturesOrDefault(), From 65d3c220a184b11e41d6fc5853d865f3eb92b02e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 31 Jul 2024 16:26:08 -0700 Subject: [PATCH 042/114] [RISCV] Merge more rv32/rv64 intrinsic tests that have the same content. NFC --- llvm/test/CodeGen/RISCV/rvv/vsadd-rv64.ll | 2801 ---------------- .../RISCV/rvv/{vsadd-rv32.ll => vsadd.ll} | 1038 +++--- llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll | 2849 ----------------- .../RISCV/rvv/{vsaddu-rv64.ll => vsaddu.ll} | 990 +++--- llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll | 2166 ------------- .../RISCV/rvv/{vsmul-rv64.ll => vsmul.ll} | 758 +++-- llvm/test/CodeGen/RISCV/rvv/vssub-rv64.ll | 2075 ------------ .../RISCV/rvv/{vssub-rv32.ll => vssub.ll} | 862 ++--- llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll | 2123 ------------ .../RISCV/rvv/{vssubu-rv64.ll => vssubu.ll} | 814 ++--- 10 files changed, 2434 insertions(+), 14042 deletions(-) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vsadd-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vsadd-rv32.ll => vsadd.ll} (83%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll rename llvm/test/CodeGen/RISCV/rvv/{vsaddu-rv64.ll => vsaddu.ll} (83%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll rename llvm/test/CodeGen/RISCV/rvv/{vsmul-rv64.ll => vsmul.ll} (79%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vssub-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vssub-rv32.ll => vssub.ll} (80%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll rename llvm/test/CodeGen/RISCV/rvv/{vssubu-rv64.ll => vssubu.ll} (80%) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-rv64.ll deleted file mode 100644 index ca56ad2122c1fe..00000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv64.ll +++ /dev/null @@ -1,2801 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s - -declare @llvm.riscv.vsadd.nxv1i8.nxv1i8( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv1i8_nxv1i8_nxv1i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv1i8.nxv1i8( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv1i8.nxv1i8( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv1i8_nxv1i8_nxv1i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vsadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv1i8.nxv1i8( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv2i8.nxv2i8( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv2i8_nxv2i8_nxv2i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv2i8.nxv2i8( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv2i8.nxv2i8( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv2i8_nxv2i8_nxv2i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vsadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv2i8.nxv2i8( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv4i8.nxv4i8( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv4i8_nxv4i8_nxv4i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv4i8.nxv4i8( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv4i8.nxv4i8( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv4i8_nxv4i8_nxv4i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vsadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv4i8.nxv4i8( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv8i8.nxv8i8( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv8i8_nxv8i8_nxv8i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv8i8.nxv8i8( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv8i8.nxv8i8( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv8i8_nxv8i8_nxv8i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vsadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv8i8.nxv8i8( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv16i8.nxv16i8( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv16i8_nxv16i8_nxv16i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv16i8.nxv16i8( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv16i8.nxv16i8( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv16i8_nxv16i8_nxv16i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vsadd.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv16i8.nxv16i8( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv32i8.nxv32i8( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv32i8_nxv32i8_nxv32i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv32i8.nxv32i8( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv32i8.nxv32i8( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv32i8_nxv32i8_nxv32i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vsadd.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv32i8.nxv32i8( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv64i8.nxv64i8( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv64i8_nxv64i8_nxv64i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv64i8.nxv64i8( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv64i8.nxv64i8( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv64i8_nxv64i8_nxv64i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8r.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu -; CHECK-NEXT: vsadd.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv64i8.nxv64i8( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv1i16.nxv1i16( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv1i16_nxv1i16_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv1i16.nxv1i16( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv1i16.nxv1i16( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv1i16_nxv1i16_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vsadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv1i16.nxv1i16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv2i16.nxv2i16( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv2i16_nxv2i16_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv2i16.nxv2i16( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv2i16.nxv2i16( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv2i16_nxv2i16_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vsadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv2i16.nxv2i16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv4i16.nxv4i16( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv4i16_nxv4i16_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv4i16.nxv4i16( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv4i16.nxv4i16( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv4i16_nxv4i16_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vsadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv4i16.nxv4i16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv8i16.nxv8i16( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv8i16_nxv8i16_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv8i16.nxv8i16( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv8i16.nxv8i16( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv8i16_nxv8i16_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vsadd.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv8i16.nxv8i16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv16i16.nxv16i16( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv16i16_nxv16i16_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv16i16.nxv16i16( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv16i16.nxv16i16( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv16i16_nxv16i16_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vsadd.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv16i16.nxv16i16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv32i16.nxv32i16( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv32i16_nxv32i16_nxv32i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv32i16.nxv32i16( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv32i16.nxv32i16( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv32i16_nxv32i16_nxv32i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vsadd.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv32i16.nxv32i16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv1i32.nxv1i32( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv1i32_nxv1i32_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv1i32.nxv1i32( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv1i32.nxv1i32( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv1i32_nxv1i32_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vsadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv1i32.nxv1i32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv2i32.nxv2i32( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv2i32_nxv2i32_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv2i32.nxv2i32( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv2i32.nxv2i32( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv2i32_nxv2i32_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vsadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv2i32.nxv2i32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv4i32.nxv4i32( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv4i32_nxv4i32_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv4i32.nxv4i32( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv4i32.nxv4i32( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv4i32_nxv4i32_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vsadd.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv4i32.nxv4i32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv8i32.nxv8i32( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv8i32_nxv8i32_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv8i32.nxv8i32( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv8i32.nxv8i32( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv8i32_nxv8i32_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vsadd.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv8i32.nxv8i32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv16i32.nxv16i32( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv16i32_nxv16i32_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv16i32.nxv16i32( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv16i32.nxv16i32( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv16i32_nxv16i32_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vsadd.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv16i32.nxv16i32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv1i64.nxv1i64( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv1i64_nxv1i64_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv1i64.nxv1i64( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv1i64.nxv1i64( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv1i64_nxv1i64_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vsadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv1i64.nxv1i64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv2i64.nxv2i64( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv2i64_nxv2i64_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv2i64.nxv2i64( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv2i64.nxv2i64( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv2i64_nxv2i64_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vsadd.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv2i64.nxv2i64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv4i64.nxv4i64( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv4i64_nxv4i64_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv4i64.nxv4i64( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv4i64.nxv4i64( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv4i64_nxv4i64_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vsadd.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv4i64.nxv4i64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv8i64.nxv8i64( - , - , - , - i64); - -define @intrinsic_vsadd_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vv_nxv8i64_nxv8i64_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vsadd.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv8i64.nxv8i64( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv8i64.nxv8i64( - , - , - , - , - i64, - i64); - -define @intrinsic_vsadd_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv8i64_nxv8i64_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vsadd.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv8i64.nxv8i64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv1i8.i8( - , - , - i8, - i64); - -define @intrinsic_vsadd_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv1i8_nxv1i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv1i8.i8( - undef, - %0, - i8 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv1i8.i8( - , - , - i8, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv1i8_nxv1i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu -; CHECK-NEXT: vsadd.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv1i8.i8( - %0, - %1, - i8 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv2i8.i8( - , - , - i8, - i64); - -define @intrinsic_vsadd_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv2i8_nxv2i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv2i8.i8( - undef, - %0, - i8 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv2i8.i8( - , - , - i8, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv2i8_nxv2i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu -; CHECK-NEXT: vsadd.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv2i8.i8( - %0, - %1, - i8 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv4i8.i8( - , - , - i8, - i64); - -define @intrinsic_vsadd_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv4i8_nxv4i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv4i8.i8( - undef, - %0, - i8 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv4i8.i8( - , - , - i8, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv4i8_nxv4i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu -; CHECK-NEXT: vsadd.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv4i8.i8( - %0, - %1, - i8 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv8i8.i8( - , - , - i8, - i64); - -define @intrinsic_vsadd_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv8i8_nxv8i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv8i8.i8( - undef, - %0, - i8 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv8i8.i8( - , - , - i8, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv8i8_nxv8i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu -; CHECK-NEXT: vsadd.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv8i8.i8( - %0, - %1, - i8 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv16i8.i8( - , - , - i8, - i64); - -define @intrinsic_vsadd_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv16i8_nxv16i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv16i8.i8( - undef, - %0, - i8 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv16i8.i8( - , - , - i8, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv16i8_nxv16i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; CHECK-NEXT: vsadd.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv16i8.i8( - %0, - %1, - i8 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv32i8.i8( - , - , - i8, - i64); - -define @intrinsic_vsadd_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv32i8_nxv32i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv32i8.i8( - undef, - %0, - i8 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv32i8.i8( - , - , - i8, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv32i8_nxv32i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu -; CHECK-NEXT: vsadd.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv32i8.i8( - %0, - %1, - i8 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv64i8.i8( - , - , - i8, - i64); - -define @intrinsic_vsadd_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv64i8_nxv64i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv64i8.i8( - undef, - %0, - i8 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv64i8.i8( - , - , - i8, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv64i8_nxv64i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu -; CHECK-NEXT: vsadd.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv64i8.i8( - %0, - %1, - i8 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv1i16.i16( - , - , - i16, - i64); - -define @intrinsic_vsadd_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv1i16_nxv1i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv1i16.i16( - undef, - %0, - i16 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv1i16.i16( - , - , - i16, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv1i16_nxv1i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vsadd.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv1i16.i16( - %0, - %1, - i16 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv2i16.i16( - , - , - i16, - i64); - -define @intrinsic_vsadd_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv2i16_nxv2i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv2i16.i16( - undef, - %0, - i16 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv2i16.i16( - , - , - i16, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv2i16_nxv2i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vsadd.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv2i16.i16( - %0, - %1, - i16 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv4i16.i16( - , - , - i16, - i64); - -define @intrinsic_vsadd_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv4i16_nxv4i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv4i16.i16( - undef, - %0, - i16 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv4i16.i16( - , - , - i16, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv4i16_nxv4i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vsadd.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv4i16.i16( - %0, - %1, - i16 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv8i16.i16( - , - , - i16, - i64); - -define @intrinsic_vsadd_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv8i16_nxv8i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv8i16.i16( - undef, - %0, - i16 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv8i16.i16( - , - , - i16, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv8i16_nxv8i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vsadd.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv8i16.i16( - %0, - %1, - i16 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv16i16.i16( - , - , - i16, - i64); - -define @intrinsic_vsadd_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv16i16_nxv16i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv16i16.i16( - undef, - %0, - i16 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv16i16.i16( - , - , - i16, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv16i16_nxv16i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vsadd.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv16i16.i16( - %0, - %1, - i16 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv32i16.i16( - , - , - i16, - i64); - -define @intrinsic_vsadd_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv32i16_nxv32i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv32i16.i16( - undef, - %0, - i16 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv32i16.i16( - , - , - i16, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv32i16_nxv32i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vsadd.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv32i16.i16( - %0, - %1, - i16 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv1i32.i32( - , - , - i32, - i64); - -define @intrinsic_vsadd_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv1i32_nxv1i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv1i32.i32( - undef, - %0, - i32 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv1i32.i32( - , - , - i32, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv1i32_nxv1i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vsadd.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv1i32.i32( - %0, - %1, - i32 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv2i32.i32( - , - , - i32, - i64); - -define @intrinsic_vsadd_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv2i32_nxv2i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv2i32.i32( - undef, - %0, - i32 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv2i32.i32( - , - , - i32, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv2i32_nxv2i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vsadd.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv2i32.i32( - %0, - %1, - i32 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv4i32.i32( - , - , - i32, - i64); - -define @intrinsic_vsadd_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv4i32_nxv4i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv4i32.i32( - undef, - %0, - i32 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv4i32.i32( - , - , - i32, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv4i32_nxv4i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vsadd.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv4i32.i32( - %0, - %1, - i32 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv8i32.i32( - , - , - i32, - i64); - -define @intrinsic_vsadd_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv8i32_nxv8i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv8i32.i32( - undef, - %0, - i32 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv8i32.i32( - , - , - i32, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv8i32_nxv8i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vsadd.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv8i32.i32( - %0, - %1, - i32 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv16i32.i32( - , - , - i32, - i64); - -define @intrinsic_vsadd_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv16i32_nxv16i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv16i32.i32( - undef, - %0, - i32 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv16i32.i32( - , - , - i32, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv16i32_nxv16i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vsadd.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv16i32.i32( - %0, - %1, - i32 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv1i64.i64( - , - , - i64, - i64); - -define @intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv1i64.i64( - undef, - %0, - i64 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv1i64.i64( - , - , - i64, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vsadd.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv1i64.i64( - %0, - %1, - i64 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv2i64.i64( - , - , - i64, - i64); - -define @intrinsic_vsadd_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv2i64.i64( - undef, - %0, - i64 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv2i64.i64( - , - , - i64, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vsadd.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv2i64.i64( - %0, - %1, - i64 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv4i64.i64( - , - , - i64, - i64); - -define @intrinsic_vsadd_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv4i64.i64( - undef, - %0, - i64 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv4i64.i64( - , - , - i64, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vsadd.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv4i64.i64( - %0, - %1, - i64 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vsadd.nxv8i64.i64( - , - , - i64, - i64); - -define @intrinsic_vsadd_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vsadd.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv8i64.i64( - undef, - %0, - i64 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vsadd.mask.nxv8i64.i64( - , - , - i64, - , - i64, - i64); - -define @intrinsic_vsadd_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vsadd.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv8i64.i64( - %0, - %1, - i64 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv1i8_nxv1i8_i8( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv1i8_nxv1i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv1i8.i8( - undef, - %0, - i8 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv1i8_nxv1i8_i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv1i8_nxv1i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vsadd.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv1i8.i8( - %0, - %1, - i8 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv2i8_nxv2i8_i8( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv2i8_nxv2i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv2i8.i8( - undef, - %0, - i8 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv2i8_nxv2i8_i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv2i8_nxv2i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vsadd.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv2i8.i8( - %0, - %1, - i8 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv4i8_nxv4i8_i8( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv4i8_nxv4i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv4i8.i8( - undef, - %0, - i8 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv4i8_nxv4i8_i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv4i8_nxv4i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vsadd.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv4i8.i8( - %0, - %1, - i8 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv8i8_nxv8i8_i8( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv8i8_nxv8i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv8i8.i8( - undef, - %0, - i8 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv8i8_nxv8i8_i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv8i8_nxv8i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vsadd.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv8i8.i8( - %0, - %1, - i8 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv16i8_nxv16i8_i8( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv16i8_nxv16i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv16i8.i8( - undef, - %0, - i8 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv16i8_nxv16i8_i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv16i8_nxv16i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vsadd.vi v8, v10, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv16i8.i8( - %0, - %1, - i8 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv32i8_nxv32i8_i8( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv32i8_nxv32i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv32i8.i8( - undef, - %0, - i8 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv32i8_nxv32i8_i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv32i8_nxv32i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vsadd.vi v8, v12, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv32i8.i8( - %0, - %1, - i8 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv64i8_nxv64i8_i8( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv64i8_nxv64i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv64i8.i8( - undef, - %0, - i8 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv64i8_nxv64i8_i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv64i8_nxv64i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vsadd.vi v8, v16, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv64i8.i8( - %0, - %1, - i8 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv1i16_nxv1i16_i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv1i16_nxv1i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv1i16.i16( - undef, - %0, - i16 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv1i16_nxv1i16_i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv1i16_nxv1i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vsadd.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv1i16.i16( - %0, - %1, - i16 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv2i16_nxv2i16_i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv2i16_nxv2i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv2i16.i16( - undef, - %0, - i16 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv2i16_nxv2i16_i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv2i16_nxv2i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vsadd.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv2i16.i16( - %0, - %1, - i16 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv4i16_nxv4i16_i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv4i16_nxv4i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv4i16.i16( - undef, - %0, - i16 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv4i16_nxv4i16_i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv4i16_nxv4i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vsadd.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv4i16.i16( - %0, - %1, - i16 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv8i16_nxv8i16_i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv8i16_nxv8i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv8i16.i16( - undef, - %0, - i16 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv8i16_nxv8i16_i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv8i16_nxv8i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vsadd.vi v8, v10, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv8i16.i16( - %0, - %1, - i16 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv16i16_nxv16i16_i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv16i16_nxv16i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv16i16.i16( - undef, - %0, - i16 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv16i16_nxv16i16_i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv16i16_nxv16i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vsadd.vi v8, v12, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv16i16.i16( - %0, - %1, - i16 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv32i16_nxv32i16_i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv32i16_nxv32i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv32i16.i16( - undef, - %0, - i16 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv32i16_nxv32i16_i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv32i16_nxv32i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vsadd.vi v8, v16, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv32i16.i16( - %0, - %1, - i16 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv1i32_nxv1i32_i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv1i32_nxv1i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv1i32.i32( - undef, - %0, - i32 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv1i32_nxv1i32_i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv1i32_nxv1i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vsadd.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv1i32.i32( - %0, - %1, - i32 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv2i32_nxv2i32_i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv2i32_nxv2i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv2i32.i32( - undef, - %0, - i32 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv2i32_nxv2i32_i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv2i32_nxv2i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vsadd.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv2i32.i32( - %0, - %1, - i32 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv4i32_nxv4i32_i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv4i32_nxv4i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv4i32.i32( - undef, - %0, - i32 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv4i32_nxv4i32_i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv4i32_nxv4i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vsadd.vi v8, v10, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv4i32.i32( - %0, - %1, - i32 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv8i32_nxv8i32_i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv8i32_nxv8i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv8i32.i32( - undef, - %0, - i32 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv8i32_nxv8i32_i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv8i32_nxv8i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vsadd.vi v8, v12, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv8i32.i32( - %0, - %1, - i32 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv16i32_nxv16i32_i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv16i32_nxv16i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv16i32.i32( - undef, - %0, - i32 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv16i32_nxv16i32_i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv16i32_nxv16i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vsadd.vi v8, v16, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv16i32.i32( - %0, - %1, - i32 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv1i64_nxv1i64_i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv1i64.i64( - undef, - %0, - i64 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv1i64_nxv1i64_i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vsadd.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv1i64.i64( - %0, - %1, - i64 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv2i64_nxv2i64_i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv2i64.i64( - undef, - %0, - i64 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv2i64_nxv2i64_i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vsadd.vi v8, v10, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv2i64.i64( - %0, - %1, - i64 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv4i64_nxv4i64_i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv4i64.i64( - undef, - %0, - i64 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv4i64_nxv4i64_i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vsadd.vi v8, v12, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv4i64.i64( - %0, - %1, - i64 9, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vsadd_vi_nxv8i64_nxv8i64_i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vi_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vsadd.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.nxv8i64.i64( - undef, - %0, - i64 9, - i64 %1) - - ret %a -} - -define @intrinsic_vsadd_mask_vi_nxv8i64_nxv8i64_i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vsadd.vi v8, v16, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsadd.mask.nxv8i64.i64( - %0, - %1, - i64 9, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd.ll similarity index 83% rename from llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vsadd.ll index c2586e4bc2d84b..a108d98c1731b3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsadd.ll @@ -1,14 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vsadd.nxv1i8.nxv1i8( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv1i8_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma @@ -19,7 +21,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -29,10 +31,10 @@ declare @llvm.riscv.vsadd.mask.nxv1i8.nxv1i8( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv1i8_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -44,7 +46,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -53,9 +55,9 @@ declare @llvm.riscv.vsadd.nxv2i8.nxv2i8( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv2i8_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma @@ -66,7 +68,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -76,10 +78,10 @@ declare @llvm.riscv.vsadd.mask.nxv2i8.nxv2i8( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv2i8_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -91,7 +93,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -100,9 +102,9 @@ declare @llvm.riscv.vsadd.nxv4i8.nxv4i8( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv4i8_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma @@ -113,7 +115,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -123,10 +125,10 @@ declare @llvm.riscv.vsadd.mask.nxv4i8.nxv4i8( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv4i8_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -138,7 +140,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -147,9 +149,9 @@ declare @llvm.riscv.vsadd.nxv8i8.nxv8i8( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv8i8_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma @@ -160,7 +162,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -170,10 +172,10 @@ declare @llvm.riscv.vsadd.mask.nxv8i8.nxv8i8( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv8i8_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -185,7 +187,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -194,9 +196,9 @@ declare @llvm.riscv.vsadd.nxv16i8.nxv16i8( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv16i8_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma @@ -207,7 +209,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -217,10 +219,10 @@ declare @llvm.riscv.vsadd.mask.nxv16i8.nxv16i8( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv16i8_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -232,7 +234,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -241,9 +243,9 @@ declare @llvm.riscv.vsadd.nxv32i8.nxv32i8( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv32i8_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma @@ -254,7 +256,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -264,10 +266,10 @@ declare @llvm.riscv.vsadd.mask.nxv32i8.nxv32i8( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv32i8_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -279,7 +281,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -288,9 +290,9 @@ declare @llvm.riscv.vsadd.nxv64i8.nxv64i8( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma @@ -301,7 +303,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -311,10 +313,10 @@ declare @llvm.riscv.vsadd.mask.nxv64i8.nxv64i8( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8r.v v24, (a0) @@ -327,7 +329,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -336,9 +338,9 @@ declare @llvm.riscv.vsadd.nxv1i16.nxv1i16( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv1i16_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma @@ -349,7 +351,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -359,10 +361,10 @@ declare @llvm.riscv.vsadd.mask.nxv1i16.nxv1i16( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv1i16_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -374,7 +376,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -383,9 +385,9 @@ declare @llvm.riscv.vsadd.nxv2i16.nxv2i16( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv2i16_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma @@ -396,7 +398,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -406,10 +408,10 @@ declare @llvm.riscv.vsadd.mask.nxv2i16.nxv2i16( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv2i16_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -421,7 +423,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -430,9 +432,9 @@ declare @llvm.riscv.vsadd.nxv4i16.nxv4i16( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv4i16_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma @@ -443,7 +445,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -453,10 +455,10 @@ declare @llvm.riscv.vsadd.mask.nxv4i16.nxv4i16( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv4i16_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -468,7 +470,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -477,9 +479,9 @@ declare @llvm.riscv.vsadd.nxv8i16.nxv8i16( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv8i16_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma @@ -490,7 +492,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -500,10 +502,10 @@ declare @llvm.riscv.vsadd.mask.nxv8i16.nxv8i16( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv8i16_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -515,7 +517,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -524,9 +526,9 @@ declare @llvm.riscv.vsadd.nxv16i16.nxv16i16( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv16i16_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma @@ -537,7 +539,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -547,10 +549,10 @@ declare @llvm.riscv.vsadd.mask.nxv16i16.nxv16i16( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv16i16_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -562,7 +564,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -571,9 +573,9 @@ declare @llvm.riscv.vsadd.nxv32i16.nxv32i16( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma @@ -584,7 +586,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -594,10 +596,10 @@ declare @llvm.riscv.vsadd.mask.nxv32i16.nxv32i16( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re16.v v24, (a0) @@ -610,7 +612,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -619,9 +621,9 @@ declare @llvm.riscv.vsadd.nxv1i32.nxv1i32( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv1i32_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma @@ -632,7 +634,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -642,10 +644,10 @@ declare @llvm.riscv.vsadd.mask.nxv1i32.nxv1i32( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv1i32_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -657,7 +659,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -666,9 +668,9 @@ declare @llvm.riscv.vsadd.nxv2i32.nxv2i32( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv2i32_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma @@ -679,7 +681,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -689,10 +691,10 @@ declare @llvm.riscv.vsadd.mask.nxv2i32.nxv2i32( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv2i32_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -704,7 +706,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -713,9 +715,9 @@ declare @llvm.riscv.vsadd.nxv4i32.nxv4i32( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv4i32_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma @@ -726,7 +728,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -736,10 +738,10 @@ declare @llvm.riscv.vsadd.mask.nxv4i32.nxv4i32( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv4i32_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -751,7 +753,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -760,9 +762,9 @@ declare @llvm.riscv.vsadd.nxv8i32.nxv8i32( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv8i32_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma @@ -773,7 +775,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -783,10 +785,10 @@ declare @llvm.riscv.vsadd.mask.nxv8i32.nxv8i32( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv8i32_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -798,7 +800,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -807,9 +809,9 @@ declare @llvm.riscv.vsadd.nxv16i32.nxv16i32( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -820,7 +822,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -830,10 +832,10 @@ declare @llvm.riscv.vsadd.mask.nxv16i32.nxv16i32( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re32.v v24, (a0) @@ -846,7 +848,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -855,9 +857,9 @@ declare @llvm.riscv.vsadd.nxv1i64.nxv1i64( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv1i64_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -868,7 +870,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -878,10 +880,10 @@ declare @llvm.riscv.vsadd.mask.nxv1i64.nxv1i64( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv1i64_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -893,7 +895,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -902,9 +904,9 @@ declare @llvm.riscv.vsadd.nxv2i64.nxv2i64( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv2i64_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -915,7 +917,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -925,10 +927,10 @@ declare @llvm.riscv.vsadd.mask.nxv2i64.nxv2i64( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv2i64_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -940,7 +942,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -949,9 +951,9 @@ declare @llvm.riscv.vsadd.nxv4i64.nxv4i64( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv4i64_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -962,7 +964,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -972,10 +974,10 @@ declare @llvm.riscv.vsadd.mask.nxv4i64.nxv4i64( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv4i64_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -987,7 +989,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -996,9 +998,9 @@ declare @llvm.riscv.vsadd.nxv8i64.nxv8i64( , , , - i32); + iXLen) -define @intrinsic_vsadd_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, i32 %2) nounwind { +define @intrinsic_vsadd_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -1009,7 +1011,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -1019,10 +1021,10 @@ declare @llvm.riscv.vsadd.mask.nxv8i64.nxv8i64( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re64.v v24, (a0) @@ -1035,7 +1037,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1044,9 +1046,9 @@ declare @llvm.riscv.vsadd.nxv1i8.i8( , , i8, - i32); + iXLen) -define @intrinsic_vsadd_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i32 %2) nounwind { +define @intrinsic_vsadd_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv1i8_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -1057,7 +1059,7 @@ entry: undef, %0, i8 %1, - i32 %2) + iXLen %2) ret %a } @@ -1067,10 +1069,10 @@ declare @llvm.riscv.vsadd.mask.nxv1i8.i8( , i8, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv1i8_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu @@ -1082,7 +1084,7 @@ entry: %1, i8 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1091,9 +1093,9 @@ declare @llvm.riscv.vsadd.nxv2i8.i8( , , i8, - i32); + iXLen) -define @intrinsic_vsadd_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, i32 %2) nounwind { +define @intrinsic_vsadd_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv2i8_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -1104,7 +1106,7 @@ entry: undef, %0, i8 %1, - i32 %2) + iXLen %2) ret %a } @@ -1114,10 +1116,10 @@ declare @llvm.riscv.vsadd.mask.nxv2i8.i8( , i8, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv2i8_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu @@ -1129,7 +1131,7 @@ entry: %1, i8 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1138,9 +1140,9 @@ declare @llvm.riscv.vsadd.nxv4i8.i8( , , i8, - i32); + iXLen) -define @intrinsic_vsadd_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, i32 %2) nounwind { +define @intrinsic_vsadd_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv4i8_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -1151,7 +1153,7 @@ entry: undef, %0, i8 %1, - i32 %2) + iXLen %2) ret %a } @@ -1161,10 +1163,10 @@ declare @llvm.riscv.vsadd.mask.nxv4i8.i8( , i8, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv4i8_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu @@ -1176,7 +1178,7 @@ entry: %1, i8 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1185,9 +1187,9 @@ declare @llvm.riscv.vsadd.nxv8i8.i8( , , i8, - i32); + iXLen) -define @intrinsic_vsadd_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, i32 %2) nounwind { +define @intrinsic_vsadd_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv8i8_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -1198,7 +1200,7 @@ entry: undef, %0, i8 %1, - i32 %2) + iXLen %2) ret %a } @@ -1208,10 +1210,10 @@ declare @llvm.riscv.vsadd.mask.nxv8i8.i8( , i8, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv8i8_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu @@ -1223,7 +1225,7 @@ entry: %1, i8 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1232,9 +1234,9 @@ declare @llvm.riscv.vsadd.nxv16i8.i8( , , i8, - i32); + iXLen) -define @intrinsic_vsadd_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, i32 %2) nounwind { +define @intrinsic_vsadd_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv16i8_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -1245,7 +1247,7 @@ entry: undef, %0, i8 %1, - i32 %2) + iXLen %2) ret %a } @@ -1255,10 +1257,10 @@ declare @llvm.riscv.vsadd.mask.nxv16i8.i8( , i8, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv16i8_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu @@ -1270,7 +1272,7 @@ entry: %1, i8 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1279,9 +1281,9 @@ declare @llvm.riscv.vsadd.nxv32i8.i8( , , i8, - i32); + iXLen) -define @intrinsic_vsadd_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, i32 %2) nounwind { +define @intrinsic_vsadd_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv32i8_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -1292,7 +1294,7 @@ entry: undef, %0, i8 %1, - i32 %2) + iXLen %2) ret %a } @@ -1302,10 +1304,10 @@ declare @llvm.riscv.vsadd.mask.nxv32i8.i8( , i8, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv32i8_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu @@ -1317,7 +1319,7 @@ entry: %1, i8 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1326,9 +1328,9 @@ declare @llvm.riscv.vsadd.nxv64i8.i8( , , i8, - i32); + iXLen) -define @intrinsic_vsadd_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, i32 %2) nounwind { +define @intrinsic_vsadd_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv64i8_nxv64i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma @@ -1339,7 +1341,7 @@ entry: undef, %0, i8 %1, - i32 %2) + iXLen %2) ret %a } @@ -1349,10 +1351,10 @@ declare @llvm.riscv.vsadd.mask.nxv64i8.i8( , i8, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv64i8_nxv64i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu @@ -1364,7 +1366,7 @@ entry: %1, i8 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1373,9 +1375,9 @@ declare @llvm.riscv.vsadd.nxv1i16.i16( , , i16, - i32); + iXLen) -define @intrinsic_vsadd_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, i32 %2) nounwind { +define @intrinsic_vsadd_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv1i16_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma @@ -1386,7 +1388,7 @@ entry: undef, %0, i16 %1, - i32 %2) + iXLen %2) ret %a } @@ -1396,10 +1398,10 @@ declare @llvm.riscv.vsadd.mask.nxv1i16.i16( , i16, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv1i16_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -1411,7 +1413,7 @@ entry: %1, i16 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1420,9 +1422,9 @@ declare @llvm.riscv.vsadd.nxv2i16.i16( , , i16, - i32); + iXLen) -define @intrinsic_vsadd_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, i32 %2) nounwind { +define @intrinsic_vsadd_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv2i16_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma @@ -1433,7 +1435,7 @@ entry: undef, %0, i16 %1, - i32 %2) + iXLen %2) ret %a } @@ -1443,10 +1445,10 @@ declare @llvm.riscv.vsadd.mask.nxv2i16.i16( , i16, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv2i16_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -1458,7 +1460,7 @@ entry: %1, i16 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1467,9 +1469,9 @@ declare @llvm.riscv.vsadd.nxv4i16.i16( , , i16, - i32); + iXLen) -define @intrinsic_vsadd_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, i32 %2) nounwind { +define @intrinsic_vsadd_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv4i16_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma @@ -1480,7 +1482,7 @@ entry: undef, %0, i16 %1, - i32 %2) + iXLen %2) ret %a } @@ -1490,10 +1492,10 @@ declare @llvm.riscv.vsadd.mask.nxv4i16.i16( , i16, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv4i16_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -1505,7 +1507,7 @@ entry: %1, i16 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1514,9 +1516,9 @@ declare @llvm.riscv.vsadd.nxv8i16.i16( , , i16, - i32); + iXLen) -define @intrinsic_vsadd_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, i32 %2) nounwind { +define @intrinsic_vsadd_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv8i16_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma @@ -1527,7 +1529,7 @@ entry: undef, %0, i16 %1, - i32 %2) + iXLen %2) ret %a } @@ -1537,10 +1539,10 @@ declare @llvm.riscv.vsadd.mask.nxv8i16.i16( , i16, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv8i16_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -1552,7 +1554,7 @@ entry: %1, i16 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1561,9 +1563,9 @@ declare @llvm.riscv.vsadd.nxv16i16.i16( , , i16, - i32); + iXLen) -define @intrinsic_vsadd_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, i32 %2) nounwind { +define @intrinsic_vsadd_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv16i16_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma @@ -1574,7 +1576,7 @@ entry: undef, %0, i16 %1, - i32 %2) + iXLen %2) ret %a } @@ -1584,10 +1586,10 @@ declare @llvm.riscv.vsadd.mask.nxv16i16.i16( , i16, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv16i16_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -1599,7 +1601,7 @@ entry: %1, i16 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1608,9 +1610,9 @@ declare @llvm.riscv.vsadd.nxv32i16.i16( , , i16, - i32); + iXLen) -define @intrinsic_vsadd_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, i32 %2) nounwind { +define @intrinsic_vsadd_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv32i16_nxv32i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma @@ -1621,7 +1623,7 @@ entry: undef, %0, i16 %1, - i32 %2) + iXLen %2) ret %a } @@ -1631,10 +1633,10 @@ declare @llvm.riscv.vsadd.mask.nxv32i16.i16( , i16, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv32i16_nxv32i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu @@ -1646,7 +1648,7 @@ entry: %1, i16 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1655,9 +1657,9 @@ declare @llvm.riscv.vsadd.nxv1i32.i32( , , i32, - i32); + iXLen) -define @intrinsic_vsadd_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, i32 %2) nounwind { +define @intrinsic_vsadd_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv1i32_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma @@ -1668,7 +1670,7 @@ entry: undef, %0, i32 %1, - i32 %2) + iXLen %2) ret %a } @@ -1678,10 +1680,10 @@ declare @llvm.riscv.vsadd.mask.nxv1i32.i32( , i32, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv1i32_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -1693,7 +1695,7 @@ entry: %1, i32 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1702,9 +1704,9 @@ declare @llvm.riscv.vsadd.nxv2i32.i32( , , i32, - i32); + iXLen) -define @intrinsic_vsadd_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, i32 %2) nounwind { +define @intrinsic_vsadd_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv2i32_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma @@ -1715,7 +1717,7 @@ entry: undef, %0, i32 %1, - i32 %2) + iXLen %2) ret %a } @@ -1725,10 +1727,10 @@ declare @llvm.riscv.vsadd.mask.nxv2i32.i32( , i32, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv2i32_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -1740,7 +1742,7 @@ entry: %1, i32 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1749,9 +1751,9 @@ declare @llvm.riscv.vsadd.nxv4i32.i32( , , i32, - i32); + iXLen) -define @intrinsic_vsadd_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, i32 %2) nounwind { +define @intrinsic_vsadd_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv4i32_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma @@ -1762,7 +1764,7 @@ entry: undef, %0, i32 %1, - i32 %2) + iXLen %2) ret %a } @@ -1772,10 +1774,10 @@ declare @llvm.riscv.vsadd.mask.nxv4i32.i32( , i32, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv4i32_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -1787,7 +1789,7 @@ entry: %1, i32 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1796,9 +1798,9 @@ declare @llvm.riscv.vsadd.nxv8i32.i32( , , i32, - i32); + iXLen) -define @intrinsic_vsadd_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, i32 %2) nounwind { +define @intrinsic_vsadd_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv8i32_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma @@ -1809,7 +1811,7 @@ entry: undef, %0, i32 %1, - i32 %2) + iXLen %2) ret %a } @@ -1819,10 +1821,10 @@ declare @llvm.riscv.vsadd.mask.nxv8i32.i32( , i32, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv8i32_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -1834,7 +1836,7 @@ entry: %1, i32 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1843,9 +1845,9 @@ declare @llvm.riscv.vsadd.nxv16i32.i32( , , i32, - i32); + iXLen) -define @intrinsic_vsadd_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, i32 %2) nounwind { +define @intrinsic_vsadd_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv16i32_nxv16i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma @@ -1856,7 +1858,7 @@ entry: undef, %0, i32 %1, - i32 %2) + iXLen %2) ret %a } @@ -1866,10 +1868,10 @@ declare @llvm.riscv.vsadd.mask.nxv16i32.i32( , i32, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vsadd_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +define @intrinsic_vsadd_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv16i32_nxv16i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu @@ -1881,7 +1883,7 @@ entry: %1, i32 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1890,26 +1892,32 @@ declare @llvm.riscv.vsadd.nxv1i64.i64( , , i64, - i32); - -define @intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsadd.vv v8, v8, v9 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret + iXLen) + +define @intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsadd.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsadd.nxv1i64.i64( undef, %0, i64 %1, - i32 %2) + iXLen %2) ret %a } @@ -1919,28 +1927,34 @@ declare @llvm.riscv.vsadd.mask.nxv1i64.i64( , i64, , - i32, - i32); - -define @intrinsic_vsadd_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret + iXLen, + iXLen) + +define @intrinsic_vsadd_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vsadd_mask_vx_nxv1i64_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsadd.vv v8, v9, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsadd_mask_vx_nxv1i64_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; RV64-NEXT: vsadd.vx v8, v9, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsadd.mask.nxv1i64.i64( %0, %1, i64 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1949,26 +1963,32 @@ declare @llvm.riscv.vsadd.nxv2i64.i64( , , i64, - i32); - -define @intrinsic_vsadd_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsadd.vv v8, v8, v10 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret + iXLen) + +define @intrinsic_vsadd_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vsadd_vx_nxv2i64_nxv2i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsadd.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsadd_vx_nxv2i64_nxv2i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsadd.nxv2i64.i64( undef, %0, i64 %1, - i32 %2) + iXLen %2) ret %a } @@ -1978,28 +1998,34 @@ declare @llvm.riscv.vsadd.mask.nxv2i64.i64( , i64, , - i32, - i32); - -define @intrinsic_vsadd_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vlse64.v v12, (a0), zero -; CHECK-NEXT: vsadd.vv v8, v10, v12, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret + iXLen, + iXLen) + +define @intrinsic_vsadd_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vsadd_mask_vx_nxv2i64_nxv2i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsadd.vv v8, v10, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsadd_mask_vx_nxv2i64_nxv2i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; RV64-NEXT: vsadd.vx v8, v10, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsadd.mask.nxv2i64.i64( %0, %1, i64 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -2008,26 +2034,32 @@ declare @llvm.riscv.vsadd.nxv4i64.i64( , , i64, - i32); - -define @intrinsic_vsadd_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; CHECK-NEXT: vlse64.v v12, (a0), zero -; CHECK-NEXT: vsadd.vv v8, v8, v12 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret + iXLen) + +define @intrinsic_vsadd_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vsadd_vx_nxv4i64_nxv4i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsadd.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsadd_vx_nxv4i64_nxv4i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsadd.nxv4i64.i64( undef, %0, i64 %1, - i32 %2) + iXLen %2) ret %a } @@ -2037,28 +2069,34 @@ declare @llvm.riscv.vsadd.mask.nxv4i64.i64( , i64, , - i32, - i32); - -define @intrinsic_vsadd_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vsadd.vv v8, v12, v16, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret + iXLen, + iXLen) + +define @intrinsic_vsadd_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vsadd_mask_vx_nxv4i64_nxv4i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsadd.vv v8, v12, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsadd_mask_vx_nxv4i64_nxv4i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV64-NEXT: vsadd.vx v8, v12, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsadd.mask.nxv4i64.i64( %0, %1, i64 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -2067,26 +2105,32 @@ declare @llvm.riscv.vsadd.nxv8i64.i64( , , i64, - i32); - -define @intrinsic_vsadd_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsadd_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vsadd.vv v8, v8, v16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret + iXLen) + +define @intrinsic_vsadd_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vsadd_vx_nxv8i64_nxv8i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsadd.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsadd_vx_nxv8i64_nxv8i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsadd.nxv8i64.i64( undef, %0, i64 %1, - i32 %2) + iXLen %2) ret %a } @@ -2096,33 +2140,39 @@ declare @llvm.riscv.vsadd.mask.nxv8i64.i64( , i64, , - i32, - i32); - -define @intrinsic_vsadd_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vlse64.v v24, (a0), zero -; CHECK-NEXT: vsadd.vv v8, v16, v24, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret + iXLen, + iXLen) + +define @intrinsic_vsadd_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vsadd_mask_vx_nxv8i64_nxv8i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vsadd.vv v8, v16, v24, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsadd_mask_vx_nxv8i64_nxv8i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vsadd.vx v8, v16, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsadd.mask.nxv8i64.i64( %0, %1, i64 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv1i8_nxv1i8_i8( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv1i8_nxv1i8_i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv1i8_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma @@ -2133,12 +2183,12 @@ entry: undef, %0, i8 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv1i8_nxv1i8_i8( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv1i8_nxv1i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv1i8_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -2150,12 +2200,12 @@ entry: %1, i8 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv2i8_nxv2i8_i8( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv2i8_nxv2i8_i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv2i8_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma @@ -2166,12 +2216,12 @@ entry: undef, %0, i8 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv2i8_nxv2i8_i8( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv2i8_nxv2i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv2i8_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -2183,12 +2233,12 @@ entry: %1, i8 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv4i8_nxv4i8_i8( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv4i8_nxv4i8_i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv4i8_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma @@ -2199,12 +2249,12 @@ entry: undef, %0, i8 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv4i8_nxv4i8_i8( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv4i8_nxv4i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv4i8_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -2216,12 +2266,12 @@ entry: %1, i8 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv8i8_nxv8i8_i8( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv8i8_nxv8i8_i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv8i8_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma @@ -2232,12 +2282,12 @@ entry: undef, %0, i8 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv8i8_nxv8i8_i8( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv8i8_nxv8i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv8i8_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -2249,12 +2299,12 @@ entry: %1, i8 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv16i8_nxv16i8_i8( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv16i8_nxv16i8_i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv16i8_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma @@ -2265,12 +2315,12 @@ entry: undef, %0, i8 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv16i8_nxv16i8_i8( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv16i8_nxv16i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv16i8_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -2282,12 +2332,12 @@ entry: %1, i8 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv32i8_nxv32i8_i8( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv32i8_nxv32i8_i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv32i8_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma @@ -2298,12 +2348,12 @@ entry: undef, %0, i8 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv32i8_nxv32i8_i8( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv32i8_nxv32i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv32i8_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -2315,12 +2365,12 @@ entry: %1, i8 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv64i8_nxv64i8_i8( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv64i8_nxv64i8_i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv64i8_nxv64i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma @@ -2331,12 +2381,12 @@ entry: undef, %0, i8 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv64i8_nxv64i8_i8( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv64i8_nxv64i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv64i8_nxv64i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu @@ -2348,12 +2398,12 @@ entry: %1, i8 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv1i16_nxv1i16_i16( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv1i16_nxv1i16_i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv1i16_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma @@ -2364,12 +2414,12 @@ entry: undef, %0, i16 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv1i16_nxv1i16_i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv1i16_nxv1i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv1i16_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -2381,12 +2431,12 @@ entry: %1, i16 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv2i16_nxv2i16_i16( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv2i16_nxv2i16_i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv2i16_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma @@ -2397,12 +2447,12 @@ entry: undef, %0, i16 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv2i16_nxv2i16_i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv2i16_nxv2i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv2i16_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -2414,12 +2464,12 @@ entry: %1, i16 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv4i16_nxv4i16_i16( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv4i16_nxv4i16_i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv4i16_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma @@ -2430,12 +2480,12 @@ entry: undef, %0, i16 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv4i16_nxv4i16_i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv4i16_nxv4i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv4i16_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -2447,12 +2497,12 @@ entry: %1, i16 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv8i16_nxv8i16_i16( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv8i16_nxv8i16_i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv8i16_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma @@ -2463,12 +2513,12 @@ entry: undef, %0, i16 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv8i16_nxv8i16_i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv8i16_nxv8i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv8i16_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -2480,12 +2530,12 @@ entry: %1, i16 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv16i16_nxv16i16_i16( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv16i16_nxv16i16_i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv16i16_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma @@ -2496,12 +2546,12 @@ entry: undef, %0, i16 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv16i16_nxv16i16_i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv16i16_nxv16i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv16i16_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -2513,12 +2563,12 @@ entry: %1, i16 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv32i16_nxv32i16_i16( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv32i16_nxv32i16_i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv32i16_nxv32i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma @@ -2529,12 +2579,12 @@ entry: undef, %0, i16 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv32i16_nxv32i16_i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv32i16_nxv32i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv32i16_nxv32i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -2546,12 +2596,12 @@ entry: %1, i16 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv1i32_nxv1i32_i32( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv1i32_nxv1i32_i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv1i32_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma @@ -2562,12 +2612,12 @@ entry: undef, %0, i32 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv1i32_nxv1i32_i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv1i32_nxv1i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv1i32_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -2579,12 +2629,12 @@ entry: %1, i32 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv2i32_nxv2i32_i32( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv2i32_nxv2i32_i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv2i32_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma @@ -2595,12 +2645,12 @@ entry: undef, %0, i32 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv2i32_nxv2i32_i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv2i32_nxv2i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv2i32_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -2612,12 +2662,12 @@ entry: %1, i32 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv4i32_nxv4i32_i32( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv4i32_nxv4i32_i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv4i32_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma @@ -2628,12 +2678,12 @@ entry: undef, %0, i32 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv4i32_nxv4i32_i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv4i32_nxv4i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv4i32_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -2645,12 +2695,12 @@ entry: %1, i32 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv8i32_nxv8i32_i32( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv8i32_nxv8i32_i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv8i32_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma @@ -2661,12 +2711,12 @@ entry: undef, %0, i32 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv8i32_nxv8i32_i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv8i32_nxv8i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv8i32_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -2678,12 +2728,12 @@ entry: %1, i32 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv16i32_nxv16i32_i32( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv16i32_nxv16i32_i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv16i32_nxv16i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -2694,12 +2744,12 @@ entry: undef, %0, i32 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv16i32_nxv16i32_i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv16i32_nxv16i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv16i32_nxv16i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -2711,12 +2761,12 @@ entry: %1, i32 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv1i64_nxv1i64_i64( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv1i64_nxv1i64_i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -2727,12 +2777,12 @@ entry: undef, %0, i64 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv1i64_nxv1i64_i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv1i64_nxv1i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -2744,12 +2794,12 @@ entry: %1, i64 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv2i64_nxv2i64_i64( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv2i64_nxv2i64_i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -2760,12 +2810,12 @@ entry: undef, %0, i64 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv2i64_nxv2i64_i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv2i64_nxv2i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -2777,12 +2827,12 @@ entry: %1, i64 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv4i64_nxv4i64_i64( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv4i64_nxv4i64_i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2793,12 +2843,12 @@ entry: undef, %0, i64 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv4i64_nxv4i64_i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv4i64_nxv4i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -2810,12 +2860,12 @@ entry: %1, i64 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsadd_vi_nxv8i64_nxv8i64_i64( %0, i32 %1) nounwind { +define @intrinsic_vsadd_vi_nxv8i64_nxv8i64_i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vi_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2826,12 +2876,12 @@ entry: undef, %0, i64 9, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vsadd_mask_vi_nxv8i64_nxv8i64_i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vsadd_mask_vi_nxv8i64_nxv8i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vi_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -2843,7 +2893,7 @@ entry: %1, i64 9, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll deleted file mode 100644 index b5fa9a921d46c2..00000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll +++ /dev/null @@ -1,2849 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s - -declare @llvm.riscv.vsaddu.nxv1i8.nxv1i8( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv1i8_nxv1i8_nxv1i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv1i8.nxv1i8( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv1i8.nxv1i8( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv1i8_nxv1i8_nxv1i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv1i8.nxv1i8( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv2i8.nxv2i8( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv2i8_nxv2i8_nxv2i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv2i8.nxv2i8( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv2i8.nxv2i8( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv2i8_nxv2i8_nxv2i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv2i8.nxv2i8( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv4i8.nxv4i8( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv4i8_nxv4i8_nxv4i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv4i8.nxv4i8( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv4i8.nxv4i8( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv4i8_nxv4i8_nxv4i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv4i8.nxv4i8( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv8i8.nxv8i8( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv8i8_nxv8i8_nxv8i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv8i8.nxv8i8( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv8i8.nxv8i8( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv8i8_nxv8i8_nxv8i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv8i8.nxv8i8( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv16i8.nxv16i8( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv16i8_nxv16i8_nxv16i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv16i8.nxv16i8( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv16i8.nxv16i8( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv16i8_nxv16i8_nxv16i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv16i8.nxv16i8( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv32i8.nxv32i8( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv32i8_nxv32i8_nxv32i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv32i8.nxv32i8( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv32i8.nxv32i8( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv32i8_nxv32i8_nxv32i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv32i8.nxv32i8( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv64i8.nxv64i8( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv64i8_nxv64i8_nxv64i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv64i8.nxv64i8( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv64i8.nxv64i8( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv64i8_nxv64i8_nxv64i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8r.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv64i8.nxv64i8( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv1i16.nxv1i16( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv1i16_nxv1i16_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv1i16.nxv1i16( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv1i16.nxv1i16( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv1i16_nxv1i16_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv1i16.nxv1i16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv2i16.nxv2i16( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv2i16_nxv2i16_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv2i16.nxv2i16( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv2i16.nxv2i16( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv2i16_nxv2i16_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv2i16.nxv2i16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv4i16.nxv4i16( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv4i16_nxv4i16_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv4i16.nxv4i16( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv4i16.nxv4i16( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv4i16_nxv4i16_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv4i16.nxv4i16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv8i16.nxv8i16( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv8i16_nxv8i16_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv8i16.nxv8i16( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv8i16.nxv8i16( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv8i16_nxv8i16_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv8i16.nxv8i16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv16i16.nxv16i16( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv16i16_nxv16i16_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv16i16.nxv16i16( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv16i16.nxv16i16( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv16i16_nxv16i16_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv16i16.nxv16i16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv32i16.nxv32i16( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv32i16_nxv32i16_nxv32i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv32i16.nxv32i16( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv32i16.nxv32i16( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv32i16_nxv32i16_nxv32i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv32i16.nxv32i16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv1i32.nxv1i32( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv1i32_nxv1i32_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv1i32.nxv1i32( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv1i32.nxv1i32( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv1i32_nxv1i32_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv1i32.nxv1i32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv2i32.nxv2i32( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv2i32_nxv2i32_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv2i32.nxv2i32( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv2i32.nxv2i32( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv2i32_nxv2i32_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv2i32.nxv2i32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv4i32.nxv4i32( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv4i32_nxv4i32_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv4i32.nxv4i32( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv4i32.nxv4i32( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv4i32_nxv4i32_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv4i32.nxv4i32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv8i32.nxv8i32( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv8i32_nxv8i32_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv8i32.nxv8i32( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv8i32.nxv8i32( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv8i32_nxv8i32_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv8i32.nxv8i32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv16i32.nxv16i32( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv16i32_nxv16i32_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv16i32.nxv16i32( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv16i32.nxv16i32( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv16i32_nxv16i32_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv16i32.nxv16i32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv1i64.nxv1i64( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv1i64_nxv1i64_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv1i64.nxv1i64( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv1i64.nxv1i64( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv1i64_nxv1i64_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv1i64.nxv1i64( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv2i64.nxv2i64( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv2i64_nxv2i64_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv2i64.nxv2i64( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv2i64.nxv2i64( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv2i64_nxv2i64_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv2i64.nxv2i64( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv4i64.nxv4i64( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv4i64_nxv4i64_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv4i64.nxv4i64( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv4i64.nxv4i64( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv4i64_nxv4i64_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv4i64.nxv4i64( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv8i64.nxv8i64( - , - , - , - i32); - -define @intrinsic_vsaddu_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vv_nxv8i64_nxv8i64_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vsaddu.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv8i64.nxv8i64( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv8i64.nxv8i64( - , - , - , - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv8i64_nxv8i64_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vsaddu.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv8i64.nxv8i64( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv1i8.i8( - , - , - i8, - i32); - -define @intrinsic_vsaddu_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv1i8_nxv1i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv1i8.i8( - undef, - %0, - i8 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv1i8.i8( - , - , - i8, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv1i8_nxv1i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv1i8.i8( - %0, - %1, - i8 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv2i8.i8( - , - , - i8, - i32); - -define @intrinsic_vsaddu_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv2i8_nxv2i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv2i8.i8( - undef, - %0, - i8 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv2i8.i8( - , - , - i8, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv2i8_nxv2i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv2i8.i8( - %0, - %1, - i8 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv4i8.i8( - , - , - i8, - i32); - -define @intrinsic_vsaddu_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv4i8_nxv4i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv4i8.i8( - undef, - %0, - i8 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv4i8.i8( - , - , - i8, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv4i8_nxv4i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv4i8.i8( - %0, - %1, - i8 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv8i8.i8( - , - , - i8, - i32); - -define @intrinsic_vsaddu_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv8i8_nxv8i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv8i8.i8( - undef, - %0, - i8 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv8i8.i8( - , - , - i8, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv8i8_nxv8i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv8i8.i8( - %0, - %1, - i8 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv16i8.i8( - , - , - i8, - i32); - -define @intrinsic_vsaddu_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv16i8_nxv16i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv16i8.i8( - undef, - %0, - i8 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv16i8.i8( - , - , - i8, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv16i8_nxv16i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv16i8.i8( - %0, - %1, - i8 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv32i8.i8( - , - , - i8, - i32); - -define @intrinsic_vsaddu_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv32i8_nxv32i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv32i8.i8( - undef, - %0, - i8 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv32i8.i8( - , - , - i8, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv32i8_nxv32i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv32i8.i8( - %0, - %1, - i8 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv64i8.i8( - , - , - i8, - i32); - -define @intrinsic_vsaddu_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv64i8_nxv64i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv64i8.i8( - undef, - %0, - i8 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv64i8.i8( - , - , - i8, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv64i8_nxv64i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv64i8.i8( - %0, - %1, - i8 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv1i16.i16( - , - , - i16, - i32); - -define @intrinsic_vsaddu_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv1i16_nxv1i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv1i16.i16( - undef, - %0, - i16 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv1i16.i16( - , - , - i16, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv1i16_nxv1i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv1i16.i16( - %0, - %1, - i16 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv2i16.i16( - , - , - i16, - i32); - -define @intrinsic_vsaddu_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv2i16_nxv2i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv2i16.i16( - undef, - %0, - i16 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv2i16.i16( - , - , - i16, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv2i16_nxv2i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv2i16.i16( - %0, - %1, - i16 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv4i16.i16( - , - , - i16, - i32); - -define @intrinsic_vsaddu_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv4i16_nxv4i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv4i16.i16( - undef, - %0, - i16 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv4i16.i16( - , - , - i16, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv4i16_nxv4i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv4i16.i16( - %0, - %1, - i16 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv8i16.i16( - , - , - i16, - i32); - -define @intrinsic_vsaddu_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv8i16_nxv8i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv8i16.i16( - undef, - %0, - i16 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv8i16.i16( - , - , - i16, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv8i16_nxv8i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv8i16.i16( - %0, - %1, - i16 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv16i16.i16( - , - , - i16, - i32); - -define @intrinsic_vsaddu_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv16i16_nxv16i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv16i16.i16( - undef, - %0, - i16 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv16i16.i16( - , - , - i16, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv16i16_nxv16i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv16i16.i16( - %0, - %1, - i16 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv32i16.i16( - , - , - i16, - i32); - -define @intrinsic_vsaddu_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv32i16_nxv32i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv32i16.i16( - undef, - %0, - i16 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv32i16.i16( - , - , - i16, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv32i16_nxv32i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv32i16.i16( - %0, - %1, - i16 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv1i32.i32( - , - , - i32, - i32); - -define @intrinsic_vsaddu_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv1i32_nxv1i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv1i32.i32( - undef, - %0, - i32 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv1i32.i32( - , - , - i32, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv1i32_nxv1i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv1i32.i32( - %0, - %1, - i32 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv2i32.i32( - , - , - i32, - i32); - -define @intrinsic_vsaddu_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv2i32_nxv2i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv2i32.i32( - undef, - %0, - i32 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv2i32.i32( - , - , - i32, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv2i32_nxv2i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv2i32.i32( - %0, - %1, - i32 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv4i32.i32( - , - , - i32, - i32); - -define @intrinsic_vsaddu_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv4i32_nxv4i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv4i32.i32( - undef, - %0, - i32 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv4i32.i32( - , - , - i32, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv4i32_nxv4i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv4i32.i32( - %0, - %1, - i32 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv8i32.i32( - , - , - i32, - i32); - -define @intrinsic_vsaddu_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv8i32_nxv8i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv8i32.i32( - undef, - %0, - i32 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv8i32.i32( - , - , - i32, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv8i32_nxv8i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv8i32.i32( - %0, - %1, - i32 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv16i32.i32( - , - , - i32, - i32); - -define @intrinsic_vsaddu_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv16i32_nxv16i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv16i32.i32( - undef, - %0, - i32 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv16i32.i32( - , - , - i32, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv16i32_nxv16i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv16i32.i32( - %0, - %1, - i32 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv1i64.i64( - , - , - i64, - i32); - -define @intrinsic_vsaddu_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsaddu.vv v8, v8, v9 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv1i64.i64( - undef, - %0, - i64 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv1i64.i64( - , - , - i64, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsaddu.vv v8, v9, v10, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv1i64.i64( - %0, - %1, - i64 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv2i64.i64( - , - , - i64, - i32); - -define @intrinsic_vsaddu_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsaddu.vv v8, v8, v10 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv2i64.i64( - undef, - %0, - i64 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv2i64.i64( - , - , - i64, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vlse64.v v12, (a0), zero -; CHECK-NEXT: vsaddu.vv v8, v10, v12, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv2i64.i64( - %0, - %1, - i64 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv4i64.i64( - , - , - i64, - i32); - -define @intrinsic_vsaddu_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; CHECK-NEXT: vlse64.v v12, (a0), zero -; CHECK-NEXT: vsaddu.vv v8, v8, v12 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv4i64.i64( - undef, - %0, - i64 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv4i64.i64( - , - , - i64, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vsaddu.vv v8, v12, v16, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv4i64.i64( - %0, - %1, - i64 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsaddu.nxv8i64.i64( - , - , - i64, - i32); - -define @intrinsic_vsaddu_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vsaddu.vv v8, v8, v16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv8i64.i64( - undef, - %0, - i64 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vsaddu.mask.nxv8i64.i64( - , - , - i64, - , - i32, - i32); - -define @intrinsic_vsaddu_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vlse64.v v24, (a0), zero -; CHECK-NEXT: vsaddu.vv v8, v16, v24, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv8i64.i64( - %0, - %1, - i64 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv1i8_nxv1i8_i8( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv1i8_nxv1i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv1i8.i8( - undef, - %0, - i8 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv1i8_nxv1i8_i8( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv1i8_nxv1i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv1i8.i8( - %0, - %1, - i8 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv2i8_nxv2i8_i8( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv2i8_nxv2i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv2i8.i8( - undef, - %0, - i8 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv2i8_nxv2i8_i8( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv2i8_nxv2i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv2i8.i8( - %0, - %1, - i8 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv4i8_nxv4i8_i8( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv4i8_nxv4i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv4i8.i8( - undef, - %0, - i8 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv4i8_nxv4i8_i8( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv4i8_nxv4i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv4i8.i8( - %0, - %1, - i8 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv8i8_nxv8i8_i8( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv8i8_nxv8i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv8i8.i8( - undef, - %0, - i8 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv8i8_nxv8i8_i8( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv8i8_nxv8i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv8i8.i8( - %0, - %1, - i8 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv16i8_nxv16i8_i8( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv16i8_nxv16i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv16i8.i8( - undef, - %0, - i8 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv16i8_nxv16i8_i8( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv16i8_nxv16i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v10, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv16i8.i8( - %0, - %1, - i8 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv32i8_nxv32i8_i8( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv32i8_nxv32i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv32i8.i8( - undef, - %0, - i8 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv32i8_nxv32i8_i8( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv32i8_nxv32i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v12, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv32i8.i8( - %0, - %1, - i8 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv64i8_nxv64i8_i8( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv64i8_nxv64i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv64i8.i8( - undef, - %0, - i8 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv64i8_nxv64i8_i8( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv64i8_nxv64i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v16, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv64i8.i8( - %0, - %1, - i8 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv1i16_nxv1i16_i16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv1i16_nxv1i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv1i16.i16( - undef, - %0, - i16 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv1i16_nxv1i16_i16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv1i16_nxv1i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv1i16.i16( - %0, - %1, - i16 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv2i16_nxv2i16_i16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv2i16_nxv2i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv2i16.i16( - undef, - %0, - i16 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv2i16_nxv2i16_i16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv2i16_nxv2i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv2i16.i16( - %0, - %1, - i16 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv4i16_nxv4i16_i16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv4i16_nxv4i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv4i16.i16( - undef, - %0, - i16 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv4i16_nxv4i16_i16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv4i16_nxv4i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv4i16.i16( - %0, - %1, - i16 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv8i16_nxv8i16_i16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv8i16_nxv8i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv8i16.i16( - undef, - %0, - i16 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv8i16_nxv8i16_i16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv8i16_nxv8i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v10, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv8i16.i16( - %0, - %1, - i16 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv16i16_nxv16i16_i16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv16i16_nxv16i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv16i16.i16( - undef, - %0, - i16 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv16i16_nxv16i16_i16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv16i16_nxv16i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v12, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv16i16.i16( - %0, - %1, - i16 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv32i16_nxv32i16_i16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv32i16_nxv32i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv32i16.i16( - undef, - %0, - i16 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv32i16_nxv32i16_i16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv32i16_nxv32i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v16, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv32i16.i16( - %0, - %1, - i16 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv1i32_nxv1i32_i32( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv1i32_nxv1i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv1i32.i32( - undef, - %0, - i32 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv1i32_nxv1i32_i32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv1i32_nxv1i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv1i32.i32( - %0, - %1, - i32 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv2i32_nxv2i32_i32( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv2i32_nxv2i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv2i32.i32( - undef, - %0, - i32 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv2i32_nxv2i32_i32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv2i32_nxv2i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv2i32.i32( - %0, - %1, - i32 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv4i32_nxv4i32_i32( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv4i32_nxv4i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv4i32.i32( - undef, - %0, - i32 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv4i32_nxv4i32_i32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv4i32_nxv4i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v10, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv4i32.i32( - %0, - %1, - i32 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv8i32_nxv8i32_i32( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv8i32_nxv8i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv8i32.i32( - undef, - %0, - i32 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv8i32_nxv8i32_i32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv8i32_nxv8i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v12, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv8i32.i32( - %0, - %1, - i32 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv16i32_nxv16i32_i32( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv16i32_nxv16i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv16i32.i32( - undef, - %0, - i32 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv16i32_nxv16i32_i32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv16i32_nxv16i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v16, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv16i32.i32( - %0, - %1, - i32 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv1i64_nxv1i64_i64( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv1i64.i64( - undef, - %0, - i64 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv1i64_nxv1i64_i64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v9, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv1i64.i64( - %0, - %1, - i64 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv2i64_nxv2i64_i64( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv2i64.i64( - undef, - %0, - i64 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv2i64_nxv2i64_i64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v10, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv2i64.i64( - %0, - %1, - i64 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv4i64_nxv4i64_i64( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv4i64.i64( - undef, - %0, - i64 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv4i64_nxv4i64_i64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v12, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv4i64.i64( - %0, - %1, - i64 9, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vsaddu_vi_nxv8i64_nxv8i64_i64( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vi_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vsaddu.vi v8, v8, 9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.nxv8i64.i64( - undef, - %0, - i64 9, - i32 %1) - - ret %a -} - -define @intrinsic_vsaddu_mask_vi_nxv8i64_nxv8i64_i64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vsaddu.vi v8, v16, 9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsaddu.mask.nxv8i64.i64( - %0, - %1, - i64 9, - %2, - i32 %3, i32 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu.ll similarity index 83% rename from llvm/test/CodeGen/RISCV/rvv/vsaddu-rv64.ll rename to llvm/test/CodeGen/RISCV/rvv/vsaddu.ll index 077e45f6408d96..57a89d6fe7d234 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu.ll @@ -1,14 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vsaddu.nxv1i8.nxv1i8( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv1i8_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma @@ -19,7 +21,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -29,10 +31,10 @@ declare @llvm.riscv.vsaddu.mask.nxv1i8.nxv1i8( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv1i8_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -44,7 +46,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -53,9 +55,9 @@ declare @llvm.riscv.vsaddu.nxv2i8.nxv2i8( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv2i8_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma @@ -66,7 +68,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -76,10 +78,10 @@ declare @llvm.riscv.vsaddu.mask.nxv2i8.nxv2i8( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv2i8_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -91,7 +93,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -100,9 +102,9 @@ declare @llvm.riscv.vsaddu.nxv4i8.nxv4i8( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv4i8_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma @@ -113,7 +115,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -123,10 +125,10 @@ declare @llvm.riscv.vsaddu.mask.nxv4i8.nxv4i8( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv4i8_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -138,7 +140,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -147,9 +149,9 @@ declare @llvm.riscv.vsaddu.nxv8i8.nxv8i8( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv8i8_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma @@ -160,7 +162,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -170,10 +172,10 @@ declare @llvm.riscv.vsaddu.mask.nxv8i8.nxv8i8( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv8i8_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -185,7 +187,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -194,9 +196,9 @@ declare @llvm.riscv.vsaddu.nxv16i8.nxv16i8( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv16i8_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma @@ -207,7 +209,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -217,10 +219,10 @@ declare @llvm.riscv.vsaddu.mask.nxv16i8.nxv16i8( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv16i8_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -232,7 +234,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -241,9 +243,9 @@ declare @llvm.riscv.vsaddu.nxv32i8.nxv32i8( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv32i8_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma @@ -254,7 +256,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -264,10 +266,10 @@ declare @llvm.riscv.vsaddu.mask.nxv32i8.nxv32i8( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv32i8_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -279,7 +281,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -288,9 +290,9 @@ declare @llvm.riscv.vsaddu.nxv64i8.nxv64i8( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma @@ -301,7 +303,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -311,10 +313,10 @@ declare @llvm.riscv.vsaddu.mask.nxv64i8.nxv64i8( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8r.v v24, (a0) @@ -327,7 +329,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -336,9 +338,9 @@ declare @llvm.riscv.vsaddu.nxv1i16.nxv1i16( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv1i16_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma @@ -349,7 +351,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -359,10 +361,10 @@ declare @llvm.riscv.vsaddu.mask.nxv1i16.nxv1i16( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv1i16_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -374,7 +376,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -383,9 +385,9 @@ declare @llvm.riscv.vsaddu.nxv2i16.nxv2i16( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv2i16_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma @@ -396,7 +398,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -406,10 +408,10 @@ declare @llvm.riscv.vsaddu.mask.nxv2i16.nxv2i16( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv2i16_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -421,7 +423,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -430,9 +432,9 @@ declare @llvm.riscv.vsaddu.nxv4i16.nxv4i16( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv4i16_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma @@ -443,7 +445,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -453,10 +455,10 @@ declare @llvm.riscv.vsaddu.mask.nxv4i16.nxv4i16( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv4i16_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -468,7 +470,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -477,9 +479,9 @@ declare @llvm.riscv.vsaddu.nxv8i16.nxv8i16( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv8i16_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma @@ -490,7 +492,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -500,10 +502,10 @@ declare @llvm.riscv.vsaddu.mask.nxv8i16.nxv8i16( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv8i16_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -515,7 +517,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -524,9 +526,9 @@ declare @llvm.riscv.vsaddu.nxv16i16.nxv16i16( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv16i16_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma @@ -537,7 +539,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -547,10 +549,10 @@ declare @llvm.riscv.vsaddu.mask.nxv16i16.nxv16i16( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv16i16_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -562,7 +564,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -571,9 +573,9 @@ declare @llvm.riscv.vsaddu.nxv32i16.nxv32i16( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma @@ -584,7 +586,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -594,10 +596,10 @@ declare @llvm.riscv.vsaddu.mask.nxv32i16.nxv32i16( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re16.v v24, (a0) @@ -610,7 +612,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -619,9 +621,9 @@ declare @llvm.riscv.vsaddu.nxv1i32.nxv1i32( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv1i32_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma @@ -632,7 +634,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -642,10 +644,10 @@ declare @llvm.riscv.vsaddu.mask.nxv1i32.nxv1i32( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv1i32_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -657,7 +659,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -666,9 +668,9 @@ declare @llvm.riscv.vsaddu.nxv2i32.nxv2i32( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv2i32_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma @@ -679,7 +681,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -689,10 +691,10 @@ declare @llvm.riscv.vsaddu.mask.nxv2i32.nxv2i32( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv2i32_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -704,7 +706,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -713,9 +715,9 @@ declare @llvm.riscv.vsaddu.nxv4i32.nxv4i32( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv4i32_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma @@ -726,7 +728,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -736,10 +738,10 @@ declare @llvm.riscv.vsaddu.mask.nxv4i32.nxv4i32( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv4i32_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -751,7 +753,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -760,9 +762,9 @@ declare @llvm.riscv.vsaddu.nxv8i32.nxv8i32( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv8i32_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma @@ -773,7 +775,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -783,10 +785,10 @@ declare @llvm.riscv.vsaddu.mask.nxv8i32.nxv8i32( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv8i32_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -798,7 +800,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -807,9 +809,9 @@ declare @llvm.riscv.vsaddu.nxv16i32.nxv16i32( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -820,7 +822,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -830,10 +832,10 @@ declare @llvm.riscv.vsaddu.mask.nxv16i32.nxv16i32( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re32.v v24, (a0) @@ -846,7 +848,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -855,9 +857,9 @@ declare @llvm.riscv.vsaddu.nxv1i64.nxv1i64( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv1i64_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -868,7 +870,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -878,10 +880,10 @@ declare @llvm.riscv.vsaddu.mask.nxv1i64.nxv1i64( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv1i64_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -893,7 +895,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -902,9 +904,9 @@ declare @llvm.riscv.vsaddu.nxv2i64.nxv2i64( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv2i64_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -915,7 +917,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -925,10 +927,10 @@ declare @llvm.riscv.vsaddu.mask.nxv2i64.nxv2i64( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv2i64_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -940,7 +942,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -949,9 +951,9 @@ declare @llvm.riscv.vsaddu.nxv4i64.nxv4i64( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv4i64_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -962,7 +964,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -972,10 +974,10 @@ declare @llvm.riscv.vsaddu.mask.nxv4i64.nxv4i64( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv4i64_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -987,7 +989,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -996,9 +998,9 @@ declare @llvm.riscv.vsaddu.nxv8i64.nxv8i64( , , , - i64); + iXLen) -define @intrinsic_vsaddu_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -1009,7 +1011,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -1019,10 +1021,10 @@ declare @llvm.riscv.vsaddu.mask.nxv8i64.nxv8i64( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re64.v v24, (a0) @@ -1035,7 +1037,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1044,9 +1046,9 @@ declare @llvm.riscv.vsaddu.nxv1i8.i8( , , i8, - i64); + iXLen) -define @intrinsic_vsaddu_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv1i8_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -1057,7 +1059,7 @@ entry: undef, %0, i8 %1, - i64 %2) + iXLen %2) ret %a } @@ -1067,10 +1069,10 @@ declare @llvm.riscv.vsaddu.mask.nxv1i8.i8( , i8, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv1i8_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu @@ -1082,7 +1084,7 @@ entry: %1, i8 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1091,9 +1093,9 @@ declare @llvm.riscv.vsaddu.nxv2i8.i8( , , i8, - i64); + iXLen) -define @intrinsic_vsaddu_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv2i8_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -1104,7 +1106,7 @@ entry: undef, %0, i8 %1, - i64 %2) + iXLen %2) ret %a } @@ -1114,10 +1116,10 @@ declare @llvm.riscv.vsaddu.mask.nxv2i8.i8( , i8, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv2i8_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu @@ -1129,7 +1131,7 @@ entry: %1, i8 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1138,9 +1140,9 @@ declare @llvm.riscv.vsaddu.nxv4i8.i8( , , i8, - i64); + iXLen) -define @intrinsic_vsaddu_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv4i8_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -1151,7 +1153,7 @@ entry: undef, %0, i8 %1, - i64 %2) + iXLen %2) ret %a } @@ -1161,10 +1163,10 @@ declare @llvm.riscv.vsaddu.mask.nxv4i8.i8( , i8, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv4i8_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu @@ -1176,7 +1178,7 @@ entry: %1, i8 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1185,9 +1187,9 @@ declare @llvm.riscv.vsaddu.nxv8i8.i8( , , i8, - i64); + iXLen) -define @intrinsic_vsaddu_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv8i8_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -1198,7 +1200,7 @@ entry: undef, %0, i8 %1, - i64 %2) + iXLen %2) ret %a } @@ -1208,10 +1210,10 @@ declare @llvm.riscv.vsaddu.mask.nxv8i8.i8( , i8, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv8i8_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu @@ -1223,7 +1225,7 @@ entry: %1, i8 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1232,9 +1234,9 @@ declare @llvm.riscv.vsaddu.nxv16i8.i8( , , i8, - i64); + iXLen) -define @intrinsic_vsaddu_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv16i8_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -1245,7 +1247,7 @@ entry: undef, %0, i8 %1, - i64 %2) + iXLen %2) ret %a } @@ -1255,10 +1257,10 @@ declare @llvm.riscv.vsaddu.mask.nxv16i8.i8( , i8, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv16i8_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu @@ -1270,7 +1272,7 @@ entry: %1, i8 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1279,9 +1281,9 @@ declare @llvm.riscv.vsaddu.nxv32i8.i8( , , i8, - i64); + iXLen) -define @intrinsic_vsaddu_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv32i8_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -1292,7 +1294,7 @@ entry: undef, %0, i8 %1, - i64 %2) + iXLen %2) ret %a } @@ -1302,10 +1304,10 @@ declare @llvm.riscv.vsaddu.mask.nxv32i8.i8( , i8, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv32i8_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu @@ -1317,7 +1319,7 @@ entry: %1, i8 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1326,9 +1328,9 @@ declare @llvm.riscv.vsaddu.nxv64i8.i8( , , i8, - i64); + iXLen) -define @intrinsic_vsaddu_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv64i8_nxv64i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma @@ -1339,7 +1341,7 @@ entry: undef, %0, i8 %1, - i64 %2) + iXLen %2) ret %a } @@ -1349,10 +1351,10 @@ declare @llvm.riscv.vsaddu.mask.nxv64i8.i8( , i8, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv64i8_nxv64i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu @@ -1364,7 +1366,7 @@ entry: %1, i8 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1373,9 +1375,9 @@ declare @llvm.riscv.vsaddu.nxv1i16.i16( , , i16, - i64); + iXLen) -define @intrinsic_vsaddu_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv1i16_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma @@ -1386,7 +1388,7 @@ entry: undef, %0, i16 %1, - i64 %2) + iXLen %2) ret %a } @@ -1396,10 +1398,10 @@ declare @llvm.riscv.vsaddu.mask.nxv1i16.i16( , i16, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv1i16_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -1411,7 +1413,7 @@ entry: %1, i16 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1420,9 +1422,9 @@ declare @llvm.riscv.vsaddu.nxv2i16.i16( , , i16, - i64); + iXLen) -define @intrinsic_vsaddu_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv2i16_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma @@ -1433,7 +1435,7 @@ entry: undef, %0, i16 %1, - i64 %2) + iXLen %2) ret %a } @@ -1443,10 +1445,10 @@ declare @llvm.riscv.vsaddu.mask.nxv2i16.i16( , i16, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv2i16_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -1458,7 +1460,7 @@ entry: %1, i16 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1467,9 +1469,9 @@ declare @llvm.riscv.vsaddu.nxv4i16.i16( , , i16, - i64); + iXLen) -define @intrinsic_vsaddu_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv4i16_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma @@ -1480,7 +1482,7 @@ entry: undef, %0, i16 %1, - i64 %2) + iXLen %2) ret %a } @@ -1490,10 +1492,10 @@ declare @llvm.riscv.vsaddu.mask.nxv4i16.i16( , i16, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv4i16_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -1505,7 +1507,7 @@ entry: %1, i16 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1514,9 +1516,9 @@ declare @llvm.riscv.vsaddu.nxv8i16.i16( , , i16, - i64); + iXLen) -define @intrinsic_vsaddu_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv8i16_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma @@ -1527,7 +1529,7 @@ entry: undef, %0, i16 %1, - i64 %2) + iXLen %2) ret %a } @@ -1537,10 +1539,10 @@ declare @llvm.riscv.vsaddu.mask.nxv8i16.i16( , i16, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv8i16_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -1552,7 +1554,7 @@ entry: %1, i16 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1561,9 +1563,9 @@ declare @llvm.riscv.vsaddu.nxv16i16.i16( , , i16, - i64); + iXLen) -define @intrinsic_vsaddu_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv16i16_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma @@ -1574,7 +1576,7 @@ entry: undef, %0, i16 %1, - i64 %2) + iXLen %2) ret %a } @@ -1584,10 +1586,10 @@ declare @llvm.riscv.vsaddu.mask.nxv16i16.i16( , i16, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv16i16_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -1599,7 +1601,7 @@ entry: %1, i16 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1608,9 +1610,9 @@ declare @llvm.riscv.vsaddu.nxv32i16.i16( , , i16, - i64); + iXLen) -define @intrinsic_vsaddu_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv32i16_nxv32i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma @@ -1621,7 +1623,7 @@ entry: undef, %0, i16 %1, - i64 %2) + iXLen %2) ret %a } @@ -1631,10 +1633,10 @@ declare @llvm.riscv.vsaddu.mask.nxv32i16.i16( , i16, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv32i16_nxv32i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu @@ -1646,7 +1648,7 @@ entry: %1, i16 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1655,9 +1657,9 @@ declare @llvm.riscv.vsaddu.nxv1i32.i32( , , i32, - i64); + iXLen) -define @intrinsic_vsaddu_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv1i32_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma @@ -1668,7 +1670,7 @@ entry: undef, %0, i32 %1, - i64 %2) + iXLen %2) ret %a } @@ -1678,10 +1680,10 @@ declare @llvm.riscv.vsaddu.mask.nxv1i32.i32( , i32, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv1i32_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -1693,7 +1695,7 @@ entry: %1, i32 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1702,9 +1704,9 @@ declare @llvm.riscv.vsaddu.nxv2i32.i32( , , i32, - i64); + iXLen) -define @intrinsic_vsaddu_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv2i32_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma @@ -1715,7 +1717,7 @@ entry: undef, %0, i32 %1, - i64 %2) + iXLen %2) ret %a } @@ -1725,10 +1727,10 @@ declare @llvm.riscv.vsaddu.mask.nxv2i32.i32( , i32, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv2i32_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -1740,7 +1742,7 @@ entry: %1, i32 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1749,9 +1751,9 @@ declare @llvm.riscv.vsaddu.nxv4i32.i32( , , i32, - i64); + iXLen) -define @intrinsic_vsaddu_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv4i32_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma @@ -1762,7 +1764,7 @@ entry: undef, %0, i32 %1, - i64 %2) + iXLen %2) ret %a } @@ -1772,10 +1774,10 @@ declare @llvm.riscv.vsaddu.mask.nxv4i32.i32( , i32, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv4i32_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -1787,7 +1789,7 @@ entry: %1, i32 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1796,9 +1798,9 @@ declare @llvm.riscv.vsaddu.nxv8i32.i32( , , i32, - i64); + iXLen) -define @intrinsic_vsaddu_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv8i32_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma @@ -1809,7 +1811,7 @@ entry: undef, %0, i32 %1, - i64 %2) + iXLen %2) ret %a } @@ -1819,10 +1821,10 @@ declare @llvm.riscv.vsaddu.mask.nxv8i32.i32( , i32, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv8i32_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -1834,7 +1836,7 @@ entry: %1, i32 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1843,9 +1845,9 @@ declare @llvm.riscv.vsaddu.nxv16i32.i32( , , i32, - i64); + iXLen) -define @intrinsic_vsaddu_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, i64 %2) nounwind { +define @intrinsic_vsaddu_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv16i32_nxv16i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma @@ -1856,7 +1858,7 @@ entry: undef, %0, i32 %1, - i64 %2) + iXLen %2) ret %a } @@ -1866,10 +1868,10 @@ declare @llvm.riscv.vsaddu.mask.nxv16i32.i32( , i32, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vsaddu_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +define @intrinsic_vsaddu_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv16i32_nxv16i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu @@ -1881,7 +1883,7 @@ entry: %1, i32 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1890,20 +1892,32 @@ declare @llvm.riscv.vsaddu.nxv1i64.i64( , , i64, - i64); - -define @intrinsic_vsaddu_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret + iXLen) + +define @intrinsic_vsaddu_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vsaddu_vx_nxv1i64_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsaddu.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsaddu_vx_nxv1i64_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsaddu.nxv1i64.i64( undef, %0, i64 %1, - i64 %2) + iXLen %2) ret %a } @@ -1913,22 +1927,34 @@ declare @llvm.riscv.vsaddu.mask.nxv1i64.i64( , i64, , - i64, - i64); - -define @intrinsic_vsaddu_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret + iXLen, + iXLen) + +define @intrinsic_vsaddu_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vsaddu_mask_vx_nxv1i64_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsaddu.vv v8, v9, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsaddu_mask_vx_nxv1i64_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; RV64-NEXT: vsaddu.vx v8, v9, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsaddu.mask.nxv1i64.i64( %0, %1, i64 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1937,20 +1963,32 @@ declare @llvm.riscv.vsaddu.nxv2i64.i64( , , i64, - i64); - -define @intrinsic_vsaddu_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret + iXLen) + +define @intrinsic_vsaddu_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vsaddu_vx_nxv2i64_nxv2i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsaddu.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsaddu_vx_nxv2i64_nxv2i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsaddu.nxv2i64.i64( undef, %0, i64 %1, - i64 %2) + iXLen %2) ret %a } @@ -1960,22 +1998,34 @@ declare @llvm.riscv.vsaddu.mask.nxv2i64.i64( , i64, , - i64, - i64); - -define @intrinsic_vsaddu_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret + iXLen, + iXLen) + +define @intrinsic_vsaddu_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vsaddu_mask_vx_nxv2i64_nxv2i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsaddu.vv v8, v10, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsaddu_mask_vx_nxv2i64_nxv2i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; RV64-NEXT: vsaddu.vx v8, v10, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsaddu.mask.nxv2i64.i64( %0, %1, i64 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1984,20 +2034,32 @@ declare @llvm.riscv.vsaddu.nxv4i64.i64( , , i64, - i64); - -define @intrinsic_vsaddu_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret + iXLen) + +define @intrinsic_vsaddu_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vsaddu_vx_nxv4i64_nxv4i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsaddu.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsaddu_vx_nxv4i64_nxv4i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsaddu.nxv4i64.i64( undef, %0, i64 %1, - i64 %2) + iXLen %2) ret %a } @@ -2007,22 +2069,34 @@ declare @llvm.riscv.vsaddu.mask.nxv4i64.i64( , i64, , - i64, - i64); - -define @intrinsic_vsaddu_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret + iXLen, + iXLen) + +define @intrinsic_vsaddu_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vsaddu_mask_vx_nxv4i64_nxv4i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsaddu.vv v8, v12, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsaddu_mask_vx_nxv4i64_nxv4i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV64-NEXT: vsaddu.vx v8, v12, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsaddu.mask.nxv4i64.i64( %0, %1, i64 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -2031,20 +2105,32 @@ declare @llvm.riscv.vsaddu.nxv8i64.i64( , , i64, - i64); - -define @intrinsic_vsaddu_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vsaddu.vx v8, v8, a0 -; CHECK-NEXT: ret + iXLen) + +define @intrinsic_vsaddu_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vsaddu_vx_nxv8i64_nxv8i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsaddu.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsaddu_vx_nxv8i64_nxv8i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsaddu.nxv8i64.i64( undef, %0, i64 %1, - i64 %2) + iXLen %2) ret %a } @@ -2054,27 +2140,39 @@ declare @llvm.riscv.vsaddu.mask.nxv8i64.i64( , i64, , - i64, - i64); - -define @intrinsic_vsaddu_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vsaddu.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret + iXLen, + iXLen) + +define @intrinsic_vsaddu_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vsaddu_mask_vx_nxv8i64_nxv8i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vsaddu.vv v8, v16, v24, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsaddu_mask_vx_nxv8i64_nxv8i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vsaddu.vx v8, v16, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsaddu.mask.nxv8i64.i64( %0, %1, i64 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv1i8_nxv1i8_i8( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv1i8_nxv1i8_i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv1i8_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma @@ -2085,12 +2183,12 @@ entry: undef, %0, i8 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv1i8_nxv1i8_i8( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv1i8_nxv1i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv1i8_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -2102,12 +2200,12 @@ entry: %1, i8 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv2i8_nxv2i8_i8( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv2i8_nxv2i8_i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv2i8_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma @@ -2118,12 +2216,12 @@ entry: undef, %0, i8 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv2i8_nxv2i8_i8( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv2i8_nxv2i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv2i8_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -2135,12 +2233,12 @@ entry: %1, i8 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv4i8_nxv4i8_i8( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv4i8_nxv4i8_i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv4i8_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma @@ -2151,12 +2249,12 @@ entry: undef, %0, i8 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv4i8_nxv4i8_i8( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv4i8_nxv4i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv4i8_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -2168,12 +2266,12 @@ entry: %1, i8 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv8i8_nxv8i8_i8( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv8i8_nxv8i8_i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv8i8_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma @@ -2184,12 +2282,12 @@ entry: undef, %0, i8 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv8i8_nxv8i8_i8( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv8i8_nxv8i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv8i8_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -2201,12 +2299,12 @@ entry: %1, i8 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv16i8_nxv16i8_i8( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv16i8_nxv16i8_i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv16i8_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma @@ -2217,12 +2315,12 @@ entry: undef, %0, i8 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv16i8_nxv16i8_i8( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv16i8_nxv16i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv16i8_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -2234,12 +2332,12 @@ entry: %1, i8 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv32i8_nxv32i8_i8( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv32i8_nxv32i8_i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv32i8_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma @@ -2250,12 +2348,12 @@ entry: undef, %0, i8 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv32i8_nxv32i8_i8( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv32i8_nxv32i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv32i8_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -2267,12 +2365,12 @@ entry: %1, i8 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv64i8_nxv64i8_i8( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv64i8_nxv64i8_i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv64i8_nxv64i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma @@ -2283,12 +2381,12 @@ entry: undef, %0, i8 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv64i8_nxv64i8_i8( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv64i8_nxv64i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv64i8_nxv64i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu @@ -2300,12 +2398,12 @@ entry: %1, i8 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv1i16_nxv1i16_i16( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv1i16_nxv1i16_i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv1i16_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma @@ -2316,12 +2414,12 @@ entry: undef, %0, i16 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv1i16_nxv1i16_i16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv1i16_nxv1i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv1i16_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -2333,12 +2431,12 @@ entry: %1, i16 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv2i16_nxv2i16_i16( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv2i16_nxv2i16_i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv2i16_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma @@ -2349,12 +2447,12 @@ entry: undef, %0, i16 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv2i16_nxv2i16_i16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv2i16_nxv2i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv2i16_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -2366,12 +2464,12 @@ entry: %1, i16 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv4i16_nxv4i16_i16( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv4i16_nxv4i16_i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv4i16_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma @@ -2382,12 +2480,12 @@ entry: undef, %0, i16 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv4i16_nxv4i16_i16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv4i16_nxv4i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv4i16_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -2399,12 +2497,12 @@ entry: %1, i16 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv8i16_nxv8i16_i16( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv8i16_nxv8i16_i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv8i16_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma @@ -2415,12 +2513,12 @@ entry: undef, %0, i16 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv8i16_nxv8i16_i16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv8i16_nxv8i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv8i16_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -2432,12 +2530,12 @@ entry: %1, i16 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv16i16_nxv16i16_i16( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv16i16_nxv16i16_i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv16i16_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma @@ -2448,12 +2546,12 @@ entry: undef, %0, i16 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv16i16_nxv16i16_i16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv16i16_nxv16i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv16i16_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -2465,12 +2563,12 @@ entry: %1, i16 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv32i16_nxv32i16_i16( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv32i16_nxv32i16_i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv32i16_nxv32i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma @@ -2481,12 +2579,12 @@ entry: undef, %0, i16 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv32i16_nxv32i16_i16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv32i16_nxv32i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv32i16_nxv32i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -2498,12 +2596,12 @@ entry: %1, i16 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv1i32_nxv1i32_i32( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv1i32_nxv1i32_i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv1i32_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma @@ -2514,12 +2612,12 @@ entry: undef, %0, i32 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv1i32_nxv1i32_i32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv1i32_nxv1i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv1i32_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -2531,12 +2629,12 @@ entry: %1, i32 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv2i32_nxv2i32_i32( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv2i32_nxv2i32_i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv2i32_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma @@ -2547,12 +2645,12 @@ entry: undef, %0, i32 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv2i32_nxv2i32_i32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv2i32_nxv2i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv2i32_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -2564,12 +2662,12 @@ entry: %1, i32 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv4i32_nxv4i32_i32( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv4i32_nxv4i32_i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv4i32_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma @@ -2580,12 +2678,12 @@ entry: undef, %0, i32 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv4i32_nxv4i32_i32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv4i32_nxv4i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv4i32_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -2597,12 +2695,12 @@ entry: %1, i32 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv8i32_nxv8i32_i32( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv8i32_nxv8i32_i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv8i32_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma @@ -2613,12 +2711,12 @@ entry: undef, %0, i32 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv8i32_nxv8i32_i32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv8i32_nxv8i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv8i32_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -2630,12 +2728,12 @@ entry: %1, i32 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv16i32_nxv16i32_i32( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv16i32_nxv16i32_i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv16i32_nxv16i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -2646,12 +2744,12 @@ entry: undef, %0, i32 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv16i32_nxv16i32_i32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv16i32_nxv16i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv16i32_nxv16i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -2663,12 +2761,12 @@ entry: %1, i32 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv1i64_nxv1i64_i64( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv1i64_nxv1i64_i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -2679,12 +2777,12 @@ entry: undef, %0, i64 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv1i64_nxv1i64_i64( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv1i64_nxv1i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -2696,12 +2794,12 @@ entry: %1, i64 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv2i64_nxv2i64_i64( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv2i64_nxv2i64_i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -2712,12 +2810,12 @@ entry: undef, %0, i64 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv2i64_nxv2i64_i64( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv2i64_nxv2i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -2729,12 +2827,12 @@ entry: %1, i64 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv4i64_nxv4i64_i64( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv4i64_nxv4i64_i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2745,12 +2843,12 @@ entry: undef, %0, i64 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv4i64_nxv4i64_i64( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv4i64_nxv4i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -2762,12 +2860,12 @@ entry: %1, i64 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vsaddu_vi_nxv8i64_nxv8i64_i64( %0, i64 %1) nounwind { +define @intrinsic_vsaddu_vi_nxv8i64_nxv8i64_i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vi_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2778,12 +2876,12 @@ entry: undef, %0, i64 9, - i64 %1) + iXLen %1) ret %a } -define @intrinsic_vsaddu_mask_vi_nxv8i64_nxv8i64_i64( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vsaddu_mask_vi_nxv8i64_nxv8i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vi_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -2795,7 +2893,7 @@ entry: %1, i64 9, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll deleted file mode 100644 index e7d8ae635f75c2..00000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll +++ /dev/null @@ -1,2166 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -; RUN: not --crash llc -mtriple=riscv32 -mattr=+zve64d 2>&1 \ -; RUN: < %s | FileCheck %s --check-prefixes=ZVE64D - -; ZVE64D: LLVM ERROR: Cannot select: intrinsic %llvm.riscv.vsmul - -declare @llvm.riscv.vsmul.nxv1i8.nxv1i8( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv1i8_nxv1i8_nxv1i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv1i8.nxv1i8( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv1i8.nxv1i8( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv1i8_nxv1i8_nxv1i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vsmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv1i8.nxv1i8( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv2i8.nxv2i8( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv2i8_nxv2i8_nxv2i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv2i8.nxv2i8( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv2i8.nxv2i8( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv2i8_nxv2i8_nxv2i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vsmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv2i8.nxv2i8( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv4i8.nxv4i8( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv4i8_nxv4i8_nxv4i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv4i8.nxv4i8( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv4i8.nxv4i8( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv4i8_nxv4i8_nxv4i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vsmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv4i8.nxv4i8( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv8i8.nxv8i8( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv8i8_nxv8i8_nxv8i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv8i8.nxv8i8( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv8i8.nxv8i8( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv8i8_nxv8i8_nxv8i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vsmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv8i8.nxv8i8( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv16i8.nxv16i8( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv16i8_nxv16i8_nxv16i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv16i8.nxv16i8( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv16i8.nxv16i8( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv16i8_nxv16i8_nxv16i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vsmul.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv16i8.nxv16i8( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv32i8.nxv32i8( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv32i8_nxv32i8_nxv32i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv32i8.nxv32i8( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv32i8.nxv32i8( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv32i8_nxv32i8_nxv32i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vsmul.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv32i8.nxv32i8( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv64i8.nxv64i8( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv64i8_nxv64i8_nxv64i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv64i8.nxv64i8( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv64i8.nxv64i8( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv64i8_nxv64i8_nxv64i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8r.v v24, (a0) -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu -; CHECK-NEXT: vsmul.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv64i8.nxv64i8( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv1i16.nxv1i16( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv1i16_nxv1i16_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv1i16.nxv1i16( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv1i16.nxv1i16( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv1i16_nxv1i16_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vsmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv1i16.nxv1i16( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv2i16.nxv2i16( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv2i16_nxv2i16_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv2i16.nxv2i16( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv2i16.nxv2i16( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv2i16_nxv2i16_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vsmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv2i16.nxv2i16( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv4i16.nxv4i16( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv4i16_nxv4i16_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv4i16.nxv4i16( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv4i16.nxv4i16( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv4i16_nxv4i16_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vsmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv4i16.nxv4i16( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv8i16.nxv8i16( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv8i16_nxv8i16_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv8i16.nxv8i16( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv8i16.nxv8i16( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv8i16_nxv8i16_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vsmul.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv8i16.nxv8i16( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv16i16.nxv16i16( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv16i16_nxv16i16_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv16i16.nxv16i16( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv16i16.nxv16i16( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv16i16_nxv16i16_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vsmul.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv16i16.nxv16i16( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv32i16.nxv32i16( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv32i16_nxv32i16_nxv32i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv32i16.nxv32i16( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv32i16.nxv32i16( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv32i16_nxv32i16_nxv32i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vsmul.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv32i16.nxv32i16( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv1i32.nxv1i32( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv1i32_nxv1i32_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv1i32.nxv1i32( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv1i32.nxv1i32( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv1i32_nxv1i32_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vsmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv1i32.nxv1i32( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv2i32.nxv2i32( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv2i32_nxv2i32_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv2i32.nxv2i32( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv2i32.nxv2i32( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv2i32_nxv2i32_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vsmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv2i32.nxv2i32( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv4i32.nxv4i32( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv4i32_nxv4i32_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv4i32.nxv4i32( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv4i32.nxv4i32( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv4i32_nxv4i32_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vsmul.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv4i32.nxv4i32( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv8i32.nxv8i32( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv8i32_nxv8i32_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv8i32.nxv8i32( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv8i32.nxv8i32( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv8i32_nxv8i32_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vsmul.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv8i32.nxv8i32( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv16i32.nxv16i32( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv16i32_nxv16i32_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv16i32.nxv16i32( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv16i32.nxv16i32( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv16i32_nxv16i32_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vsmul.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv16i32.nxv16i32( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv1i64.nxv1i64( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv1i64_nxv1i64_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv1i64.nxv1i64( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv1i64_nxv1i64_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vsmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv2i64.nxv2i64( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv2i64_nxv2i64_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv2i64.nxv2i64( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv2i64.nxv2i64( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv2i64_nxv2i64_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vsmul.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv2i64.nxv2i64( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv4i64.nxv4i64( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv4i64_nxv4i64_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv4i64.nxv4i64( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv4i64.nxv4i64( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv4i64_nxv4i64_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vsmul.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv4i64.nxv4i64( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv8i64.nxv8i64( - , - , - , - i32, i32); - -define @intrinsic_vsmul_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vv_nxv8i64_nxv8i64_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vsmul.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv8i64.nxv8i64( - undef, - %0, - %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv8i64.nxv8i64( - , - , - , - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv8i64_nxv8i64_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vsmul.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv8i64.nxv8i64( - %0, - %1, - %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv1i8.i8( - , - , - i8, - i32, i32); - -define @intrinsic_vsmul_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv1i8_nxv1i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv1i8.i8( - undef, - %0, - i8 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv1i8.i8( - , - , - i8, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv1i8_nxv1i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu -; CHECK-NEXT: vsmul.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv1i8.i8( - %0, - %1, - i8 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv2i8.i8( - , - , - i8, - i32, i32); - -define @intrinsic_vsmul_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv2i8_nxv2i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv2i8.i8( - undef, - %0, - i8 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv2i8.i8( - , - , - i8, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv2i8_nxv2i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu -; CHECK-NEXT: vsmul.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv2i8.i8( - %0, - %1, - i8 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv4i8.i8( - , - , - i8, - i32, i32); - -define @intrinsic_vsmul_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv4i8_nxv4i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv4i8.i8( - undef, - %0, - i8 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv4i8.i8( - , - , - i8, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv4i8_nxv4i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu -; CHECK-NEXT: vsmul.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv4i8.i8( - %0, - %1, - i8 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv8i8.i8( - , - , - i8, - i32, i32); - -define @intrinsic_vsmul_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv8i8_nxv8i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv8i8.i8( - undef, - %0, - i8 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv8i8.i8( - , - , - i8, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv8i8_nxv8i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu -; CHECK-NEXT: vsmul.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv8i8.i8( - %0, - %1, - i8 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv16i8.i8( - , - , - i8, - i32, i32); - -define @intrinsic_vsmul_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv16i8_nxv16i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv16i8.i8( - undef, - %0, - i8 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv16i8.i8( - , - , - i8, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv16i8_nxv16i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; CHECK-NEXT: vsmul.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv16i8.i8( - %0, - %1, - i8 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv32i8.i8( - , - , - i8, - i32, i32); - -define @intrinsic_vsmul_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv32i8_nxv32i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv32i8.i8( - undef, - %0, - i8 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv32i8.i8( - , - , - i8, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv32i8_nxv32i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu -; CHECK-NEXT: vsmul.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv32i8.i8( - %0, - %1, - i8 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv64i8.i8( - , - , - i8, - i32, i32); - -define @intrinsic_vsmul_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv64i8_nxv64i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv64i8.i8( - undef, - %0, - i8 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv64i8.i8( - , - , - i8, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv64i8_nxv64i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu -; CHECK-NEXT: vsmul.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv64i8.i8( - %0, - %1, - i8 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv1i16.i16( - , - , - i16, - i32, i32); - -define @intrinsic_vsmul_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv1i16_nxv1i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv1i16.i16( - undef, - %0, - i16 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv1i16.i16( - , - , - i16, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv1i16_nxv1i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vsmul.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv1i16.i16( - %0, - %1, - i16 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv2i16.i16( - , - , - i16, - i32, i32); - -define @intrinsic_vsmul_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv2i16_nxv2i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv2i16.i16( - undef, - %0, - i16 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv2i16.i16( - , - , - i16, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv2i16_nxv2i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vsmul.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv2i16.i16( - %0, - %1, - i16 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv4i16.i16( - , - , - i16, - i32, i32); - -define @intrinsic_vsmul_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv4i16_nxv4i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv4i16.i16( - undef, - %0, - i16 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv4i16.i16( - , - , - i16, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv4i16_nxv4i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vsmul.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv4i16.i16( - %0, - %1, - i16 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv8i16.i16( - , - , - i16, - i32, i32); - -define @intrinsic_vsmul_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv8i16_nxv8i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv8i16.i16( - undef, - %0, - i16 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv8i16.i16( - , - , - i16, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv8i16_nxv8i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vsmul.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv8i16.i16( - %0, - %1, - i16 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv16i16.i16( - , - , - i16, - i32, i32); - -define @intrinsic_vsmul_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv16i16_nxv16i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv16i16.i16( - undef, - %0, - i16 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv16i16.i16( - , - , - i16, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv16i16_nxv16i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vsmul.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv16i16.i16( - %0, - %1, - i16 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv32i16.i16( - , - , - i16, - i32, i32); - -define @intrinsic_vsmul_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv32i16_nxv32i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv32i16.i16( - undef, - %0, - i16 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv32i16.i16( - , - , - i16, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv32i16_nxv32i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vsmul.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv32i16.i16( - %0, - %1, - i16 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv1i32.i32( - , - , - i32, i32, i32); - -define @intrinsic_vsmul_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv1i32_nxv1i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv1i32.i32( - undef, - %0, - i32 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv1i32.i32( - , - , - i32, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv1i32_nxv1i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vsmul.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv1i32.i32( - %0, - %1, - i32 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv2i32.i32( - , - , - i32, i32, i32); - -define @intrinsic_vsmul_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv2i32_nxv2i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv2i32.i32( - undef, - %0, - i32 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv2i32.i32( - , - , - i32, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv2i32_nxv2i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vsmul.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv2i32.i32( - %0, - %1, - i32 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv4i32.i32( - , - , - i32, i32, i32); - -define @intrinsic_vsmul_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv4i32_nxv4i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv4i32.i32( - undef, - %0, - i32 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv4i32.i32( - , - , - i32, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv4i32_nxv4i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vsmul.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv4i32.i32( - %0, - %1, - i32 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv8i32.i32( - , - , - i32, i32, i32); - -define @intrinsic_vsmul_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv8i32_nxv8i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv8i32.i32( - undef, - %0, - i32 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv8i32.i32( - , - , - i32, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv8i32_nxv8i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vsmul.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv8i32.i32( - %0, - %1, - i32 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv16i32.i32( - , - , - i32, i32, i32); - -define @intrinsic_vsmul_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv16i32_nxv16i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv16i32.i32( - undef, - %0, - i32 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv16i32.i32( - , - , - i32, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv16i32_nxv16i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vsmul.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv16i32.i32( - %0, - %1, - i32 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv1i64.i64( - , - , - i64, - i32, i32); - -define @intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsmul.vv v8, v8, v9 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv1i64.i64( - undef, - %0, - i64 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv1i64.i64( - , - , - i64, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv1i64.i64( - %0, - %1, - i64 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv2i64.i64( - , - , - i64, - i32, i32); - -define @intrinsic_vsmul_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsmul.vv v8, v8, v10 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv2i64.i64( - undef, - %0, - i64 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv2i64.i64( - , - , - i64, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vlse64.v v12, (a0), zero -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsmul.vv v8, v10, v12, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv2i64.i64( - %0, - %1, - i64 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv4i64.i64( - , - , - i64, - i32, i32); - -define @intrinsic_vsmul_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; CHECK-NEXT: vlse64.v v12, (a0), zero -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsmul.vv v8, v8, v12 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv4i64.i64( - undef, - %0, - i64 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv4i64.i64( - , - , - i64, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsmul.vv v8, v12, v16, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv4i64.i64( - %0, - %1, - i64 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vsmul.nxv8i64.i64( - , - , - i64, - i32, i32); - -define @intrinsic_vsmul_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsmul.vv v8, v8, v16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.nxv8i64.i64( - undef, - %0, - i64 %1, - i32 0, i32 %2) - - ret %a -} - -declare @llvm.riscv.vsmul.mask.nxv8i64.i64( - , - , - i64, - , - i32, i32, i32); - -define @intrinsic_vsmul_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vlse64.v v24, (a0), zero -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsmul.vv v8, v16, v24, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vsmul.mask.nxv8i64.i64( - %0, - %1, - i64 %2, - %3, - i32 0, i32 %4, i32 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsmul.ll similarity index 79% rename from llvm/test/CodeGen/RISCV/rvv/vsmul-rv64.ll rename to llvm/test/CodeGen/RISCV/rvv/vsmul.ll index 66bc5c9103a482..bc53bce889ddb7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsmul.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -; RUN: not --crash llc -mtriple=riscv64 -mattr=+zve64d 2>&1 \ -; RUN: < %s | FileCheck %s --check-prefixes=ZVE64D +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: sed 's/iXLen/i64/g' %s | not --crash llc -mtriple=riscv64 \ +; RUN: -mattr=+zve64d 2>&1 | FileCheck %s --check-prefixes=ZVE64D ; ZVE64D: LLVM ERROR: Cannot select: intrinsic %llvm.riscv.vsmul @@ -10,9 +12,9 @@ declare @llvm.riscv.vsmul.nxv1i8.nxv1i8( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv1i8_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -24,7 +26,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -34,9 +36,9 @@ declare @llvm.riscv.vsmul.mask.nxv1i8.nxv1i8( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv1i8_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -49,7 +51,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -58,9 +60,9 @@ declare @llvm.riscv.vsmul.nxv2i8.nxv2i8( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv2i8_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -72,7 +74,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -82,9 +84,9 @@ declare @llvm.riscv.vsmul.mask.nxv2i8.nxv2i8( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv2i8_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -97,7 +99,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -106,9 +108,9 @@ declare @llvm.riscv.vsmul.nxv4i8.nxv4i8( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv4i8_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -120,7 +122,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -130,9 +132,9 @@ declare @llvm.riscv.vsmul.mask.nxv4i8.nxv4i8( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv4i8_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -145,7 +147,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -154,9 +156,9 @@ declare @llvm.riscv.vsmul.nxv8i8.nxv8i8( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv8i8_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -168,7 +170,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -178,9 +180,9 @@ declare @llvm.riscv.vsmul.mask.nxv8i8.nxv8i8( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv8i8_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -193,7 +195,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -202,9 +204,9 @@ declare @llvm.riscv.vsmul.nxv16i8.nxv16i8( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv16i8_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -216,7 +218,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -226,9 +228,9 @@ declare @llvm.riscv.vsmul.mask.nxv16i8.nxv16i8( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv16i8_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -241,7 +243,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -250,9 +252,9 @@ declare @llvm.riscv.vsmul.nxv32i8.nxv32i8( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv32i8_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -264,7 +266,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -274,9 +276,9 @@ declare @llvm.riscv.vsmul.mask.nxv32i8.nxv32i8( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv32i8_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -289,7 +291,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -298,9 +300,9 @@ declare @llvm.riscv.vsmul.nxv64i8.nxv64i8( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -312,7 +314,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -322,9 +324,9 @@ declare @llvm.riscv.vsmul.mask.nxv64i8.nxv64i8( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8r.v v24, (a0) @@ -338,7 +340,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -347,9 +349,9 @@ declare @llvm.riscv.vsmul.nxv1i16.nxv1i16( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv1i16_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -361,7 +363,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -371,9 +373,9 @@ declare @llvm.riscv.vsmul.mask.nxv1i16.nxv1i16( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv1i16_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -386,7 +388,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -395,9 +397,9 @@ declare @llvm.riscv.vsmul.nxv2i16.nxv2i16( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv2i16_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -409,7 +411,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -419,9 +421,9 @@ declare @llvm.riscv.vsmul.mask.nxv2i16.nxv2i16( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv2i16_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -434,7 +436,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -443,9 +445,9 @@ declare @llvm.riscv.vsmul.nxv4i16.nxv4i16( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv4i16_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -457,7 +459,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -467,9 +469,9 @@ declare @llvm.riscv.vsmul.mask.nxv4i16.nxv4i16( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv4i16_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -482,7 +484,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -491,9 +493,9 @@ declare @llvm.riscv.vsmul.nxv8i16.nxv8i16( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv8i16_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -505,7 +507,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -515,9 +517,9 @@ declare @llvm.riscv.vsmul.mask.nxv8i16.nxv8i16( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv8i16_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -530,7 +532,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -539,9 +541,9 @@ declare @llvm.riscv.vsmul.nxv16i16.nxv16i16( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv16i16_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -553,7 +555,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -563,9 +565,9 @@ declare @llvm.riscv.vsmul.mask.nxv16i16.nxv16i16( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv16i16_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -578,7 +580,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -587,9 +589,9 @@ declare @llvm.riscv.vsmul.nxv32i16.nxv32i16( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -601,7 +603,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -611,9 +613,9 @@ declare @llvm.riscv.vsmul.mask.nxv32i16.nxv32i16( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re16.v v24, (a0) @@ -627,7 +629,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -636,9 +638,9 @@ declare @llvm.riscv.vsmul.nxv1i32.nxv1i32( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv1i32_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -650,7 +652,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -660,9 +662,9 @@ declare @llvm.riscv.vsmul.mask.nxv1i32.nxv1i32( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv1i32_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -675,7 +677,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -684,9 +686,9 @@ declare @llvm.riscv.vsmul.nxv2i32.nxv2i32( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv2i32_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -698,7 +700,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -708,9 +710,9 @@ declare @llvm.riscv.vsmul.mask.nxv2i32.nxv2i32( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv2i32_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -723,7 +725,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -732,9 +734,9 @@ declare @llvm.riscv.vsmul.nxv4i32.nxv4i32( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv4i32_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -746,7 +748,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -756,9 +758,9 @@ declare @llvm.riscv.vsmul.mask.nxv4i32.nxv4i32( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv4i32_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -771,7 +773,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -780,9 +782,9 @@ declare @llvm.riscv.vsmul.nxv8i32.nxv8i32( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv8i32_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -794,7 +796,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -804,9 +806,9 @@ declare @llvm.riscv.vsmul.mask.nxv8i32.nxv8i32( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv8i32_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -819,7 +821,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -828,9 +830,9 @@ declare @llvm.riscv.vsmul.nxv16i32.nxv16i32( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -842,7 +844,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -852,9 +854,9 @@ declare @llvm.riscv.vsmul.mask.nxv16i32.nxv16i32( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re32.v v24, (a0) @@ -868,7 +870,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -877,9 +879,9 @@ declare @llvm.riscv.vsmul.nxv1i64.nxv1i64( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv1i64_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -891,7 +893,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -901,9 +903,9 @@ declare @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv1i64_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -916,7 +918,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -925,9 +927,9 @@ declare @llvm.riscv.vsmul.nxv2i64.nxv2i64( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv2i64_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -939,7 +941,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -949,9 +951,9 @@ declare @llvm.riscv.vsmul.mask.nxv2i64.nxv2i64( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv2i64_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -964,7 +966,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -973,9 +975,9 @@ declare @llvm.riscv.vsmul.nxv4i64.nxv4i64( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv4i64_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -987,7 +989,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -997,9 +999,9 @@ declare @llvm.riscv.vsmul.mask.nxv4i64.nxv4i64( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv4i64_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1012,7 +1014,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1021,9 +1023,9 @@ declare @llvm.riscv.vsmul.nxv8i64.nxv8i64( , , , - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, i64 %2) nounwind { +define @intrinsic_vsmul_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1035,7 +1037,7 @@ entry: undef, %0, %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1045,9 +1047,9 @@ declare @llvm.riscv.vsmul.mask.nxv8i64.nxv8i64( , , , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re64.v v24, (a0) @@ -1061,7 +1063,7 @@ entry: %1, %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1070,9 +1072,9 @@ declare @llvm.riscv.vsmul.nxv1i8.i8( , , i8, - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vsmul_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv1i8_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1084,7 +1086,7 @@ entry: undef, %0, i8 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1094,9 +1096,9 @@ declare @llvm.riscv.vsmul.mask.nxv1i8.i8( , i8, , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv1i8_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1109,7 +1111,7 @@ entry: %1, i8 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1118,9 +1120,9 @@ declare @llvm.riscv.vsmul.nxv2i8.i8( , , i8, - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vsmul_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv2i8_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1132,7 +1134,7 @@ entry: undef, %0, i8 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1142,9 +1144,9 @@ declare @llvm.riscv.vsmul.mask.nxv2i8.i8( , i8, , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv2i8_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1157,7 +1159,7 @@ entry: %1, i8 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1166,9 +1168,9 @@ declare @llvm.riscv.vsmul.nxv4i8.i8( , , i8, - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vsmul_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv4i8_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1180,7 +1182,7 @@ entry: undef, %0, i8 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1190,9 +1192,9 @@ declare @llvm.riscv.vsmul.mask.nxv4i8.i8( , i8, , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv4i8_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1205,7 +1207,7 @@ entry: %1, i8 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1214,9 +1216,9 @@ declare @llvm.riscv.vsmul.nxv8i8.i8( , , i8, - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vsmul_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv8i8_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1228,7 +1230,7 @@ entry: undef, %0, i8 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1238,9 +1240,9 @@ declare @llvm.riscv.vsmul.mask.nxv8i8.i8( , i8, , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv8i8_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1253,7 +1255,7 @@ entry: %1, i8 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1262,9 +1264,9 @@ declare @llvm.riscv.vsmul.nxv16i8.i8( , , i8, - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vsmul_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv16i8_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1276,7 +1278,7 @@ entry: undef, %0, i8 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1286,9 +1288,9 @@ declare @llvm.riscv.vsmul.mask.nxv16i8.i8( , i8, , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv16i8_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1301,7 +1303,7 @@ entry: %1, i8 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1310,9 +1312,9 @@ declare @llvm.riscv.vsmul.nxv32i8.i8( , , i8, - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vsmul_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv32i8_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1324,7 +1326,7 @@ entry: undef, %0, i8 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1334,9 +1336,9 @@ declare @llvm.riscv.vsmul.mask.nxv32i8.i8( , i8, , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv32i8_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1349,7 +1351,7 @@ entry: %1, i8 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1358,9 +1360,9 @@ declare @llvm.riscv.vsmul.nxv64i8.i8( , , i8, - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vsmul_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv64i8_nxv64i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1372,7 +1374,7 @@ entry: undef, %0, i8 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1382,9 +1384,9 @@ declare @llvm.riscv.vsmul.mask.nxv64i8.i8( , i8, , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv64i8_nxv64i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1397,7 +1399,7 @@ entry: %1, i8 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1406,9 +1408,9 @@ declare @llvm.riscv.vsmul.nxv1i16.i16( , , i16, - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, i64 %2) nounwind { +define @intrinsic_vsmul_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv1i16_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1420,7 +1422,7 @@ entry: undef, %0, i16 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1430,9 +1432,9 @@ declare @llvm.riscv.vsmul.mask.nxv1i16.i16( , i16, , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv1i16_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1445,7 +1447,7 @@ entry: %1, i16 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1454,9 +1456,9 @@ declare @llvm.riscv.vsmul.nxv2i16.i16( , , i16, - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, i64 %2) nounwind { +define @intrinsic_vsmul_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv2i16_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1468,7 +1470,7 @@ entry: undef, %0, i16 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1478,9 +1480,9 @@ declare @llvm.riscv.vsmul.mask.nxv2i16.i16( , i16, , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv2i16_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1493,7 +1495,7 @@ entry: %1, i16 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1502,9 +1504,9 @@ declare @llvm.riscv.vsmul.nxv4i16.i16( , , i16, - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, i64 %2) nounwind { +define @intrinsic_vsmul_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv4i16_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1516,7 +1518,7 @@ entry: undef, %0, i16 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1526,9 +1528,9 @@ declare @llvm.riscv.vsmul.mask.nxv4i16.i16( , i16, , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv4i16_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1541,7 +1543,7 @@ entry: %1, i16 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1550,9 +1552,9 @@ declare @llvm.riscv.vsmul.nxv8i16.i16( , , i16, - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, i64 %2) nounwind { +define @intrinsic_vsmul_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv8i16_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1564,7 +1566,7 @@ entry: undef, %0, i16 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1574,9 +1576,9 @@ declare @llvm.riscv.vsmul.mask.nxv8i16.i16( , i16, , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv8i16_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1589,7 +1591,7 @@ entry: %1, i16 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1598,9 +1600,9 @@ declare @llvm.riscv.vsmul.nxv16i16.i16( , , i16, - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, i64 %2) nounwind { +define @intrinsic_vsmul_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv16i16_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1612,7 +1614,7 @@ entry: undef, %0, i16 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1622,9 +1624,9 @@ declare @llvm.riscv.vsmul.mask.nxv16i16.i16( , i16, , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv16i16_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1637,7 +1639,7 @@ entry: %1, i16 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1646,9 +1648,9 @@ declare @llvm.riscv.vsmul.nxv32i16.i16( , , i16, - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, i64 %2) nounwind { +define @intrinsic_vsmul_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv32i16_nxv32i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1660,7 +1662,7 @@ entry: undef, %0, i16 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1670,9 +1672,9 @@ declare @llvm.riscv.vsmul.mask.nxv32i16.i16( , i16, , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv32i16_nxv32i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1685,7 +1687,7 @@ entry: %1, i16 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1694,9 +1696,9 @@ declare @llvm.riscv.vsmul.nxv1i32.i32( , , i32, - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, i64 %2) nounwind { +define @intrinsic_vsmul_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv1i32_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1708,7 +1710,7 @@ entry: undef, %0, i32 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1718,9 +1720,9 @@ declare @llvm.riscv.vsmul.mask.nxv1i32.i32( , i32, , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv1i32_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1733,7 +1735,7 @@ entry: %1, i32 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1742,9 +1744,9 @@ declare @llvm.riscv.vsmul.nxv2i32.i32( , , i32, - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, i64 %2) nounwind { +define @intrinsic_vsmul_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv2i32_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1756,7 +1758,7 @@ entry: undef, %0, i32 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1766,9 +1768,9 @@ declare @llvm.riscv.vsmul.mask.nxv2i32.i32( , i32, , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv2i32_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1781,7 +1783,7 @@ entry: %1, i32 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1790,9 +1792,9 @@ declare @llvm.riscv.vsmul.nxv4i32.i32( , , i32, - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, i64 %2) nounwind { +define @intrinsic_vsmul_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv4i32_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1804,7 +1806,7 @@ entry: undef, %0, i32 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1814,9 +1816,9 @@ declare @llvm.riscv.vsmul.mask.nxv4i32.i32( , i32, , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv4i32_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1829,7 +1831,7 @@ entry: %1, i32 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1838,9 +1840,9 @@ declare @llvm.riscv.vsmul.nxv8i32.i32( , , i32, - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, i64 %2) nounwind { +define @intrinsic_vsmul_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv8i32_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1852,7 +1854,7 @@ entry: undef, %0, i32 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1862,9 +1864,9 @@ declare @llvm.riscv.vsmul.mask.nxv8i32.i32( , i32, , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv8i32_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1877,7 +1879,7 @@ entry: %1, i32 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1886,9 +1888,9 @@ declare @llvm.riscv.vsmul.nxv16i32.i32( , , i32, - i64, i64); + iXLen, iXLen) -define @intrinsic_vsmul_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, i64 %2) nounwind { +define @intrinsic_vsmul_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv16i32_nxv16i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1900,7 +1902,7 @@ entry: undef, %0, i32 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1910,9 +1912,9 @@ declare @llvm.riscv.vsmul.mask.nxv16i32.i32( , i32, , - i64, i64, i64); + iXLen, iXLen, iXLen) -define @intrinsic_vsmul_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +define @intrinsic_vsmul_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv16i32_nxv16i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrwi vxrm, 0 @@ -1925,7 +1927,7 @@ entry: %1, i32 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1933,21 +1935,35 @@ entry: declare @llvm.riscv.vsmul.nxv1i64.i64( , , - i64, i64, i64); - -define @intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret + i64, + iXLen, iXLen) + +define @intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: csrwi vxrm, 0 +; RV32-NEXT: vsmul.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrwi vxrm, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vsmul.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsmul.nxv1i64.i64( undef, %0, i64 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -1957,22 +1973,35 @@ declare @llvm.riscv.vsmul.mask.nxv1i64.i64( , i64, , - i64, i64, i64); - -define @intrinsic_vsmul_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vsmul.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret + iXLen, iXLen, iXLen) + +define @intrinsic_vsmul_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vsmul_mask_vx_nxv1i64_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: csrwi vxrm, 0 +; RV32-NEXT: vsmul.vv v8, v9, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsmul_mask_vx_nxv1i64_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrwi vxrm, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; RV64-NEXT: vsmul.vx v8, v9, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsmul.mask.nxv1i64.i64( %0, %1, i64 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -1980,21 +2009,35 @@ entry: declare @llvm.riscv.vsmul.nxv2i64.i64( , , - i64, i64, i64); - -define @intrinsic_vsmul_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret + i64, + iXLen, iXLen) + +define @intrinsic_vsmul_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vsmul_vx_nxv2i64_nxv2i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: csrwi vxrm, 0 +; RV32-NEXT: vsmul.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsmul_vx_nxv2i64_nxv2i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrwi vxrm, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vsmul.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsmul.nxv2i64.i64( undef, %0, i64 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -2004,22 +2047,35 @@ declare @llvm.riscv.vsmul.mask.nxv2i64.i64( , i64, , - i64, i64, i64); - -define @intrinsic_vsmul_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vsmul.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret + iXLen, iXLen, iXLen) + +define @intrinsic_vsmul_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vsmul_mask_vx_nxv2i64_nxv2i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: csrwi vxrm, 0 +; RV32-NEXT: vsmul.vv v8, v10, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsmul_mask_vx_nxv2i64_nxv2i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrwi vxrm, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; RV64-NEXT: vsmul.vx v8, v10, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsmul.mask.nxv2i64.i64( %0, %1, i64 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -2027,21 +2083,35 @@ entry: declare @llvm.riscv.vsmul.nxv4i64.i64( , , - i64, i64, i64); - -define @intrinsic_vsmul_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret + i64, + iXLen, iXLen) + +define @intrinsic_vsmul_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vsmul_vx_nxv4i64_nxv4i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: csrwi vxrm, 0 +; RV32-NEXT: vsmul.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsmul_vx_nxv4i64_nxv4i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrwi vxrm, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsmul.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsmul.nxv4i64.i64( undef, %0, i64 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -2051,22 +2121,35 @@ declare @llvm.riscv.vsmul.mask.nxv4i64.i64( , i64, , - i64, i64, i64); - -define @intrinsic_vsmul_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vsmul.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret + iXLen, iXLen, iXLen) + +define @intrinsic_vsmul_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vsmul_mask_vx_nxv4i64_nxv4i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: csrwi vxrm, 0 +; RV32-NEXT: vsmul.vv v8, v12, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsmul_mask_vx_nxv4i64_nxv4i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrwi vxrm, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV64-NEXT: vsmul.vx v8, v12, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsmul.mask.nxv4i64.i64( %0, %1, i64 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } @@ -2074,21 +2157,35 @@ entry: declare @llvm.riscv.vsmul.nxv8i64.i64( , , - i64, i64, i64); - -define @intrinsic_vsmul_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsmul_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vsmul.vx v8, v8, a0 -; CHECK-NEXT: ret + i64, + iXLen, iXLen) + +define @intrinsic_vsmul_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vsmul_vx_nxv8i64_nxv8i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: csrwi vxrm, 0 +; RV32-NEXT: vsmul.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsmul_vx_nxv8i64_nxv8i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrwi vxrm, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsmul.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsmul.nxv8i64.i64( undef, %0, i64 %1, - i64 0, i64 %2) + iXLen 0, iXLen %2) ret %a } @@ -2098,22 +2195,35 @@ declare @llvm.riscv.vsmul.mask.nxv8i64.i64( , i64, , - i64, i64, i64); - -define @intrinsic_vsmul_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vsmul.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret + iXLen, iXLen, iXLen) + +define @intrinsic_vsmul_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vsmul_mask_vx_nxv8i64_nxv8i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: csrwi vxrm, 0 +; RV32-NEXT: vsmul.vv v8, v16, v24, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsmul_mask_vx_nxv8i64_nxv8i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrwi vxrm, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vsmul.vx v8, v16, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vsmul.mask.nxv8i64.i64( %0, %1, i64 %2, %3, - i64 0, i64 %4, i64 1) + iXLen 0, iXLen %4, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-rv64.ll deleted file mode 100644 index 3928e6fbd1f756..00000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vssub-rv64.ll +++ /dev/null @@ -1,2075 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s - -declare @llvm.riscv.vssub.nxv1i8.nxv1i8( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv1i8_nxv1i8_nxv1i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv1i8.nxv1i8( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv1i8.nxv1i8( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv1i8_nxv1i8_nxv1i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vssub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv1i8.nxv1i8( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv2i8.nxv2i8( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv2i8_nxv2i8_nxv2i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv2i8.nxv2i8( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv2i8.nxv2i8( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv2i8_nxv2i8_nxv2i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vssub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv2i8.nxv2i8( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv4i8.nxv4i8( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv4i8_nxv4i8_nxv4i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv4i8.nxv4i8( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv4i8.nxv4i8( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv4i8_nxv4i8_nxv4i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vssub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv4i8.nxv4i8( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv8i8.nxv8i8( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv8i8_nxv8i8_nxv8i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv8i8.nxv8i8( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv8i8.nxv8i8( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv8i8_nxv8i8_nxv8i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vssub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv8i8.nxv8i8( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv16i8.nxv16i8( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv16i8_nxv16i8_nxv16i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv16i8.nxv16i8( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv16i8.nxv16i8( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv16i8_nxv16i8_nxv16i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vssub.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv16i8.nxv16i8( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv32i8.nxv32i8( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv32i8_nxv32i8_nxv32i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv32i8.nxv32i8( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv32i8.nxv32i8( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv32i8_nxv32i8_nxv32i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vssub.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv32i8.nxv32i8( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv64i8.nxv64i8( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv64i8_nxv64i8_nxv64i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv64i8.nxv64i8( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv64i8.nxv64i8( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv64i8_nxv64i8_nxv64i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8r.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu -; CHECK-NEXT: vssub.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv64i8.nxv64i8( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv1i16.nxv1i16( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv1i16_nxv1i16_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv1i16.nxv1i16( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv1i16.nxv1i16( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv1i16_nxv1i16_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vssub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv1i16.nxv1i16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv2i16.nxv2i16( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv2i16_nxv2i16_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv2i16.nxv2i16( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv2i16.nxv2i16( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv2i16_nxv2i16_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vssub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv2i16.nxv2i16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv4i16.nxv4i16( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv4i16_nxv4i16_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv4i16.nxv4i16( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv4i16.nxv4i16( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv4i16_nxv4i16_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vssub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv4i16.nxv4i16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv8i16.nxv8i16( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv8i16_nxv8i16_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv8i16.nxv8i16( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv8i16.nxv8i16( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv8i16_nxv8i16_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vssub.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv8i16.nxv8i16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv16i16.nxv16i16( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv16i16_nxv16i16_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv16i16.nxv16i16( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv16i16.nxv16i16( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv16i16_nxv16i16_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vssub.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv16i16.nxv16i16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv32i16.nxv32i16( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv32i16_nxv32i16_nxv32i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv32i16.nxv32i16( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv32i16.nxv32i16( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv32i16_nxv32i16_nxv32i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vssub.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv32i16.nxv32i16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv1i32.nxv1i32( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv1i32_nxv1i32_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv1i32.nxv1i32( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv1i32.nxv1i32( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv1i32_nxv1i32_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vssub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv1i32.nxv1i32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv2i32.nxv2i32( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv2i32_nxv2i32_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv2i32.nxv2i32( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv2i32.nxv2i32( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv2i32_nxv2i32_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vssub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv2i32.nxv2i32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv4i32.nxv4i32( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv4i32_nxv4i32_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv4i32.nxv4i32( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv4i32.nxv4i32( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv4i32_nxv4i32_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vssub.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv4i32.nxv4i32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv8i32.nxv8i32( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv8i32_nxv8i32_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv8i32.nxv8i32( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv8i32.nxv8i32( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv8i32_nxv8i32_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vssub.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv8i32.nxv8i32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv16i32.nxv16i32( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv16i32_nxv16i32_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv16i32.nxv16i32( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv16i32.nxv16i32( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv16i32_nxv16i32_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vssub.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv16i32.nxv16i32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv1i64.nxv1i64( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv1i64_nxv1i64_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv1i64.nxv1i64( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv1i64.nxv1i64( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv1i64_nxv1i64_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vssub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv1i64.nxv1i64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv2i64.nxv2i64( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv2i64_nxv2i64_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv2i64.nxv2i64( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv2i64.nxv2i64( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv2i64_nxv2i64_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vssub.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv2i64.nxv2i64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv4i64.nxv4i64( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv4i64_nxv4i64_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv4i64.nxv4i64( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv4i64.nxv4i64( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv4i64_nxv4i64_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vssub.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv4i64.nxv4i64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv8i64.nxv8i64( - , - , - , - i64); - -define @intrinsic_vssub_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vv_nxv8i64_nxv8i64_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vssub.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv8i64.nxv8i64( - undef, - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv8i64.nxv8i64( - , - , - , - , - i64, - i64); - -define @intrinsic_vssub_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv8i64_nxv8i64_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vssub.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv8i64.nxv8i64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv1i8.i8( - , - , - i8, - i64); - -define @intrinsic_vssub_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv1i8_nxv1i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv1i8.i8( - undef, - %0, - i8 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv1i8.i8( - , - , - i8, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv1i8_nxv1i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu -; CHECK-NEXT: vssub.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv1i8.i8( - %0, - %1, - i8 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv2i8.i8( - , - , - i8, - i64); - -define @intrinsic_vssub_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv2i8_nxv2i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv2i8.i8( - undef, - %0, - i8 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv2i8.i8( - , - , - i8, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv2i8_nxv2i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu -; CHECK-NEXT: vssub.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv2i8.i8( - %0, - %1, - i8 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv4i8.i8( - , - , - i8, - i64); - -define @intrinsic_vssub_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv4i8_nxv4i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv4i8.i8( - undef, - %0, - i8 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv4i8.i8( - , - , - i8, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv4i8_nxv4i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu -; CHECK-NEXT: vssub.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv4i8.i8( - %0, - %1, - i8 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv8i8.i8( - , - , - i8, - i64); - -define @intrinsic_vssub_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv8i8_nxv8i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv8i8.i8( - undef, - %0, - i8 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv8i8.i8( - , - , - i8, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv8i8_nxv8i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu -; CHECK-NEXT: vssub.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv8i8.i8( - %0, - %1, - i8 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv16i8.i8( - , - , - i8, - i64); - -define @intrinsic_vssub_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv16i8_nxv16i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv16i8.i8( - undef, - %0, - i8 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv16i8.i8( - , - , - i8, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv16i8_nxv16i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; CHECK-NEXT: vssub.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv16i8.i8( - %0, - %1, - i8 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv32i8.i8( - , - , - i8, - i64); - -define @intrinsic_vssub_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv32i8_nxv32i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv32i8.i8( - undef, - %0, - i8 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv32i8.i8( - , - , - i8, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv32i8_nxv32i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu -; CHECK-NEXT: vssub.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv32i8.i8( - %0, - %1, - i8 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv64i8.i8( - , - , - i8, - i64); - -define @intrinsic_vssub_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv64i8_nxv64i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv64i8.i8( - undef, - %0, - i8 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv64i8.i8( - , - , - i8, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv64i8_nxv64i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu -; CHECK-NEXT: vssub.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv64i8.i8( - %0, - %1, - i8 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv1i16.i16( - , - , - i16, - i64); - -define @intrinsic_vssub_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv1i16_nxv1i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv1i16.i16( - undef, - %0, - i16 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv1i16.i16( - , - , - i16, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv1i16_nxv1i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vssub.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv1i16.i16( - %0, - %1, - i16 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv2i16.i16( - , - , - i16, - i64); - -define @intrinsic_vssub_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv2i16_nxv2i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv2i16.i16( - undef, - %0, - i16 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv2i16.i16( - , - , - i16, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv2i16_nxv2i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vssub.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv2i16.i16( - %0, - %1, - i16 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv4i16.i16( - , - , - i16, - i64); - -define @intrinsic_vssub_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv4i16_nxv4i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv4i16.i16( - undef, - %0, - i16 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv4i16.i16( - , - , - i16, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv4i16_nxv4i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vssub.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv4i16.i16( - %0, - %1, - i16 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv8i16.i16( - , - , - i16, - i64); - -define @intrinsic_vssub_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv8i16_nxv8i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv8i16.i16( - undef, - %0, - i16 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv8i16.i16( - , - , - i16, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv8i16_nxv8i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vssub.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv8i16.i16( - %0, - %1, - i16 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv16i16.i16( - , - , - i16, - i64); - -define @intrinsic_vssub_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv16i16_nxv16i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv16i16.i16( - undef, - %0, - i16 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv16i16.i16( - , - , - i16, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv16i16_nxv16i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vssub.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv16i16.i16( - %0, - %1, - i16 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv32i16.i16( - , - , - i16, - i64); - -define @intrinsic_vssub_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv32i16_nxv32i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv32i16.i16( - undef, - %0, - i16 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv32i16.i16( - , - , - i16, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv32i16_nxv32i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vssub.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv32i16.i16( - %0, - %1, - i16 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv1i32.i32( - , - , - i32, - i64); - -define @intrinsic_vssub_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv1i32_nxv1i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv1i32.i32( - undef, - %0, - i32 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv1i32.i32( - , - , - i32, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv1i32_nxv1i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vssub.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv1i32.i32( - %0, - %1, - i32 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv2i32.i32( - , - , - i32, - i64); - -define @intrinsic_vssub_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv2i32_nxv2i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv2i32.i32( - undef, - %0, - i32 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv2i32.i32( - , - , - i32, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv2i32_nxv2i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vssub.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv2i32.i32( - %0, - %1, - i32 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv4i32.i32( - , - , - i32, - i64); - -define @intrinsic_vssub_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv4i32_nxv4i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv4i32.i32( - undef, - %0, - i32 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv4i32.i32( - , - , - i32, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv4i32_nxv4i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vssub.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv4i32.i32( - %0, - %1, - i32 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv8i32.i32( - , - , - i32, - i64); - -define @intrinsic_vssub_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv8i32_nxv8i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv8i32.i32( - undef, - %0, - i32 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv8i32.i32( - , - , - i32, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv8i32_nxv8i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vssub.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv8i32.i32( - %0, - %1, - i32 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv16i32.i32( - , - , - i32, - i64); - -define @intrinsic_vssub_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv16i32_nxv16i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv16i32.i32( - undef, - %0, - i32 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv16i32.i32( - , - , - i32, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv16i32_nxv16i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vssub.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv16i32.i32( - %0, - %1, - i32 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv1i64.i64( - , - , - i64, - i64); - -define @intrinsic_vssub_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv1i64.i64( - undef, - %0, - i64 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv1i64.i64( - , - , - i64, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vssub.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv1i64.i64( - %0, - %1, - i64 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv2i64.i64( - , - , - i64, - i64); - -define @intrinsic_vssub_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv2i64.i64( - undef, - %0, - i64 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv2i64.i64( - , - , - i64, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vssub.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv2i64.i64( - %0, - %1, - i64 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv4i64.i64( - , - , - i64, - i64); - -define @intrinsic_vssub_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv4i64.i64( - undef, - %0, - i64 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv4i64.i64( - , - , - i64, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vssub.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv4i64.i64( - %0, - %1, - i64 %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vssub.nxv8i64.i64( - , - , - i64, - i64); - -define @intrinsic_vssub_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vssub.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.nxv8i64.i64( - undef, - %0, - i64 %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vssub.mask.nxv8i64.i64( - , - , - i64, - , - i64, - i64); - -define @intrinsic_vssub_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vssub.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssub.mask.nxv8i64.i64( - %0, - %1, - i64 %2, - %3, - i64 %4, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssub.ll similarity index 80% rename from llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vssub.ll index f3ba5daafb6838..50fca5e832af50 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssub.ll @@ -1,14 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vssub.nxv1i8.nxv1i8( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv1i8_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma @@ -19,7 +21,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -29,10 +31,10 @@ declare @llvm.riscv.vssub.mask.nxv1i8.nxv1i8( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv1i8_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -44,7 +46,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -53,9 +55,9 @@ declare @llvm.riscv.vssub.nxv2i8.nxv2i8( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv2i8_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma @@ -66,7 +68,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -76,10 +78,10 @@ declare @llvm.riscv.vssub.mask.nxv2i8.nxv2i8( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv2i8_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -91,7 +93,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -100,9 +102,9 @@ declare @llvm.riscv.vssub.nxv4i8.nxv4i8( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv4i8_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma @@ -113,7 +115,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -123,10 +125,10 @@ declare @llvm.riscv.vssub.mask.nxv4i8.nxv4i8( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv4i8_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -138,7 +140,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -147,9 +149,9 @@ declare @llvm.riscv.vssub.nxv8i8.nxv8i8( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv8i8_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma @@ -160,7 +162,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -170,10 +172,10 @@ declare @llvm.riscv.vssub.mask.nxv8i8.nxv8i8( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv8i8_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -185,7 +187,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -194,9 +196,9 @@ declare @llvm.riscv.vssub.nxv16i8.nxv16i8( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv16i8_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma @@ -207,7 +209,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -217,10 +219,10 @@ declare @llvm.riscv.vssub.mask.nxv16i8.nxv16i8( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv16i8_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -232,7 +234,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -241,9 +243,9 @@ declare @llvm.riscv.vssub.nxv32i8.nxv32i8( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv32i8_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma @@ -254,7 +256,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -264,10 +266,10 @@ declare @llvm.riscv.vssub.mask.nxv32i8.nxv32i8( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv32i8_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -279,7 +281,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -288,9 +290,9 @@ declare @llvm.riscv.vssub.nxv64i8.nxv64i8( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma @@ -301,7 +303,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -311,10 +313,10 @@ declare @llvm.riscv.vssub.mask.nxv64i8.nxv64i8( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8r.v v24, (a0) @@ -327,7 +329,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -336,9 +338,9 @@ declare @llvm.riscv.vssub.nxv1i16.nxv1i16( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv1i16_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma @@ -349,7 +351,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -359,10 +361,10 @@ declare @llvm.riscv.vssub.mask.nxv1i16.nxv1i16( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv1i16_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -374,7 +376,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -383,9 +385,9 @@ declare @llvm.riscv.vssub.nxv2i16.nxv2i16( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv2i16_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma @@ -396,7 +398,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -406,10 +408,10 @@ declare @llvm.riscv.vssub.mask.nxv2i16.nxv2i16( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv2i16_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -421,7 +423,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -430,9 +432,9 @@ declare @llvm.riscv.vssub.nxv4i16.nxv4i16( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv4i16_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma @@ -443,7 +445,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -453,10 +455,10 @@ declare @llvm.riscv.vssub.mask.nxv4i16.nxv4i16( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv4i16_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -468,7 +470,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -477,9 +479,9 @@ declare @llvm.riscv.vssub.nxv8i16.nxv8i16( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv8i16_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma @@ -490,7 +492,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -500,10 +502,10 @@ declare @llvm.riscv.vssub.mask.nxv8i16.nxv8i16( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv8i16_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -515,7 +517,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -524,9 +526,9 @@ declare @llvm.riscv.vssub.nxv16i16.nxv16i16( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv16i16_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma @@ -537,7 +539,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -547,10 +549,10 @@ declare @llvm.riscv.vssub.mask.nxv16i16.nxv16i16( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv16i16_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -562,7 +564,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -571,9 +573,9 @@ declare @llvm.riscv.vssub.nxv32i16.nxv32i16( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma @@ -584,7 +586,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -594,10 +596,10 @@ declare @llvm.riscv.vssub.mask.nxv32i16.nxv32i16( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re16.v v24, (a0) @@ -610,7 +612,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -619,9 +621,9 @@ declare @llvm.riscv.vssub.nxv1i32.nxv1i32( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv1i32_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma @@ -632,7 +634,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -642,10 +644,10 @@ declare @llvm.riscv.vssub.mask.nxv1i32.nxv1i32( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv1i32_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -657,7 +659,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -666,9 +668,9 @@ declare @llvm.riscv.vssub.nxv2i32.nxv2i32( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv2i32_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma @@ -679,7 +681,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -689,10 +691,10 @@ declare @llvm.riscv.vssub.mask.nxv2i32.nxv2i32( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv2i32_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -704,7 +706,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -713,9 +715,9 @@ declare @llvm.riscv.vssub.nxv4i32.nxv4i32( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv4i32_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma @@ -726,7 +728,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -736,10 +738,10 @@ declare @llvm.riscv.vssub.mask.nxv4i32.nxv4i32( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv4i32_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -751,7 +753,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -760,9 +762,9 @@ declare @llvm.riscv.vssub.nxv8i32.nxv8i32( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv8i32_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma @@ -773,7 +775,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -783,10 +785,10 @@ declare @llvm.riscv.vssub.mask.nxv8i32.nxv8i32( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv8i32_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -798,7 +800,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -807,9 +809,9 @@ declare @llvm.riscv.vssub.nxv16i32.nxv16i32( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -820,7 +822,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -830,10 +832,10 @@ declare @llvm.riscv.vssub.mask.nxv16i32.nxv16i32( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re32.v v24, (a0) @@ -846,7 +848,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -855,9 +857,9 @@ declare @llvm.riscv.vssub.nxv1i64.nxv1i64( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv1i64_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -868,7 +870,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -878,10 +880,10 @@ declare @llvm.riscv.vssub.mask.nxv1i64.nxv1i64( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv1i64_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -893,7 +895,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -902,9 +904,9 @@ declare @llvm.riscv.vssub.nxv2i64.nxv2i64( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv2i64_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -915,7 +917,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -925,10 +927,10 @@ declare @llvm.riscv.vssub.mask.nxv2i64.nxv2i64( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv2i64_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -940,7 +942,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -949,9 +951,9 @@ declare @llvm.riscv.vssub.nxv4i64.nxv4i64( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv4i64_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -962,7 +964,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -972,10 +974,10 @@ declare @llvm.riscv.vssub.mask.nxv4i64.nxv4i64( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv4i64_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -987,7 +989,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -996,9 +998,9 @@ declare @llvm.riscv.vssub.nxv8i64.nxv8i64( , , , - i32); + iXLen) -define @intrinsic_vssub_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, i32 %2) nounwind { +define @intrinsic_vssub_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -1009,7 +1011,7 @@ entry: undef, %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -1019,10 +1021,10 @@ declare @llvm.riscv.vssub.mask.nxv8i64.nxv8i64( , , , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re64.v v24, (a0) @@ -1035,7 +1037,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1044,9 +1046,9 @@ declare @llvm.riscv.vssub.nxv1i8.i8( , , i8, - i32); + iXLen) -define @intrinsic_vssub_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i32 %2) nounwind { +define @intrinsic_vssub_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv1i8_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -1057,7 +1059,7 @@ entry: undef, %0, i8 %1, - i32 %2) + iXLen %2) ret %a } @@ -1067,10 +1069,10 @@ declare @llvm.riscv.vssub.mask.nxv1i8.i8( , i8, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv1i8_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu @@ -1082,7 +1084,7 @@ entry: %1, i8 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1091,9 +1093,9 @@ declare @llvm.riscv.vssub.nxv2i8.i8( , , i8, - i32); + iXLen) -define @intrinsic_vssub_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, i32 %2) nounwind { +define @intrinsic_vssub_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv2i8_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -1104,7 +1106,7 @@ entry: undef, %0, i8 %1, - i32 %2) + iXLen %2) ret %a } @@ -1114,10 +1116,10 @@ declare @llvm.riscv.vssub.mask.nxv2i8.i8( , i8, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv2i8_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu @@ -1129,7 +1131,7 @@ entry: %1, i8 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1138,9 +1140,9 @@ declare @llvm.riscv.vssub.nxv4i8.i8( , , i8, - i32); + iXLen) -define @intrinsic_vssub_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, i32 %2) nounwind { +define @intrinsic_vssub_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv4i8_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -1151,7 +1153,7 @@ entry: undef, %0, i8 %1, - i32 %2) + iXLen %2) ret %a } @@ -1161,10 +1163,10 @@ declare @llvm.riscv.vssub.mask.nxv4i8.i8( , i8, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv4i8_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu @@ -1176,7 +1178,7 @@ entry: %1, i8 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1185,9 +1187,9 @@ declare @llvm.riscv.vssub.nxv8i8.i8( , , i8, - i32); + iXLen) -define @intrinsic_vssub_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, i32 %2) nounwind { +define @intrinsic_vssub_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv8i8_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -1198,7 +1200,7 @@ entry: undef, %0, i8 %1, - i32 %2) + iXLen %2) ret %a } @@ -1208,10 +1210,10 @@ declare @llvm.riscv.vssub.mask.nxv8i8.i8( , i8, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv8i8_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu @@ -1223,7 +1225,7 @@ entry: %1, i8 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1232,9 +1234,9 @@ declare @llvm.riscv.vssub.nxv16i8.i8( , , i8, - i32); + iXLen) -define @intrinsic_vssub_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, i32 %2) nounwind { +define @intrinsic_vssub_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv16i8_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -1245,7 +1247,7 @@ entry: undef, %0, i8 %1, - i32 %2) + iXLen %2) ret %a } @@ -1255,10 +1257,10 @@ declare @llvm.riscv.vssub.mask.nxv16i8.i8( , i8, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv16i8_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu @@ -1270,7 +1272,7 @@ entry: %1, i8 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1279,9 +1281,9 @@ declare @llvm.riscv.vssub.nxv32i8.i8( , , i8, - i32); + iXLen) -define @intrinsic_vssub_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, i32 %2) nounwind { +define @intrinsic_vssub_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv32i8_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -1292,7 +1294,7 @@ entry: undef, %0, i8 %1, - i32 %2) + iXLen %2) ret %a } @@ -1302,10 +1304,10 @@ declare @llvm.riscv.vssub.mask.nxv32i8.i8( , i8, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv32i8_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu @@ -1317,7 +1319,7 @@ entry: %1, i8 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1326,9 +1328,9 @@ declare @llvm.riscv.vssub.nxv64i8.i8( , , i8, - i32); + iXLen) -define @intrinsic_vssub_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, i32 %2) nounwind { +define @intrinsic_vssub_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv64i8_nxv64i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma @@ -1339,7 +1341,7 @@ entry: undef, %0, i8 %1, - i32 %2) + iXLen %2) ret %a } @@ -1349,10 +1351,10 @@ declare @llvm.riscv.vssub.mask.nxv64i8.i8( , i8, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv64i8_nxv64i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu @@ -1364,7 +1366,7 @@ entry: %1, i8 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1373,9 +1375,9 @@ declare @llvm.riscv.vssub.nxv1i16.i16( , , i16, - i32); + iXLen) -define @intrinsic_vssub_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, i32 %2) nounwind { +define @intrinsic_vssub_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv1i16_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma @@ -1386,7 +1388,7 @@ entry: undef, %0, i16 %1, - i32 %2) + iXLen %2) ret %a } @@ -1396,10 +1398,10 @@ declare @llvm.riscv.vssub.mask.nxv1i16.i16( , i16, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv1i16_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -1411,7 +1413,7 @@ entry: %1, i16 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1420,9 +1422,9 @@ declare @llvm.riscv.vssub.nxv2i16.i16( , , i16, - i32); + iXLen) -define @intrinsic_vssub_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, i32 %2) nounwind { +define @intrinsic_vssub_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv2i16_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma @@ -1433,7 +1435,7 @@ entry: undef, %0, i16 %1, - i32 %2) + iXLen %2) ret %a } @@ -1443,10 +1445,10 @@ declare @llvm.riscv.vssub.mask.nxv2i16.i16( , i16, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv2i16_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -1458,7 +1460,7 @@ entry: %1, i16 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1467,9 +1469,9 @@ declare @llvm.riscv.vssub.nxv4i16.i16( , , i16, - i32); + iXLen) -define @intrinsic_vssub_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, i32 %2) nounwind { +define @intrinsic_vssub_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv4i16_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma @@ -1480,7 +1482,7 @@ entry: undef, %0, i16 %1, - i32 %2) + iXLen %2) ret %a } @@ -1490,10 +1492,10 @@ declare @llvm.riscv.vssub.mask.nxv4i16.i16( , i16, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv4i16_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -1505,7 +1507,7 @@ entry: %1, i16 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1514,9 +1516,9 @@ declare @llvm.riscv.vssub.nxv8i16.i16( , , i16, - i32); + iXLen) -define @intrinsic_vssub_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, i32 %2) nounwind { +define @intrinsic_vssub_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv8i16_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma @@ -1527,7 +1529,7 @@ entry: undef, %0, i16 %1, - i32 %2) + iXLen %2) ret %a } @@ -1537,10 +1539,10 @@ declare @llvm.riscv.vssub.mask.nxv8i16.i16( , i16, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv8i16_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -1552,7 +1554,7 @@ entry: %1, i16 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1561,9 +1563,9 @@ declare @llvm.riscv.vssub.nxv16i16.i16( , , i16, - i32); + iXLen) -define @intrinsic_vssub_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, i32 %2) nounwind { +define @intrinsic_vssub_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv16i16_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma @@ -1574,7 +1576,7 @@ entry: undef, %0, i16 %1, - i32 %2) + iXLen %2) ret %a } @@ -1584,10 +1586,10 @@ declare @llvm.riscv.vssub.mask.nxv16i16.i16( , i16, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv16i16_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -1599,7 +1601,7 @@ entry: %1, i16 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1608,9 +1610,9 @@ declare @llvm.riscv.vssub.nxv32i16.i16( , , i16, - i32); + iXLen) -define @intrinsic_vssub_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, i32 %2) nounwind { +define @intrinsic_vssub_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv32i16_nxv32i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma @@ -1621,7 +1623,7 @@ entry: undef, %0, i16 %1, - i32 %2) + iXLen %2) ret %a } @@ -1631,10 +1633,10 @@ declare @llvm.riscv.vssub.mask.nxv32i16.i16( , i16, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv32i16_nxv32i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu @@ -1646,7 +1648,7 @@ entry: %1, i16 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1655,9 +1657,9 @@ declare @llvm.riscv.vssub.nxv1i32.i32( , , i32, - i32); + iXLen) -define @intrinsic_vssub_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, i32 %2) nounwind { +define @intrinsic_vssub_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv1i32_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma @@ -1668,7 +1670,7 @@ entry: undef, %0, i32 %1, - i32 %2) + iXLen %2) ret %a } @@ -1678,10 +1680,10 @@ declare @llvm.riscv.vssub.mask.nxv1i32.i32( , i32, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv1i32_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -1693,7 +1695,7 @@ entry: %1, i32 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1702,9 +1704,9 @@ declare @llvm.riscv.vssub.nxv2i32.i32( , , i32, - i32); + iXLen) -define @intrinsic_vssub_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, i32 %2) nounwind { +define @intrinsic_vssub_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv2i32_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma @@ -1715,7 +1717,7 @@ entry: undef, %0, i32 %1, - i32 %2) + iXLen %2) ret %a } @@ -1725,10 +1727,10 @@ declare @llvm.riscv.vssub.mask.nxv2i32.i32( , i32, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv2i32_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -1740,7 +1742,7 @@ entry: %1, i32 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1749,9 +1751,9 @@ declare @llvm.riscv.vssub.nxv4i32.i32( , , i32, - i32); + iXLen) -define @intrinsic_vssub_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, i32 %2) nounwind { +define @intrinsic_vssub_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv4i32_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma @@ -1762,7 +1764,7 @@ entry: undef, %0, i32 %1, - i32 %2) + iXLen %2) ret %a } @@ -1772,10 +1774,10 @@ declare @llvm.riscv.vssub.mask.nxv4i32.i32( , i32, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv4i32_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -1787,7 +1789,7 @@ entry: %1, i32 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1796,9 +1798,9 @@ declare @llvm.riscv.vssub.nxv8i32.i32( , , i32, - i32); + iXLen) -define @intrinsic_vssub_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, i32 %2) nounwind { +define @intrinsic_vssub_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv8i32_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma @@ -1809,7 +1811,7 @@ entry: undef, %0, i32 %1, - i32 %2) + iXLen %2) ret %a } @@ -1819,10 +1821,10 @@ declare @llvm.riscv.vssub.mask.nxv8i32.i32( , i32, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv8i32_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -1834,7 +1836,7 @@ entry: %1, i32 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1843,9 +1845,9 @@ declare @llvm.riscv.vssub.nxv16i32.i32( , , i32, - i32); + iXLen) -define @intrinsic_vssub_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, i32 %2) nounwind { +define @intrinsic_vssub_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv16i32_nxv16i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma @@ -1856,7 +1858,7 @@ entry: undef, %0, i32 %1, - i32 %2) + iXLen %2) ret %a } @@ -1866,10 +1868,10 @@ declare @llvm.riscv.vssub.mask.nxv16i32.i32( , i32, , - i32, - i32); + iXLen, + iXLen) -define @intrinsic_vssub_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { +define @intrinsic_vssub_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv16i32_nxv16i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu @@ -1881,7 +1883,7 @@ entry: %1, i32 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1890,26 +1892,32 @@ declare @llvm.riscv.vssub.nxv1i64.i64( , , i64, - i32); - -define @intrinsic_vssub_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vssub.vv v8, v8, v9 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret + iXLen) + +define @intrinsic_vssub_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vssub_vx_nxv1i64_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vssub.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssub_vx_nxv1i64_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vssub.nxv1i64.i64( undef, %0, i64 %1, - i32 %2) + iXLen %2) ret %a } @@ -1919,28 +1927,34 @@ declare @llvm.riscv.vssub.mask.nxv1i64.i64( , i64, , - i32, - i32); - -define @intrinsic_vssub_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vssub.vv v8, v9, v10, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret + iXLen, + iXLen) + +define @intrinsic_vssub_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vssub_mask_vx_nxv1i64_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vssub.vv v8, v9, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssub_mask_vx_nxv1i64_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; RV64-NEXT: vssub.vx v8, v9, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vssub.mask.nxv1i64.i64( %0, %1, i64 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1949,26 +1963,32 @@ declare @llvm.riscv.vssub.nxv2i64.i64( , , i64, - i32); - -define @intrinsic_vssub_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vssub.vv v8, v8, v10 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret + iXLen) + +define @intrinsic_vssub_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vssub_vx_nxv2i64_nxv2i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vssub.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssub_vx_nxv2i64_nxv2i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vssub.nxv2i64.i64( undef, %0, i64 %1, - i32 %2) + iXLen %2) ret %a } @@ -1978,28 +1998,34 @@ declare @llvm.riscv.vssub.mask.nxv2i64.i64( , i64, , - i32, - i32); - -define @intrinsic_vssub_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vlse64.v v12, (a0), zero -; CHECK-NEXT: vssub.vv v8, v10, v12, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret + iXLen, + iXLen) + +define @intrinsic_vssub_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vssub_mask_vx_nxv2i64_nxv2i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vssub.vv v8, v10, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssub_mask_vx_nxv2i64_nxv2i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; RV64-NEXT: vssub.vx v8, v10, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vssub.mask.nxv2i64.i64( %0, %1, i64 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -2008,26 +2034,32 @@ declare @llvm.riscv.vssub.nxv4i64.i64( , , i64, - i32); - -define @intrinsic_vssub_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; CHECK-NEXT: vlse64.v v12, (a0), zero -; CHECK-NEXT: vssub.vv v8, v8, v12 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret + iXLen) + +define @intrinsic_vssub_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vssub_vx_nxv4i64_nxv4i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vssub.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssub_vx_nxv4i64_nxv4i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vssub.nxv4i64.i64( undef, %0, i64 %1, - i32 %2) + iXLen %2) ret %a } @@ -2037,28 +2069,34 @@ declare @llvm.riscv.vssub.mask.nxv4i64.i64( , i64, , - i32, - i32); - -define @intrinsic_vssub_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vssub.vv v8, v12, v16, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret + iXLen, + iXLen) + +define @intrinsic_vssub_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vssub_mask_vx_nxv4i64_nxv4i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vssub.vv v8, v12, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssub_mask_vx_nxv4i64_nxv4i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV64-NEXT: vssub.vx v8, v12, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vssub.mask.nxv4i64.i64( %0, %1, i64 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -2067,26 +2105,32 @@ declare @llvm.riscv.vssub.nxv8i64.i64( , , i64, - i32); - -define @intrinsic_vssub_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssub_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vssub.vv v8, v8, v16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret + iXLen) + +define @intrinsic_vssub_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vssub_vx_nxv8i64_nxv8i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vssub.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssub_vx_nxv8i64_nxv8i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vssub.nxv8i64.i64( undef, %0, i64 %1, - i32 %2) + iXLen %2) ret %a } @@ -2096,28 +2140,34 @@ declare @llvm.riscv.vssub.mask.nxv8i64.i64( , i64, , - i32, - i32); - -define @intrinsic_vssub_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vlse64.v v24, (a0), zero -; CHECK-NEXT: vssub.vv v8, v16, v24, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret + iXLen, + iXLen) + +define @intrinsic_vssub_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vssub_mask_vx_nxv8i64_nxv8i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vssub.vv v8, v16, v24, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssub_mask_vx_nxv8i64_nxv8i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vssub.vx v8, v16, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vssub.mask.nxv8i64.i64( %0, %1, i64 %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll deleted file mode 100644 index 5e7147158d0f15..00000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll +++ /dev/null @@ -1,2123 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s - -declare @llvm.riscv.vssubu.nxv1i8.nxv1i8( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv1i8_nxv1i8_nxv1i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv1i8.nxv1i8( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv1i8.nxv1i8( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv1i8_nxv1i8_nxv1i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vssubu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv1i8.nxv1i8( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv2i8.nxv2i8( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv2i8_nxv2i8_nxv2i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv2i8.nxv2i8( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv2i8.nxv2i8( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv2i8_nxv2i8_nxv2i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vssubu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv2i8.nxv2i8( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv4i8.nxv4i8( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv4i8_nxv4i8_nxv4i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv4i8.nxv4i8( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv4i8.nxv4i8( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv4i8_nxv4i8_nxv4i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vssubu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv4i8.nxv4i8( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv8i8.nxv8i8( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv8i8_nxv8i8_nxv8i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv8i8.nxv8i8( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv8i8.nxv8i8( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv8i8_nxv8i8_nxv8i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vssubu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv8i8.nxv8i8( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv16i8.nxv16i8( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv16i8_nxv16i8_nxv16i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv16i8.nxv16i8( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv16i8.nxv16i8( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv16i8_nxv16i8_nxv16i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vssubu.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv16i8.nxv16i8( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv32i8.nxv32i8( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv32i8_nxv32i8_nxv32i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv32i8.nxv32i8( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv32i8.nxv32i8( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv32i8_nxv32i8_nxv32i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vssubu.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv32i8.nxv32i8( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv64i8.nxv64i8( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv64i8_nxv64i8_nxv64i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv64i8.nxv64i8( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv64i8.nxv64i8( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv64i8_nxv64i8_nxv64i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8r.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu -; CHECK-NEXT: vssubu.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv64i8.nxv64i8( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv1i16.nxv1i16( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv1i16_nxv1i16_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv1i16.nxv1i16( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv1i16.nxv1i16( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv1i16_nxv1i16_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vssubu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv1i16.nxv1i16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv2i16.nxv2i16( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv2i16_nxv2i16_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv2i16.nxv2i16( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv2i16.nxv2i16( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv2i16_nxv2i16_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vssubu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv2i16.nxv2i16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv4i16.nxv4i16( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv4i16_nxv4i16_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv4i16.nxv4i16( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv4i16.nxv4i16( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv4i16_nxv4i16_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vssubu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv4i16.nxv4i16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv8i16.nxv8i16( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv8i16_nxv8i16_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv8i16.nxv8i16( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv8i16.nxv8i16( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv8i16_nxv8i16_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vssubu.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv8i16.nxv8i16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv16i16.nxv16i16( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv16i16_nxv16i16_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv16i16.nxv16i16( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv16i16.nxv16i16( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv16i16_nxv16i16_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vssubu.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv16i16.nxv16i16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv32i16.nxv32i16( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv32i16_nxv32i16_nxv32i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv32i16.nxv32i16( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv32i16.nxv32i16( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv32i16_nxv32i16_nxv32i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vssubu.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv32i16.nxv32i16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv1i32.nxv1i32( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv1i32_nxv1i32_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv1i32.nxv1i32( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv1i32.nxv1i32( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv1i32_nxv1i32_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vssubu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv1i32.nxv1i32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv2i32.nxv2i32( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv2i32_nxv2i32_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv2i32.nxv2i32( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv2i32.nxv2i32( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv2i32_nxv2i32_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vssubu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv2i32.nxv2i32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv4i32.nxv4i32( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv4i32_nxv4i32_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv4i32.nxv4i32( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv4i32.nxv4i32( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv4i32_nxv4i32_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vssubu.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv4i32.nxv4i32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv8i32.nxv8i32( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv8i32_nxv8i32_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv8i32.nxv8i32( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv8i32.nxv8i32( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv8i32_nxv8i32_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vssubu.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv8i32.nxv8i32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv16i32.nxv16i32( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv16i32_nxv16i32_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv16i32.nxv16i32( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv16i32.nxv16i32( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv16i32_nxv16i32_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vssubu.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv16i32.nxv16i32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv1i64.nxv1i64( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv1i64_nxv1i64_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv1i64.nxv1i64( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv1i64.nxv1i64( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv1i64_nxv1i64_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vssubu.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv1i64.nxv1i64( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv2i64.nxv2i64( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv2i64_nxv2i64_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv2i64.nxv2i64( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv2i64.nxv2i64( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv2i64_nxv2i64_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vssubu.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv2i64.nxv2i64( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv4i64.nxv4i64( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv4i64_nxv4i64_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv4i64.nxv4i64( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv4i64.nxv4i64( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv4i64_nxv4i64_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vssubu.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv4i64.nxv4i64( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv8i64.nxv8i64( - , - , - , - i32); - -define @intrinsic_vssubu_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vv_nxv8i64_nxv8i64_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vssubu.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv8i64.nxv8i64( - undef, - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv8i64.nxv8i64( - , - , - , - , - i32, - i32); - -define @intrinsic_vssubu_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv8i64_nxv8i64_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vssubu.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv8i64.nxv8i64( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv1i8.i8( - , - , - i8, - i32); - -define @intrinsic_vssubu_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv1i8_nxv1i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv1i8.i8( - undef, - %0, - i8 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv1i8.i8( - , - , - i8, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv1i8_nxv1i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu -; CHECK-NEXT: vssubu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv1i8.i8( - %0, - %1, - i8 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv2i8.i8( - , - , - i8, - i32); - -define @intrinsic_vssubu_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv2i8_nxv2i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv2i8.i8( - undef, - %0, - i8 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv2i8.i8( - , - , - i8, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv2i8_nxv2i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu -; CHECK-NEXT: vssubu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv2i8.i8( - %0, - %1, - i8 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv4i8.i8( - , - , - i8, - i32); - -define @intrinsic_vssubu_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv4i8_nxv4i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv4i8.i8( - undef, - %0, - i8 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv4i8.i8( - , - , - i8, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv4i8_nxv4i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu -; CHECK-NEXT: vssubu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv4i8.i8( - %0, - %1, - i8 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv8i8.i8( - , - , - i8, - i32); - -define @intrinsic_vssubu_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv8i8_nxv8i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv8i8.i8( - undef, - %0, - i8 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv8i8.i8( - , - , - i8, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv8i8_nxv8i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu -; CHECK-NEXT: vssubu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv8i8.i8( - %0, - %1, - i8 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv16i8.i8( - , - , - i8, - i32); - -define @intrinsic_vssubu_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv16i8_nxv16i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv16i8.i8( - undef, - %0, - i8 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv16i8.i8( - , - , - i8, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv16i8_nxv16i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; CHECK-NEXT: vssubu.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv16i8.i8( - %0, - %1, - i8 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv32i8.i8( - , - , - i8, - i32); - -define @intrinsic_vssubu_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv32i8_nxv32i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv32i8.i8( - undef, - %0, - i8 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv32i8.i8( - , - , - i8, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv32i8_nxv32i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu -; CHECK-NEXT: vssubu.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv32i8.i8( - %0, - %1, - i8 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv64i8.i8( - , - , - i8, - i32); - -define @intrinsic_vssubu_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv64i8_nxv64i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv64i8.i8( - undef, - %0, - i8 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv64i8.i8( - , - , - i8, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv64i8_nxv64i8_i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu -; CHECK-NEXT: vssubu.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv64i8.i8( - %0, - %1, - i8 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv1i16.i16( - , - , - i16, - i32); - -define @intrinsic_vssubu_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv1i16_nxv1i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv1i16.i16( - undef, - %0, - i16 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv1i16.i16( - , - , - i16, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv1i16_nxv1i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vssubu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv1i16.i16( - %0, - %1, - i16 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv2i16.i16( - , - , - i16, - i32); - -define @intrinsic_vssubu_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv2i16_nxv2i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv2i16.i16( - undef, - %0, - i16 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv2i16.i16( - , - , - i16, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv2i16_nxv2i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vssubu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv2i16.i16( - %0, - %1, - i16 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv4i16.i16( - , - , - i16, - i32); - -define @intrinsic_vssubu_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv4i16_nxv4i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv4i16.i16( - undef, - %0, - i16 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv4i16.i16( - , - , - i16, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv4i16_nxv4i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vssubu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv4i16.i16( - %0, - %1, - i16 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv8i16.i16( - , - , - i16, - i32); - -define @intrinsic_vssubu_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv8i16_nxv8i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv8i16.i16( - undef, - %0, - i16 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv8i16.i16( - , - , - i16, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv8i16_nxv8i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vssubu.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv8i16.i16( - %0, - %1, - i16 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv16i16.i16( - , - , - i16, - i32); - -define @intrinsic_vssubu_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv16i16_nxv16i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv16i16.i16( - undef, - %0, - i16 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv16i16.i16( - , - , - i16, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv16i16_nxv16i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vssubu.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv16i16.i16( - %0, - %1, - i16 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv32i16.i16( - , - , - i16, - i32); - -define @intrinsic_vssubu_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv32i16_nxv32i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv32i16.i16( - undef, - %0, - i16 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv32i16.i16( - , - , - i16, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv32i16_nxv32i16_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vssubu.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv32i16.i16( - %0, - %1, - i16 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv1i32.i32( - , - , - i32, - i32); - -define @intrinsic_vssubu_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv1i32_nxv1i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv1i32.i32( - undef, - %0, - i32 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv1i32.i32( - , - , - i32, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv1i32_nxv1i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vssubu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv1i32.i32( - %0, - %1, - i32 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv2i32.i32( - , - , - i32, - i32); - -define @intrinsic_vssubu_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv2i32_nxv2i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv2i32.i32( - undef, - %0, - i32 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv2i32.i32( - , - , - i32, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv2i32_nxv2i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vssubu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv2i32.i32( - %0, - %1, - i32 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv4i32.i32( - , - , - i32, - i32); - -define @intrinsic_vssubu_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv4i32_nxv4i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv4i32.i32( - undef, - %0, - i32 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv4i32.i32( - , - , - i32, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv4i32_nxv4i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vssubu.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv4i32.i32( - %0, - %1, - i32 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv8i32.i32( - , - , - i32, - i32); - -define @intrinsic_vssubu_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv8i32_nxv8i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv8i32.i32( - undef, - %0, - i32 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv8i32.i32( - , - , - i32, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv8i32_nxv8i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vssubu.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv8i32.i32( - %0, - %1, - i32 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv16i32.i32( - , - , - i32, - i32); - -define @intrinsic_vssubu_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv16i32_nxv16i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv16i32.i32( - undef, - %0, - i32 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv16i32.i32( - , - , - i32, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv16i32_nxv16i32_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vssubu.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv16i32.i32( - %0, - %1, - i32 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv1i64.i64( - , - , - i64, - i32); - -define @intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vssubu.vv v8, v8, v9 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv1i64.i64( - undef, - %0, - i64 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv1i64.i64( - , - , - i64, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vssubu.vv v8, v9, v10, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv1i64.i64( - %0, - %1, - i64 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv2i64.i64( - , - , - i64, - i32); - -define @intrinsic_vssubu_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vssubu.vv v8, v8, v10 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv2i64.i64( - undef, - %0, - i64 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv2i64.i64( - , - , - i64, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vlse64.v v12, (a0), zero -; CHECK-NEXT: vssubu.vv v8, v10, v12, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv2i64.i64( - %0, - %1, - i64 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv4i64.i64( - , - , - i64, - i32); - -define @intrinsic_vssubu_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; CHECK-NEXT: vlse64.v v12, (a0), zero -; CHECK-NEXT: vssubu.vv v8, v8, v12 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv4i64.i64( - undef, - %0, - i64 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv4i64.i64( - , - , - i64, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vssubu.vv v8, v12, v16, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv4i64.i64( - %0, - %1, - i64 %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vssubu.nxv8i64.i64( - , - , - i64, - i32); - -define @intrinsic_vssubu_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vssubu.vv v8, v8, v16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.nxv8i64.i64( - undef, - %0, - i64 %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vssubu.mask.nxv8i64.i64( - , - , - i64, - , - i32, - i32); - -define @intrinsic_vssubu_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vlse64.v v24, (a0), zero -; CHECK-NEXT: vssubu.vv v8, v16, v24, v0.t -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vssubu.mask.nxv8i64.i64( - %0, - %1, - i64 %2, - %3, - i32 %4, i32 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu.ll similarity index 80% rename from llvm/test/CodeGen/RISCV/rvv/vssubu-rv64.ll rename to llvm/test/CodeGen/RISCV/rvv/vssubu.ll index 71b623c555a5f0..db1b4ce34e9b38 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu.ll @@ -1,14 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vssubu.nxv1i8.nxv1i8( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv1i8_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma @@ -19,7 +21,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -29,10 +31,10 @@ declare @llvm.riscv.vssubu.mask.nxv1i8.nxv1i8( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv1i8_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -44,7 +46,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -53,9 +55,9 @@ declare @llvm.riscv.vssubu.nxv2i8.nxv2i8( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv2i8_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma @@ -66,7 +68,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -76,10 +78,10 @@ declare @llvm.riscv.vssubu.mask.nxv2i8.nxv2i8( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv2i8_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv2i8_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -91,7 +93,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -100,9 +102,9 @@ declare @llvm.riscv.vssubu.nxv4i8.nxv4i8( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv4i8_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma @@ -113,7 +115,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -123,10 +125,10 @@ declare @llvm.riscv.vssubu.mask.nxv4i8.nxv4i8( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv4i8_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv4i8_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -138,7 +140,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -147,9 +149,9 @@ declare @llvm.riscv.vssubu.nxv8i8.nxv8i8( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv8i8_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma @@ -160,7 +162,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -170,10 +172,10 @@ declare @llvm.riscv.vssubu.mask.nxv8i8.nxv8i8( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv8i8_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -185,7 +187,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -194,9 +196,9 @@ declare @llvm.riscv.vssubu.nxv16i8.nxv16i8( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv16i8_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma @@ -207,7 +209,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -217,10 +219,10 @@ declare @llvm.riscv.vssubu.mask.nxv16i8.nxv16i8( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv16i8_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -232,7 +234,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -241,9 +243,9 @@ declare @llvm.riscv.vssubu.nxv32i8.nxv32i8( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv32i8_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma @@ -254,7 +256,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -264,10 +266,10 @@ declare @llvm.riscv.vssubu.mask.nxv32i8.nxv32i8( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv32i8_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -279,7 +281,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -288,9 +290,9 @@ declare @llvm.riscv.vssubu.nxv64i8.nxv64i8( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma @@ -301,7 +303,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -311,10 +313,10 @@ declare @llvm.riscv.vssubu.mask.nxv64i8.nxv64i8( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8r.v v24, (a0) @@ -327,7 +329,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -336,9 +338,9 @@ declare @llvm.riscv.vssubu.nxv1i16.nxv1i16( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv1i16_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma @@ -349,7 +351,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -359,10 +361,10 @@ declare @llvm.riscv.vssubu.mask.nxv1i16.nxv1i16( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv1i16_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv1i16_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -374,7 +376,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -383,9 +385,9 @@ declare @llvm.riscv.vssubu.nxv2i16.nxv2i16( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv2i16_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma @@ -396,7 +398,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -406,10 +408,10 @@ declare @llvm.riscv.vssubu.mask.nxv2i16.nxv2i16( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv2i16_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv2i16_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -421,7 +423,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -430,9 +432,9 @@ declare @llvm.riscv.vssubu.nxv4i16.nxv4i16( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv4i16_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma @@ -443,7 +445,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -453,10 +455,10 @@ declare @llvm.riscv.vssubu.mask.nxv4i16.nxv4i16( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv4i16_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -468,7 +470,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -477,9 +479,9 @@ declare @llvm.riscv.vssubu.nxv8i16.nxv8i16( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv8i16_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma @@ -490,7 +492,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -500,10 +502,10 @@ declare @llvm.riscv.vssubu.mask.nxv8i16.nxv8i16( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv8i16_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -515,7 +517,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -524,9 +526,9 @@ declare @llvm.riscv.vssubu.nxv16i16.nxv16i16( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv16i16_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma @@ -537,7 +539,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -547,10 +549,10 @@ declare @llvm.riscv.vssubu.mask.nxv16i16.nxv16i16( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv16i16_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -562,7 +564,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -571,9 +573,9 @@ declare @llvm.riscv.vssubu.nxv32i16.nxv32i16( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma @@ -584,7 +586,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -594,10 +596,10 @@ declare @llvm.riscv.vssubu.mask.nxv32i16.nxv32i16( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re16.v v24, (a0) @@ -610,7 +612,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -619,9 +621,9 @@ declare @llvm.riscv.vssubu.nxv1i32.nxv1i32( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv1i32_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma @@ -632,7 +634,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -642,10 +644,10 @@ declare @llvm.riscv.vssubu.mask.nxv1i32.nxv1i32( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv1i32_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv1i32_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -657,7 +659,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -666,9 +668,9 @@ declare @llvm.riscv.vssubu.nxv2i32.nxv2i32( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv2i32_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma @@ -679,7 +681,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -689,10 +691,10 @@ declare @llvm.riscv.vssubu.mask.nxv2i32.nxv2i32( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv2i32_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -704,7 +706,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -713,9 +715,9 @@ declare @llvm.riscv.vssubu.nxv4i32.nxv4i32( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv4i32_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma @@ -726,7 +728,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -736,10 +738,10 @@ declare @llvm.riscv.vssubu.mask.nxv4i32.nxv4i32( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv4i32_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -751,7 +753,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -760,9 +762,9 @@ declare @llvm.riscv.vssubu.nxv8i32.nxv8i32( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv8i32_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma @@ -773,7 +775,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -783,10 +785,10 @@ declare @llvm.riscv.vssubu.mask.nxv8i32.nxv8i32( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv8i32_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -798,7 +800,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -807,9 +809,9 @@ declare @llvm.riscv.vssubu.nxv16i32.nxv16i32( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -820,7 +822,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -830,10 +832,10 @@ declare @llvm.riscv.vssubu.mask.nxv16i32.nxv16i32( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re32.v v24, (a0) @@ -846,7 +848,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -855,9 +857,9 @@ declare @llvm.riscv.vssubu.nxv1i64.nxv1i64( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv1i64_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -868,7 +870,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -878,10 +880,10 @@ declare @llvm.riscv.vssubu.mask.nxv1i64.nxv1i64( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv1i64_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -893,7 +895,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -902,9 +904,9 @@ declare @llvm.riscv.vssubu.nxv2i64.nxv2i64( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv2i64_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -915,7 +917,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -925,10 +927,10 @@ declare @llvm.riscv.vssubu.mask.nxv2i64.nxv2i64( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv2i64_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -940,7 +942,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -949,9 +951,9 @@ declare @llvm.riscv.vssubu.nxv4i64.nxv4i64( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv4i64_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -962,7 +964,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -972,10 +974,10 @@ declare @llvm.riscv.vssubu.mask.nxv4i64.nxv4i64( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv4i64_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -987,7 +989,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -996,9 +998,9 @@ declare @llvm.riscv.vssubu.nxv8i64.nxv8i64( , , , - i64); + iXLen) -define @intrinsic_vssubu_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, i64 %2) nounwind { +define @intrinsic_vssubu_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -1009,7 +1011,7 @@ entry: undef, %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -1019,10 +1021,10 @@ declare @llvm.riscv.vssubu.mask.nxv8i64.nxv8i64( , , , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re64.v v24, (a0) @@ -1035,7 +1037,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1044,9 +1046,9 @@ declare @llvm.riscv.vssubu.nxv1i8.i8( , , i8, - i64); + iXLen) -define @intrinsic_vssubu_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vssubu_vx_nxv1i8_nxv1i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv1i8_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -1057,7 +1059,7 @@ entry: undef, %0, i8 %1, - i64 %2) + iXLen %2) ret %a } @@ -1067,10 +1069,10 @@ declare @llvm.riscv.vssubu.mask.nxv1i8.i8( , i8, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vx_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv1i8_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu @@ -1082,7 +1084,7 @@ entry: %1, i8 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1091,9 +1093,9 @@ declare @llvm.riscv.vssubu.nxv2i8.i8( , , i8, - i64); + iXLen) -define @intrinsic_vssubu_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vssubu_vx_nxv2i8_nxv2i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv2i8_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -1104,7 +1106,7 @@ entry: undef, %0, i8 %1, - i64 %2) + iXLen %2) ret %a } @@ -1114,10 +1116,10 @@ declare @llvm.riscv.vssubu.mask.nxv2i8.i8( , i8, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vx_nxv2i8_nxv2i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv2i8_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu @@ -1129,7 +1131,7 @@ entry: %1, i8 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1138,9 +1140,9 @@ declare @llvm.riscv.vssubu.nxv4i8.i8( , , i8, - i64); + iXLen) -define @intrinsic_vssubu_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vssubu_vx_nxv4i8_nxv4i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv4i8_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -1151,7 +1153,7 @@ entry: undef, %0, i8 %1, - i64 %2) + iXLen %2) ret %a } @@ -1161,10 +1163,10 @@ declare @llvm.riscv.vssubu.mask.nxv4i8.i8( , i8, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vx_nxv4i8_nxv4i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv4i8_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu @@ -1176,7 +1178,7 @@ entry: %1, i8 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1185,9 +1187,9 @@ declare @llvm.riscv.vssubu.nxv8i8.i8( , , i8, - i64); + iXLen) -define @intrinsic_vssubu_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vssubu_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv8i8_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -1198,7 +1200,7 @@ entry: undef, %0, i8 %1, - i64 %2) + iXLen %2) ret %a } @@ -1208,10 +1210,10 @@ declare @llvm.riscv.vssubu.mask.nxv8i8.i8( , i8, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv8i8_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu @@ -1223,7 +1225,7 @@ entry: %1, i8 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1232,9 +1234,9 @@ declare @llvm.riscv.vssubu.nxv16i8.i8( , , i8, - i64); + iXLen) -define @intrinsic_vssubu_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vssubu_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv16i8_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -1245,7 +1247,7 @@ entry: undef, %0, i8 %1, - i64 %2) + iXLen %2) ret %a } @@ -1255,10 +1257,10 @@ declare @llvm.riscv.vssubu.mask.nxv16i8.i8( , i8, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv16i8_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu @@ -1270,7 +1272,7 @@ entry: %1, i8 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1279,9 +1281,9 @@ declare @llvm.riscv.vssubu.nxv32i8.i8( , , i8, - i64); + iXLen) -define @intrinsic_vssubu_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vssubu_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv32i8_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -1292,7 +1294,7 @@ entry: undef, %0, i8 %1, - i64 %2) + iXLen %2) ret %a } @@ -1302,10 +1304,10 @@ declare @llvm.riscv.vssubu.mask.nxv32i8.i8( , i8, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv32i8_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu @@ -1317,7 +1319,7 @@ entry: %1, i8 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1326,9 +1328,9 @@ declare @llvm.riscv.vssubu.nxv64i8.i8( , , i8, - i64); + iXLen) -define @intrinsic_vssubu_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, i64 %2) nounwind { +define @intrinsic_vssubu_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv64i8_nxv64i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma @@ -1339,7 +1341,7 @@ entry: undef, %0, i8 %1, - i64 %2) + iXLen %2) ret %a } @@ -1349,10 +1351,10 @@ declare @llvm.riscv.vssubu.mask.nxv64i8.i8( , i8, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv64i8_nxv64i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu @@ -1364,7 +1366,7 @@ entry: %1, i8 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1373,9 +1375,9 @@ declare @llvm.riscv.vssubu.nxv1i16.i16( , , i16, - i64); + iXLen) -define @intrinsic_vssubu_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, i64 %2) nounwind { +define @intrinsic_vssubu_vx_nxv1i16_nxv1i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv1i16_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma @@ -1386,7 +1388,7 @@ entry: undef, %0, i16 %1, - i64 %2) + iXLen %2) ret %a } @@ -1396,10 +1398,10 @@ declare @llvm.riscv.vssubu.mask.nxv1i16.i16( , i16, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vx_nxv1i16_nxv1i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv1i16_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -1411,7 +1413,7 @@ entry: %1, i16 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1420,9 +1422,9 @@ declare @llvm.riscv.vssubu.nxv2i16.i16( , , i16, - i64); + iXLen) -define @intrinsic_vssubu_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, i64 %2) nounwind { +define @intrinsic_vssubu_vx_nxv2i16_nxv2i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv2i16_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma @@ -1433,7 +1435,7 @@ entry: undef, %0, i16 %1, - i64 %2) + iXLen %2) ret %a } @@ -1443,10 +1445,10 @@ declare @llvm.riscv.vssubu.mask.nxv2i16.i16( , i16, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vx_nxv2i16_nxv2i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv2i16_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -1458,7 +1460,7 @@ entry: %1, i16 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1467,9 +1469,9 @@ declare @llvm.riscv.vssubu.nxv4i16.i16( , , i16, - i64); + iXLen) -define @intrinsic_vssubu_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, i64 %2) nounwind { +define @intrinsic_vssubu_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv4i16_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma @@ -1480,7 +1482,7 @@ entry: undef, %0, i16 %1, - i64 %2) + iXLen %2) ret %a } @@ -1490,10 +1492,10 @@ declare @llvm.riscv.vssubu.mask.nxv4i16.i16( , i16, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv4i16_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -1505,7 +1507,7 @@ entry: %1, i16 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1514,9 +1516,9 @@ declare @llvm.riscv.vssubu.nxv8i16.i16( , , i16, - i64); + iXLen) -define @intrinsic_vssubu_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, i64 %2) nounwind { +define @intrinsic_vssubu_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv8i16_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma @@ -1527,7 +1529,7 @@ entry: undef, %0, i16 %1, - i64 %2) + iXLen %2) ret %a } @@ -1537,10 +1539,10 @@ declare @llvm.riscv.vssubu.mask.nxv8i16.i16( , i16, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv8i16_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -1552,7 +1554,7 @@ entry: %1, i16 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1561,9 +1563,9 @@ declare @llvm.riscv.vssubu.nxv16i16.i16( , , i16, - i64); + iXLen) -define @intrinsic_vssubu_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, i64 %2) nounwind { +define @intrinsic_vssubu_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv16i16_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma @@ -1574,7 +1576,7 @@ entry: undef, %0, i16 %1, - i64 %2) + iXLen %2) ret %a } @@ -1584,10 +1586,10 @@ declare @llvm.riscv.vssubu.mask.nxv16i16.i16( , i16, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv16i16_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -1599,7 +1601,7 @@ entry: %1, i16 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1608,9 +1610,9 @@ declare @llvm.riscv.vssubu.nxv32i16.i16( , , i16, - i64); + iXLen) -define @intrinsic_vssubu_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, i64 %2) nounwind { +define @intrinsic_vssubu_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv32i16_nxv32i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma @@ -1621,7 +1623,7 @@ entry: undef, %0, i16 %1, - i64 %2) + iXLen %2) ret %a } @@ -1631,10 +1633,10 @@ declare @llvm.riscv.vssubu.mask.nxv32i16.i16( , i16, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv32i16_nxv32i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu @@ -1646,7 +1648,7 @@ entry: %1, i16 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1655,9 +1657,9 @@ declare @llvm.riscv.vssubu.nxv1i32.i32( , , i32, - i64); + iXLen) -define @intrinsic_vssubu_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, i64 %2) nounwind { +define @intrinsic_vssubu_vx_nxv1i32_nxv1i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv1i32_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma @@ -1668,7 +1670,7 @@ entry: undef, %0, i32 %1, - i64 %2) + iXLen %2) ret %a } @@ -1678,10 +1680,10 @@ declare @llvm.riscv.vssubu.mask.nxv1i32.i32( , i32, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vx_nxv1i32_nxv1i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv1i32_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -1693,7 +1695,7 @@ entry: %1, i32 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1702,9 +1704,9 @@ declare @llvm.riscv.vssubu.nxv2i32.i32( , , i32, - i64); + iXLen) -define @intrinsic_vssubu_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, i64 %2) nounwind { +define @intrinsic_vssubu_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv2i32_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma @@ -1715,7 +1717,7 @@ entry: undef, %0, i32 %1, - i64 %2) + iXLen %2) ret %a } @@ -1725,10 +1727,10 @@ declare @llvm.riscv.vssubu.mask.nxv2i32.i32( , i32, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv2i32_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -1740,7 +1742,7 @@ entry: %1, i32 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1749,9 +1751,9 @@ declare @llvm.riscv.vssubu.nxv4i32.i32( , , i32, - i64); + iXLen) -define @intrinsic_vssubu_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, i64 %2) nounwind { +define @intrinsic_vssubu_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv4i32_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma @@ -1762,7 +1764,7 @@ entry: undef, %0, i32 %1, - i64 %2) + iXLen %2) ret %a } @@ -1772,10 +1774,10 @@ declare @llvm.riscv.vssubu.mask.nxv4i32.i32( , i32, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv4i32_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -1787,7 +1789,7 @@ entry: %1, i32 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1796,9 +1798,9 @@ declare @llvm.riscv.vssubu.nxv8i32.i32( , , i32, - i64); + iXLen) -define @intrinsic_vssubu_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, i64 %2) nounwind { +define @intrinsic_vssubu_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv8i32_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma @@ -1809,7 +1811,7 @@ entry: undef, %0, i32 %1, - i64 %2) + iXLen %2) ret %a } @@ -1819,10 +1821,10 @@ declare @llvm.riscv.vssubu.mask.nxv8i32.i32( , i32, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv8i32_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -1834,7 +1836,7 @@ entry: %1, i32 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1843,9 +1845,9 @@ declare @llvm.riscv.vssubu.nxv16i32.i32( , , i32, - i64); + iXLen) -define @intrinsic_vssubu_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, i64 %2) nounwind { +define @intrinsic_vssubu_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv16i32_nxv16i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma @@ -1856,7 +1858,7 @@ entry: undef, %0, i32 %1, - i64 %2) + iXLen %2) ret %a } @@ -1866,10 +1868,10 @@ declare @llvm.riscv.vssubu.mask.nxv16i32.i32( , i32, , - i64, - i64); + iXLen, + iXLen) -define @intrinsic_vssubu_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, i64 %4) nounwind { +define @intrinsic_vssubu_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv16i32_nxv16i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu @@ -1881,7 +1883,7 @@ entry: %1, i32 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1890,20 +1892,32 @@ declare @llvm.riscv.vssubu.nxv1i64.i64( , , i64, - i64); - -define @intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret + iXLen) + +define @intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vssubu.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vssubu.nxv1i64.i64( undef, %0, i64 %1, - i64 %2) + iXLen %2) ret %a } @@ -1913,22 +1927,34 @@ declare @llvm.riscv.vssubu.mask.nxv1i64.i64( , i64, , - i64, - i64); - -define @intrinsic_vssubu_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv1i64_nxv1i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vssubu.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret + iXLen, + iXLen) + +define @intrinsic_vssubu_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vssubu_mask_vx_nxv1i64_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vssubu.vv v8, v9, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssubu_mask_vx_nxv1i64_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; RV64-NEXT: vssubu.vx v8, v9, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vssubu.mask.nxv1i64.i64( %0, %1, i64 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1937,20 +1963,32 @@ declare @llvm.riscv.vssubu.nxv2i64.i64( , , i64, - i64); - -define @intrinsic_vssubu_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret + iXLen) + +define @intrinsic_vssubu_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vssubu_vx_nxv2i64_nxv2i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vssubu.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssubu_vx_nxv2i64_nxv2i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vssubu.nxv2i64.i64( undef, %0, i64 %1, - i64 %2) + iXLen %2) ret %a } @@ -1960,22 +1998,34 @@ declare @llvm.riscv.vssubu.mask.nxv2i64.i64( , i64, , - i64, - i64); - -define @intrinsic_vssubu_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv2i64_nxv2i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vssubu.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret + iXLen, + iXLen) + +define @intrinsic_vssubu_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vssubu_mask_vx_nxv2i64_nxv2i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vssubu.vv v8, v10, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssubu_mask_vx_nxv2i64_nxv2i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; RV64-NEXT: vssubu.vx v8, v10, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vssubu.mask.nxv2i64.i64( %0, %1, i64 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1984,20 +2034,32 @@ declare @llvm.riscv.vssubu.nxv4i64.i64( , , i64, - i64); - -define @intrinsic_vssubu_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret + iXLen) + +define @intrinsic_vssubu_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vssubu_vx_nxv4i64_nxv4i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vssubu.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssubu_vx_nxv4i64_nxv4i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vssubu.nxv4i64.i64( undef, %0, i64 %1, - i64 %2) + iXLen %2) ret %a } @@ -2007,22 +2069,34 @@ declare @llvm.riscv.vssubu.mask.nxv4i64.i64( , i64, , - i64, - i64); - -define @intrinsic_vssubu_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv4i64_nxv4i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vssubu.vx v8, v12, a0, v0.t -; CHECK-NEXT: ret + iXLen, + iXLen) + +define @intrinsic_vssubu_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vssubu_mask_vx_nxv4i64_nxv4i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vssubu.vv v8, v12, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssubu_mask_vx_nxv4i64_nxv4i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV64-NEXT: vssubu.vx v8, v12, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vssubu.mask.nxv4i64.i64( %0, %1, i64 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -2031,20 +2105,32 @@ declare @llvm.riscv.vssubu.nxv8i64.i64( , , i64, - i64); - -define @intrinsic_vssubu_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vssubu_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vssubu.vx v8, v8, a0 -; CHECK-NEXT: ret + iXLen) + +define @intrinsic_vssubu_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vssubu_vx_nxv8i64_nxv8i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vssubu.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssubu_vx_nxv8i64_nxv8i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vssubu.nxv8i64.i64( undef, %0, i64 %1, - i64 %2) + iXLen %2) ret %a } @@ -2054,22 +2140,34 @@ declare @llvm.riscv.vssubu.mask.nxv8i64.i64( , i64, , - i64, - i64); - -define @intrinsic_vssubu_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv8i64_nxv8i64_i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vssubu.vx v8, v16, a0, v0.t -; CHECK-NEXT: ret + iXLen, + iXLen) + +define @intrinsic_vssubu_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vssubu_mask_vx_nxv8i64_nxv8i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vssubu.vv v8, v16, v24, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssubu_mask_vx_nxv8i64_nxv8i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vssubu.vx v8, v16, a0, v0.t +; RV64-NEXT: ret entry: %a = call @llvm.riscv.vssubu.mask.nxv8i64.i64( %0, %1, i64 %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } From 3403b593ae11e624ace8918a7840e85198f2ca3c Mon Sep 17 00:00:00 2001 From: Sterling-Augustine <56981066+Sterling-Augustine@users.noreply.github.com> Date: Wed, 31 Jul 2024 16:41:15 -0700 Subject: [PATCH 043/114] [SandboxIR] Implement PHINodes (#101111) This patch implements sandboxir::PHINode which mirrors llvm::PHINode. Based almost entirely on work by vporpo. --- llvm/include/llvm/SandboxIR/SandboxIR.h | 98 ++++++++++++++ .../llvm/SandboxIR/SandboxIRValues.def | 2 + llvm/include/llvm/SandboxIR/Tracker.h | 58 ++++++++ llvm/include/llvm/SandboxIR/Use.h | 2 + llvm/lib/SandboxIR/SandboxIR.cpp | 108 +++++++++++++++ llvm/lib/SandboxIR/Tracker.cpp | 75 +++++++++++ llvm/unittests/SandboxIR/SandboxIRTest.cpp | 120 +++++++++++++++++ llvm/unittests/SandboxIR/TrackerTest.cpp | 124 ++++++++++++++++++ 8 files changed, 587 insertions(+) diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index 38c2586f9d73c2..1fd9283313895c 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -226,6 +226,7 @@ class Value { friend class CallBrInst; // For getting `Val`. friend class GetElementPtrInst; // For getting `Val`. friend class CastInst; // For getting `Val`. + friend class PHINode; // For getting `Val`. /// All values point to the context. Context &Ctx; @@ -618,6 +619,7 @@ class Instruction : public sandboxir::User { friend class CallBrInst; // For getTopmostLLVMInstruction(). friend class GetElementPtrInst; // For getTopmostLLVMInstruction(). friend class CastInst; // For getTopmostLLVMInstruction(). + friend class PHINode; // For getTopmostLLVMInstruction(). /// \Returns the LLVM IR Instructions that this SandboxIR maps to in program /// order. @@ -1515,6 +1517,100 @@ class IntToPtrInst final : public CastInst { #endif // NDEBUG }; +class PHINode final : public Instruction { + /// Use Context::createPHINode(). Don't call the constructor directly. + PHINode(llvm::PHINode *PHI, Context &Ctx) + : Instruction(ClassID::PHI, Opcode::PHI, PHI, Ctx) {} + friend Context; // for PHINode() + Use getOperandUseInternal(unsigned OpIdx, bool Verify) const final { + return getOperandUseDefault(OpIdx, Verify); + } + SmallVector getLLVMInstrs() const final { + return {cast(Val)}; + } + /// Helper for mapped_iterator. + struct LLVMBBToBB { + Context &Ctx; + LLVMBBToBB(Context &Ctx) : Ctx(Ctx) {} + BasicBlock *operator()(llvm::BasicBlock *LLVMBB) const; + }; + +public: + unsigned getUseOperandNo(const Use &Use) const final { + return getUseOperandNoDefault(Use); + } + unsigned getNumOfIRInstrs() const final { return 1u; } + static PHINode *create(Type *Ty, unsigned NumReservedValues, + Instruction *InsertBefore, Context &Ctx, + const Twine &Name = ""); + /// For isa/dyn_cast. + static bool classof(const Value *From); + + using const_block_iterator = + mapped_iterator; + + const_block_iterator block_begin() const { + LLVMBBToBB BBGetter(Ctx); + return const_block_iterator(cast(Val)->block_begin(), + BBGetter); + } + const_block_iterator block_end() const { + LLVMBBToBB BBGetter(Ctx); + return const_block_iterator(cast(Val)->block_end(), + BBGetter); + } + iterator_range blocks() const { + return make_range(block_begin(), block_end()); + } + + op_range incoming_values() { return operands(); } + + const_op_range incoming_values() const { return operands(); } + + unsigned getNumIncomingValues() const { + return cast(Val)->getNumIncomingValues(); + } + Value *getIncomingValue(unsigned Idx) const; + void setIncomingValue(unsigned Idx, Value *V); + static unsigned getOperandNumForIncomingValue(unsigned Idx) { + return llvm::PHINode::getOperandNumForIncomingValue(Idx); + } + static unsigned getIncomingValueNumForOperand(unsigned Idx) { + return llvm::PHINode::getIncomingValueNumForOperand(Idx); + } + BasicBlock *getIncomingBlock(unsigned Idx) const; + BasicBlock *getIncomingBlock(const Use &U) const; + + void setIncomingBlock(unsigned Idx, BasicBlock *BB); + + void addIncoming(Value *V, BasicBlock *BB); + + Value *removeIncomingValue(unsigned Idx); + Value *removeIncomingValue(BasicBlock *BB); + + int getBasicBlockIndex(const BasicBlock *BB) const; + Value *getIncomingValueForBlock(const BasicBlock *BB) const; + + Value *hasConstantValue() const; + + bool hasConstantOrUndefValue() const { + return cast(Val)->hasConstantOrUndefValue(); + } + bool isComplete() const { return cast(Val)->isComplete(); } + // TODO: Implement the below functions: + // void replaceIncomingBlockWith (const BasicBlock *Old, BasicBlock *New); + // void copyIncomingBlocks(iterator_range BBRange, + // uint32_t ToIdx = 0) + // void removeIncomingValueIf(function_ref< bool(unsigned)> Predicate, + // bool DeletePHIIfEmpty=true) +#ifndef NDEBUG + void verify() const final { + assert(isa(Val) && "Expected PHINode!"); + } + void dump(raw_ostream &OS) const override; + LLVM_DUMP_METHOD void dump() const override; +#endif +}; class PtrToIntInst final : public CastInst { public: static Value *create(Value *Src, Type *DestTy, BBIterator WhereIt, @@ -1700,6 +1796,8 @@ class Context { friend GetElementPtrInst; // For createGetElementPtrInst() CastInst *createCastInst(llvm::CastInst *I); friend CastInst; // For createCastInst() + PHINode *createPHINode(llvm::PHINode *I); + friend PHINode; // For createPHINode() public: Context(LLVMContext &LLVMCtx) diff --git a/llvm/include/llvm/SandboxIR/SandboxIRValues.def b/llvm/include/llvm/SandboxIR/SandboxIRValues.def index 243ce6b2c60a9f..4cb601128a507e 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIRValues.def +++ b/llvm/include/llvm/SandboxIR/SandboxIRValues.def @@ -58,6 +58,8 @@ DEF_INSTR(Cast, OPCODES(\ OP(BitCast) \ OP(AddrSpaceCast) \ ), CastInst) +DEF_INSTR(PHI, OP(PHI), PHINode) + // clang-format on #ifdef DEF_VALUE #undef DEF_VALUE diff --git a/llvm/include/llvm/SandboxIR/Tracker.h b/llvm/include/llvm/SandboxIR/Tracker.h index 64068461b94905..238e4e9dacd342 100644 --- a/llvm/include/llvm/SandboxIR/Tracker.h +++ b/llvm/include/llvm/SandboxIR/Tracker.h @@ -102,6 +102,64 @@ class UseSet : public IRChangeBase { #endif }; +class PHISetIncoming : public IRChangeBase { + PHINode &PHI; + unsigned Idx; + PointerUnion OrigValueOrBB; + +public: + enum class What { + Value, + Block, + }; + PHISetIncoming(PHINode &PHI, unsigned Idx, What What, Tracker &Tracker); + void revert() final; + void accept() final {} +#ifndef NDEBUG + void dump(raw_ostream &OS) const final { + dumpCommon(OS); + OS << "PHISetIncoming"; + } + LLVM_DUMP_METHOD void dump() const final; +#endif +}; + +class PHIRemoveIncoming : public IRChangeBase { + PHINode &PHI; + unsigned RemovedIdx; + Value *RemovedV; + BasicBlock *RemovedBB; + +public: + PHIRemoveIncoming(PHINode &PHI, unsigned RemovedIdx, Tracker &Tracker); + void revert() final; + void accept() final {} +#ifndef NDEBUG + void dump(raw_ostream &OS) const final { + dumpCommon(OS); + OS << "PHISetIncoming"; + } + LLVM_DUMP_METHOD void dump() const final; +#endif +}; + +class PHIAddIncoming : public IRChangeBase { + PHINode &PHI; + unsigned Idx; + +public: + PHIAddIncoming(PHINode &PHI, Tracker &Tracker); + void revert() final; + void accept() final {} +#ifndef NDEBUG + void dump(raw_ostream &OS) const final { + dumpCommon(OS); + OS << "PHISetIncoming"; + } + LLVM_DUMP_METHOD void dump() const final; +#endif +}; + /// Tracks swapping a Use with another Use. class UseSwap : public IRChangeBase { Use ThisUse; diff --git a/llvm/include/llvm/SandboxIR/Use.h b/llvm/include/llvm/SandboxIR/Use.h index ef728ea3878516..35d01daf39f6e1 100644 --- a/llvm/include/llvm/SandboxIR/Use.h +++ b/llvm/include/llvm/SandboxIR/Use.h @@ -22,6 +22,7 @@ class Context; class Value; class User; class CallBase; +class PHINode; /// Represents a Def-use/Use-def edge in SandboxIR. /// NOTE: Unlike llvm::Use, this is not an integral part of the use-def chains. @@ -43,6 +44,7 @@ class Use { friend class UserUseIterator; // For accessing members friend class CallBase; // For LLVMUse friend class CallBrInst; // For constructor + friend class PHINode; // For LLVMUse public: operator Value *() const { return get(); } diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp index 1ea22c3a8b48e5..4f12985bb0e636 100644 --- a/llvm/lib/SandboxIR/SandboxIR.cpp +++ b/llvm/lib/SandboxIR/SandboxIR.cpp @@ -1062,6 +1062,95 @@ void GetElementPtrInst::dump() const { } #endif // NDEBUG +BasicBlock *PHINode::LLVMBBToBB::operator()(llvm::BasicBlock *LLVMBB) const { + return cast(Ctx.getValue(LLVMBB)); +} + +PHINode *PHINode::create(Type *Ty, unsigned NumReservedValues, + Instruction *InsertBefore, Context &Ctx, + const Twine &Name) { + llvm::PHINode *NewPHI = llvm::PHINode::Create( + Ty, NumReservedValues, Name, InsertBefore->getTopmostLLVMInstruction()); + return Ctx.createPHINode(NewPHI); +} + +bool PHINode::classof(const Value *From) { + return From->getSubclassID() == ClassID::PHI; +} + +Value *PHINode::getIncomingValue(unsigned Idx) const { + return Ctx.getValue(cast(Val)->getIncomingValue(Idx)); +} +void PHINode::setIncomingValue(unsigned Idx, Value *V) { + auto &Tracker = Ctx.getTracker(); + if (Tracker.isTracking()) + Tracker.track(std::make_unique( + *this, Idx, PHISetIncoming::What::Value, Tracker)); + + cast(Val)->setIncomingValue(Idx, V->Val); +} +BasicBlock *PHINode::getIncomingBlock(unsigned Idx) const { + return cast( + Ctx.getValue(cast(Val)->getIncomingBlock(Idx))); +} +BasicBlock *PHINode::getIncomingBlock(const Use &U) const { + llvm::Use *LLVMUse = U.LLVMUse; + llvm::BasicBlock *BB = cast(Val)->getIncomingBlock(*LLVMUse); + return cast(Ctx.getValue(BB)); +} +void PHINode::setIncomingBlock(unsigned Idx, BasicBlock *BB) { + auto &Tracker = Ctx.getTracker(); + if (Tracker.isTracking()) + Tracker.track(std::make_unique( + *this, Idx, PHISetIncoming::What::Block, Tracker)); + cast(Val)->setIncomingBlock(Idx, + cast(BB->Val)); +} +void PHINode::addIncoming(Value *V, BasicBlock *BB) { + auto &Tracker = Ctx.getTracker(); + if (Tracker.isTracking()) + Tracker.track(std::make_unique(*this, Tracker)); + + cast(Val)->addIncoming(V->Val, + cast(BB->Val)); +} +Value *PHINode::removeIncomingValue(unsigned Idx) { + auto &Tracker = Ctx.getTracker(); + if (Tracker.isTracking()) + Tracker.track(std::make_unique(*this, Idx, Tracker)); + + llvm::Value *LLVMV = + cast(Val)->removeIncomingValue(Idx, + /*DeletePHIIfEmpty=*/false); + return Ctx.getValue(LLVMV); +} +Value *PHINode::removeIncomingValue(BasicBlock *BB) { + auto &Tracker = Ctx.getTracker(); + if (Tracker.isTracking()) + Tracker.track(std::make_unique( + *this, getBasicBlockIndex(BB), Tracker)); + + auto *LLVMBB = cast(BB->Val); + llvm::Value *LLVMV = + cast(Val)->removeIncomingValue(LLVMBB, + /*DeletePHIIfEmpty=*/false); + return Ctx.getValue(LLVMV); +} +int PHINode::getBasicBlockIndex(const BasicBlock *BB) const { + auto *LLVMBB = cast(BB->Val); + return cast(Val)->getBasicBlockIndex(LLVMBB); +} +Value *PHINode::getIncomingValueForBlock(const BasicBlock *BB) const { + auto *LLVMBB = cast(BB->Val); + llvm::Value *LLVMV = + cast(Val)->getIncomingValueForBlock(LLVMBB); + return Ctx.getValue(LLVMV); +} +Value *PHINode::hasConstantValue() const { + llvm::Value *LLVMV = cast(Val)->hasConstantValue(); + return LLVMV != nullptr ? Ctx.getValue(LLVMV) : nullptr; +} + static llvm::Instruction::CastOps getLLVMCastOp(Instruction::Opcode Opc) { switch (Opc) { case Instruction::Opcode::ZExt: @@ -1272,6 +1361,16 @@ Value *PtrToIntInst::create(Value *Src, Type *DestTy, BasicBlock *InsertAtEnd, } #ifndef NDEBUG +void PHINode::dump(raw_ostream &OS) const { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); +} + +void PHINode::dump() const { + dump(dbgs()); + dbgs() << "\n"; +} + void PtrToIntInst::dump(raw_ostream &OS) const { dumpCommonPrefix(OS); dumpCommonSuffix(OS); @@ -1537,6 +1636,11 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) { It->second = std::unique_ptr(new CastInst(LLVMCast, *this)); return It->second.get(); } + case llvm::Instruction::PHI: { + auto *LLVMPhi = cast(LLVMV); + It->second = std::unique_ptr(new PHINode(LLVMPhi, *this)); + return It->second.get(); + } default: break; } @@ -1606,6 +1710,10 @@ CastInst *Context::createCastInst(llvm::CastInst *I) { auto NewPtr = std::unique_ptr(new CastInst(I, *this)); return cast(registerValue(std::move(NewPtr))); } +PHINode *Context::createPHINode(llvm::PHINode *I) { + auto NewPtr = std::unique_ptr(new PHINode(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} Value *Context::getValue(llvm::Value *V) const { auto It = LLVMValueToValueMap.find(V); diff --git a/llvm/lib/SandboxIR/Tracker.cpp b/llvm/lib/SandboxIR/Tracker.cpp index eae55d7b3d962f..0310160e8bf35a 100644 --- a/llvm/lib/SandboxIR/Tracker.cpp +++ b/llvm/lib/SandboxIR/Tracker.cpp @@ -42,6 +42,81 @@ void UseSwap::dump() const { } #endif // NDEBUG +PHISetIncoming::PHISetIncoming(PHINode &PHI, unsigned Idx, What What, + Tracker &Tracker) + : IRChangeBase(Tracker), PHI(PHI), Idx(Idx) { + switch (What) { + case What::Value: + OrigValueOrBB = PHI.getIncomingValue(Idx); + break; + case What::Block: + OrigValueOrBB = PHI.getIncomingBlock(Idx); + break; + } +} + +void PHISetIncoming::revert() { + if (auto *V = OrigValueOrBB.dyn_cast()) + PHI.setIncomingValue(Idx, V); + else + PHI.setIncomingBlock(Idx, OrigValueOrBB.get()); +} + +#ifndef NDEBUG +void PHISetIncoming::dump() const { + dump(dbgs()); + dbgs() << "\n"; +} +#endif // NDEBUG + +PHIRemoveIncoming::PHIRemoveIncoming(PHINode &PHI, unsigned RemovedIdx, + Tracker &Tracker) + : IRChangeBase(Tracker), PHI(PHI), RemovedIdx(RemovedIdx) { + RemovedV = PHI.getIncomingValue(RemovedIdx); + RemovedBB = PHI.getIncomingBlock(RemovedIdx); +} + +void PHIRemoveIncoming::revert() { + // Special case: if the PHI is now empty, as we don't need to care about the + // order of the incoming values. + unsigned NumIncoming = PHI.getNumIncomingValues(); + if (NumIncoming == 0) { + PHI.addIncoming(RemovedV, RemovedBB); + return; + } + // Shift all incoming values by one starting from the end until `Idx`. + // Start by adding a copy of the last incoming values. + unsigned LastIdx = NumIncoming - 1; + PHI.addIncoming(PHI.getIncomingValue(LastIdx), PHI.getIncomingBlock(LastIdx)); + for (unsigned Idx = LastIdx; Idx > RemovedIdx; --Idx) { + auto *PrevV = PHI.getIncomingValue(Idx - 1); + auto *PrevBB = PHI.getIncomingBlock(Idx - 1); + PHI.setIncomingValue(Idx, PrevV); + PHI.setIncomingBlock(Idx, PrevBB); + } + PHI.setIncomingValue(RemovedIdx, RemovedV); + PHI.setIncomingBlock(RemovedIdx, RemovedBB); +} + +#ifndef NDEBUG +void PHIRemoveIncoming::dump() const { + dump(dbgs()); + dbgs() << "\n"; +} +#endif // NDEBUG + +PHIAddIncoming::PHIAddIncoming(PHINode &PHI, Tracker &Tracker) + : IRChangeBase(Tracker), PHI(PHI), Idx(PHI.getNumIncomingValues()) {} + +void PHIAddIncoming::revert() { PHI.removeIncomingValue(Idx); } + +#ifndef NDEBUG +void PHIAddIncoming::dump() const { + dump(dbgs()); + dbgs() << "\n"; +} +#endif // NDEBUG + Tracker::~Tracker() { assert(Changes.empty() && "You must accept or revert changes!"); } diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index 9d4fba404a43cf..31feb56a5272f8 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -2112,3 +2112,123 @@ define void @foo(ptr %ptr) { EXPECT_EQ(NewI->getParent(), BB); } } + +TEST_F(SandboxIRTest, PHINode) { + parseIR(C, R"IR( +define void @foo(i32 %arg) { +bb1: + br label %bb2 + +bb2: + %phi = phi i32 [ %arg, %bb1 ], [ 0, %bb2 ] + br label %bb2 + +bb3: + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + auto *LLVMBB1 = getBasicBlockByName(LLVMF, "bb1"); + auto *LLVMBB2 = getBasicBlockByName(LLVMF, "bb2"); + auto *LLVMBB3 = getBasicBlockByName(LLVMF, "bb3"); + auto LLVMIt = LLVMBB2->begin(); + auto *LLVMPHI = cast(&*LLVMIt++); + sandboxir::Context Ctx(C); + sandboxir::Function *F = Ctx.createFunction(&LLVMF); + auto *Arg = F->getArg(0); + auto *BB1 = cast(Ctx.getValue(LLVMBB1)); + auto *BB2 = cast(Ctx.getValue(LLVMBB2)); + auto *BB3 = cast(Ctx.getValue(LLVMBB3)); + auto It = BB2->begin(); + // Check classof(). + auto *PHI = cast(&*It++); + auto *Br = cast(&*It++); + // Check blocks(). + EXPECT_EQ(range_size(PHI->blocks()), range_size(LLVMPHI->blocks())); + auto BlockIt = PHI->block_begin(); + for (llvm::BasicBlock *LLVMBB : LLVMPHI->blocks()) { + sandboxir::BasicBlock *BB = *BlockIt++; + EXPECT_EQ(BB, Ctx.getValue(LLVMBB)); + } + // Check incoming_values(). + EXPECT_EQ(range_size(PHI->incoming_values()), + range_size(LLVMPHI->incoming_values())); + auto IncIt = PHI->incoming_values().begin(); + for (llvm::Value *LLVMV : LLVMPHI->incoming_values()) { + sandboxir::Value *IncV = *IncIt++; + EXPECT_EQ(IncV, Ctx.getValue(LLVMV)); + } + // Check getNumIncomingValues(). + EXPECT_EQ(PHI->getNumIncomingValues(), LLVMPHI->getNumIncomingValues()); + // Check getIncomingValue(). + EXPECT_EQ(PHI->getIncomingValue(0), + Ctx.getValue(LLVMPHI->getIncomingValue(0))); + EXPECT_EQ(PHI->getIncomingValue(1), + Ctx.getValue(LLVMPHI->getIncomingValue(1))); + // Check setIncomingValue(). + auto *OrigV = PHI->getIncomingValue(0); + PHI->setIncomingValue(0, PHI); + EXPECT_EQ(PHI->getIncomingValue(0), PHI); + PHI->setIncomingValue(0, OrigV); + // Check getOperandNumForIncomingValue(). + EXPECT_EQ(sandboxir::PHINode::getOperandNumForIncomingValue(0), + llvm::PHINode::getOperandNumForIncomingValue(0)); + // Check getIncomingValueNumForOperand(). + EXPECT_EQ(sandboxir::PHINode::getIncomingValueNumForOperand(0), + llvm::PHINode::getIncomingValueNumForOperand(0)); + // Check getIncomingBlock(unsigned). + EXPECT_EQ(PHI->getIncomingBlock(0), + Ctx.getValue(LLVMPHI->getIncomingBlock(0))); + // Check getIncomingBlock(Use). + llvm::Use &LLVMUse = LLVMPHI->getOperandUse(0); + sandboxir::Use Use = PHI->getOperandUse(0); + EXPECT_EQ(PHI->getIncomingBlock(Use), + Ctx.getValue(LLVMPHI->getIncomingBlock(LLVMUse))); + // Check setIncomingBlock(). + sandboxir::BasicBlock *OrigBB = PHI->getIncomingBlock(0); + EXPECT_NE(OrigBB, BB2); + PHI->setIncomingBlock(0, BB2); + EXPECT_EQ(PHI->getIncomingBlock(0), BB2); + PHI->setIncomingBlock(0, OrigBB); + EXPECT_EQ(PHI->getIncomingBlock(0), OrigBB); + // Check addIncoming(). + unsigned OrigNumIncoming = PHI->getNumIncomingValues(); + PHI->addIncoming(Arg, BB3); + EXPECT_EQ(PHI->getNumIncomingValues(), LLVMPHI->getNumIncomingValues()); + EXPECT_EQ(PHI->getNumIncomingValues(), OrigNumIncoming + 1); + EXPECT_EQ(PHI->getIncomingValue(OrigNumIncoming), Arg); + EXPECT_EQ(PHI->getIncomingBlock(OrigNumIncoming), BB3); + // Check removeIncomingValue(unsigned). + PHI->removeIncomingValue(OrigNumIncoming); + EXPECT_EQ(PHI->getNumIncomingValues(), OrigNumIncoming); + // Check removeIncomingValue(BasicBlock *). + PHI->addIncoming(Arg, BB3); + PHI->removeIncomingValue(BB3); + EXPECT_EQ(PHI->getNumIncomingValues(), OrigNumIncoming); + // Check getBasicBlockIndex(). + EXPECT_EQ(PHI->getBasicBlockIndex(BB1), LLVMPHI->getBasicBlockIndex(LLVMBB1)); + // Check getIncomingValueForBlock(). + EXPECT_EQ(PHI->getIncomingValueForBlock(BB1), + Ctx.getValue(LLVMPHI->getIncomingValueForBlock(LLVMBB1))); + // Check hasConstantValue(). + llvm::Value *ConstV = LLVMPHI->hasConstantValue(); + EXPECT_EQ(PHI->hasConstantValue(), + ConstV != nullptr ? Ctx.getValue(ConstV) : nullptr); + // Check hasConstantOrUndefValue(). + EXPECT_EQ(PHI->hasConstantOrUndefValue(), LLVMPHI->hasConstantOrUndefValue()); + // Check isComplete(). + EXPECT_EQ(PHI->isComplete(), LLVMPHI->isComplete()); + + // Check create(). + auto *NewPHI = cast( + sandboxir::PHINode::create(PHI->getType(), 0, Br, Ctx, "NewPHI")); + EXPECT_EQ(NewPHI->getType(), PHI->getType()); + EXPECT_EQ(NewPHI->getNextNode(), Br); + EXPECT_EQ(NewPHI->getName(), "NewPHI"); + EXPECT_EQ(NewPHI->getNumIncomingValues(), 0u); + for (auto [Idx, V] : enumerate(PHI->incoming_values())) { + sandboxir::BasicBlock *IncBB = PHI->getIncomingBlock(Idx); + NewPHI->addIncoming(V, IncBB); + } + EXPECT_EQ(NewPHI->getNumIncomingValues(), PHI->getNumIncomingValues()); +} diff --git a/llvm/unittests/SandboxIR/TrackerTest.cpp b/llvm/unittests/SandboxIR/TrackerTest.cpp index cd737d33dd1937..d016c7793a52c0 100644 --- a/llvm/unittests/SandboxIR/TrackerTest.cpp +++ b/llvm/unittests/SandboxIR/TrackerTest.cpp @@ -584,3 +584,127 @@ define void @foo(i8 %arg) { Ctx.revert(); EXPECT_EQ(CallBr->getIndirectDest(0), OrigIndirectDest); } + +TEST_F(TrackerTest, PHINodeSetters) { + parseIR(C, R"IR( +define void @foo(i8 %arg0, i8 %arg1, i8 %arg2) { +bb0: + br label %bb2 + +bb1: + %phi = phi i8 [ %arg0, %bb0 ], [ %arg1, %bb1 ] + br label %bb1 + +bb2: + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + sandboxir::Context Ctx(C); + auto &F = *Ctx.createFunction(&LLVMF); + unsigned ArgIdx = 0; + auto *Arg0 = F.getArg(ArgIdx++); + auto *Arg1 = F.getArg(ArgIdx++); + auto *Arg2 = F.getArg(ArgIdx++); + auto *BB0 = cast( + Ctx.getValue(getBasicBlockByName(LLVMF, "bb0"))); + auto *BB1 = cast( + Ctx.getValue(getBasicBlockByName(LLVMF, "bb1"))); + auto *BB2 = cast( + Ctx.getValue(getBasicBlockByName(LLVMF, "bb2"))); + auto *PHI = cast(&*BB1->begin()); + + // Check setIncomingValue(). + Ctx.save(); + EXPECT_EQ(PHI->getIncomingValue(0), Arg0); + PHI->setIncomingValue(0, Arg2); + EXPECT_EQ(PHI->getIncomingValue(0), Arg2); + Ctx.revert(); + EXPECT_EQ(PHI->getIncomingValue(0), Arg0); + EXPECT_EQ(PHI->getNumIncomingValues(), 2u); + EXPECT_EQ(PHI->getIncomingBlock(0), BB0); + EXPECT_EQ(PHI->getIncomingValue(0), Arg0); + EXPECT_EQ(PHI->getIncomingBlock(1), BB1); + EXPECT_EQ(PHI->getIncomingValue(1), Arg1); + + // Check setIncomingBlock(). + Ctx.save(); + EXPECT_EQ(PHI->getIncomingBlock(0), BB0); + PHI->setIncomingBlock(0, BB2); + EXPECT_EQ(PHI->getIncomingBlock(0), BB2); + Ctx.revert(); + EXPECT_EQ(PHI->getIncomingBlock(0), BB0); + EXPECT_EQ(PHI->getNumIncomingValues(), 2u); + EXPECT_EQ(PHI->getIncomingBlock(0), BB0); + EXPECT_EQ(PHI->getIncomingValue(0), Arg0); + EXPECT_EQ(PHI->getIncomingBlock(1), BB1); + EXPECT_EQ(PHI->getIncomingValue(1), Arg1); + + // Check addIncoming(). + Ctx.save(); + EXPECT_EQ(PHI->getNumIncomingValues(), 2u); + PHI->addIncoming(Arg1, BB2); + EXPECT_EQ(PHI->getNumIncomingValues(), 3u); + EXPECT_EQ(PHI->getIncomingBlock(2), BB2); + EXPECT_EQ(PHI->getIncomingValue(2), Arg1); + Ctx.revert(); + EXPECT_EQ(PHI->getNumIncomingValues(), 2u); + EXPECT_EQ(PHI->getIncomingBlock(0), BB0); + EXPECT_EQ(PHI->getIncomingValue(0), Arg0); + EXPECT_EQ(PHI->getIncomingBlock(1), BB1); + EXPECT_EQ(PHI->getIncomingValue(1), Arg1); + + // Check removeIncomingValue(1). + Ctx.save(); + PHI->removeIncomingValue(1); + EXPECT_EQ(PHI->getNumIncomingValues(), 1u); + EXPECT_EQ(PHI->getIncomingBlock(0), BB0); + EXPECT_EQ(PHI->getIncomingValue(0), Arg0); + Ctx.revert(); + EXPECT_EQ(PHI->getNumIncomingValues(), 2u); + EXPECT_EQ(PHI->getIncomingBlock(0), BB0); + EXPECT_EQ(PHI->getIncomingValue(0), Arg0); + EXPECT_EQ(PHI->getIncomingBlock(1), BB1); + EXPECT_EQ(PHI->getIncomingValue(1), Arg1); + + // Check removeIncomingValue(0). + Ctx.save(); + PHI->removeIncomingValue(0u); + EXPECT_EQ(PHI->getNumIncomingValues(), 1u); + EXPECT_EQ(PHI->getIncomingBlock(0), BB1); + EXPECT_EQ(PHI->getIncomingValue(0), Arg1); + Ctx.revert(); + EXPECT_EQ(PHI->getNumIncomingValues(), 2u); + EXPECT_EQ(PHI->getIncomingBlock(0), BB0); + EXPECT_EQ(PHI->getIncomingValue(0), Arg0); + EXPECT_EQ(PHI->getIncomingBlock(1), BB1); + EXPECT_EQ(PHI->getIncomingValue(1), Arg1); + + // Check removeIncomingValue() remove all. + Ctx.save(); + PHI->removeIncomingValue(0u); + EXPECT_EQ(PHI->getNumIncomingValues(), 1u); + EXPECT_EQ(PHI->getIncomingBlock(0), BB1); + EXPECT_EQ(PHI->getIncomingValue(0), Arg1); + PHI->removeIncomingValue(0u); + EXPECT_EQ(PHI->getNumIncomingValues(), 0u); + Ctx.revert(); + EXPECT_EQ(PHI->getNumIncomingValues(), 2u); + EXPECT_EQ(PHI->getIncomingBlock(0), BB0); + EXPECT_EQ(PHI->getIncomingValue(0), Arg0); + EXPECT_EQ(PHI->getIncomingBlock(1), BB1); + EXPECT_EQ(PHI->getIncomingValue(1), Arg1); + + // Check removeIncomingValue(BasicBlock *). + Ctx.save(); + PHI->removeIncomingValue(BB1); + EXPECT_EQ(PHI->getNumIncomingValues(), 1u); + EXPECT_EQ(PHI->getIncomingBlock(0), BB0); + EXPECT_EQ(PHI->getIncomingValue(0), Arg0); + Ctx.revert(); + EXPECT_EQ(PHI->getNumIncomingValues(), 2u); + EXPECT_EQ(PHI->getIncomingBlock(0), BB0); + EXPECT_EQ(PHI->getIncomingValue(0), Arg0); + EXPECT_EQ(PHI->getIncomingBlock(1), BB1); + EXPECT_EQ(PHI->getIncomingValue(1), Arg1); +} From 3a4c7cc56c07b2db9010c2228fc7cb2a43dd9b2d Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Wed, 31 Jul 2024 16:51:45 -0700 Subject: [PATCH 044/114] Forward declare OSSpinLockLock on MacOS since it's not shipped on the system. (#101392) Fixes build errors on some SDKs. rdar://132607572 --- compiler-rt/lib/rtsan/rtsan_interceptors.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors.cpp b/compiler-rt/lib/rtsan/rtsan_interceptors.cpp index 4d5423ec629d22..b63040446e53c7 100644 --- a/compiler-rt/lib/rtsan/rtsan_interceptors.cpp +++ b/compiler-rt/lib/rtsan/rtsan_interceptors.cpp @@ -21,6 +21,18 @@ #include "rtsan/rtsan_context.h" #if SANITIZER_APPLE + +#if TARGET_OS_MAC +// On MacOS OSSpinLockLock is deprecated and no longer present in the headers, +// but the symbol still exists on the system. Forward declare here so we +// don't get compilation errors. +#include +extern "C" { +typedef int32_t OSSpinLock; +void OSSpinLockLock(volatile OSSpinLock *__lock); +} +#endif + #include #include #endif From 307d1249ea635a78fcd347a65ddaa395cf64130e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 31 Jul 2024 17:18:27 -0700 Subject: [PATCH 045/114] [LegalizeTypes][RISCV][LoongArch] Optimize promotion of ucmp. (#101366) ucmp can be promoted with either sext or zext. RISC-V and LoongArch prefer sext for promoting i32 to i64 unless the inputs are known to be zero extended already. This patch uses the existing SExtOrZExtPromotedOperands function that is used by SETCC promotion to intelligently handle this. --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 15 +++-- llvm/test/CodeGen/LoongArch/ucmp.ll | 8 +-- llvm/test/CodeGen/RISCV/ucmp.ll | 60 ++++++++++++++----- 3 files changed, 59 insertions(+), 24 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 33a53dfc81379a..b1ada66aa9aeb3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2294,12 +2294,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_CMP(SDNode *N) { - SDValue LHS = N->getOpcode() == ISD::UCMP - ? ZExtPromotedInteger(N->getOperand(0)) - : SExtPromotedInteger(N->getOperand(0)); - SDValue RHS = N->getOpcode() == ISD::UCMP - ? ZExtPromotedInteger(N->getOperand(1)) - : SExtPromotedInteger(N->getOperand(1)); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + if (N->getOpcode() == ISD::SCMP) { + LHS = SExtPromotedInteger(LHS); + RHS = SExtPromotedInteger(RHS); + } else { + SExtOrZExtPromotedOperands(LHS, RHS); + } return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS), 0); } diff --git a/llvm/test/CodeGen/LoongArch/ucmp.ll b/llvm/test/CodeGen/LoongArch/ucmp.ll index 548c5bd0db72ba..b91d3bf15d812d 100644 --- a/llvm/test/CodeGen/LoongArch/ucmp.ll +++ b/llvm/test/CodeGen/LoongArch/ucmp.ll @@ -26,8 +26,8 @@ define i8 @ucmp.8.16(i16 zeroext %x, i16 zeroext %y) nounwind { define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: ucmp.8.32: ; CHECK: # %bb.0: -; CHECK-NEXT: bstrpick.d $a1, $a1, 31, 0 -; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 +; CHECK-NEXT: addi.w $a1, $a1, 0 +; CHECK-NEXT: addi.w $a0, $a0, 0 ; CHECK-NEXT: sltu $a2, $a0, $a1 ; CHECK-NEXT: sltu $a0, $a1, $a0 ; CHECK-NEXT: sub.d $a0, $a0, $a2 @@ -71,8 +71,8 @@ define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind { define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: ucmp.32.32: ; CHECK: # %bb.0: -; CHECK-NEXT: bstrpick.d $a1, $a1, 31, 0 -; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 +; CHECK-NEXT: addi.w $a1, $a1, 0 +; CHECK-NEXT: addi.w $a0, $a0, 0 ; CHECK-NEXT: sltu $a2, $a0, $a1 ; CHECK-NEXT: sltu $a0, $a1, $a0 ; CHECK-NEXT: sub.d $a0, $a0, $a2 diff --git a/llvm/test/CodeGen/RISCV/ucmp.ll b/llvm/test/CodeGen/RISCV/ucmp.ll index 026340ede1f908..c74bc6838ff7df 100644 --- a/llvm/test/CodeGen/RISCV/ucmp.ll +++ b/llvm/test/CodeGen/RISCV/ucmp.ll @@ -48,10 +48,8 @@ define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind { ; ; RV64I-LABEL: ucmp.8.32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: srli a1, a1, 32 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: sext.w a1, a1 +; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: sltu a2, a0, a1 ; RV64I-NEXT: sltu a0, a1, a0 ; RV64I-NEXT: sub a0, a0, a2 @@ -164,10 +162,44 @@ define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind { ; ; RV64I-LABEL: ucmp.32.32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: srli a1, a1, 32 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: sext.w a1, a1 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: sltu a2, a0, a1 +; RV64I-NEXT: sltu a0, a1, a0 +; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: ret + %1 = call i32 @llvm.ucmp(i32 %x, i32 %y) + ret i32 %1 +} + +define i32 @ucmp.32.32_sext(i32 signext %x, i32 signext %y) nounwind { +; RV32I-LABEL: ucmp.32.32_sext: +; RV32I: # %bb.0: +; RV32I-NEXT: sltu a2, a0, a1 +; RV32I-NEXT: sltu a0, a1, a0 +; RV32I-NEXT: sub a0, a0, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: ucmp.32.32_sext: +; RV64I: # %bb.0: +; RV64I-NEXT: sltu a2, a0, a1 +; RV64I-NEXT: sltu a0, a1, a0 +; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: ret + %1 = call i32 @llvm.ucmp(i32 %x, i32 %y) + ret i32 %1 +} + +define i32 @ucmp.32.32_zext(i32 zeroext %x, i32 zeroext %y) nounwind { +; RV32I-LABEL: ucmp.32.32_zext: +; RV32I: # %bb.0: +; RV32I-NEXT: sltu a2, a0, a1 +; RV32I-NEXT: sltu a0, a1, a0 +; RV32I-NEXT: sub a0, a0, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: ucmp.32.32_zext: +; RV64I: # %bb.0: ; RV64I-NEXT: sltu a2, a0, a1 ; RV64I-NEXT: sltu a0, a1, a0 ; RV64I-NEXT: sub a0, a0, a2 @@ -179,13 +211,13 @@ define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind { define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: ucmp.32.64: ; RV32I: # %bb.0: -; RV32I-NEXT: beq a1, a3, .LBB6_2 +; RV32I-NEXT: beq a1, a3, .LBB8_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sltu a4, a1, a3 ; RV32I-NEXT: sltu a0, a3, a1 ; RV32I-NEXT: sub a0, a0, a4 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB6_2: +; RV32I-NEXT: .LBB8_2: ; RV32I-NEXT: sltu a4, a0, a2 ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: sub a0, a0, a4 @@ -204,15 +236,15 @@ define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind { define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: ucmp.64.64: ; RV32I: # %bb.0: -; RV32I-NEXT: beq a1, a3, .LBB7_2 +; RV32I-NEXT: beq a1, a3, .LBB9_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sltu a4, a1, a3 ; RV32I-NEXT: sltu a0, a3, a1 -; RV32I-NEXT: j .LBB7_3 -; RV32I-NEXT: .LBB7_2: +; RV32I-NEXT: j .LBB9_3 +; RV32I-NEXT: .LBB9_2: ; RV32I-NEXT: sltu a4, a0, a2 ; RV32I-NEXT: sltu a0, a2, a0 -; RV32I-NEXT: .LBB7_3: +; RV32I-NEXT: .LBB9_3: ; RV32I-NEXT: sub a0, a0, a4 ; RV32I-NEXT: srai a1, a0, 31 ; RV32I-NEXT: ret From 1c5f6cfc352c3bd2a4faa0e3aebb4028b557a5e7 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Wed, 31 Jul 2024 17:29:15 -0700 Subject: [PATCH 046/114] [DirectX] Rename backend DXIL resource analysis passes to DXILResourceMD*. NFC These passes will be replaced soon as we move to the target extension based resource handling in the DirectX backend, but removing them now before the replacement stuff is all up and running would be very disruptive. However, we do need to move these passes out of the way to avoid symbol conflicts with the new DXILResourceAnalysis in the Analysis library. Note: I tried an even simpler hack in #100698 but it doesn't really work. A rename is the most expedient path forward here. Pull Request: https://github.com/llvm/llvm-project/pull/101393 --- llvm/lib/Target/DirectX/DXILPrepare.cpp | 2 +- llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp | 6 ++--- .../Target/DirectX/DXILResourceAnalysis.cpp | 24 +++++++++---------- .../lib/Target/DirectX/DXILResourceAnalysis.h | 16 +++++++------ .../Target/DirectX/DXILTranslateMetadata.cpp | 6 ++--- llvm/lib/Target/DirectX/DirectX.h | 2 +- .../Target/DirectX/DirectXPassRegistry.def | 4 ++-- .../Target/DirectX/DirectXTargetMachine.cpp | 2 +- llvm/test/CodeGen/DirectX/UAVMetadata.ll | 2 +- llvm/test/CodeGen/DirectX/cbuf.ll | 2 +- 10 files changed, 34 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp index 889de3a81536d1..56098864e987fb 100644 --- a/llvm/lib/Target/DirectX/DXILPrepare.cpp +++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp @@ -246,7 +246,7 @@ class DXILPrepareModule : public ModulePass { DXILPrepareModule() : ModulePass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved(); - AU.addPreserved(); + AU.addPreserved(); } static char ID; // Pass identification. }; diff --git a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp index 7ae568c5ae53a3..99cc4067b1d62d 100644 --- a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp +++ b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp @@ -41,7 +41,7 @@ class DXILPrettyPrinter : public llvm::ModulePass { bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); - AU.addRequired(); + AU.addRequired(); } }; } // namespace @@ -49,12 +49,12 @@ class DXILPrettyPrinter : public llvm::ModulePass { char DXILPrettyPrinter::ID = 0; INITIALIZE_PASS_BEGIN(DXILPrettyPrinter, "dxil-pretty-printer", "DXIL Metadata Pretty Printer", true, true) -INITIALIZE_PASS_DEPENDENCY(DXILResourceWrapper) +INITIALIZE_PASS_DEPENDENCY(DXILResourceMDWrapper) INITIALIZE_PASS_END(DXILPrettyPrinter, "dxil-pretty-printer", "DXIL Metadata Pretty Printer", true, true) bool DXILPrettyPrinter::runOnModule(Module &M) { - dxil::Resources &Res = getAnalysis().getDXILResource(); + dxil::Resources &Res = getAnalysis().getDXILResource(); Res.print(OS); return false; } diff --git a/llvm/lib/Target/DirectX/DXILResourceAnalysis.cpp b/llvm/lib/Target/DirectX/DXILResourceAnalysis.cpp index 0b2f0d827ebbc9..33e0119807bb8e 100644 --- a/llvm/lib/Target/DirectX/DXILResourceAnalysis.cpp +++ b/llvm/lib/Target/DirectX/DXILResourceAnalysis.cpp @@ -18,35 +18,35 @@ using namespace llvm; #define DEBUG_TYPE "dxil-resource-analysis" -dxil::Resources DXILResourceAnalysis::run(Module &M, - ModuleAnalysisManager &AM) { +dxil::Resources DXILResourceMDAnalysis::run(Module &M, + ModuleAnalysisManager &AM) { dxil::Resources R; R.collect(M); return R; } -AnalysisKey DXILResourceAnalysis::Key; +AnalysisKey DXILResourceMDAnalysis::Key; -PreservedAnalyses DXILResourcePrinterPass::run(Module &M, - ModuleAnalysisManager &AM) { - dxil::Resources Res = AM.getResult(M); +PreservedAnalyses DXILResourceMDPrinterPass::run(Module &M, + ModuleAnalysisManager &AM) { + dxil::Resources Res = AM.getResult(M); Res.print(OS); return PreservedAnalyses::all(); } -char DXILResourceWrapper::ID = 0; -INITIALIZE_PASS_BEGIN(DXILResourceWrapper, DEBUG_TYPE, +char DXILResourceMDWrapper::ID = 0; +INITIALIZE_PASS_BEGIN(DXILResourceMDWrapper, DEBUG_TYPE, "DXIL resource Information", true, true) -INITIALIZE_PASS_END(DXILResourceWrapper, DEBUG_TYPE, +INITIALIZE_PASS_END(DXILResourceMDWrapper, DEBUG_TYPE, "DXIL resource Information", true, true) -bool DXILResourceWrapper::runOnModule(Module &M) { +bool DXILResourceMDWrapper::runOnModule(Module &M) { Resources.collect(M); return false; } -DXILResourceWrapper::DXILResourceWrapper() : ModulePass(ID) {} +DXILResourceMDWrapper::DXILResourceMDWrapper() : ModulePass(ID) {} -void DXILResourceWrapper::print(raw_ostream &OS, const Module *) const { +void DXILResourceMDWrapper::print(raw_ostream &OS, const Module *) const { Resources.print(OS); } diff --git a/llvm/lib/Target/DirectX/DXILResourceAnalysis.h b/llvm/lib/Target/DirectX/DXILResourceAnalysis.h index bce41160b95ec9..3a2b8a9fd39d59 100644 --- a/llvm/lib/Target/DirectX/DXILResourceAnalysis.h +++ b/llvm/lib/Target/DirectX/DXILResourceAnalysis.h @@ -20,8 +20,9 @@ namespace llvm { /// Analysis pass that exposes the \c DXILResource for a module. -class DXILResourceAnalysis : public AnalysisInfoMixin { - friend AnalysisInfoMixin; +class DXILResourceMDAnalysis + : public AnalysisInfoMixin { + friend AnalysisInfoMixin; static AnalysisKey Key; public: @@ -29,25 +30,26 @@ class DXILResourceAnalysis : public AnalysisInfoMixin { dxil::Resources run(Module &M, ModuleAnalysisManager &AM); }; -/// Printer pass for the \c DXILResourceAnalysis results. -class DXILResourcePrinterPass : public PassInfoMixin { +/// Printer pass for the \c DXILResourceMDAnalysis results. +class DXILResourceMDPrinterPass + : public PassInfoMixin { raw_ostream &OS; public: - explicit DXILResourcePrinterPass(raw_ostream &OS) : OS(OS) {} + explicit DXILResourceMDPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); static bool isRequired() { return true; } }; /// The legacy pass manager's analysis pass to compute DXIL resource /// information. -class DXILResourceWrapper : public ModulePass { +class DXILResourceMDWrapper : public ModulePass { dxil::Resources Resources; public: static char ID; // Pass identification, replacement for typeid - DXILResourceWrapper(); + DXILResourceMDWrapper(); dxil::Resources &getDXILResource() { return Resources; } const dxil::Resources &getDXILResource() const { return Resources; } diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp index ae6d6f96904c86..583bce0f50e700 100644 --- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp +++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp @@ -33,7 +33,7 @@ class DXILTranslateMetadata : public ModulePass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); } @@ -51,7 +51,7 @@ bool DXILTranslateMetadata::runOnModule(Module &M) { dxil::createDXILVersionMD(M); const dxil::Resources &Res = - getAnalysis().getDXILResource(); + getAnalysis().getDXILResource(); Res.write(M); const uint64_t Flags = static_cast( @@ -69,7 +69,7 @@ ModulePass *llvm::createDXILTranslateMetadataPass() { INITIALIZE_PASS_BEGIN(DXILTranslateMetadata, "dxil-metadata-emit", "DXIL Metadata Emit", false, false) -INITIALIZE_PASS_DEPENDENCY(DXILResourceWrapper) +INITIALIZE_PASS_DEPENDENCY(DXILResourceMDWrapper) INITIALIZE_PASS_DEPENDENCY(ShaderFlagsAnalysisWrapper) INITIALIZE_PASS_END(DXILTranslateMetadata, "dxil-metadata-emit", "DXIL Metadata Emit", false, false) diff --git a/llvm/lib/Target/DirectX/DirectX.h b/llvm/lib/Target/DirectX/DirectX.h index 11b5412c21d783..d056ae2bc488e7 100644 --- a/llvm/lib/Target/DirectX/DirectX.h +++ b/llvm/lib/Target/DirectX/DirectX.h @@ -47,7 +47,7 @@ void initializeDXILTranslateMetadataPass(PassRegistry &); ModulePass *createDXILTranslateMetadataPass(); /// Initializer for DXILTranslateMetadata. -void initializeDXILResourceWrapperPass(PassRegistry &); +void initializeDXILResourceMDWrapperPass(PassRegistry &); /// Pass to pretty print DXIL metadata. ModulePass *createDXILPrettyPrinterPass(raw_ostream &OS); diff --git a/llvm/lib/Target/DirectX/DirectXPassRegistry.def b/llvm/lib/Target/DirectX/DirectXPassRegistry.def index 1b326d020d5114..7544172ab94e43 100644 --- a/llvm/lib/Target/DirectX/DirectXPassRegistry.def +++ b/llvm/lib/Target/DirectX/DirectXPassRegistry.def @@ -17,7 +17,7 @@ #define MODULE_ANALYSIS(NAME, CREATE_PASS) #endif MODULE_ANALYSIS("dx-shader-flags", dxil::ShaderFlagsAnalysis()) -MODULE_ANALYSIS("dxil-resource", DXILResourceAnalysis()) +MODULE_ANALYSIS("dxil-resource-md", DXILResourceMDAnalysis()) #undef MODULE_ANALYSIS #ifndef MODULE_PASS @@ -25,5 +25,5 @@ MODULE_ANALYSIS("dxil-resource", DXILResourceAnalysis()) #endif // TODO: rename to print after NPM switch MODULE_PASS("print-dx-shader-flags", dxil::ShaderFlagsAnalysisPrinter(dbgs())) -MODULE_PASS("print-dxil-resource", DXILResourcePrinterPass(dbgs())) +MODULE_PASS("print-dxil-resource-md", DXILResourceMDPrinterPass(dbgs())) #undef MODULE_PASS diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index e6dbb25b710ec6..92bd69b69684f0 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -46,7 +46,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() { initializeDXContainerGlobalsPass(*PR); initializeDXILOpLoweringLegacyPass(*PR); initializeDXILTranslateMetadataPass(*PR); - initializeDXILResourceWrapperPass(*PR); + initializeDXILResourceMDWrapperPass(*PR); initializeShaderFlagsAnalysisWrapperPass(*PR); } diff --git a/llvm/test/CodeGen/DirectX/UAVMetadata.ll b/llvm/test/CodeGen/DirectX/UAVMetadata.ll index 0bc8a8cfcd713b..bdad9fd40c9bd3 100644 --- a/llvm/test/CodeGen/DirectX/UAVMetadata.ll +++ b/llvm/test/CodeGen/DirectX/UAVMetadata.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s -; RUN: opt -S --passes="print-dxil-resource" < %s 2>&1 | FileCheck %s --check-prefix=PRINT +; RUN: opt -S --passes="print-dxil-resource-md" < %s 2>&1 | FileCheck %s --check-prefix=PRINT ; RUN: llc %s --filetype=asm -o - < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,PRINT target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" diff --git a/llvm/test/CodeGen/DirectX/cbuf.ll b/llvm/test/CodeGen/DirectX/cbuf.ll index d07cc1e880b1a8..38f08fad995d1f 100644 --- a/llvm/test/CodeGen/DirectX/cbuf.ll +++ b/llvm/test/CodeGen/DirectX/cbuf.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s --check-prefix=DXILMD -; RUN: opt -S --passes="print-dxil-resource" < %s 2>&1 | FileCheck %s --check-prefix=PRINT +; RUN: opt -S --passes="print-dxil-resource-md" < %s 2>&1 | FileCheck %s --check-prefix=PRINT target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" target triple = "dxil-unknown-shadermodel6.7-library" From 5dbbc3b14bb04ef4bf2cbf4c23008f94f4253704 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Wed, 31 Jul 2024 11:02:18 -0700 Subject: [PATCH 047/114] [lldb] Use Target references instead of pointers in CommandObject (NFC) The GetTarget helper returns a Target reference so there's reason to convert it to a pointer and check its validity. --- .../CommandObjectBreakpointCommand.cpp | 8 +- .../Commands/CommandObjectDisassemble.cpp | 6 +- lldb/source/Commands/CommandObjectTarget.cpp | 185 ++++++++---------- .../Commands/CommandObjectWatchpoint.cpp | 96 ++++----- .../source/Commands/CommandObjectWatchpoint.h | 2 +- .../CommandObjectWatchpointCommand.cpp | 18 +- 6 files changed, 148 insertions(+), 167 deletions(-) diff --git a/lldb/source/Commands/CommandObjectBreakpointCommand.cpp b/lldb/source/Commands/CommandObjectBreakpointCommand.cpp index 5d95c2a30957df..8c1fb513e016ec 100644 --- a/lldb/source/Commands/CommandObjectBreakpointCommand.cpp +++ b/lldb/source/Commands/CommandObjectBreakpointCommand.cpp @@ -548,9 +548,9 @@ class CommandObjectBreakpointCommandList : public CommandObjectParsed { protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); - const BreakpointList &breakpoints = target->GetBreakpointList(); + const BreakpointList &breakpoints = target.GetBreakpointList(); size_t num_breakpoints = breakpoints.GetSize(); if (num_breakpoints == 0) { @@ -566,7 +566,7 @@ class CommandObjectBreakpointCommandList : public CommandObjectParsed { BreakpointIDList valid_bp_ids; CommandObjectMultiwordBreakpoint::VerifyBreakpointOrLocationIDs( - command, target, result, &valid_bp_ids, + command, &target, result, &valid_bp_ids, BreakpointName::Permissions::PermissionKinds::listPerm); if (result.Succeeded()) { @@ -575,7 +575,7 @@ class CommandObjectBreakpointCommandList : public CommandObjectParsed { BreakpointID cur_bp_id = valid_bp_ids.GetBreakpointIDAtIndex(i); if (cur_bp_id.GetBreakpointID() != LLDB_INVALID_BREAK_ID) { Breakpoint *bp = - target->GetBreakpointByID(cur_bp_id.GetBreakpointID()).get(); + target.GetBreakpointByID(cur_bp_id.GetBreakpointID()).get(); if (bp) { BreakpointLocationSP bp_loc_sp; diff --git a/lldb/source/Commands/CommandObjectDisassemble.cpp b/lldb/source/Commands/CommandObjectDisassemble.cpp index 8ec55cc1207251..652a300706b11f 100644 --- a/lldb/source/Commands/CommandObjectDisassemble.cpp +++ b/lldb/source/Commands/CommandObjectDisassemble.cpp @@ -439,10 +439,10 @@ CommandObjectDisassemble::GetRangesForSelectedMode( void CommandObjectDisassemble::DoExecute(Args &command, CommandReturnObject &result) { - Target *target = &GetTarget(); + Target &target = GetTarget(); if (!m_options.arch.IsValid()) - m_options.arch = target->GetArchitecture(); + m_options.arch = target.GetArchitecture(); if (!m_options.arch.IsValid()) { result.AppendError( @@ -535,7 +535,7 @@ void CommandObjectDisassemble::DoExecute(Args &command, } else { result.AppendErrorWithFormat( "Failed to disassemble memory at 0x%8.8" PRIx64 ".\n", - cur_range.GetBaseAddress().GetLoadAddress(target)); + cur_range.GetBaseAddress().GetLoadAddress(&target)); } } if (print_sc_header) diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index 60a84820f69de6..b77bd8b0bc71a7 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -1027,7 +1027,7 @@ class CommandObjectTargetModulesSearchPathsAdd : public CommandObjectParsed { protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); const size_t argc = command.GetArgumentCount(); if (argc & 1) { result.AppendError("add requires an even number of arguments\n"); @@ -1045,7 +1045,7 @@ class CommandObjectTargetModulesSearchPathsAdd : public CommandObjectParsed { from, to); } bool last_pair = ((argc - i) == 2); - target->GetImageSearchPathList().Append( + target.GetImageSearchPathList().Append( from, to, last_pair); // Notify if this is the last pair result.SetStatus(eReturnStatusSuccessFinishNoResult); } else { @@ -1074,9 +1074,9 @@ class CommandObjectTargetModulesSearchPathsClear : public CommandObjectParsed { protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); bool notify = true; - target->GetImageSearchPathList().Clear(notify); + target.GetImageSearchPathList().Clear(notify); result.SetStatus(eReturnStatusSuccessFinishNoResult); } }; @@ -1148,7 +1148,7 @@ class CommandObjectTargetModulesSearchPathsInsert : public CommandObjectParsed { protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); size_t argc = command.GetArgumentCount(); // check for at least 3 arguments and an odd number of parameters if (argc >= 3 && argc & 1) { @@ -1171,8 +1171,8 @@ class CommandObjectTargetModulesSearchPathsInsert : public CommandObjectParsed { if (from[0] && to[0]) { bool last_pair = ((argc - i) == 2); - target->GetImageSearchPathList().Insert(from, to, insert_idx, - last_pair); + target.GetImageSearchPathList().Insert(from, to, insert_idx, + last_pair); result.SetStatus(eReturnStatusSuccessFinishNoResult); } else { if (from[0]) @@ -1203,9 +1203,8 @@ class CommandObjectTargetModulesSearchPathsList : public CommandObjectParsed { protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); - - target->GetImageSearchPathList().Dump(&result.GetOutputStream()); + Target &target = GetTarget(); + target.GetImageSearchPathList().Dump(&result.GetOutputStream()); result.SetStatus(eReturnStatusSuccessFinishResult); } }; @@ -1226,7 +1225,7 @@ class CommandObjectTargetModulesSearchPathsQuery : public CommandObjectParsed { protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); if (command.GetArgumentCount() != 1) { result.AppendError("query requires one argument\n"); return; @@ -1234,7 +1233,7 @@ class CommandObjectTargetModulesSearchPathsQuery : public CommandObjectParsed { ConstString orig(command.GetArgumentAtIndex(0)); ConstString transformed; - if (target->GetImageSearchPathList().RemapPath(orig, transformed)) + if (target.GetImageSearchPathList().RemapPath(orig, transformed)) result.GetOutputStream().Printf("%s\n", transformed.GetCString()); else result.GetOutputStream().Printf("%s\n", orig.GetCString()); @@ -1898,9 +1897,9 @@ class CommandObjectTargetModulesDumpObjfile protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); - uint32_t addr_byte_size = target->GetArchitecture().GetAddressByteSize(); + uint32_t addr_byte_size = target.GetArchitecture().GetAddressByteSize(); result.GetOutputStream().SetAddressByteSize(addr_byte_size); result.GetErrorStream().SetAddressByteSize(addr_byte_size); @@ -1908,7 +1907,7 @@ class CommandObjectTargetModulesDumpObjfile if (command.GetArgumentCount() == 0) { // Dump all headers for all modules images num_dumped = DumpModuleObjfileHeaders(result.GetOutputStream(), - target->GetImages()); + target.GetImages()); if (num_dumped == 0) { result.AppendError("the target has no associated executable images"); } @@ -1920,7 +1919,7 @@ class CommandObjectTargetModulesDumpObjfile (arg_cstr = command.GetArgumentAtIndex(arg_idx)) != nullptr; ++arg_idx) { size_t num_matched = - FindModulesByName(target, arg_cstr, module_list, true); + FindModulesByName(&target, arg_cstr, module_list, true); if (num_matched == 0) { result.AppendWarningWithFormat( "Unable to find an image that matches '%s'.\n", arg_cstr); @@ -1999,19 +1998,19 @@ class CommandObjectTargetModulesDumpSymtab protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); uint32_t num_dumped = 0; Mangled::NamePreference name_preference = (m_options.m_prefer_mangled ? Mangled::ePreferMangled : Mangled::ePreferDemangled); - uint32_t addr_byte_size = target->GetArchitecture().GetAddressByteSize(); + uint32_t addr_byte_size = target.GetArchitecture().GetAddressByteSize(); result.GetOutputStream().SetAddressByteSize(addr_byte_size); result.GetErrorStream().SetAddressByteSize(addr_byte_size); if (command.GetArgumentCount() == 0) { // Dump all sections for all modules images - const ModuleList &module_list = target->GetImages(); + const ModuleList &module_list = target.GetImages(); std::lock_guard guard(module_list.GetMutex()); const size_t num_modules = module_list.GetSize(); if (num_modules > 0) { @@ -2044,7 +2043,7 @@ class CommandObjectTargetModulesDumpSymtab ++arg_idx) { ModuleList module_list; const size_t num_matches = - FindModulesByName(target, arg_cstr, module_list, true); + FindModulesByName(&target, arg_cstr, module_list, true); if (num_matches > 0) { for (ModuleSP module_sp : module_list.Modules()) { if (module_sp) { @@ -2097,16 +2096,16 @@ class CommandObjectTargetModulesDumpSections protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); uint32_t num_dumped = 0; - uint32_t addr_byte_size = target->GetArchitecture().GetAddressByteSize(); + uint32_t addr_byte_size = target.GetArchitecture().GetAddressByteSize(); result.GetOutputStream().SetAddressByteSize(addr_byte_size); result.GetErrorStream().SetAddressByteSize(addr_byte_size); if (command.GetArgumentCount() == 0) { // Dump all sections for all modules images - const size_t num_modules = target->GetImages().GetSize(); + const size_t num_modules = target.GetImages().GetSize(); if (num_modules == 0) { result.AppendError("the target has no associated executable images"); return; @@ -2123,7 +2122,7 @@ class CommandObjectTargetModulesDumpSections num_dumped++; DumpModuleSections( m_interpreter, result.GetOutputStream(), - target->GetImages().GetModulePointerAtIndex(image_idx)); + target.GetImages().GetModulePointerAtIndex(image_idx)); } } else { // Dump specified images (by basename or fullpath) @@ -2133,7 +2132,7 @@ class CommandObjectTargetModulesDumpSections ++arg_idx) { ModuleList module_list; const size_t num_matches = - FindModulesByName(target, arg_cstr, module_list, true); + FindModulesByName(&target, arg_cstr, module_list, true); if (num_matches > 0) { for (size_t i = 0; i < num_matches; ++i) { if (INTERRUPT_REQUESTED(GetDebugger(), @@ -2238,9 +2237,9 @@ class CommandObjectTargetModulesDumpClangAST protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); - const ModuleList &module_list = target->GetImages(); + const ModuleList &module_list = target.GetImages(); const size_t num_modules = module_list.GetSize(); if (num_modules == 0) { result.AppendError("the target has no associated executable images"); @@ -2265,7 +2264,7 @@ class CommandObjectTargetModulesDumpClangAST for (const Args::ArgEntry &arg : command.entries()) { ModuleList module_list; const size_t num_matches = - FindModulesByName(target, arg.c_str(), module_list, true); + FindModulesByName(&target, arg.c_str(), module_list, true); if (num_matches == 0) { // Check the global list std::lock_guard guard( @@ -2309,16 +2308,16 @@ class CommandObjectTargetModulesDumpSymfile protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); uint32_t num_dumped = 0; - uint32_t addr_byte_size = target->GetArchitecture().GetAddressByteSize(); + uint32_t addr_byte_size = target.GetArchitecture().GetAddressByteSize(); result.GetOutputStream().SetAddressByteSize(addr_byte_size); result.GetErrorStream().SetAddressByteSize(addr_byte_size); if (command.GetArgumentCount() == 0) { // Dump all sections for all modules images - const ModuleList &target_modules = target->GetImages(); + const ModuleList &target_modules = target.GetImages(); std::lock_guard guard(target_modules.GetMutex()); const size_t num_modules = target_modules.GetSize(); if (num_modules == 0) { @@ -2344,7 +2343,7 @@ class CommandObjectTargetModulesDumpSymfile ++arg_idx) { ModuleList module_list; const size_t num_matches = - FindModulesByName(target, arg_cstr, module_list, true); + FindModulesByName(&target, arg_cstr, module_list, true); if (num_matches > 0) { for (size_t i = 0; i < num_matches; ++i) { if (INTERRUPT_REQUESTED(GetDebugger(), "Interrupted dumping {0} " @@ -2726,7 +2725,7 @@ class CommandObjectTargetModulesAdd : public CommandObjectParsed { OptionGroupFile m_symbol_file; void DoExecute(Args &args, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); bool flush = false; const size_t argc = args.GetArgumentCount(); @@ -2742,7 +2741,7 @@ class CommandObjectTargetModulesAdd : public CommandObjectParsed { Status error; if (PluginManager::DownloadObjectAndSymbolFile(module_spec, error)) { ModuleSP module_sp( - target->GetOrCreateModule(module_spec, true /* notify */)); + target.GetOrCreateModule(module_spec, true /* notify */)); if (module_sp) { result.SetStatus(eReturnStatusSuccessFinishResult); return; @@ -2799,10 +2798,10 @@ class CommandObjectTargetModulesAdd : public CommandObjectParsed { module_spec.GetSymbolFileSpec() = m_symbol_file.GetOptionValue().GetCurrentValue(); if (!module_spec.GetArchitecture().IsValid()) - module_spec.GetArchitecture() = target->GetArchitecture(); + module_spec.GetArchitecture() = target.GetArchitecture(); Status error; - ModuleSP module_sp(target->GetOrCreateModule( - module_spec, true /* notify */, &error)); + ModuleSP module_sp( + target.GetOrCreateModule(module_spec, true /* notify */, &error)); if (!module_sp) { const char *error_cstr = error.AsCString(); if (error_cstr) @@ -2831,7 +2830,7 @@ class CommandObjectTargetModulesAdd : public CommandObjectParsed { } if (flush) { - ProcessSP process = target->GetProcessSP(); + ProcessSP process = target.GetProcessSP(); if (process) process->Flush(); } @@ -2876,7 +2875,7 @@ class CommandObjectTargetModulesLoad protected: void DoExecute(Args &args, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); const bool load = m_load_option.GetOptionValue().GetCurrentValue(); const bool set_pc = m_pc_option.GetOptionValue().GetCurrentValue(); @@ -2888,7 +2887,7 @@ class CommandObjectTargetModulesLoad if (load) { if (!m_file_option.GetOptionValue().OptionWasSet() && !m_uuid_option_group.GetOptionValue().OptionWasSet()) { - ModuleList &module_list = target->GetImages(); + ModuleList &module_list = target.GetImages(); if (module_list.GetSize() == 1) { search_using_module_spec = true; module_spec.GetFileSpec() = @@ -2903,7 +2902,7 @@ class CommandObjectTargetModulesLoad const bool use_global_module_list = true; ModuleList module_list; const size_t num_matches = FindModulesByName( - target, arg_cstr, module_list, use_global_module_list); + &target, arg_cstr, module_list, use_global_module_list); if (num_matches == 1) { module_spec.GetFileSpec() = module_list.GetModuleAtIndex(0)->GetFileSpec(); @@ -2926,7 +2925,7 @@ class CommandObjectTargetModulesLoad if (search_using_module_spec) { ModuleList matching_modules; - target->GetImages().FindModules(module_spec, matching_modules); + target.GetImages().FindModules(module_spec, matching_modules); const size_t num_matches = matching_modules.GetSize(); char path[PATH_MAX]; @@ -2943,7 +2942,7 @@ class CommandObjectTargetModulesLoad const addr_t slide = m_slide_option.GetOptionValue().GetCurrentValue(); const bool slide_is_offset = true; - module->SetLoadAddress(*target, slide, slide_is_offset, + module->SetLoadAddress(target, slide, slide_is_offset, changed); } else { result.AppendError("one or more section name + load " @@ -2975,8 +2974,8 @@ class CommandObjectTargetModulesLoad sect_name); break; } else { - if (target->GetSectionLoadList() - .SetSectionLoadAddress(section_sp, load_addr)) + if (target.GetSectionLoadList().SetSectionLoadAddress( + section_sp, load_addr)) changed = true; result.AppendMessageWithFormat( "section '%s' loaded at 0x%" PRIx64 "\n", @@ -3007,13 +3006,13 @@ class CommandObjectTargetModulesLoad } if (changed) { - target->ModulesDidLoad(matching_modules); + target.ModulesDidLoad(matching_modules); Process *process = m_exe_ctx.GetProcessPtr(); if (process) process->Flush(); } if (load) { - ProcessSP process = target->CalculateProcess(); + ProcessSP process = target.CalculateProcess(); Address file_entry = objfile->GetEntryPointAddress(); if (!process) { result.AppendError("No process"); @@ -3024,7 +3023,7 @@ class CommandObjectTargetModulesLoad return; } std::vector loadables( - objfile->GetLoadableData(*target)); + objfile->GetLoadableData(target)); if (loadables.size() == 0) { result.AppendError("No loadable sections"); return; @@ -3038,7 +3037,7 @@ class CommandObjectTargetModulesLoad ThreadList &thread_list = process->GetThreadList(); RegisterContextSP reg_context( thread_list.GetSelectedThread()->GetRegisterContext()); - addr_t file_entry_addr = file_entry.GetLoadAddress(target); + addr_t file_entry_addr = file_entry.GetLoadAddress(&target); if (!reg_context->SetPC(file_entry_addr)) { result.AppendErrorWithFormat("failed to set PC value to " "0x%" PRIx64 "\n", @@ -3166,50 +3165,37 @@ class CommandObjectTargetModulesList : public CommandObjectParsed { protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); const bool use_global_module_list = m_options.m_use_global_module_list; // Define a local module list here to ensure it lives longer than any // "locker" object which might lock its contents below (through the // "module_list_ptr" variable). ModuleList module_list; - if (target == nullptr && !use_global_module_list) { - result.AppendError("invalid target, create a debug target using the " - "'target create' command"); - return; - } else { - if (target) { - uint32_t addr_byte_size = - target->GetArchitecture().GetAddressByteSize(); - result.GetOutputStream().SetAddressByteSize(addr_byte_size); - result.GetErrorStream().SetAddressByteSize(addr_byte_size); - } - // Dump all sections for all modules images - Stream &strm = result.GetOutputStream(); + uint32_t addr_byte_size = target.GetArchitecture().GetAddressByteSize(); + result.GetOutputStream().SetAddressByteSize(addr_byte_size); + result.GetErrorStream().SetAddressByteSize(addr_byte_size); + // Dump all sections for all modules images + Stream &strm = result.GetOutputStream(); - if (m_options.m_module_addr != LLDB_INVALID_ADDRESS) { - if (target) { - Address module_address; - if (module_address.SetLoadAddress(m_options.m_module_addr, target)) { - ModuleSP module_sp(module_address.GetModule()); - if (module_sp) { - PrintModule(target, module_sp.get(), 0, strm); - result.SetStatus(eReturnStatusSuccessFinishResult); - } else { - result.AppendErrorWithFormat( - "Couldn't find module matching address: 0x%" PRIx64 ".", - m_options.m_module_addr); - } - } else { - result.AppendErrorWithFormat( - "Couldn't find module containing address: 0x%" PRIx64 ".", - m_options.m_module_addr); - } + if (m_options.m_module_addr != LLDB_INVALID_ADDRESS) { + Address module_address; + if (module_address.SetLoadAddress(m_options.m_module_addr, &target)) { + ModuleSP module_sp(module_address.GetModule()); + if (module_sp) { + PrintModule(target, module_sp.get(), 0, strm); + result.SetStatus(eReturnStatusSuccessFinishResult); } else { - result.AppendError( - "Can only look up modules by address with a valid target."); + result.AppendErrorWithFormat( + "Couldn't find module matching address: 0x%" PRIx64 ".", + m_options.m_module_addr); } - return; + } else { + result.AppendErrorWithFormat( + "Couldn't find module containing address: 0x%" PRIx64 ".", + m_options.m_module_addr); } + return; + } size_t num_modules = 0; @@ -3227,13 +3213,13 @@ class CommandObjectTargetModulesList : public CommandObjectParsed { guard.lock(); num_modules = Module::GetNumberAllocatedModules(); } else { - module_list_ptr = &target->GetImages(); + module_list_ptr = &target.GetImages(); } } else { for (const Args::ArgEntry &arg : command) { // Dump specified images (by basename or fullpath) const size_t num_matches = FindModulesByName( - target, arg.c_str(), module_list, use_global_module_list); + &target, arg.c_str(), module_list, use_global_module_list); if (num_matches == 0) { if (argc == 1) { result.AppendErrorWithFormat("no modules found that match '%s'", @@ -3286,10 +3272,9 @@ class CommandObjectTargetModulesList : public CommandObjectParsed { } return; } - } } - void PrintModule(Target *target, Module *module, int indent, Stream &strm) { + void PrintModule(Target &target, Module *module, int indent, Stream &strm) { if (module == nullptr) { strm.PutCString("Null module"); return; @@ -3338,17 +3323,16 @@ class CommandObjectTargetModulesList : public CommandObjectParsed { // Image header address { uint32_t addr_nibble_width = - target ? (target->GetArchitecture().GetAddressByteSize() * 2) - : 16; + target.GetArchitecture().GetAddressByteSize() * 2; ObjectFile *objfile = module->GetObjectFile(); if (objfile) { Address base_addr(objfile->GetBaseAddress()); if (base_addr.IsValid()) { - if (target && !target->GetSectionLoadList().IsEmpty()) { - lldb::addr_t load_addr = base_addr.GetLoadAddress(target); + if (!target.GetSectionLoadList().IsEmpty()) { + lldb::addr_t load_addr = base_addr.GetLoadAddress(&target); if (load_addr == LLDB_INVALID_ADDRESS) { - base_addr.Dump(&strm, target, + base_addr.Dump(&strm, &target, Address::DumpStyleModuleWithFileAddress, Address::DumpStyleFileAddress); } else { @@ -3367,7 +3351,7 @@ class CommandObjectTargetModulesList : public CommandObjectParsed { } // The address was valid, but the image isn't loaded, output the // address in an appropriate format - base_addr.Dump(&strm, target, Address::DumpStyleFileAddress); + base_addr.Dump(&strm, &target, Address::DumpStyleFileAddress); break; } } @@ -4070,11 +4054,11 @@ class CommandObjectTargetModulesLookup : public CommandObjectParsed { protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); bool syntax_error = false; uint32_t i; uint32_t num_successful_lookups = 0; - uint32_t addr_byte_size = target->GetArchitecture().GetAddressByteSize(); + uint32_t addr_byte_size = target.GetArchitecture().GetAddressByteSize(); result.GetOutputStream().SetAddressByteSize(addr_byte_size); result.GetErrorStream().SetAddressByteSize(addr_byte_size); // Dump all sections for all modules images @@ -4096,7 +4080,7 @@ class CommandObjectTargetModulesLookup : public CommandObjectParsed { // Dump all sections for all other modules - const ModuleList &target_modules = target->GetImages(); + const ModuleList &target_modules = target.GetImages(); std::lock_guard guard(target_modules.GetMutex()); if (target_modules.GetSize() == 0) { result.AppendError("the target has no associated executable images"); @@ -4119,7 +4103,7 @@ class CommandObjectTargetModulesLookup : public CommandObjectParsed { ++i) { ModuleList module_list; const size_t num_matches = - FindModulesByName(target, arg_cstr, module_list, false); + FindModulesByName(&target, arg_cstr, module_list, false); if (num_matches > 0) { for (size_t j = 0; j < num_matches; ++j) { Module *module = module_list.GetModulePointerAtIndex(j); @@ -4937,10 +4921,7 @@ Filter Options: m_stop_hook_sp->GetID()); error_sp->Flush(); } - Target *target = &GetTarget(); - if (target) { - target->UndoCreateStopHook(m_stop_hook_sp->GetID()); - } + GetTarget().UndoCreateStopHook(m_stop_hook_sp->GetID()); } else { // The IOHandler editor is only for command lines stop hooks: Target::StopHookCommandLine *hook_ptr = diff --git a/lldb/source/Commands/CommandObjectWatchpoint.cpp b/lldb/source/Commands/CommandObjectWatchpoint.cpp index 314c75111a028f..126982dfddf88e 100644 --- a/lldb/source/Commands/CommandObjectWatchpoint.cpp +++ b/lldb/source/Commands/CommandObjectWatchpoint.cpp @@ -39,10 +39,10 @@ static void AddWatchpointDescription(Stream &s, Watchpoint &wp, s.EOL(); } -static bool CheckTargetForWatchpointOperations(Target *target, +static bool CheckTargetForWatchpointOperations(Target &target, CommandReturnObject &result) { bool process_is_valid = - target->GetProcessSP() && target->GetProcessSP()->IsAlive(); + target.GetProcessSP() && target.GetProcessSP()->IsAlive(); if (!process_is_valid) { result.AppendError("There's no process or it is not alive."); return false; @@ -67,12 +67,10 @@ static int32_t WithRSAIndex(llvm::StringRef Arg) { // Return true if wp_ids is successfully populated with the watch ids. False // otherwise. bool CommandObjectMultiwordWatchpoint::VerifyWatchpointIDs( - Target *target, Args &args, std::vector &wp_ids) { + Target &target, Args &args, std::vector &wp_ids) { // Pre-condition: args.GetArgumentCount() > 0. if (args.GetArgumentCount() == 0) { - if (target == nullptr) - return false; - WatchpointSP watch_sp = target->GetLastCreatedWatchpoint(); + WatchpointSP watch_sp = target.GetLastCreatedWatchpoint(); if (watch_sp) { wp_ids.push_back(watch_sp->GetID()); return true; @@ -203,22 +201,24 @@ class CommandObjectWatchpointList : public CommandObjectParsed { protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); - if (target->GetProcessSP() && target->GetProcessSP()->IsAlive()) { - std::optional num_supported_hardware_watchpoints = - target->GetProcessSP()->GetWatchpointSlotCount(); + if (ProcessSP process_sp = target.GetProcessSP()) { + if (process_sp->IsAlive()) { + std::optional num_supported_hardware_watchpoints = + process_sp->GetWatchpointSlotCount(); - if (num_supported_hardware_watchpoints) - result.AppendMessageWithFormat( - "Number of supported hardware watchpoints: %u\n", - *num_supported_hardware_watchpoints); + if (num_supported_hardware_watchpoints) + result.AppendMessageWithFormat( + "Number of supported hardware watchpoints: %u\n", + *num_supported_hardware_watchpoints); + } } - const WatchpointList &watchpoints = target->GetWatchpointList(); + const WatchpointList &watchpoints = target.GetWatchpointList(); std::unique_lock lock; - target->GetWatchpointList().GetListMutex(lock); + target.GetWatchpointList().GetListMutex(lock); size_t num_watchpoints = watchpoints.GetSize(); @@ -286,14 +286,14 @@ class CommandObjectWatchpointEnable : public CommandObjectParsed { protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); if (!CheckTargetForWatchpointOperations(target, result)) return; std::unique_lock lock; - target->GetWatchpointList().GetListMutex(lock); + target.GetWatchpointList().GetListMutex(lock); - const WatchpointList &watchpoints = target->GetWatchpointList(); + const WatchpointList &watchpoints = target.GetWatchpointList(); size_t num_watchpoints = watchpoints.GetSize(); @@ -304,7 +304,7 @@ class CommandObjectWatchpointEnable : public CommandObjectParsed { if (command.GetArgumentCount() == 0) { // No watchpoint selected; enable all currently set watchpoints. - target->EnableAllWatchpoints(); + target.EnableAllWatchpoints(); result.AppendMessageWithFormat("All watchpoints enabled. (%" PRIu64 " watchpoints)\n", (uint64_t)num_watchpoints); @@ -321,7 +321,7 @@ class CommandObjectWatchpointEnable : public CommandObjectParsed { int count = 0; const size_t size = wp_ids.size(); for (size_t i = 0; i < size; ++i) - if (target->EnableWatchpointByID(wp_ids[i])) + if (target.EnableWatchpointByID(wp_ids[i])) ++count; result.AppendMessageWithFormat("%d watchpoints enabled.\n", count); result.SetStatus(eReturnStatusSuccessFinishNoResult); @@ -355,14 +355,14 @@ class CommandObjectWatchpointDisable : public CommandObjectParsed { protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); if (!CheckTargetForWatchpointOperations(target, result)) return; std::unique_lock lock; - target->GetWatchpointList().GetListMutex(lock); + target.GetWatchpointList().GetListMutex(lock); - const WatchpointList &watchpoints = target->GetWatchpointList(); + const WatchpointList &watchpoints = target.GetWatchpointList(); size_t num_watchpoints = watchpoints.GetSize(); if (num_watchpoints == 0) { @@ -372,7 +372,7 @@ class CommandObjectWatchpointDisable : public CommandObjectParsed { if (command.GetArgumentCount() == 0) { // No watchpoint selected; disable all currently set watchpoints. - if (target->DisableAllWatchpoints()) { + if (target.DisableAllWatchpoints()) { result.AppendMessageWithFormat("All watchpoints disabled. (%" PRIu64 " watchpoints)\n", (uint64_t)num_watchpoints); @@ -392,7 +392,7 @@ class CommandObjectWatchpointDisable : public CommandObjectParsed { int count = 0; const size_t size = wp_ids.size(); for (size_t i = 0; i < size; ++i) - if (target->DisableWatchpointByID(wp_ids[i])) + if (target.DisableWatchpointByID(wp_ids[i])) ++count; result.AppendMessageWithFormat("%d watchpoints disabled.\n", count); result.SetStatus(eReturnStatusSuccessFinishNoResult); @@ -464,14 +464,14 @@ class CommandObjectWatchpointDelete : public CommandObjectParsed { protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); if (!CheckTargetForWatchpointOperations(target, result)) return; std::unique_lock lock; - target->GetWatchpointList().GetListMutex(lock); + target.GetWatchpointList().GetListMutex(lock); - const WatchpointList &watchpoints = target->GetWatchpointList(); + const WatchpointList &watchpoints = target.GetWatchpointList(); size_t num_watchpoints = watchpoints.GetSize(); @@ -487,7 +487,7 @@ class CommandObjectWatchpointDelete : public CommandObjectParsed { true)) { result.AppendMessage("Operation cancelled..."); } else { - target->RemoveAllWatchpoints(); + target.RemoveAllWatchpoints(); result.AppendMessageWithFormat("All watchpoints removed. (%" PRIu64 " watchpoints)\n", (uint64_t)num_watchpoints); @@ -507,7 +507,7 @@ class CommandObjectWatchpointDelete : public CommandObjectParsed { int count = 0; const size_t size = wp_ids.size(); for (size_t i = 0; i < size; ++i) - if (target->RemoveWatchpointByID(wp_ids[i])) + if (target.RemoveWatchpointByID(wp_ids[i])) ++count; result.AppendMessageWithFormat("%d watchpoints deleted.\n", count); result.SetStatus(eReturnStatusSuccessFinishNoResult); @@ -584,14 +584,14 @@ class CommandObjectWatchpointIgnore : public CommandObjectParsed { protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); if (!CheckTargetForWatchpointOperations(target, result)) return; std::unique_lock lock; - target->GetWatchpointList().GetListMutex(lock); + target.GetWatchpointList().GetListMutex(lock); - const WatchpointList &watchpoints = target->GetWatchpointList(); + const WatchpointList &watchpoints = target.GetWatchpointList(); size_t num_watchpoints = watchpoints.GetSize(); @@ -601,7 +601,7 @@ class CommandObjectWatchpointIgnore : public CommandObjectParsed { } if (command.GetArgumentCount() == 0) { - target->IgnoreAllWatchpoints(m_options.m_ignore_count); + target.IgnoreAllWatchpoints(m_options.m_ignore_count); result.AppendMessageWithFormat("All watchpoints ignored. (%" PRIu64 " watchpoints)\n", (uint64_t)num_watchpoints); @@ -618,7 +618,7 @@ class CommandObjectWatchpointIgnore : public CommandObjectParsed { int count = 0; const size_t size = wp_ids.size(); for (size_t i = 0; i < size; ++i) - if (target->IgnoreWatchpointByID(wp_ids[i], m_options.m_ignore_count)) + if (target.IgnoreWatchpointByID(wp_ids[i], m_options.m_ignore_count)) ++count; result.AppendMessageWithFormat("%d watchpoints ignored.\n", count); result.SetStatus(eReturnStatusSuccessFinishNoResult); @@ -703,14 +703,14 @@ class CommandObjectWatchpointModify : public CommandObjectParsed { protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); if (!CheckTargetForWatchpointOperations(target, result)) return; std::unique_lock lock; - target->GetWatchpointList().GetListMutex(lock); + target.GetWatchpointList().GetListMutex(lock); - const WatchpointList &watchpoints = target->GetWatchpointList(); + const WatchpointList &watchpoints = target.GetWatchpointList(); size_t num_watchpoints = watchpoints.GetSize(); @@ -720,7 +720,7 @@ class CommandObjectWatchpointModify : public CommandObjectParsed { } if (command.GetArgumentCount() == 0) { - WatchpointSP watch_sp = target->GetLastCreatedWatchpoint(); + WatchpointSP watch_sp = target.GetLastCreatedWatchpoint(); watch_sp->SetCondition(m_options.m_condition.c_str()); result.SetStatus(eReturnStatusSuccessFinishNoResult); } else { @@ -804,7 +804,7 @@ corresponding to the byte size of the data type."); } void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); StackFrame *frame = m_exe_ctx.GetFramePtr(); // If no argument is present, issue an error message. There's no way to @@ -852,8 +852,8 @@ corresponding to the byte size of the data type."); Status error(Variable::GetValuesForVariableExpressionPath( command.GetArgumentAtIndex(0), - m_exe_ctx.GetBestExecutionContextScope(), GetVariableCallback, target, - variable_list, valobj_list)); + m_exe_ctx.GetBestExecutionContextScope(), GetVariableCallback, + &target, variable_list, valobj_list)); if (valobj_list.GetSize()) valobj_sp = valobj_list.GetValueObjectAtIndex(0); @@ -904,7 +904,7 @@ corresponding to the byte size of the data type."); error.Clear(); WatchpointSP watch_sp = - target->CreateWatchpoint(addr, size, &compiler_type, watch_type, error); + target.CreateWatchpoint(addr, size, &compiler_type, watch_type, error); if (!watch_sp) { result.AppendErrorWithFormat( "Watchpoint creation failed (addr=0x%" PRIx64 ", size=%" PRIu64 @@ -991,7 +991,7 @@ class CommandObjectWatchpointSetExpression : public CommandObjectRaw { m_option_group.NotifyOptionParsingStarting( &exe_ctx); // This is a raw command, so notify the option group - Target *target = &GetTarget(); + Target &target = GetTarget(); StackFrame *frame = m_exe_ctx.GetFramePtr(); OptionsWithRaw args(raw_command); @@ -1034,7 +1034,7 @@ class CommandObjectWatchpointSetExpression : public CommandObjectRaw { options.SetLanguage(m_option_watchpoint.language_type); ExpressionResults expr_result = - target->EvaluateExpression(expr, frame, valobj_sp, options); + target.EvaluateExpression(expr, frame, valobj_sp, options); if (expr_result != eExpressionCompleted) { result.AppendError("expression evaluation of address to watch failed"); result.AppendErrorWithFormat("expression evaluated: \n%s", expr.data()); @@ -1054,7 +1054,7 @@ class CommandObjectWatchpointSetExpression : public CommandObjectRaw { if (m_option_watchpoint.watch_size.GetCurrentValue() != 0) size = m_option_watchpoint.watch_size.GetCurrentValue(); else - size = target->GetArchitecture().GetAddressByteSize(); + size = target.GetArchitecture().GetAddressByteSize(); // Now it's time to create the watchpoint. uint32_t watch_type; @@ -1095,7 +1095,7 @@ class CommandObjectWatchpointSetExpression : public CommandObjectRaw { Status error; WatchpointSP watch_sp = - target->CreateWatchpoint(addr, size, &compiler_type, watch_type, error); + target.CreateWatchpoint(addr, size, &compiler_type, watch_type, error); if (watch_sp) { watch_sp->SetWatchSpec(std::string(expr)); Stream &output_stream = result.GetOutputStream(); diff --git a/lldb/source/Commands/CommandObjectWatchpoint.h b/lldb/source/Commands/CommandObjectWatchpoint.h index 87f9f4383bd270..a68491103ef518 100644 --- a/lldb/source/Commands/CommandObjectWatchpoint.h +++ b/lldb/source/Commands/CommandObjectWatchpoint.h @@ -22,7 +22,7 @@ class CommandObjectMultiwordWatchpoint : public CommandObjectMultiword { ~CommandObjectMultiwordWatchpoint() override; - static bool VerifyWatchpointIDs(Target *target, Args &args, + static bool VerifyWatchpointIDs(Target &target, Args &args, std::vector &wp_ids); }; diff --git a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp index b4743eb3d87574..cc4cb767648668 100644 --- a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp +++ b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp @@ -355,9 +355,9 @@ are no syntax errors may indicate that a function was declared but never called. protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); - const WatchpointList &watchpoints = target->GetWatchpointList(); + const WatchpointList &watchpoints = target.GetWatchpointList(); size_t num_watchpoints = watchpoints.GetSize(); if (num_watchpoints == 0) { @@ -384,7 +384,7 @@ are no syntax errors may indicate that a function was declared but never called. for (size_t i = 0; i < count; ++i) { uint32_t cur_wp_id = valid_wp_ids.at(i); if (cur_wp_id != LLDB_INVALID_WATCH_ID) { - Watchpoint *wp = target->GetWatchpointList().FindByID(cur_wp_id).get(); + Watchpoint *wp = target.GetWatchpointList().FindByID(cur_wp_id).get(); // Sanity check wp first. if (wp == nullptr) continue; @@ -450,9 +450,9 @@ class CommandObjectWatchpointCommandDelete : public CommandObjectParsed { protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); - const WatchpointList &watchpoints = target->GetWatchpointList(); + const WatchpointList &watchpoints = target.GetWatchpointList(); size_t num_watchpoints = watchpoints.GetSize(); if (num_watchpoints == 0) { @@ -478,7 +478,7 @@ class CommandObjectWatchpointCommandDelete : public CommandObjectParsed { for (size_t i = 0; i < count; ++i) { uint32_t cur_wp_id = valid_wp_ids.at(i); if (cur_wp_id != LLDB_INVALID_WATCH_ID) { - Watchpoint *wp = target->GetWatchpointList().FindByID(cur_wp_id).get(); + Watchpoint *wp = target.GetWatchpointList().FindByID(cur_wp_id).get(); if (wp) wp->ClearCallback(); } else { @@ -505,9 +505,9 @@ class CommandObjectWatchpointCommandList : public CommandObjectParsed { protected: void DoExecute(Args &command, CommandReturnObject &result) override { - Target *target = &GetTarget(); + Target &target = GetTarget(); - const WatchpointList &watchpoints = target->GetWatchpointList(); + const WatchpointList &watchpoints = target.GetWatchpointList(); size_t num_watchpoints = watchpoints.GetSize(); if (num_watchpoints == 0) { @@ -533,7 +533,7 @@ class CommandObjectWatchpointCommandList : public CommandObjectParsed { for (size_t i = 0; i < count; ++i) { uint32_t cur_wp_id = valid_wp_ids.at(i); if (cur_wp_id != LLDB_INVALID_WATCH_ID) { - Watchpoint *wp = target->GetWatchpointList().FindByID(cur_wp_id).get(); + Watchpoint *wp = target.GetWatchpointList().FindByID(cur_wp_id).get(); if (wp) { const WatchpointOptions *wp_options = wp->GetOptions(); From 87af9ee870ad7ca93abced0b09459c3760dec891 Mon Sep 17 00:00:00 2001 From: Yeting Kuo <46629943+yetingk@users.noreply.github.com> Date: Thu, 1 Aug 2024 09:37:42 +0800 Subject: [PATCH 048/114] [RISCV] Use experimental.vp.splat to splat specific vector length elements. (#101329) Previously, llvm IR is hard to create a scalable vector splat with a specific vector length, so we use riscv.vmv.v.x and riscv.vmv.v.f to do this work. But the two rvv intrinsics needs strict type constraint which can not support fixed vector types and illegal vector types. Using vp.splat could preserve old functionality and also generate more optimized code for vector types and illegal vectors. This patch also fixes crash for getEVT not serving ptr types. --- llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp | 19 +---- .../RISCV/rvv/fixed-vectors-strided-vpload.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll | 80 +++++++++++++++++-- 3 files changed, 79 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp index 0a66a38f6d5abc..be2e880ecd3a98 100644 --- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp +++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp @@ -187,25 +187,10 @@ bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) { auto *VTy = cast(II.getType()); IRBuilder<> Builder(&II); - - // Extend VL from i32 to XLen if needed. - if (ST->is64Bit()) - VL = Builder.CreateZExt(VL, Builder.getInt64Ty()); - Type *STy = VTy->getElementType(); Value *Val = Builder.CreateLoad(STy, BasePtr); - const auto &TLI = *ST->getTargetLowering(); - Value *Res; - - // TODO: Also support fixed/illegal vector types to splat with evl = vl. - if (isa(VTy) && TLI.isTypeLegal(EVT::getEVT(VTy))) { - unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f - : Intrinsic::riscv_vmv_v_x; - Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()}, - {PoisonValue::get(VTy), Val, VL}); - } else { - Res = Builder.CreateVectorSplat(VTy->getElementCount(), Val); - } + Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy}, + {Val, II.getOperand(2), VL}); II.replaceAllUsesWith(Res); II.eraseFromParent(); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index b8c7037580c46b..849f98c26f4593 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -638,14 +638,14 @@ declare <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64, define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) { ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8: ; CHECK-OPT: # %bb.0: -; CHECK-OPT-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-OPT-NEXT: vsetivli zero, 3, e8, mf4, ta, ma ; CHECK-OPT-NEXT: vlse8.v v8, (a0), zero ; CHECK-OPT-NEXT: ret ; ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8: ; CHECK-NO-OPT: # %bb.0: ; CHECK-NO-OPT-NEXT: lbu a0, 0(a0) -; CHECK-NO-OPT-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NO-OPT-NEXT: vsetivli zero, 3, e8, mf4, ta, ma ; CHECK-NO-OPT-NEXT: vmv.v.x v8, a0 ; CHECK-NO-OPT-NEXT: ret %load = call <4 x i8> @llvm.experimental.vp.strided.load.4i8.p0.i8(ptr %ptr, i8 0, <4 x i1> splat (i1 true), i32 3) @@ -657,14 +657,14 @@ define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) { define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) { ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4f16: ; CHECK-OPT: # %bb.0: -; CHECK-OPT-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-OPT-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero ; CHECK-OPT-NEXT: ret ; ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4f16: ; CHECK-NO-OPT: # %bb.0: ; CHECK-NO-OPT-NEXT: flh fa5, 0(a0) -; CHECK-NO-OPT-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NO-OPT-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; CHECK-NO-OPT-NEXT: vfmv.v.f v8, fa5 ; CHECK-NO-OPT-NEXT: ret %load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 3) diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index 0010f64a93fd62..14976f21b7dbba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+optimized-zero-stride-load \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT,CHECK-OPT-RV32 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+optimized-zero-stride-load \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT,CHECK-OPT-RV64 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-RV32 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-RV64 declare @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr, i8, , i32) @@ -823,15 +823,15 @@ define @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr) { ret %load } -define @zero_strided_vadd.vx( %v, ptr %ptr) { -; CHECK-RV32-LABEL: zero_strided_vadd.vx: +define @zero_strided_vadd_nxv1i64( %v, ptr %ptr) { +; CHECK-RV32-LABEL: zero_strided_vadd_nxv1i64: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero ; CHECK-RV32-NEXT: vadd.vv v8, v8, v9 ; CHECK-RV32-NEXT: ret ; -; CHECK-RV64-LABEL: zero_strided_vadd.vx: +; CHECK-RV64-LABEL: zero_strided_vadd_nxv1i64: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ld a0, 0(a0) ; CHECK-RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma @@ -842,3 +842,69 @@ define @zero_strided_vadd.vx( %v, ptr %ptr) %w = add %v, %load ret %w } + +define @zero_strided_vadd_nxv16i64( %v, ptr %ptr) { +; CHECK-RV32-LABEL: zero_strided_vadd_nxv16i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: csrr a1, vlenb +; CHECK-RV32-NEXT: srli a2, a1, 3 +; CHECK-RV32-NEXT: sub a3, a2, a1 +; CHECK-RV32-NEXT: sltu a4, a2, a3 +; CHECK-RV32-NEXT: addi a4, a4, -1 +; CHECK-RV32-NEXT: and a3, a4, a3 +; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v24, (a0), zero +; CHECK-RV32-NEXT: bltu a2, a1, .LBB55_2 +; CHECK-RV32-NEXT: # %bb.1: +; CHECK-RV32-NEXT: mv a2, a1 +; CHECK-RV32-NEXT: .LBB55_2: +; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v0, (a0), zero +; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-RV32-NEXT: vadd.vv v16, v16, v24 +; CHECK-RV32-NEXT: vadd.vv v8, v8, v0 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: zero_strided_vadd_nxv16i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: ld a0, 0(a0) +; CHECK-RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; CHECK-RV64-NEXT: vadd.vx v8, v8, a0 +; CHECK-RV64-NEXT: vadd.vx v16, v16, a0 +; CHECK-RV64-NEXT: ret + %vscale = call i32 @llvm.vscale() + %load = call @llvm.experimental.vp.strided.load.nxv16i64.p0.i32(ptr %ptr, i32 0, splat (i1 true), i32 %vscale) + %w = add %v, %load + ret %w +} + +define @zero_strided_vadd_nxv1p0( %v, ptr %ptr) { +; CHECK-OPT-RV32-LABEL: zero_strided_vadd_nxv1p0: +; CHECK-OPT-RV32: # %bb.0: +; CHECK-OPT-RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-OPT-RV32-NEXT: vlse32.v v8, (a0), zero +; CHECK-OPT-RV32-NEXT: ret +; +; CHECK-OPT-RV64-LABEL: zero_strided_vadd_nxv1p0: +; CHECK-OPT-RV64: # %bb.0: +; CHECK-OPT-RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-OPT-RV64-NEXT: vlse64.v v8, (a0), zero +; CHECK-OPT-RV64-NEXT: ret +; +; CHECK-NO-OPT-RV32-LABEL: zero_strided_vadd_nxv1p0: +; CHECK-NO-OPT-RV32: # %bb.0: +; CHECK-NO-OPT-RV32-NEXT: lw a0, 0(a0) +; CHECK-NO-OPT-RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NO-OPT-RV32-NEXT: vmv.v.x v8, a0 +; CHECK-NO-OPT-RV32-NEXT: ret +; +; CHECK-NO-OPT-RV64-LABEL: zero_strided_vadd_nxv1p0: +; CHECK-NO-OPT-RV64: # %bb.0: +; CHECK-NO-OPT-RV64-NEXT: ld a0, 0(a0) +; CHECK-NO-OPT-RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NO-OPT-RV64-NEXT: vmv.v.x v8, a0 +; CHECK-NO-OPT-RV64-NEXT: ret + %vscale = call i32 @llvm.vscale() + %load = call @llvm.experimental.vp.strided.load.nxv1p0.p0.i32(ptr %ptr, i32 0, splat (i1 true), i32 %vscale) + ret %load +} From e2c74aa535752cd6cf098731608d26275d1e40ac Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 31 Jul 2024 18:52:28 -0700 Subject: [PATCH 049/114] [TableGen][MVT] Lower the maximum 16-bit MVT from 16384 to 511. (#101401) MachineValueTypeSet in tablegen allocates an array with a bit per MVT. This used to be 256 bits, with the introduction of 16-bit MVT it ballooned to 65536 bits. I suspect this is increasing the memory usage of many of the data structures used by CodeGenDAGPatterns. Since we don't need the full 16-bit range yet, this patch proposes lowering the maximum MVT to 511 and using only 512 bits for MachineValueTypeSet's storage. --- llvm/include/llvm/CodeGen/ValueTypes.td | 16 ++++++++-------- llvm/utils/TableGen/Common/CodeGenDAGPatterns.h | 8 ++++---- llvm/utils/TableGen/VTEmitter.cpp | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td index 4636a2543b56aa..0883f597dabc12 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -289,34 +289,34 @@ def aarch64svcount def spirvbuiltin : ValueType<0, 200>; // SPIR-V's builtin type let isNormalValueType = false in { -def token : ValueType<0, 16376>; // TokenTy -def MetadataVT : ValueType<0, 16377> { // Metadata +def token : ValueType<0, 504>; // TokenTy +def MetadataVT : ValueType<0, 505> { // Metadata let LLVMName = "Metadata"; } // Pseudo valuetype mapped to the current pointer size to any address space. // Should only be used in TableGen. -def iPTRAny : VTAny<16378>; +def iPTRAny : VTAny<506>; // Pseudo valuetype to represent "vector of any size" // Should only be used in TableGen. -def vAny : VTAny<16379>; +def vAny : VTAny<507>; // Pseudo valuetype to represent "float of any format" // Should only be used in TableGen. -def fAny : VTAny<16380>; +def fAny : VTAny<508>; // Pseudo valuetype to represent "integer of any bit width" // Should only be used in TableGen. -def iAny : VTAny<16381>; +def iAny : VTAny<509>; // Pseudo valuetype mapped to the current pointer size. // Should only be used in TableGen. -def iPTR : ValueType<0, 16382>; +def iPTR : ValueType<0, 510>; // Pseudo valuetype to represent "any type of any size". // Should only be used in TableGen. -def Any : VTAny<16383>; +def Any : VTAny<511>; } // isNormalValueType = false diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h index b4de20bb13184c..d8df7427836c9a 100644 --- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h +++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h @@ -53,10 +53,7 @@ using TreePatternNodePtr = IntrusiveRefCntPtr; /// To reduce the allocations even further, make MachineValueTypeSet own /// the storage and use std::array as the bit container. struct MachineValueTypeSet { - static_assert(std::is_same, - uint16_t>::value, - "Change uint16_t here to the SimpleValueType's type"); - static unsigned constexpr Capacity = std::numeric_limits::max() + 1; + static unsigned constexpr Capacity = 512; using WordType = uint64_t; static unsigned constexpr WordWidth = CHAR_BIT * sizeof(WordType); static unsigned constexpr NumWords = Capacity / WordWidth; @@ -84,9 +81,11 @@ struct MachineValueTypeSet { } LLVM_ATTRIBUTE_ALWAYS_INLINE unsigned count(MVT T) const { + assert(T.SimpleTy < Capacity && "Capacity needs to be enlarged"); return (Words[T.SimpleTy / WordWidth] >> (T.SimpleTy % WordWidth)) & 1; } std::pair insert(MVT T) { + assert(T.SimpleTy < Capacity && "Capacity needs to be enlarged"); bool V = count(T.SimpleTy); Words[T.SimpleTy / WordWidth] |= WordType(1) << (T.SimpleTy % WordWidth); return {*this, V}; @@ -98,6 +97,7 @@ struct MachineValueTypeSet { } LLVM_ATTRIBUTE_ALWAYS_INLINE void erase(MVT T) { + assert(T.SimpleTy < Capacity && "Capacity needs to be enlarged"); Words[T.SimpleTy / WordWidth] &= ~(WordType(1) << (T.SimpleTy % WordWidth)); } diff --git a/llvm/utils/TableGen/VTEmitter.cpp b/llvm/utils/TableGen/VTEmitter.cpp index 79dbc37bb0b120..eb58148e4e364a 100644 --- a/llvm/utils/TableGen/VTEmitter.cpp +++ b/llvm/utils/TableGen/VTEmitter.cpp @@ -79,7 +79,7 @@ static void VTtoGetLLVMTyString(raw_ostream &OS, const Record *VT) { void VTEmitter::run(raw_ostream &OS) { emitSourceFileHeader("ValueTypes Source Fragment", OS, Records); - std::vector VTsByNumber{16384}; + std::vector VTsByNumber{512}; auto ValueTypes = Records.getAllDerivedDefinitions("ValueType"); for (auto *VT : ValueTypes) { auto Number = VT->getValueAsInt("Value"); From a1ba4fb4516a33bd61b8219e2fc46ba3c1787460 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 31 Jul 2024 19:01:45 -0700 Subject: [PATCH 050/114] [RISCV][GISel] Slightly simplify the regbank selection for G_LOAD/STORE. NFC (#101431) Merge the isVector early out with the previous check for isVector. --- .../RISCV/GISel/RISCVRegisterBankInfo.cpp | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp index 2b1df0cd4670a6..5369be24f0e7cb 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp @@ -311,15 +311,16 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_LOAD: { LLT Ty = MRI.getType(MI.getOperand(0).getReg()); TypeSize Size = Ty.getSizeInBits(); - if (Ty.isVector()) - OpdsMapping[0] = getVRBValueMapping(Size.getKnownMinValue()); - else - OpdsMapping[0] = GPRValueMapping; OpdsMapping[1] = GPRValueMapping; - if (Ty.isVector()) + if (Ty.isVector()) { + OpdsMapping[0] = getVRBValueMapping(Size.getKnownMinValue()); break; + } + + OpdsMapping[0] = GPRValueMapping; + // Use FPR64 for s64 loads on rv32. if (GPRSize == 32 && Size.getFixedValue() == 64) { assert(MF.getSubtarget().hasStdExtD()); @@ -342,15 +343,15 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_STORE: { LLT Ty = MRI.getType(MI.getOperand(0).getReg()); TypeSize Size = Ty.getSizeInBits(); - if (Ty.isVector()) - OpdsMapping[0] = getVRBValueMapping(Size.getKnownMinValue()); - else - OpdsMapping[0] = GPRValueMapping; OpdsMapping[1] = GPRValueMapping; - if (Ty.isVector()) + if (Ty.isVector()) { + OpdsMapping[0] = getVRBValueMapping(Size.getKnownMinValue()); break; + } + + OpdsMapping[0] = GPRValueMapping; // Use FPR64 for s64 stores on rv32. if (GPRSize == 32 && Size.getFixedValue() == 64) { From bc6834f5c70daca7ec321398a16891800b1c2bd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrea=20=F0=9F=A6=88?= Date: Thu, 1 Aug 2024 04:07:54 +0200 Subject: [PATCH 051/114] [mlir][spirv] Fix tablegen generator script's stripping of prefixes (#101378) This script looks for existing definitions with the `SPIRV_` prefix, so that it can preserve them when updating the file. When the commit 2d628330482e49d36744cb8f3fb5047cfeae6c56 changed the prefix from `SPV_`, the number of characters to strip from matched names was not updated, which broke this feature. This commit fixes remaining cases that weren't fixed by 339c87a8a086347bd8b5aae8b5bc43fc1c155cc1. The relationship of this script to the files it is meant to maintain is still bitrotten in other ways. --- mlir/utils/spirv/gen_spirv_dialect.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/mlir/utils/spirv/gen_spirv_dialect.py b/mlir/utils/spirv/gen_spirv_dialect.py index 426bfca1b4f88f..78c1022428d8a1 100755 --- a/mlir/utils/spirv/gen_spirv_dialect.py +++ b/mlir/utils/spirv/gen_spirv_dialect.py @@ -536,7 +536,10 @@ def gen_instr_coverage_report(path, instructions): content = content.split(AUTOGEN_OPCODE_SECTION_MARKER) - existing_opcodes = [k[11:] for k in re.findall("def SPIRV_OC_\w+", content[1])] + prefix = "def SPIRV_OC_" + existing_opcodes = [ + k[len(prefix) :] for k in re.findall(prefix + "\w+", content[1]) + ] existing_instructions = list( filter(lambda inst: (inst["opname"] in existing_opcodes), instructions) ) @@ -637,7 +640,12 @@ def update_td_enum_attrs(path, operand_kinds, filter_list): assert len(content) == 3 # Extend filter list with existing enum definitions - existing_kinds = [k[8:-4] for k in re.findall("def SPIRV_\w+Attr", content[1])] + prefix = "def SPIRV_" + suffix = "Attr" + existing_kinds = [ + k[len(prefix) : -len(suffix)] + for k in re.findall(prefix + "\w+" + suffix, content[1]) + ] filter_list.extend(existing_kinds) capability_mapping = get_capability_mapping(operand_kinds) @@ -959,12 +967,20 @@ def extract_td_op_info(op_def): - A dict containing potential manually specified sections """ # Get opname - opname = [o[8:-2] for o in re.findall("def SPIRV_\w+Op", op_def)] + prefix = "def SPIRV_" + suffix = "Op" + opname = [ + o[len(prefix) : -len(suffix)] + for o in re.findall(prefix + "\w+" + suffix, op_def) + ] assert len(opname) == 1, "more than one ops in the same section!" opname = opname[0] # Get instruction category - inst_category = [o[4:] for o in re.findall("SPIRV_\w+Op", op_def.split(":", 1)[1])] + prefix = "SPIRV_" + inst_category = [ + o[len(prefix) :] for o in re.findall(prefix + "\w+Op", op_def.split(":", 1)[1]) + ] assert len(inst_category) <= 1, "more than one ops in the same section!" inst_category = inst_category[0] if len(inst_category) == 1 else "Op" From e6aeb3f4daea0ffd43930c63f9b9a7668b915fd9 Mon Sep 17 00:00:00 2001 From: lifengxiang1025 Date: Thu, 1 Aug 2024 10:16:53 +0800 Subject: [PATCH 052/114] [MemProf] Fix when function has indirect call (#101170) When function has indirect call in LTO mode, it causes `assert(Alias)` in `findProfiledCalleeThroughTailCalls` --- .../Transforms/IPO/MemProfContextDisambiguation.cpp | 2 +- .../MemProfContextDisambiguation/tailcall.ll | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 64da3dfdd97c44..c9de9c964bba0a 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -2109,7 +2109,7 @@ bool ModuleCallsiteContextGraph::calleeMatchesFunc( Instruction *Call, const Function *Func, const Function *CallerFunc, std::vector> &FoundCalleeChain) { auto *CB = dyn_cast(Call); - if (!CB->getCalledOperand()) + if (!CB->getCalledOperand() || CB->isIndirectCall()) return false; auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts(); auto *CalleeFunc = dyn_cast(CalleeVal); diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/tailcall.ll b/llvm/test/Transforms/MemProfContextDisambiguation/tailcall.ll index 9a771e04999b95..abba3dd7f7cbf1 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/tailcall.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/tailcall.ll @@ -16,6 +16,12 @@ source_filename = "memprof-tailcall.cc" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" +@a = dso_local global [2 x ptr] [ptr @_Z2a1v, ptr @_Z2a2v], align 16 + +declare void @_Z2a1v() #0 + +declare void @_Z2a2v() #0 + ; Function Attrs: noinline ; IR-LABEL: @_Z3barv() define ptr @_Z3barv() local_unnamed_addr #0 { @@ -58,6 +64,8 @@ define i32 @main() #0 { ;; cloned functions. ; IR: call ptr @_Z3foov.memprof.1() %call1 = tail call ptr @_Z3foov(), !callsite !7 + %2 = load ptr, ptr @a, align 16 + call void %2(), !callsite !10 ret i32 0 } @@ -79,7 +87,7 @@ attributes #0 = { noinline } attributes #1 = { nobuiltin allocsize(0) } attributes #2 = { builtin allocsize(0) } -!0 = !{!1, !3} +!0 = !{!1, !3, !8} !1 = !{!2, !"notcold"} !2 = !{i64 3186456655321080972, i64 8632435727821051414} !3 = !{!4, !"cold"} @@ -87,3 +95,6 @@ attributes #2 = { builtin allocsize(0) } !5 = !{i64 3186456655321080972} !6 = !{i64 8632435727821051414} !7 = !{i64 -3421689549917153178} +!8 = !{!9, !"notcold"} +!9 = !{i64 3186456655321080972, i64 1} +!10 = !{i64 1} From 9227fd74e49b045baba910cb07ee93b2c660d267 Mon Sep 17 00:00:00 2001 From: vporpo Date: Wed, 31 Jul 2024 19:20:43 -0700 Subject: [PATCH 053/114] [SandboxIR][NFC] Factor out common test for CastInst subclasses (#101410) The tests for most CastInst sub-classes, except AddrSpaceCastInst, are very similar. This patch creates a common template function for all of them. --- llvm/unittests/SandboxIR/SandboxIRTest.cpp | 444 +++------------------ 1 file changed, 61 insertions(+), 383 deletions(-) diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index 31feb56a5272f8..cbf39528508808 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -1620,75 +1620,85 @@ define void @foo(i32 %arg, float %farg, double %darg, ptr %ptr) { } } -TEST_F(SandboxIRTest, SIToFPInst) { - parseIR(C, R"IR( -define void @foo(i32 %arg) { - %sitofp = sitofp i32 %arg to float - ret void -} -)IR"); - Function &LLVMF = *M->getFunction("foo"); - sandboxir::Context Ctx(C); +/// CastInst's subclasses are very similar so we can use a common test function +/// for them. +template +void testCastInst(llvm::Module &M, Type *SrcTy, Type *DstTy) { + Function &LLVMF = *M.getFunction("foo"); + sandboxir::Context Ctx(M.getContext()); sandboxir::Function *F = Ctx.createFunction(&LLVMF); unsigned ArgIdx = 0; auto *Arg = F->getArg(ArgIdx++); auto *BB = &*F->begin(); auto It = BB->begin(); - Type *Ti32 = Type::getInt32Ty(C); - Type *Tfloat = Type::getFloatTy(C); - auto *SIToFP = cast(&*It++); - EXPECT_EQ(SIToFP->getOpcode(), sandboxir::Instruction::Opcode::SIToFP); - EXPECT_EQ(SIToFP->getSrcTy(), Ti32); - EXPECT_EQ(SIToFP->getDestTy(), Tfloat); + auto *CI = cast(&*It++); + EXPECT_EQ(CI->getOpcode(), OpcodeT); + EXPECT_EQ(CI->getSrcTy(), SrcTy); + EXPECT_EQ(CI->getDestTy(), DstTy); auto *Ret = cast(&*It++); { // Check create() WhereIt, WhereBB - auto *NewI = cast( - sandboxir::SIToFPInst::create(Arg, Tfloat, /*WhereIt=*/BB->end(), - /*WhereBB=*/BB, Ctx, "SIToFP")); + auto *NewI = + cast(SubclassT::create(Arg, DstTy, /*WhereIt=*/BB->end(), + /*WhereBB=*/BB, Ctx, "NewCI")); // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::SIToFP); + EXPECT_EQ(NewI->getOpcode(), OpcodeT); // Check getSrcTy(). EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Tfloat); + EXPECT_EQ(NewI->getDestTy(), DstTy); // Check instr position. EXPECT_EQ(NewI->getNextNode(), nullptr); EXPECT_EQ(NewI->getPrevNode(), Ret); + // Check instr name. + EXPECT_EQ(NewI->getName(), "NewCI"); } { // Check create() InsertBefore. - auto *NewI = cast( - sandboxir::SIToFPInst::create(Arg, Tfloat, - /*InsertBefore=*/Ret, Ctx, "SIToFP")); + auto *NewI = + cast(SubclassT::create(Arg, DstTy, + /*InsertBefore=*/Ret, Ctx, "NewCI")); // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::SIToFP); + EXPECT_EQ(NewI->getOpcode(), OpcodeT); // Check getSrcTy(). EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Tfloat); + EXPECT_EQ(NewI->getDestTy(), DstTy); // Check instr position. EXPECT_EQ(NewI->getNextNode(), Ret); } { // Check create() InsertAtEnd. - auto *NewI = cast( - sandboxir::SIToFPInst::create(Arg, Tfloat, - /*InsertAtEnd=*/BB, Ctx, "SIToFP")); + auto *NewI = + cast(SubclassT::create(Arg, DstTy, + /*InsertAtEnd=*/BB, Ctx, "NewCI")); // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::SIToFP); + EXPECT_EQ(NewI->getOpcode(), OpcodeT); // Check getSrcTy(). EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Tfloat); + EXPECT_EQ(NewI->getDestTy(), DstTy); // Check instr position. EXPECT_EQ(NewI->getNextNode(), nullptr); EXPECT_EQ(NewI->getParent(), BB); } } +TEST_F(SandboxIRTest, SIToFPInst) { + parseIR(C, R"IR( +define void @foo(i32 %arg) { + %sitofp = sitofp i32 %arg to float + ret void +} +)IR"); + testCastInst( + *M, + /*SrcTy=*/Type::getInt32Ty(C), + /*DstTy=*/Type::getFloatTy(C)); +} + TEST_F(SandboxIRTest, FPToUIInst) { parseIR(C, R"IR( define void @foo(float %arg) { @@ -1696,66 +1706,9 @@ define void @foo(float %arg) { ret void } )IR"); - Function &LLVMF = *M->getFunction("foo"); - sandboxir::Context Ctx(C); - sandboxir::Function *F = Ctx.createFunction(&LLVMF); - unsigned ArgIdx = 0; - auto *Arg = F->getArg(ArgIdx++); - auto *BB = &*F->begin(); - auto It = BB->begin(); - Type *Ti32 = Type::getInt32Ty(C); - Type *Tfloat = Type::getFloatTy(C); + testCastInst( - auto *FPToUI = cast(&*It++); - EXPECT_EQ(FPToUI->getOpcode(), sandboxir::Instruction::Opcode::FPToUI); - EXPECT_EQ(FPToUI->getSrcTy(), Tfloat); - EXPECT_EQ(FPToUI->getDestTy(), Ti32); - auto *Ret = cast(&*It++); - - { - // Check create() WhereIt, WhereBB - auto *NewI = cast( - sandboxir::FPToUIInst::create(Arg, Ti32, /*WhereIt=*/BB->end(), - /*WhereBB=*/BB, Ctx, "FPToUI")); - // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::FPToUI); - // Check getSrcTy(). - EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); - // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Ti32); - // Check instr position. - EXPECT_EQ(NewI->getNextNode(), nullptr); - EXPECT_EQ(NewI->getPrevNode(), Ret); - } - { - // Check create() InsertBefore. - auto *NewI = cast( - sandboxir::FPToUIInst::create(Arg, Ti32, - /*InsertBefore=*/Ret, Ctx, "FPToUI")); - // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::FPToUI); - // Check getSrcTy(). - EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); - // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Ti32); - // Check instr position. - EXPECT_EQ(NewI->getNextNode(), Ret); - } - { - // Check create() InsertAtEnd. - auto *NewI = cast( - sandboxir::FPToUIInst::create(Arg, Ti32, - /*InsertAtEnd=*/BB, Ctx, "FPToUI")); - // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::FPToUI); - // Check getSrcTy(). - EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); - // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Ti32); - // Check instr position. - EXPECT_EQ(NewI->getNextNode(), nullptr); - EXPECT_EQ(NewI->getParent(), BB); - } + *M, /*SrcTy=*/Type::getFloatTy(C), /*DstTy=*/Type::getInt32Ty(C)); } TEST_F(SandboxIRTest, FPToSIInst) { @@ -1765,66 +1718,8 @@ define void @foo(float %arg) { ret void } )IR"); - Function &LLVMF = *M->getFunction("foo"); - sandboxir::Context Ctx(C); - sandboxir::Function *F = Ctx.createFunction(&LLVMF); - unsigned ArgIdx = 0; - auto *Arg = F->getArg(ArgIdx++); - auto *BB = &*F->begin(); - auto It = BB->begin(); - Type *Ti32 = Type::getInt32Ty(C); - Type *Tfloat = Type::getFloatTy(C); - - auto *FPToSI = cast(&*It++); - EXPECT_EQ(FPToSI->getOpcode(), sandboxir::Instruction::Opcode::FPToSI); - EXPECT_EQ(FPToSI->getSrcTy(), Tfloat); - EXPECT_EQ(FPToSI->getDestTy(), Ti32); - auto *Ret = cast(&*It++); - - { - // Check create() WhereIt, WhereBB - auto *NewI = cast( - sandboxir::FPToSIInst::create(Arg, Ti32, /*WhereIt=*/BB->end(), - /*WhereBB=*/BB, Ctx, "FPToSI")); - // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::FPToSI); - // Check getSrcTy(). - EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); - // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Ti32); - // Check instr position. - EXPECT_EQ(NewI->getNextNode(), nullptr); - EXPECT_EQ(NewI->getPrevNode(), Ret); - } - { - // Check create() InsertBefore. - auto *NewI = cast( - sandboxir::FPToSIInst::create(Arg, Ti32, - /*InsertBefore=*/Ret, Ctx, "FPToSI")); - // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::FPToSI); - // Check getSrcTy(). - EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); - // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Ti32); - // Check instr position. - EXPECT_EQ(NewI->getNextNode(), Ret); - } - { - // Check create() InsertAtEnd. - auto *NewI = cast( - sandboxir::FPToSIInst::create(Arg, Ti32, - /*InsertAtEnd=*/BB, Ctx, "FPToSI")); - // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::FPToSI); - // Check getSrcTy(). - EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); - // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Ti32); - // Check instr position. - EXPECT_EQ(NewI->getNextNode(), nullptr); - EXPECT_EQ(NewI->getParent(), BB); - } + testCastInst( + *M, /*SrcTy=*/Type::getFloatTy(C), /*DstTy=*/Type::getInt32Ty(C)); } TEST_F(SandboxIRTest, IntToPtrInst) { @@ -1834,66 +1729,10 @@ define void @foo(i32 %arg) { ret void } )IR"); - Function &LLVMF = *M->getFunction("foo"); - sandboxir::Context Ctx(C); - sandboxir::Function *F = Ctx.createFunction(&LLVMF); - unsigned ArgIdx = 0; - auto *Arg = F->getArg(ArgIdx++); - auto *BB = &*F->begin(); - auto It = BB->begin(); - Type *Ti32 = Type::getInt32Ty(C); - Type *Tptr = Ti32->getPointerTo(); - - auto *IntToPtr = cast(&*It++); - EXPECT_EQ(IntToPtr->getOpcode(), sandboxir::Instruction::Opcode::IntToPtr); - EXPECT_EQ(IntToPtr->getSrcTy(), Ti32); - EXPECT_EQ(IntToPtr->getDestTy(), Tptr); - auto *Ret = cast(&*It++); - - { - // Check create() WhereIt, WhereBB - auto *NewI = cast( - sandboxir::IntToPtrInst::create(Arg, Tptr, /*WhereIt=*/BB->end(), - /*WhereBB=*/BB, Ctx, "IntToPtr")); - // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::IntToPtr); - // Check getSrcTy(). - EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); - // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Tptr); - // Check instr position. - EXPECT_EQ(NewI->getNextNode(), nullptr); - EXPECT_EQ(NewI->getPrevNode(), Ret); - } - { - // Check create() InsertBefore. - auto *NewI = cast( - sandboxir::IntToPtrInst::create(Arg, Tptr, - /*InsertBefore=*/Ret, Ctx, "IntToPtr")); - // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::IntToPtr); - // Check getSrcTy(). - EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); - // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Tptr); - // Check instr position. - EXPECT_EQ(NewI->getNextNode(), Ret); - } - { - // Check create() InsertAtEnd. - auto *NewI = cast( - sandboxir::IntToPtrInst::create(Arg, Tptr, - /*InsertAtEnd=*/BB, Ctx, "IntToPtr")); - // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::IntToPtr); - // Check getSrcTy(). - EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); - // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Tptr); - // Check instr position. - EXPECT_EQ(NewI->getNextNode(), nullptr); - EXPECT_EQ(NewI->getParent(), BB); - } + testCastInst( + *M, + /*SrcTy=*/Type::getInt32Ty(C), /*DstTy=*/PointerType::get(C, 0)); } TEST_F(SandboxIRTest, PtrToIntInst) { @@ -1903,66 +1742,9 @@ define void @foo(ptr %ptr) { ret void } )IR"); - Function &LLVMF = *M->getFunction("foo"); - sandboxir::Context Ctx(C); - sandboxir::Function *F = Ctx.createFunction(&LLVMF); - unsigned ArgIdx = 0; - auto *Arg = F->getArg(ArgIdx++); - auto *BB = &*F->begin(); - auto It = BB->begin(); - Type *Ti32 = Type::getInt32Ty(C); - Type *Tptr = Ti32->getPointerTo(); - - auto *PtrToInt = cast(&*It++); - EXPECT_EQ(PtrToInt->getOpcode(), sandboxir::Instruction::Opcode::PtrToInt); - EXPECT_EQ(PtrToInt->getSrcTy(), Tptr); - EXPECT_EQ(PtrToInt->getDestTy(), Ti32); - auto *Ret = cast(&*It++); - - { - // Check create() WhereIt, WhereBB - auto *NewI = cast( - sandboxir::PtrToIntInst::create(Arg, Ti32, /*WhereIt=*/BB->end(), - /*WhereBB=*/BB, Ctx, "PtrToInt")); - // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::PtrToInt); - // Check getSrcTy(). - EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); - // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Ti32); - // Check instr position. - EXPECT_EQ(NewI->getNextNode(), nullptr); - EXPECT_EQ(NewI->getPrevNode(), Ret); - } - { - // Check create() InsertBefore. - auto *NewI = cast( - sandboxir::PtrToIntInst::create(Arg, Ti32, - /*InsertBefore=*/Ret, Ctx, "PtrToInt")); - // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::PtrToInt); - // Check getSrcTy(). - EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); - // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Ti32); - // Check instr position. - EXPECT_EQ(NewI->getNextNode(), Ret); - } - { - // Check create() InsertAtEnd. - auto *NewI = cast( - sandboxir::PtrToIntInst::create(Arg, Ti32, - /*InsertAtEnd=*/BB, Ctx, "PtrToInt")); - // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::PtrToInt); - // Check getSrcTy(). - EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); - // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Ti32); - // Check instr position. - EXPECT_EQ(NewI->getNextNode(), nullptr); - EXPECT_EQ(NewI->getParent(), BB); - } + testCastInst( + *M, /*SrcTy=*/PointerType::get(C, 0), /*DstTy=*/Type::getInt32Ty(C)); } TEST_F(SandboxIRTest, BitCastInst) { @@ -1972,66 +1754,9 @@ define void @foo(i32 %arg) { ret void } )IR"); - Function &LLVMF = *M->getFunction("foo"); - sandboxir::Context Ctx(C); - sandboxir::Function *F = Ctx.createFunction(&LLVMF); - unsigned ArgIdx = 0; - auto *Arg = F->getArg(ArgIdx++); - auto *BB = &*F->begin(); - auto It = BB->begin(); - Type *Ti32 = Type::getInt32Ty(C); - Type *Tfloat = Type::getFloatTy(C); - - auto *BitCast = cast(&*It++); - EXPECT_EQ(BitCast->getOpcode(), sandboxir::Instruction::Opcode::BitCast); - EXPECT_EQ(BitCast->getSrcTy(), Ti32); - EXPECT_EQ(BitCast->getDestTy(), Tfloat); - auto *Ret = cast(&*It++); - - { - // Check create() WhereIt, WhereBB - auto *NewI = cast( - sandboxir::BitCastInst::create(Arg, Tfloat, /*WhereIt=*/BB->end(), - /*WhereBB=*/BB, Ctx, "BitCast")); - // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::BitCast); - // Check getSrcTy(). - EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); - // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Tfloat); - // Check instr position. - EXPECT_EQ(NewI->getNextNode(), nullptr); - EXPECT_EQ(NewI->getPrevNode(), Ret); - } - { - // Check create() InsertBefore. - auto *NewI = cast( - sandboxir::BitCastInst::create(Arg, Tfloat, - /*InsertBefore=*/Ret, Ctx, "BitCast")); - // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::BitCast); - // Check getSrcTy(). - EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); - // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Tfloat); - // Check instr position. - EXPECT_EQ(NewI->getNextNode(), Ret); - } - { - // Check create() InsertAtEnd. - auto *NewI = cast( - sandboxir::BitCastInst::create(Arg, Tfloat, - /*InsertAtEnd=*/BB, Ctx, "BitCast")); - // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::BitCast); - // Check getSrcTy(). - EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); - // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Tfloat); - // Check instr position. - EXPECT_EQ(NewI->getNextNode(), nullptr); - EXPECT_EQ(NewI->getParent(), BB); - } + testCastInst( + *M, + /*SrcTy=*/Type::getInt32Ty(C), /*DstTy=*/Type::getFloatTy(C)); } TEST_F(SandboxIRTest, AddrSpaceCastInst) { @@ -2041,6 +1766,12 @@ define void @foo(ptr %ptr) { ret void } )IR"); + Type *Tptr0 = PointerType::get(C, 0); + Type *Tptr1 = PointerType::get(C, 1); + testCastInst(*M, + /*SrcTy=*/Tptr0, + /*DstTy=*/Tptr1); Function &LLVMF = *M->getFunction("foo"); sandboxir::Context Ctx(C); sandboxir::Function *F = Ctx.createFunction(&LLVMF); @@ -2048,69 +1779,16 @@ define void @foo(ptr %ptr) { auto *Arg = F->getArg(ArgIdx++); auto *BB = &*F->begin(); auto It = BB->begin(); - Type *Tptr = Type::getInt32Ty(C)->getPointerTo(); - Type *Tptr1 = Tptr->getPointerTo(1); auto *AddrSpaceCast = cast(&*It++); EXPECT_EQ(AddrSpaceCast->getOpcode(), sandboxir::Instruction::Opcode::AddrSpaceCast); - EXPECT_EQ(AddrSpaceCast->getSrcTy(), Tptr); - EXPECT_EQ(AddrSpaceCast->getDestTy(), Tptr1); EXPECT_EQ(AddrSpaceCast->getPointerOperand(), Arg); EXPECT_EQ(sandboxir::AddrSpaceCastInst::getPointerOperandIndex(), 0u); EXPECT_EQ(AddrSpaceCast->getSrcAddressSpace(), - cast(Tptr)->getPointerAddressSpace()); + cast(Tptr0)->getPointerAddressSpace()); EXPECT_EQ(AddrSpaceCast->getDestAddressSpace(), cast(Tptr1)->getPointerAddressSpace()); - auto *Ret = cast(&*It++); - - { - // Check create() WhereIt, WhereBB - auto *NewI = - cast(sandboxir::AddrSpaceCastInst::create( - Arg, Tptr1, /*WhereIt=*/BB->end(), - /*WhereBB=*/BB, Ctx, "AddrSpaceCast")); - // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::AddrSpaceCast); - // Check getSrcTy(). - EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); - // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Tptr1); - // Check instr position. - EXPECT_EQ(NewI->getNextNode(), nullptr); - EXPECT_EQ(NewI->getPrevNode(), Ret); - } - { - // Check create() InsertBefore. - auto *NewI = - cast(sandboxir::AddrSpaceCastInst::create( - Arg, Tptr1, - /*InsertBefore=*/Ret, Ctx, "AddrSpaceCast")); - // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::AddrSpaceCast); - // Check getSrcTy(). - EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); - // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Tptr1); - // Check instr position. - EXPECT_EQ(NewI->getNextNode(), Ret); - } - { - // Check create() InsertAtEnd. - auto *NewI = - cast(sandboxir::AddrSpaceCastInst::create( - Arg, Tptr1, - /*InsertAtEnd=*/BB, Ctx, "AddrSpaceCast")); - // Check getOpcode(). - EXPECT_EQ(NewI->getOpcode(), sandboxir::Instruction::Opcode::AddrSpaceCast); - // Check getSrcTy(). - EXPECT_EQ(NewI->getSrcTy(), Arg->getType()); - // Check getDestTy(). - EXPECT_EQ(NewI->getDestTy(), Tptr1); - // Check instr position. - EXPECT_EQ(NewI->getNextNode(), nullptr); - EXPECT_EQ(NewI->getParent(), BB); - } } TEST_F(SandboxIRTest, PHINode) { From 42c413b48917491efc5372752c6ad245530939f5 Mon Sep 17 00:00:00 2001 From: Hideto Ueno Date: Thu, 1 Aug 2024 11:51:28 +0900 Subject: [PATCH 054/114] [mlir][Transforms] Preserve all analysis in print passes (#101315) PrintIRPass, PrintOpStatsPass and PrintOpGraphPass don't mutate IR so preserve all analysis to save computation resource a bit. --- mlir/lib/Transforms/OpStats.cpp | 1 + mlir/lib/Transforms/PrintIR.cpp | 1 + mlir/lib/Transforms/ViewOpGraph.cpp | 1 + 3 files changed, 3 insertions(+) diff --git a/mlir/lib/Transforms/OpStats.cpp b/mlir/lib/Transforms/OpStats.cpp index 6a71e1f02edc98..6746ed52396af8 100644 --- a/mlir/lib/Transforms/OpStats.cpp +++ b/mlir/lib/Transforms/OpStats.cpp @@ -55,6 +55,7 @@ void PrintOpStatsPass::runOnOperation() { printSummaryInJSON(); } else printSummary(); + markAllAnalysesPreserved(); } void PrintOpStatsPass::printSummary() { diff --git a/mlir/lib/Transforms/PrintIR.cpp b/mlir/lib/Transforms/PrintIR.cpp index cc42c7e2f1db07..3c55f920dfd616 100644 --- a/mlir/lib/Transforms/PrintIR.cpp +++ b/mlir/lib/Transforms/PrintIR.cpp @@ -25,6 +25,7 @@ struct PrintIRPass : public impl::PrintIRPassBase { llvm::dbgs() << " " << this->label; llvm::dbgs() << " //----- //\n"; getOperation()->dump(); + markAllAnalysesPreserved(); } }; diff --git a/mlir/lib/Transforms/ViewOpGraph.cpp b/mlir/lib/Transforms/ViewOpGraph.cpp index b3c0a06c96fead..82e9863ab40bf3 100644 --- a/mlir/lib/Transforms/ViewOpGraph.cpp +++ b/mlir/lib/Transforms/ViewOpGraph.cpp @@ -93,6 +93,7 @@ class PrintOpPass : public impl::ViewOpGraphBase { processOperation(getOperation()); emitAllEdgeStmts(); }); + markAllAnalysesPreserved(); } /// Create a CFG graph for a region. Used in `Region::viewGraph`. From ed12f80ff0a8d304d10245c7bfb9f6af4a5c968c Mon Sep 17 00:00:00 2001 From: Job Henandez Lara Date: Wed, 31 Jul 2024 20:16:42 -0700 Subject: [PATCH 055/114] [libc][math][c23] add entrypoints and tests for getpayload{,f,f128} (#101285) --- libc/config/gpu/entrypoints.txt | 2 + libc/config/linux/aarch64/entrypoints.txt | 3 ++ libc/config/linux/arm/entrypoints.txt | 2 + libc/config/linux/riscv/entrypoints.txt | 3 ++ libc/config/linux/x86_64/entrypoints.txt | 3 ++ libc/docs/math/index.rst | 2 +- libc/spec/stdc.td | 3 ++ libc/src/math/CMakeLists.txt | 3 ++ libc/src/math/generic/CMakeLists.txt | 37 +++++++++++++++++++ libc/src/math/generic/getpayload.cpp | 20 ++++++++++ libc/src/math/generic/getpayloadf.cpp | 20 ++++++++++ libc/src/math/generic/getpayloadf128.cpp | 20 ++++++++++ libc/src/math/getpayload.h | 20 ++++++++++ libc/src/math/getpayloadf.h | 20 ++++++++++ libc/src/math/getpayloadf128.h | 21 +++++++++++ libc/test/src/math/smoke/CMakeLists.txt | 36 ++++++++++++++++++ libc/test/src/math/smoke/getpayload_test.cpp | 13 +++++++ .../src/math/smoke/getpayloadf128_test.cpp | 13 +++++++ libc/test/src/math/smoke/getpayloadf_test.cpp | 13 +++++++ 19 files changed, 253 insertions(+), 1 deletion(-) create mode 100644 libc/src/math/generic/getpayload.cpp create mode 100644 libc/src/math/generic/getpayloadf.cpp create mode 100644 libc/src/math/generic/getpayloadf128.cpp create mode 100644 libc/src/math/getpayload.h create mode 100644 libc/src/math/getpayloadf.h create mode 100644 libc/src/math/getpayloadf128.h create mode 100644 libc/test/src/math/smoke/getpayload_test.cpp create mode 100644 libc/test/src/math/smoke/getpayloadf128_test.cpp create mode 100644 libc/test/src/math/smoke/getpayloadf_test.cpp diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index 6035af5c0ebb08..bddb1c387e8d6e 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -283,6 +283,8 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.fmodf libc.src.math.frexp libc.src.math.frexpf + libc.src.math.getpayload + libc.src.math.getpayloadf libc.src.math.hypot libc.src.math.hypotf libc.src.math.ilogb diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 2003f6929d5cb2..1cb357fa5ea59f 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -433,6 +433,8 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.fromfpxl libc.src.math.fsqrt libc.src.math.fsqrtl + libc.src.math.getpayload + libc.src.math.getpayloadf libc.src.math.hypot libc.src.math.hypotf libc.src.math.ilogb @@ -634,6 +636,7 @@ if(LIBC_TYPES_HAS_FLOAT128) libc.src.math.frexpf128 libc.src.math.fromfpf128 libc.src.math.fromfpxf128 + libc.src.math.getpayloadf128 libc.src.math.ilogbf128 libc.src.math.ldexpf128 libc.src.math.llogbf128 diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt index b102c3c219831d..90aae962080cd4 100644 --- a/libc/config/linux/arm/entrypoints.txt +++ b/libc/config/linux/arm/entrypoints.txt @@ -293,6 +293,8 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.fromfpx libc.src.math.fromfpxf libc.src.math.fromfpxl + libc.src.math.getpayload + libc.src.math.getpayloadf libc.src.math.hypot libc.src.math.hypotf libc.src.math.ilogb diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index afa7f8bdd48b06..60b5654c597a60 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -456,6 +456,8 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.fromfpxl libc.src.math.fsqrt libc.src.math.fsqrtl + libc.src.math.getpayload + libc.src.math.getpayloadf libc.src.math.hypot libc.src.math.hypotf libc.src.math.ilogb @@ -590,6 +592,7 @@ if(LIBC_TYPES_HAS_FLOAT128) libc.src.math.fromfpf128 libc.src.math.fromfpxf128 libc.src.math.fsqrtf128 + libc.src.math.getpayloadf128 libc.src.math.ilogbf128 libc.src.math.ldexpf128 libc.src.math.llogbf128 diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 84c52ef12f3dfc..a577bfa635b9cd 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -456,6 +456,8 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.fromfpxl libc.src.math.fsqrt libc.src.math.fsqrtl + libc.src.math.getpayload + libc.src.math.getpayloadf libc.src.math.hypot libc.src.math.hypotf libc.src.math.ilogb @@ -679,6 +681,7 @@ if(LIBC_TYPES_HAS_FLOAT128) libc.src.math.fromfpf128 libc.src.math.fromfpxf128 libc.src.math.fsqrtf128 + libc.src.math.getpayloadf128 libc.src.math.ilogbf128 libc.src.math.ldexpf128 libc.src.math.llogbf128 diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index 7ba2c4cb812dcf..bbe5b19a9da950 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -178,7 +178,7 @@ Basic Operations +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | fsub | N/A | | | N/A | | 7.12.14.2 | F.10.11 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| getpayload | | | | |check| | | F.10.13.1 | N/A | +| getpayload | |check| | |check| | | |check| | |check| | F.10.13.1 | N/A | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | ilogb | |check| | |check| | |check| | |check| | |check| | 7.12.6.8 | F.10.3.8 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 6bb249f6bf7d73..fa536b220ed43a 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -736,7 +736,10 @@ def StdC : StandardSpec<"stdc"> { GuardedFunctionSpec<"totalordermagf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, GuardedFunctionSpec<"totalordermagf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, + FunctionSpec<"getpayload", RetValSpec, [ArgSpec]>, + FunctionSpec<"getpayloadf", RetValSpec, [ArgSpec]>, GuardedFunctionSpec<"getpayloadf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, + GuardedFunctionSpec<"getpayloadf128", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, FunctionSpec<"setpayload", RetValSpec, [ArgSpec, ArgSpec]>, FunctionSpec<"setpayloadf", RetValSpec, [ArgSpec, ArgSpec]>, diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 42126e01898fee..bd022ad88d8845 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -260,7 +260,10 @@ add_math_entrypoint_object(fromfpxl) add_math_entrypoint_object(fromfpxf16) add_math_entrypoint_object(fromfpxf128) +add_math_entrypoint_object(getpayload) +add_math_entrypoint_object(getpayloadf) add_math_entrypoint_object(getpayloadf16) +add_math_entrypoint_object(getpayloadf128) add_math_entrypoint_object(hypot) add_math_entrypoint_object(hypotf) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 96c8e3520d5e2f..927d97578316e3 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -4155,6 +4155,30 @@ add_entrypoint_object( -O3 ) +add_entrypoint_object( + getpayload + SRCS + getpayload.cpp + HDRS + ../getpayload.h + DEPENDS + libc.src.__support.FPUtil.basic_operations + COMPILE_OPTIONS + -O3 +) + +add_entrypoint_object( + getpayloadf + SRCS + getpayloadf.cpp + HDRS + ../getpayloadf.h + DEPENDS + libc.src.__support.FPUtil.basic_operations + COMPILE_OPTIONS + -O3 +) + add_entrypoint_object( getpayloadf16 SRCS @@ -4168,6 +4192,19 @@ add_entrypoint_object( -O3 ) +add_entrypoint_object( + getpayloadf128 + SRCS + getpayloadf128.cpp + HDRS + ../getpayloadf128.h + DEPENDS + libc.src.__support.macros.properties.types + libc.src.__support.FPUtil.basic_operations + COMPILE_OPTIONS + -O3 +) + add_entrypoint_object( setpayload SRCS diff --git a/libc/src/math/generic/getpayload.cpp b/libc/src/math/generic/getpayload.cpp new file mode 100644 index 00000000000000..14d95516c42c35 --- /dev/null +++ b/libc/src/math/generic/getpayload.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of getpayload function -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/getpayload.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(double, getpayload, (const double *x)) { + return fputil::getpayload(*x); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/getpayloadf.cpp b/libc/src/math/generic/getpayloadf.cpp new file mode 100644 index 00000000000000..22db186a8c354d --- /dev/null +++ b/libc/src/math/generic/getpayloadf.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of getpayloadf function ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/getpayloadf.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(float, getpayloadf, (const float *x)) { + return fputil::getpayload(*x); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/getpayloadf128.cpp b/libc/src/math/generic/getpayloadf128.cpp new file mode 100644 index 00000000000000..b57469eb7de911 --- /dev/null +++ b/libc/src/math/generic/getpayloadf128.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of getpayloadf128 function -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/getpayloadf128.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(float128, getpayloadf128, (const float128 *x)) { + return fputil::getpayload(*x); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/getpayload.h b/libc/src/math/getpayload.h new file mode 100644 index 00000000000000..b00d313c4da500 --- /dev/null +++ b/libc/src/math/getpayload.h @@ -0,0 +1,20 @@ +//===-- Implementation header for getpayload --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_GETPAYLOAD_H +#define LLVM_LIBC_SRC_MATH_GETPAYLOAD_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +double getpayload(const double *x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_GETPAYLOAD_H diff --git a/libc/src/math/getpayloadf.h b/libc/src/math/getpayloadf.h new file mode 100644 index 00000000000000..20901cd8bd98a3 --- /dev/null +++ b/libc/src/math/getpayloadf.h @@ -0,0 +1,20 @@ +//===-- Implementation header for getpayloadf -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_GETPAYLOADF_H +#define LLVM_LIBC_SRC_MATH_GETPAYLOADF_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +float getpayloadf(const float *x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_GETPAYLOADF_H diff --git a/libc/src/math/getpayloadf128.h b/libc/src/math/getpayloadf128.h new file mode 100644 index 00000000000000..7ebb4290d4f8a3 --- /dev/null +++ b/libc/src/math/getpayloadf128.h @@ -0,0 +1,21 @@ +//===-- Implementation header for getpayloadf128 ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_GETPAYLOADF128_H +#define LLVM_LIBC_SRC_MATH_GETPAYLOADF128_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +float128 getpayloadf128(const float128 *x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_GETPAYLOADF128_H diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index c8266042258488..faca71b8b5bc41 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -3814,6 +3814,30 @@ add_fp_unittest( libc.src.math.totalordermagf128 ) +add_fp_unittest( + getpayload_test + SUITE + libc-math-smoke-tests + SRCS + getpayload_test.cpp + HDRS + GetPayloadTest.h + DEPENDS + libc.src.math.getpayload +) + +add_fp_unittest( + getpayloadf_test + SUITE + libc-math-smoke-tests + SRCS + getpayloadf_test.cpp + HDRS + GetPayloadTest.h + DEPENDS + libc.src.math.getpayloadf +) + add_fp_unittest( getpayloadf16_test SUITE @@ -3826,6 +3850,18 @@ add_fp_unittest( libc.src.math.getpayloadf16 ) +add_fp_unittest( + getpayloadf128_test + SUITE + libc-math-smoke-tests + SRCS + getpayloadf128_test.cpp + HDRS + GetPayloadTest.h + DEPENDS + libc.src.math.getpayloadf128 +) + add_fp_unittest( setpayload_test SUITE diff --git a/libc/test/src/math/smoke/getpayload_test.cpp b/libc/test/src/math/smoke/getpayload_test.cpp new file mode 100644 index 00000000000000..f157d4572d09aa --- /dev/null +++ b/libc/test/src/math/smoke/getpayload_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for getpayload ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "GetPayloadTest.h" + +#include "src/math/getpayload.h" + +LIST_GETPAYLOAD_TESTS(double, LIBC_NAMESPACE::getpayload) diff --git a/libc/test/src/math/smoke/getpayloadf128_test.cpp b/libc/test/src/math/smoke/getpayloadf128_test.cpp new file mode 100644 index 00000000000000..37bb506a3ed164 --- /dev/null +++ b/libc/test/src/math/smoke/getpayloadf128_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for getpayloadf128 --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "GetPayloadTest.h" + +#include "src/math/getpayloadf128.h" + +LIST_GETPAYLOAD_TESTS(float128, LIBC_NAMESPACE::getpayloadf128) diff --git a/libc/test/src/math/smoke/getpayloadf_test.cpp b/libc/test/src/math/smoke/getpayloadf_test.cpp new file mode 100644 index 00000000000000..89ed02487df043 --- /dev/null +++ b/libc/test/src/math/smoke/getpayloadf_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for getpayloadf -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "GetPayloadTest.h" + +#include "src/math/getpayloadf.h" + +LIST_GETPAYLOAD_TESTS(float, LIBC_NAMESPACE::getpayloadf) From 430b90f04533b099d788db2668176038be38c53b Mon Sep 17 00:00:00 2001 From: Wu Yingcong Date: Thu, 1 Aug 2024 11:23:03 +0800 Subject: [PATCH 056/114] [nsan][NFC] Use cast when dyn_cast is not needed. (#101147) Use `cast` instead to replace `dyn_cast` when `dyn_cast` is not needed/not checked. --- .../Instrumentation/NumericalStabilitySanitizer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp index 832506f639a748..5872396669435a 100644 --- a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp @@ -1715,7 +1715,7 @@ Value *NumericalStabilitySanitizer::createShadowValueWithOperandsAvailable( Map.getShadow(BinOp->getOperand(1))); if (isa(&Inst) || isa(&Inst)) { - auto *Cast = dyn_cast(&Inst); + auto *Cast = cast(&Inst); return Builder.CreateCast(Cast->getOpcode(), Cast->getOperand(0), ExtendedVT); } @@ -2168,7 +2168,7 @@ bool NumericalStabilitySanitizer::sanitizeFunction( // The last pass populates shadow phis with shadow values. for (PHINode *Phi : OriginalPhis) { - PHINode *ShadowPhi = dyn_cast(ValueToShadow.getShadow(Phi)); + PHINode *ShadowPhi = cast(ValueToShadow.getShadow(Phi)); for (unsigned I : seq(Phi->getNumOperands())) { Value *V = Phi->getOperand(I); Value *Shadow = ValueToShadow.getShadow(V); From 27b608055f8e86e2decea519e6dc1ab6aff4824e Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Thu, 1 Aug 2024 12:24:25 +0800 Subject: [PATCH 057/114] [RISCV] Increase default tail duplication threshold to 6 at -O3 (#98873) This is just like AArch64. Changing the threshold to 6 will increase the code size, but will also decrease unconditional branches. CPUs with wide fetch/issue units can benefit from it. The value 6 may be debatable, we can set it to `SchedModel.IssueWidth`. --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 6 ++ llvm/lib/Target/RISCV/RISCVInstrInfo.h | 2 + llvm/lib/Target/RISCV/RISCVProcessors.td | 5 +- llvm/lib/Target/RISCV/RISCVSubtarget.h | 7 ++ .../test/CodeGen/RISCV/riscv-tail-dup-size.ll | 79 +++++++++++++++++++ 5 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 0620c3fc12adc7..9dd79027d7a162 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -3763,6 +3763,12 @@ RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const { return ArrayRef(TargetFlags); } +unsigned RISCVInstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const { + return OptLevel >= CodeGenOptLevel::Aggressive + ? STI.getTailDupAggressiveThreshold() + : 2; +} + // Returns true if this is the sext.w pattern, addiw rd, rs1, 0. bool RISCV::isSEXT_W(const MachineInstr &MI) { return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() && diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 025e12d81e60d7..1612f56a8b5067 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -288,6 +288,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { ArrayRef> getSerializableMachineMemOperandTargetFlags() const override; + unsigned getTailDuplicateSize(CodeGenOptLevel OptLevel) const override; + unsigned getUndefInitOpcode(unsigned RegClassID) const override { switch (RegClassID) { case RISCV::VRRegClassID: diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 1729bc0282f51c..25b24980e0bf65 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -21,6 +21,9 @@ class RISCVTuneInfo { bits<32> MaxPrefetchIterationsAhead = -1; bits<32> MinimumJumpTableEntries = 5; + + // Tail duplication threshold at -O3. + bits<32> TailDupAggressiveThreshold = 6; } def RISCVTuneInfoTable : GenericTable { @@ -29,7 +32,7 @@ def RISCVTuneInfoTable : GenericTable { let Fields = ["Name", "PrefFunctionAlignment", "PrefLoopAlignment", "CacheLineSize", "PrefetchDistance", "MinPrefetchStride", "MaxPrefetchIterationsAhead", - "MinimumJumpTableEntries"]; + "MinimumJumpTableEntries", "TailDupAggressiveThreshold"]; } def getRISCVTuneInfo : SearchIndex { diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index d38952e5196f07..ea54ff1df0b7cb 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -50,6 +50,9 @@ struct RISCVTuneInfo { unsigned MaxPrefetchIterationsAhead; unsigned MinimumJumpTableEntries; + + // Tail duplication threshold at -O3. + unsigned TailDupAggressiveThreshold; }; #define GET_RISCVTuneInfoTable_DECL @@ -300,6 +303,10 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { unsigned getMinimumJumpTableEntries() const; + unsigned getTailDupAggressiveThreshold() const { + return TuneInfo->TailDupAggressiveThreshold; + } + bool supportsInitUndef() const override { return hasVInstructions(); } }; } // End llvm namespace diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll new file mode 100644 index 00000000000000..0508016736004e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m -O2 < %s | FileCheck %s --check-prefix=CHECK-O2 +; RUN: llc -mtriple=riscv64 -mattr=+m -O3 < %s | FileCheck %s --check-prefix=CHECK-O3 + +; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-size=2 < %s | FileCheck %s --check-prefix=CHECK-O2 +; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=2 < %s | FileCheck %s --check-prefix=CHECK-O2 +; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O3 + +@a = external global i32 +@b = external global i32 +@c = external global i32 + +declare i32 @foo(i32) + +define i32 @test(i32 %n) { +; CHECK-O2-LABEL: test: +; CHECK-O2: # %bb.0: # %entry +; CHECK-O2-NEXT: sext.w a1, a0 +; CHECK-O2-NEXT: blez a1, .LBB0_2 +; CHECK-O2-NEXT: # %bb.1: # %if.then +; CHECK-O2-NEXT: lui a1, %hi(a) +; CHECK-O2-NEXT: lw a1, %lo(a)(a1) +; CHECK-O2-NEXT: mul a0, a1, a0 +; CHECK-O2-NEXT: j .LBB0_3 +; CHECK-O2-NEXT: .LBB0_2: # %if.else +; CHECK-O2-NEXT: lui a1, %hi(b) +; CHECK-O2-NEXT: lw a1, %lo(b)(a1) +; CHECK-O2-NEXT: divw a0, a1, a0 +; CHECK-O2-NEXT: .LBB0_3: # %if.end +; CHECK-O2-NEXT: lui a1, %hi(c) +; CHECK-O2-NEXT: lw a1, %lo(c)(a1) +; CHECK-O2-NEXT: addi a0, a0, -1 +; CHECK-O2-NEXT: mulw a0, a0, a1 +; CHECK-O2-NEXT: tail foo +; +; CHECK-O3-LABEL: test: +; CHECK-O3: # %bb.0: # %entry +; CHECK-O3-NEXT: sext.w a1, a0 +; CHECK-O3-NEXT: blez a1, .LBB0_2 +; CHECK-O3-NEXT: # %bb.1: # %if.then +; CHECK-O3-NEXT: lui a1, %hi(a) +; CHECK-O3-NEXT: lw a1, %lo(a)(a1) +; CHECK-O3-NEXT: mul a0, a1, a0 +; CHECK-O3-NEXT: lui a1, %hi(c) +; CHECK-O3-NEXT: lw a1, %lo(c)(a1) +; CHECK-O3-NEXT: addi a0, a0, -1 +; CHECK-O3-NEXT: mulw a0, a0, a1 +; CHECK-O3-NEXT: tail foo +; CHECK-O3-NEXT: .LBB0_2: # %if.else +; CHECK-O3-NEXT: lui a1, %hi(b) +; CHECK-O3-NEXT: lw a1, %lo(b)(a1) +; CHECK-O3-NEXT: divw a0, a1, a0 +; CHECK-O3-NEXT: lui a1, %hi(c) +; CHECK-O3-NEXT: lw a1, %lo(c)(a1) +; CHECK-O3-NEXT: addi a0, a0, -1 +; CHECK-O3-NEXT: mulw a0, a0, a1 +; CHECK-O3-NEXT: tail foo +entry: + %cmp = icmp sgt i32 %n, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %va = load i32, ptr @a + %mul = mul nsw i32 %va, %n + br label %if.end + +if.else: + %vb = load i32, ptr @b + %div = sdiv i32 %vb, %n + br label %if.end + +if.end: + %phi = phi i32 [ %mul, %if.then ], [ %div, %if.else ] + %vc = load i32, ptr @c + %add = add nsw i32 %phi, -1 + %arg = mul i32 %add, %vc + %ret = tail call i32 @foo(i32 %arg) + ret i32 %ret +} From 991a6215a9ccd99eb91d2b2d46b58c2fd648c263 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 31 Jul 2024 21:53:31 -0700 Subject: [PATCH 058/114] [TargetLowering] Remove weird use of MVT::isVoid in an assert. (#101436) At the time this was written there were no vector types in MVT. The order was: -scalar integer types -scalar FP types -isVoid I believe this isVoid check was to catch walking off the end of the scalar FP types. While the isInteger()==isInteger caught walking off the end of scalar integer types. These days we have: -scalar integer types -scalar FP types -fixed vector integer types -fixed vector FP types -scalable vector integer types -scalable vector FP types. -Glue -isVoid So checking isVoid doesn't detect what it used to. I've changed it to check isFloatingPoint() == isFloatingPoint() instead. --- llvm/include/llvm/CodeGen/TargetLowering.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 9d9886f4920a29..9ccdbab008aec8 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1640,7 +1640,8 @@ class TargetLoweringBase { MVT NVT = VT; do { NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1); - assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && + assert(NVT.isInteger() == VT.isInteger() && + NVT.isFloatingPoint() == VT.isFloatingPoint() && "Didn't find type to promote to!"); } while (VTBits >= NVT.getScalarSizeInBits() || !isTypeLegal(NVT) || getOperationAction(Op, NVT) == Promote); From fb97b4f96217442c684a940558135ffbfe45b756 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Wed, 31 Jul 2024 22:12:34 -0700 Subject: [PATCH 059/114] [BOLT][NFC] Add timers for MetadataManager invocations Test Plan: added bolt/test/timers.c Reviewers: ayermolo, maksfb, rafaelauler, dcci Reviewed By: dcci Pull Request: https://github.com/llvm/llvm-project/pull/101267 --- bolt/lib/Rewrite/RewriteInstance.cpp | 10 ++++++++++ bolt/test/timers.c | 15 +++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 bolt/test/timers.c diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 33ebae3b6e6de2..b7e361c35088a2 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -3131,18 +3131,24 @@ void RewriteInstance::initializeMetadataManager() { } void RewriteInstance::processSectionMetadata() { + NamedRegionTimer T("processmetadata-section", "process section metadata", + TimerGroupName, TimerGroupDesc, opts::TimeRewrite); initializeMetadataManager(); MetadataManager.runSectionInitializers(); } void RewriteInstance::processMetadataPreCFG() { + NamedRegionTimer T("processmetadata-precfg", "process metadata pre-CFG", + TimerGroupName, TimerGroupDesc, opts::TimeRewrite); MetadataManager.runInitializersPreCFG(); processProfileDataPreCFG(); } void RewriteInstance::processMetadataPostCFG() { + NamedRegionTimer T("processmetadata-postcfg", "process metadata post-CFG", + TimerGroupName, TimerGroupDesc, opts::TimeRewrite); MetadataManager.runInitializersPostCFG(); } @@ -3536,10 +3542,14 @@ void RewriteInstance::emitAndLink() { } void RewriteInstance::finalizeMetadataPreEmit() { + NamedRegionTimer T("finalizemetadata-preemit", "finalize metadata pre-emit", + TimerGroupName, TimerGroupDesc, opts::TimeRewrite); MetadataManager.runFinalizersPreEmit(); } void RewriteInstance::updateMetadata() { + NamedRegionTimer T("updatemetadata-postemit", "update metadata post-emit", + TimerGroupName, TimerGroupDesc, opts::TimeRewrite); MetadataManager.runFinalizersAfterEmit(); if (opts::UpdateDebugSections) { diff --git a/bolt/test/timers.c b/bolt/test/timers.c new file mode 100644 index 00000000000000..b16218dd7ea76d --- /dev/null +++ b/bolt/test/timers.c @@ -0,0 +1,15 @@ +/* This test checks timers for metadata manager phases. +# RUN: %clang %cflags %s -o %t.exe +# RUN: link_fdata %s %t.exe %t.fdata +# RUN: llvm-bolt %t.exe -o %t.null --data %t.fdata -w %t.yaml --time-rewrite \ +# RUN: 2>&1 | FileCheck %s + +# CHECK-DAG: update metadata post-emit +# CHECK-DAG: process section metadata +# CHECK-DAG: process metadata pre-CFG +# CHECK-DAG: process metadata post-CFG +# CHECK-DAG: finalize metadata pre-emit + +# FDATA: 0 [unknown] 0 1 main 0 1 0 +*/ +int main() { return 0; } From 3f51bec466c4b67814a7877859ba3eeb5f80da7a Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Wed, 31 Jul 2024 22:14:52 -0700 Subject: [PATCH 060/114] [BOLT][NFC] Print timers in perf2bolt invocation When BOLT is run in AggregateOnly mode (perf2bolt), it exits with code zero so destructors are not run thus TimerGroup never prints the timers. Add explicit printing just before the exit to honor options requesting timers (`--time-rewrite`, `--time-aggr`). Test Plan: updated bolt/test/timers.c Reviewers: ayermolo, maksfb, rafaelauler, dcci Reviewed By: dcci Pull Request: https://github.com/llvm/llvm-project/pull/101270 --- bolt/lib/Rewrite/RewriteInstance.cpp | 1 + bolt/test/timers.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index b7e361c35088a2..9077869fe4955b 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -3200,6 +3200,7 @@ void RewriteInstance::processProfileData() { if (opts::AggregateOnly) { PrintProgramStats PPS(&*BAT); BC->logBOLTErrorsAndQuitOnFatal(PPS.runOnFunctions(*BC)); + TimerGroup::printAll(outs()); exit(0); } } diff --git a/bolt/test/timers.c b/bolt/test/timers.c index b16218dd7ea76d..a34958a2a15e96 100644 --- a/bolt/test/timers.c +++ b/bolt/test/timers.c @@ -3,6 +3,9 @@ # RUN: link_fdata %s %t.exe %t.fdata # RUN: llvm-bolt %t.exe -o %t.null --data %t.fdata -w %t.yaml --time-rewrite \ # RUN: 2>&1 | FileCheck %s +# RUN: link_fdata %s %t.exe %t.preagg PREAGG +# RUN: perf2bolt %t.exe -o %t.null -p %t.preagg --pa --time-rewrite \ +# RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-P2B # CHECK-DAG: update metadata post-emit # CHECK-DAG: process section metadata @@ -10,6 +13,10 @@ # CHECK-DAG: process metadata post-CFG # CHECK-DAG: finalize metadata pre-emit +# CHECK-P2B-DAG: process section metadata +# CHECK-P2B-DAG: process metadata pre-CFG + # FDATA: 0 [unknown] 0 1 main 0 1 0 +# PREAGG: B X:0 #main# 1 0 */ int main() { return 0; } From 9d068f7137a2ad732d008df46eb71aadb0cd8a8e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 31 Jul 2024 22:25:19 -0700 Subject: [PATCH 061/114] [RISCV] Remove Zfbfmin from some vector test RUN lines. NFC --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp-bf16.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/vle.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/vse.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/vselect-vp-bf16.ll | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp-bf16.ll index a0301bbf4f7468..31ab6699d7c511 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp-bf16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp-bf16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+v,+m,+zfbfmin,+zvfbfmin -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+v,+m,+zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+m,+zfbfmin,+zvfbfmin -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+m,+zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x bfloat> @llvm.vp.select.v2bf16(<2 x i1>, <2 x bfloat>, <2 x bfloat>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vle.ll b/llvm/test/CodeGen/RISCV/rvv/vle.ll index 4f2e490f36ec89..a16792235f1ba4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vle.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,,+zfbfmin,+zvfbfmin \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,,+zfbfmin,+zvfbfmin \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vle.nxv1i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vse.ll b/llvm/test/CodeGen/RISCV/rvv/vse.ll index 6b404db77e3a30..556b7702649db4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vse.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare void @llvm.riscv.vse.nxv1i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp-bf16.ll index bf1f52050ebf95..76fd1e1d8293f6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp-bf16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp-bf16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+v,+zfbfmin,+zvfbfmin -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+v,+zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+v,+zfbfmin,+zvfbfmin -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+v,+zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.select.nxv1bf16(, , , i32) From 86815a1842d308521f46048bb9ed08e47c0d8357 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 1 Aug 2024 09:42:04 +0400 Subject: [PATCH 062/114] AMDGPU/GlobalISel: Permit mapping G_FRAME_INDEX to sgprs (#101325) eliminateFrameIndex should now properly handle materializing frame indices in SGPRs, so treat this like the other constant operand types. On average this will produce worse code; we need to detect VGPR uses, and improve SGPR->VGPR frame index folds. --- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 8 +- .../GlobalISel/crash-stack-address-O0.ll | 6 +- .../GlobalISel/flat-scratch-init.gfx.ll | 14 +- .../CodeGen/AMDGPU/GlobalISel/flat-scratch.ll | 131 ++++++----- .../GlobalISel/insertelement-stack-lower.ll | 14 +- .../GlobalISel/regbankselect-frame-index.mir | 43 +++- .../codegen-prepare-addrspacecast-non-null.ll | 36 ++- llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll | 215 ++++++++++-------- 8 files changed, 266 insertions(+), 201 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 8da8c94b4d665c..9a6ba5ac680846 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4060,6 +4060,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_FCONSTANT: case AMDGPU::G_CONSTANT: case AMDGPU::G_GLOBAL_VALUE: + case AMDGPU::G_FRAME_INDEX: case AMDGPU::G_BLOCK_ADDR: case AMDGPU::G_READSTEADYCOUNTER: case AMDGPU::G_READCYCLECOUNTER: { @@ -4067,13 +4068,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; } - case AMDGPU::G_FRAME_INDEX: { - // TODO: This should be the same as other constants, but eliminateFrameIndex - // currently assumes VALU uses. - unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); - break; - } case AMDGPU::G_DYN_STACKALLOC: { // Result is always uniform, and a wave reduction is needed for the source. OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll index 48916d8d9b2c5a..84378bcb706846 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll @@ -10,9 +10,11 @@ define amdgpu_kernel void @stack_write_fi() { ; CHECK-NEXT: s_add_u32 s0, s0, s15 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 ; CHECK-NEXT: s_mov_b32 s5, 0 +; CHECK-NEXT: s_mov_b32 s6, 0 ; CHECK-NEXT: s_mov_b32 s4, 0 -; CHECK-NEXT: v_mov_b32_e32 v0, s5 -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; CHECK-NEXT: v_mov_b32_e32 v0, s6 +; CHECK-NEXT: v_mov_b32_e32 v1, s5 +; CHECK-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_mov_b32_e32 v0, s4 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll index b4b95fdab4ab25..4fdb4082346af6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll @@ -10,11 +10,13 @@ define amdgpu_ps void @amdgpu_ps() { ; MESA-LABEL: amdgpu_ps: ; MESA: ; %bb.0: ; MESA-NEXT: s_add_u32 flat_scratch_lo, s2, s4 -; MESA-NEXT: s_mov_b64 s[0:1], src_private_base ; MESA-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; MESA-NEXT: v_mov_b32_e32 v0, 0 -; MESA-NEXT: v_mov_b32_e32 v1, s1 +; MESA-NEXT: s_mov_b32 s0, 0 +; MESA-NEXT: s_mov_b64 s[2:3], src_private_base +; MESA-NEXT: s_mov_b32 s1, s3 +; MESA-NEXT: v_mov_b32_e32 v0, s0 ; MESA-NEXT: v_mov_b32_e32 v2, 0 +; MESA-NEXT: v_mov_b32_e32 v1, s1 ; MESA-NEXT: flat_store_dword v[0:1], v2 ; MESA-NEXT: s_waitcnt vmcnt(0) ; MESA-NEXT: s_endpgm @@ -24,13 +26,15 @@ define amdgpu_ps void @amdgpu_ps() { ; PAL-NEXT: s_getpc_b64 s[2:3] ; PAL-NEXT: s_mov_b32 s2, s0 ; PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 -; PAL-NEXT: v_mov_b32_e32 v0, 0 ; PAL-NEXT: v_mov_b32_e32 v2, 0 ; PAL-NEXT: s_waitcnt lgkmcnt(0) ; PAL-NEXT: s_and_b32 s3, s3, 0xffff ; PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s0 -; PAL-NEXT: s_mov_b64 s[0:1], src_private_base ; PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 +; PAL-NEXT: s_mov_b32 s0, 0 +; PAL-NEXT: s_mov_b64 s[2:3], src_private_base +; PAL-NEXT: s_mov_b32 s1, s3 +; PAL-NEXT: v_mov_b32_e32 v0, s0 ; PAL-NEXT: v_mov_b32_e32 v1, s1 ; PAL-NEXT: flat_store_dword v[0:1], v2 ; PAL-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll index a5e4151bf36958..f4fd803c8dda89 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll @@ -55,41 +55,40 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX940-NEXT: s_lshl_b32 s0, s0, 2 ; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NEXT: scratch_load_dword v0, v0, off sc0 sc1 +; GFX940-NEXT: s_add_i32 s0, s0, 0 +; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_sindex_kernel: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v0, 15 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_and_b32 s1, s0, 15 ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-NEXT: s_lshl_b32 s1, s1, 2 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: s_add_i32 s0, s0, 0 +; GFX11-NEXT: s_add_i32 s1, s1, 0 ; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: scratch_load_b32 v0, v1, off glc dlc +; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: store_load_sindex_kernel: ; GFX12: ; %bb.0: ; %bb ; GFX12-NEXT: s_load_b32 s0, s[2:3], 0x24 -; GFX12-NEXT: v_mov_b32_e32 v1, 15 +; GFX12-NEXT: v_mov_b32_e32 v0, 15 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_lshl_b32 s1, s0, 2 -; GFX12-NEXT: s_and_b32 s0, s0, 15 -; GFX12-NEXT: v_mov_b32_e32 v0, s1 +; GFX12-NEXT: s_and_b32 s1, s0, 15 ; GFX12-NEXT: s_lshl_b32 s0, s0, 2 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-NEXT: scratch_store_b32 v0, v1, off scope:SCOPE_SYS +; GFX12-NEXT: s_lshl_b32 s1, s1, 2 +; GFX12-NEXT: s_add_co_i32 s0, s0, 0 +; GFX12-NEXT: s_add_co_i32 s1, s1, 0 +; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS ; GFX12-NEXT: s_wait_storecnt 0x0 -; GFX12-NEXT: scratch_load_b32 v0, v2, off scope:SCOPE_SYS +; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_endpgm bb: @@ -378,44 +377,44 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) { ; GFX940-NEXT: s_lshl_b32 s0, s0, 2 ; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NEXT: scratch_load_dword v0, v0, off offset:256 sc0 sc1 +; GFX940-NEXT: s_addk_i32 s0, 0x100 +; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_sindex_small_offset_kernel: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x24 -; GFX11-NEXT: scratch_load_b32 v2, off, off glc dlc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_b32 v0, off, off glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, 15 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_and_b32 s1, s0, 15 ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-NEXT: s_lshl_b32 s1, s1, 2 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: s_addk_i32 s0, 0x100 +; GFX11-NEXT: s_addk_i32 s1, 0x100 ; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: scratch_load_b32 v0, v1, off offset:256 glc dlc +; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: store_load_sindex_small_offset_kernel: ; GFX12: ; %bb.0: ; %bb ; GFX12-NEXT: s_load_b32 s0, s[2:3], 0x24 -; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS +; GFX12-NEXT: scratch_load_b32 v0, off, off scope:SCOPE_SYS ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_mov_b32_e32 v1, 15 +; GFX12-NEXT: v_mov_b32_e32 v0, 15 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_lshl_b32 s1, s0, 2 -; GFX12-NEXT: s_and_b32 s0, s0, 15 -; GFX12-NEXT: v_mov_b32_e32 v0, s1 +; GFX12-NEXT: s_and_b32 s1, s0, 15 ; GFX12-NEXT: s_lshl_b32 s0, s0, 2 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:256 scope:SCOPE_SYS +; GFX12-NEXT: s_lshl_b32 s1, s1, 2 +; GFX12-NEXT: s_addk_co_i32 s0, 0x100 +; GFX12-NEXT: s_addk_co_i32 s1, 0x100 +; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS ; GFX12-NEXT: s_wait_storecnt 0x0 -; GFX12-NEXT: scratch_load_b32 v0, v2, off offset:256 scope:SCOPE_SYS +; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_endpgm bb: @@ -692,46 +691,44 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) { ; GFX940-NEXT: s_lshl_b32 s0, s0, 2 ; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NEXT: s_movk_i32 s0, 0x4004 -; GFX940-NEXT: scratch_load_dword v0, v0, s0 sc0 sc1 +; GFX940-NEXT: s_addk_i32 s0, 0x4004 +; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_sindex_large_offset_kernel: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x24 -; GFX11-NEXT: scratch_load_b32 v2, off, off offset:4 glc dlc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_b32 v0, off, off offset:4 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, 15 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_and_b32 s1, s0, 15 ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-NEXT: s_lshl_b32 s1, s1, 2 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: s_addk_i32 s0, 0x4004 +; GFX11-NEXT: s_addk_i32 s1, 0x4004 ; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_movk_i32 s0, 0x4004 -; GFX11-NEXT: scratch_load_b32 v0, v1, s0 glc dlc +; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: store_load_sindex_large_offset_kernel: ; GFX12: ; %bb.0: ; %bb ; GFX12-NEXT: s_load_b32 s0, s[2:3], 0x24 -; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS +; GFX12-NEXT: scratch_load_b32 v0, off, off scope:SCOPE_SYS ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_mov_b32_e32 v1, 15 +; GFX12-NEXT: v_mov_b32_e32 v0, 15 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_lshl_b32 s1, s0, 2 -; GFX12-NEXT: s_and_b32 s0, s0, 15 -; GFX12-NEXT: v_mov_b32_e32 v0, s1 +; GFX12-NEXT: s_and_b32 s1, s0, 15 ; GFX12-NEXT: s_lshl_b32 s0, s0, 2 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: v_mov_b32_e32 v2, s0 -; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:16384 scope:SCOPE_SYS +; GFX12-NEXT: s_lshl_b32 s1, s1, 2 +; GFX12-NEXT: s_addk_co_i32 s0, 0x4000 +; GFX12-NEXT: s_addk_co_i32 s1, 0x4000 +; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS ; GFX12-NEXT: s_wait_storecnt 0x0 -; GFX12-NEXT: scratch_load_b32 v0, v2, off offset:16384 scope:SCOPE_SYS +; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_endpgm bb: @@ -995,25 +992,28 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX940-LABEL: store_load_large_imm_offset_kernel: ; GFX940: ; %bb.0: ; %bb ; GFX940-NEXT: v_mov_b32_e32 v0, 13 +; GFX940-NEXT: s_movk_i32 s0, 0x3e80 ; GFX940-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0x3e80 -; GFX940-NEXT: v_mov_b32_e32 v1, 15 -; GFX940-NEXT: scratch_store_dword v0, v1, off offset:4 sc0 sc1 +; GFX940-NEXT: v_mov_b32_e32 v0, 15 +; GFX940-NEXT: s_add_i32 s0, s0, 4 +; GFX940-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_dword v0, v0, off offset:4 sc0 sc1 +; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_large_imm_offset_kernel: ; GFX11: ; %bb.0: ; %bb -; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 0x3e80 -; GFX11-NEXT: v_mov_b32_e32 v2, 15 +; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15 +; GFX11-NEXT: s_movk_i32 s0, 0x3e80 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_add_i32 s0, s0, 4 ; GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: scratch_store_b32 v1, v2, off offset:4 dlc +; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: scratch_load_b32 v0, v1, off offset:4 glc dlc +; GFX11-NEXT: scratch_load_b32 v0, off, s0 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_endpgm ; @@ -1075,26 +1075,31 @@ define void @store_load_large_imm_offset_foo() { ; GFX940: ; %bb.0: ; %bb ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v0, 13 +; GFX940-NEXT: s_movk_i32 s0, 0x3e80 +; GFX940-NEXT: s_add_i32 s1, s32, 4 ; GFX940-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0x3e80 -; GFX940-NEXT: v_mov_b32_e32 v1, 15 -; GFX940-NEXT: scratch_store_dword v0, v1, s32 offset:4 sc0 sc1 +; GFX940-NEXT: v_mov_b32_e32 v0, 15 +; GFX940-NEXT: s_add_i32 s0, s0, s1 +; GFX940-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_dword v0, v0, s32 offset:4 sc0 sc1 +; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: store_load_large_imm_offset_foo: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 0x3e80 -; GFX11-NEXT: v_mov_b32_e32 v2, 15 +; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15 +; GFX11-NEXT: s_movk_i32 s0, 0x3e80 +; GFX11-NEXT: s_add_i32 s1, s32, 4 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_add_i32 s0, s0, s1 ; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: scratch_store_b32 v1, v2, s32 offset:4 dlc +; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: scratch_load_b32 v0, v1, s32 offset:4 glc dlc +; GFX11-NEXT: scratch_load_b32 v0, off, s0 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll index db944b98a30135..4fcde0f2fc7cf1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll @@ -11,12 +11,11 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(ptr addrspace(1) %out.ptr, ptr ; GCN-NEXT: s_load_dwordx2 s[24:25], s[6:7], 0x10 ; GCN-NEXT: s_add_u32 s0, s0, s13 ; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: v_mov_b32_e32 v16, 0 +; GCN-NEXT: v_mov_b32_e32 v64, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_load_dwordx16 s[36:51], s[22:23], 0x0 ; GCN-NEXT: s_load_dwordx16 s[52:67], s[22:23], 0x40 ; GCN-NEXT: s_load_dwordx16 s[4:19], s[22:23], 0x80 -; GCN-NEXT: v_mov_b32_e32 v64, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, s36 ; GCN-NEXT: v_mov_b32_e32 v1, s37 @@ -143,16 +142,17 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(ptr addrspace(1) %out.ptr, ptr ; GCN-NEXT: v_mov_b32_e32 v0, s48 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:240 ; GCN-NEXT: v_mov_b32_e32 v0, s49 +; GCN-NEXT: s_and_b32 s4, s25, 63 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:244 ; GCN-NEXT: v_mov_b32_e32 v0, s50 -; GCN-NEXT: s_and_b32 s4, s25, 63 +; GCN-NEXT: s_lshl_b32 s4, s4, 2 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:248 ; GCN-NEXT: v_mov_b32_e32 v0, s51 -; GCN-NEXT: s_lshl_b32 s4, s4, 2 +; GCN-NEXT: s_add_u32 s4, 0, s4 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:252 -; GCN-NEXT: v_add_u32_e32 v0, s4, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s24 -; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v0, s24 +; GCN-NEXT: v_mov_b32_e32 v1, s4 +; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 ; GCN-NEXT: s_nop 0 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir index 14f69c301ec387..76994c5cccf5fb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir @@ -2,22 +2,45 @@ # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s ---- | - target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" - define void @test_frame_index_p5() { - %ptr0 = alloca i32, addrspace(5) - ret void - } -... --- name: test_frame_index_p5 legalized: true stack: - - { id: 0, name: ptr0, offset: 0, size: 4, alignment: 4 } + - { id: 0, offset: 0, size: 4, alignment: 4 } body: | bb.0: ; CHECK-LABEL: name: test_frame_index_p5 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:vgpr(p5) = G_FRAME_INDEX %stack.0.ptr0 - %0:_(p5) = G_FRAME_INDEX %stack.0.ptr0 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:sgpr(p5) = G_FRAME_INDEX %stack.0 + %0:_(p5) = G_FRAME_INDEX %stack.0 + +... + +--- +name: test_frame_index_p5_sgpr_use +legalized: true +stack: + - { id: 0, offset: 0, size: 4, alignment: 4 } +body: | + bb.0: + ; CHECK-LABEL: name: test_frame_index_p5_sgpr_use + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:sgpr(p5) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: $sgpr0 = COPY [[FRAME_INDEX]](p5) + %0:_(p5) = G_FRAME_INDEX %stack.0 + $sgpr0 = COPY %0 + +... + +--- +name: test_frame_index_p5_vgpr_use +legalized: true +stack: + - { id: 0, offset: 0, size: 4, alignment: 4 } +body: | + bb.0: + ; CHECK-LABEL: name: test_frame_index_p5_vgpr_use + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:sgpr(p5) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) + %0:_(p5) = G_FRAME_INDEX %stack.0 + $vgpr0 = COPY %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll index e2b4865410db81..3216e71e6221ae 100644 --- a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll +++ b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll @@ -96,16 +96,29 @@ define void @private_alloca_to_flat(ptr %ptr) { ; OPT-NEXT: store volatile i32 7, ptr [[TMP1]], align 4 ; OPT-NEXT: ret void ; -; ASM-LABEL: private_alloca_to_flat: -; ASM: ; %bb.0: -; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; ASM-NEXT: s_mov_b64 s[4:5], src_private_base -; ASM-NEXT: v_lshrrev_b32_e64 v0, 6, s32 -; ASM-NEXT: v_mov_b32_e32 v1, s5 -; ASM-NEXT: v_mov_b32_e32 v2, 7 -; ASM-NEXT: flat_store_dword v[0:1], v2 -; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; ASM-NEXT: s_setpc_b64 s[30:31] +; DAGISEL-ASM-LABEL: private_alloca_to_flat: +; DAGISEL-ASM: ; %bb.0: +; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-ASM-NEXT: s_mov_b64 s[4:5], src_private_base +; DAGISEL-ASM-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, s5 +; DAGISEL-ASM-NEXT: v_mov_b32_e32 v2, 7 +; DAGISEL-ASM-NEXT: flat_store_dword v[0:1], v2 +; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-ASM-LABEL: private_alloca_to_flat: +; GISEL-ASM: ; %bb.0: +; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-ASM-NEXT: s_lshr_b32 s4, s32, 6 +; GISEL-ASM-NEXT: s_mov_b64 s[6:7], src_private_base +; GISEL-ASM-NEXT: s_mov_b32 s5, s7 +; GISEL-ASM-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-ASM-NEXT: v_mov_b32_e32 v2, 7 +; GISEL-ASM-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-ASM-NEXT: flat_store_dword v[0:1], v2 +; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GISEL-ASM-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i8, addrspace(5) %x = addrspacecast ptr addrspace(5) %alloca to ptr store volatile i32 7, ptr %x @@ -224,8 +237,9 @@ define void @recursive_phis(i1 %cond, ptr addrspace(5) %ptr) { ; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-ASM-NEXT: v_and_b32_e32 v0, 1, v0 ; GISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-ASM-NEXT: s_lshr_b32 s6, s32, 6 ; GISEL-ASM-NEXT: s_xor_b64 s[4:5], vcc, -1 -; GISEL-ASM-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GISEL-ASM-NEXT: v_mov_b32_e32 v0, s6 ; GISEL-ASM-NEXT: s_and_saveexec_b64 s[6:7], vcc ; GISEL-ASM-NEXT: ; %bb.1: ; %then ; GISEL-ASM-NEXT: v_and_b32_e32 v0, 0xffff, v1 diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll index 087d38ce7b0046..89da9b8e75bc9c 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll @@ -37,17 +37,17 @@ define amdgpu_kernel void @soff1_voff1(i32 %soff) { ; GFX940-GISEL-LABEL: soff1_voff1: ; GFX940-GISEL: ; %bb.0: ; %bb ; GFX940-GISEL-NEXT: s_load_dword s0, s[2:3], 0x24 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 +; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 1, v0 -; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v3, 2, v0 +; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 -; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 +; GFX940-GISEL-NEXT: scratch_store_byte v3, v1, off sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 @@ -78,12 +78,14 @@ define amdgpu_kernel void @soff1_voff1(i32 %soff) { ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[2:3], 0x24 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add3_u32 v0, 0, s0, v0 +; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 -; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v5, 2, v0 ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 4, v0 ; GFX11-GISEL-NEXT: scratch_store_b8 v4, v1, off dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 @@ -114,8 +116,9 @@ define amdgpu_kernel void @soff1_voff1(i32 %soff) { ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX12-GISEL-NEXT: v_add3_u32 v0, 0, s0, v0 +; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) +; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS @@ -166,13 +169,13 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) { ; GFX940-GISEL: ; %bb.0: ; %bb ; GFX940-GISEL-NEXT: s_load_dword s0, s[2:3], 0x24 ; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 +; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 1, v0 -; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 @@ -208,10 +211,12 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) { ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[2:3], 0x24 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add3_u32 v0, 0, s0, v0 +; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 @@ -246,10 +251,12 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) { ; GFX12-GISEL-NEXT: s_load_b32 s0, s[2:3], 0x24 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: v_add3_u32 v0, 0, s0, v0 +; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS @@ -300,13 +307,13 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) { ; GFX940-GISEL: ; %bb.0: ; %bb ; GFX940-GISEL-NEXT: s_load_dword s0, s[2:3], 0x24 ; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 +; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 1, v0 -; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 @@ -342,10 +349,12 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) { ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[2:3], 0x24 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add3_u32 v0, 0, s0, v0 +; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 @@ -380,10 +389,12 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) { ; GFX12-GISEL-NEXT: s_load_b32 s0, s[2:3], 0x24 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: v_add3_u32 v0, 0, s0, v0 +; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS @@ -434,18 +445,18 @@ define amdgpu_kernel void @soff2_voff1(i32 %soff) { ; GFX940-GISEL-LABEL: soff2_voff1: ; GFX940-GISEL: ; %bb.0: ; %bb ; GFX940-GISEL-NEXT: s_load_dword s0, s[2:3], 0x24 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 +; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 1, v0 -; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v3, 2, v0 +; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 -; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 +; GFX940-GISEL-NEXT: scratch_store_byte v3, v1, off sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 @@ -478,14 +489,15 @@ define amdgpu_kernel void @soff2_voff1(i32 %soff) { ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[2:3], 0x24 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) -; GFX11-GISEL-NEXT: v_add3_u32 v0, 0, s0, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 -; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v5, 2, v0 ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 4, v0 ; GFX11-GISEL-NEXT: scratch_store_b8 v4, v1, off dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 @@ -519,8 +531,9 @@ define amdgpu_kernel void @soff2_voff1(i32 %soff) { ; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) -; GFX12-GISEL-NEXT: v_add3_u32 v0, 0, s0, v0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 +; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS @@ -571,14 +584,14 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) { ; GFX940-GISEL: ; %bb.0: ; %bb ; GFX940-GISEL-NEXT: s_load_dword s0, s[2:3], 0x24 ; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 +; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 1, v0 -; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 @@ -615,12 +628,13 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) { ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[2:3], 0x24 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 +; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX11-GISEL-NEXT: v_add3_u32 v0, 0, s0, v0 +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 @@ -656,12 +670,13 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) { ; GFX12-GISEL-NEXT: s_load_b32 s0, s[2:3], 0x24 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 1 +; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX12-GISEL-NEXT: v_add3_u32 v0, 0, s0, v0 +; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS @@ -712,14 +727,14 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) { ; GFX940-GISEL: ; %bb.0: ; %bb ; GFX940-GISEL-NEXT: s_load_dword s0, s[2:3], 0x24 ; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 +; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 1, v0 -; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 @@ -756,12 +771,13 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) { ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[2:3], 0x24 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 +; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX11-GISEL-NEXT: v_add3_u32 v0, 0, s0, v0 +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 @@ -797,12 +813,13 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) { ; GFX12-GISEL-NEXT: s_load_b32 s0, s[2:3], 0x24 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 1 +; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX12-GISEL-NEXT: v_add3_u32 v0, 0, s0, v0 +; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS @@ -853,18 +870,18 @@ define amdgpu_kernel void @soff4_voff1(i32 %soff) { ; GFX940-GISEL-LABEL: soff4_voff1: ; GFX940-GISEL: ; %bb.0: ; %bb ; GFX940-GISEL-NEXT: s_load_dword s0, s[2:3], 0x24 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 +; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 1, v0 -; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v3, 2, v0 +; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 -; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 +; GFX940-GISEL-NEXT: scratch_store_byte v3, v1, off sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 @@ -897,14 +914,15 @@ define amdgpu_kernel void @soff4_voff1(i32 %soff) { ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[2:3], 0x24 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) -; GFX11-GISEL-NEXT: v_add3_u32 v0, 0, s0, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 -; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v5, 2, v0 ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 4, v0 ; GFX11-GISEL-NEXT: scratch_store_b8 v4, v1, off dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 @@ -938,8 +956,9 @@ define amdgpu_kernel void @soff4_voff1(i32 %soff) { ; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) -; GFX12-GISEL-NEXT: v_add3_u32 v0, 0, s0, v0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 +; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS @@ -990,14 +1009,14 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) { ; GFX940-GISEL: ; %bb.0: ; %bb ; GFX940-GISEL-NEXT: s_load_dword s0, s[2:3], 0x24 ; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 +; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 1, v0 -; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 @@ -1034,12 +1053,13 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) { ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[2:3], 0x24 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 +; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX11-GISEL-NEXT: v_add3_u32 v0, 0, s0, v0 +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 @@ -1075,12 +1095,13 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) { ; GFX12-GISEL-NEXT: s_load_b32 s0, s[2:3], 0x24 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 2 +; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX12-GISEL-NEXT: v_add3_u32 v0, 0, s0, v0 +; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS @@ -1130,14 +1151,14 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) { ; GFX940-GISEL: ; %bb.0: ; %bb ; GFX940-GISEL-NEXT: s_load_dword s0, s[2:3], 0x24 ; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 +; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 1, v0 -; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 @@ -1174,12 +1195,13 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) { ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[2:3], 0x24 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 +; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX11-GISEL-NEXT: v_add3_u32 v0, 0, s0, v0 +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 @@ -1215,12 +1237,13 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) { ; GFX12-GISEL-NEXT: s_load_b32 s0, s[2:3], 0x24 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 2 +; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX12-GISEL-NEXT: v_add3_u32 v0, 0, s0, v0 +; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS From 72ed80866fcfe1366ab49995d23782e8566cec43 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 31 Jul 2024 22:57:45 -0700 Subject: [PATCH 063/114] [MIR] Remove separate Size variable from parseMachineMemoryOperand. NFC (#101453) Size is updated in sync with MemoryType. Instead of maintaining a separate Size, use the size from MemoryType where needed. --- llvm/lib/CodeGen/MIRParser/MIParser.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index bf10794a100eb1..0e2b71729fbf51 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -3362,15 +3362,15 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { if (parseOptionalAtomicOrdering(FailureOrder)) return true; - LLT MemoryType; if (Token.isNot(MIToken::IntegerLiteral) && Token.isNot(MIToken::kw_unknown_size) && Token.isNot(MIToken::lparen)) return error("expected memory LLT, the size integer literal or 'unknown-size' after " "memory operation"); - uint64_t Size = MemoryLocation::UnknownSize; + LLT MemoryType; if (Token.is(MIToken::IntegerLiteral)) { + uint64_t Size; if (getUint64(Size)) return true; @@ -3378,7 +3378,6 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { MemoryType = LLT::scalar(8 * Size); lex(); } else if (Token.is(MIToken::kw_unknown_size)) { - Size = MemoryLocation::UnknownSize; lex(); } else { if (expectAndConsume(MIToken::lparen)) @@ -3387,8 +3386,6 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { return true; if (expectAndConsume(MIToken::rparen)) return true; - - Size = MemoryType.getSizeInBytes().getKnownMinValue(); } MachinePointerInfo Ptr = MachinePointerInfo(); @@ -3406,7 +3403,9 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { return true; } uint64_t BaseAlignment = - (Size != MemoryLocation::UnknownSize ? PowerOf2Ceil(Size) : 1); + MemoryType.isValid() + ? PowerOf2Ceil(MemoryType.getSizeInBytes().getKnownMinValue()) + : 1; AAMDNodes AAInfo; MDNode *Range = nullptr; while (consumeIfPresent(MIToken::comma)) { From 129a8e1b756aa4e5932169ed2f1f7dbad692f44d Mon Sep 17 00:00:00 2001 From: Marina <173714676+citymarina@users.noreply.github.com> Date: Thu, 1 Aug 2024 07:08:07 +0100 Subject: [PATCH 064/114] [AArch64] Add tests for redundant csel instructions. NFC (#101014) --- llvm/test/CodeGen/AArch64/peephole-csel.ll | 32 ++++++ llvm/test/CodeGen/AArch64/peephole-csel.mir | 112 ++++++++++++++++++++ 2 files changed, 144 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/peephole-csel.ll create mode 100644 llvm/test/CodeGen/AArch64/peephole-csel.mir diff --git a/llvm/test/CodeGen/AArch64/peephole-csel.ll b/llvm/test/CodeGen/AArch64/peephole-csel.ll new file mode 100644 index 00000000000000..3f92943b11eb1d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/peephole-csel.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +define void @peephole_csel(ptr %dst, i1 %0, i1 %cmp) { +; CHECK-LABEL: peephole_csel: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: tst w2, #0x1 +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: csel x9, xzr, xzr, eq +; CHECK-NEXT: tst w1, #0x1 +; CHECK-NEXT: csel x8, x8, x9, eq +; CHECK-NEXT: str x8, [x0] +; CHECK-NEXT: ret +entry: + br i1 %0, label %then, label %exit + +then: ; preds = %entry + ; The donothing() is needed to make make this block less interesting to + ; SimplifyCFG. Otherwise we may not get the csel that we want to test. + call void @llvm.donothing() + br i1 %cmp, label %true, label %exit + +true: ; preds = %then + ; Same as above + call void @llvm.donothing() + br label %exit + +exit: ; preds = %true, %then, %entry + %x = phi i64 [ 0, %true ], [ 0, %then ], [ 1, %entry ] + store i64 %x, ptr %dst, align 8 + ret void +} diff --git a/llvm/test/CodeGen/AArch64/peephole-csel.mir b/llvm/test/CodeGen/AArch64/peephole-csel.mir new file mode 100644 index 00000000000000..5077441a33788a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/peephole-csel.mir @@ -0,0 +1,112 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=aarch64-unknown-linux -run-pass=aarch64-mi-peephole-opt -verify-machineinstrs | FileCheck %s + +--- +name: peephole_cselxr_same +registers: + - { id: 1, class: gpr64, preferred-register: '' } + - { id: 2, class: gpr64, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%1' } + - { reg: '$x1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: peephole_cselxr_same + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: $xzr = ANDSXri [[COPY]], 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSELXr:%[0-9]+]]:gpr64 = CSELXr [[COPY1]], [[COPY1]], 0, implicit $nzcv + ; CHECK-NEXT: RET_ReallyLR + %3:gpr64 = COPY $x1 + %4:gpr64 = COPY $x0 + $xzr = ANDSXri %3, 0, implicit-def $nzcv + %5:gpr64 = CSELXr %4, %4, 0, implicit $nzcv + RET_ReallyLR + +... +--- +name: peephole_cselwr_same +registers: + - { id: 1, class: gpr32, preferred-register: '' } + - { id: 2, class: gpr32, preferred-register: '' } +liveins: + - { reg: '$w0', virtual-reg: '%1' } + - { reg: '$w1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: peephole_cselwr_same + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: $wzr = ANDSWri [[COPY]], 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[COPY1]], [[COPY1]], 0, implicit $nzcv + ; CHECK-NEXT: RET_ReallyLR + %3:gpr32 = COPY $w1 + %4:gpr32 = COPY $w0 + $wzr = ANDSWri %3, 0, implicit-def $nzcv + %5:gpr32 = CSELWr %4, %4, 0, implicit $nzcv + RET_ReallyLR + +... +--- +name: peephole_cselxr_different +registers: + - { id: 1, class: gpr64, preferred-register: '' } + - { id: 2, class: gpr64, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%1' } + - { reg: '$x1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: peephole_cselxr_different + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: $xzr = ANDSXri [[COPY]], 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSELXr:%[0-9]+]]:gpr64 = CSELXr [[COPY]], [[COPY1]], 0, implicit $nzcv + ; CHECK-NEXT: RET_ReallyLR + %3:gpr64 = COPY $x1 + %4:gpr64 = COPY $x0 + $xzr = ANDSXri %3, 0, implicit-def $nzcv + %5:gpr64 = CSELXr %3, %4, 0, implicit $nzcv + RET_ReallyLR + +... +--- +name: peephole_cselwr_different +registers: + - { id: 1, class: gpr32, preferred-register: '' } + - { id: 2, class: gpr32, preferred-register: '' } +liveins: + - { reg: '$w0', virtual-reg: '%1' } + - { reg: '$w1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: peephole_cselwr_different + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: $wzr = ANDSWri [[COPY]], 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[COPY]], [[COPY1]], 0, implicit $nzcv + ; CHECK-NEXT: RET_ReallyLR + %3:gpr32 = COPY $w1 + %4:gpr32 = COPY $w0 + $wzr = ANDSWri %3, 0, implicit-def $nzcv + %5:gpr32 = CSELWr %3, %4, 0, implicit $nzcv + RET_ReallyLR + +... + From 972c02929ba61bd34417700d77605d8fd3f36de7 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Thu, 1 Aug 2024 08:30:20 +0200 Subject: [PATCH 065/114] [GlobalISel][TableGen] MIR Pattern Variadics (#100563) Allow for matching & rewriting a variable number of arguments in an instructions. Solves #87459 --- llvm/docs/GlobalISel/MIRPatterns.rst | 57 ++++- .../CodeGen/GlobalISel/GIMatchTableExecutor.h | 24 +++ .../GlobalISel/GIMatchTableExecutorImpl.h | 33 +++ .../include/llvm/Target/GlobalISel/Combine.td | 10 + .../match-table-variadics.td | 71 ++++++- .../operand-types.td | 26 ++- .../typeof-errors.td | 14 +- .../variadic-errors.td | 89 ++++++++ .../TableGen/Common/GlobalISel/CodeExpander.h | 2 +- .../GlobalISel/GlobalISelMatchTable.cpp | 64 +++--- .../Common/GlobalISel/GlobalISelMatchTable.h | 70 +++++-- .../TableGen/Common/GlobalISel/Patterns.cpp | 39 ++++ .../TableGen/Common/GlobalISel/Patterns.h | 28 ++- .../TableGen/GlobalISelCombinerEmitter.cpp | 195 +++++++++++++----- llvm/utils/TableGen/GlobalISelEmitter.cpp | 6 +- 15 files changed, 625 insertions(+), 103 deletions(-) create mode 100644 llvm/test/TableGen/GlobalISelCombinerEmitter/variadic-errors.td diff --git a/llvm/docs/GlobalISel/MIRPatterns.rst b/llvm/docs/GlobalISel/MIRPatterns.rst index 7e6d88683d491e..654a60730bb9c5 100644 --- a/llvm/docs/GlobalISel/MIRPatterns.rst +++ b/llvm/docs/GlobalISel/MIRPatterns.rst @@ -115,7 +115,7 @@ GITypeOf ``GITypeOf<"$x">`` is a ``GISpecialType`` that allows for the creation of a register or immediate with the same type as another (register) operand. -Operand: +Type Parameters: * An operand name as a string, prefixed by ``$``. @@ -143,6 +143,59 @@ Semantics: (apply (G_FSUB $dst, $src, $tmp), (G_FNEG GITypeOf<"$dst">:$tmp, $src))>; +GIVariadic +~~~~~~~~~~ + +``GIVariadic<>`` is a ``GISpecialType`` that allows for matching 1 or +more operands remaining on an instruction. + +Type Parameters: + +* The minimum number of additional operands to match. Must be greater than zero. + + * Default is 1. + +* The maximum number of additional operands to match. Must be strictly greater + than the minimum. + + * 0 can be used to indicate there is no upper limit. + * Default is 0. + +Semantics: + +* ``GIVariadic<>`` operands can only appear on variadic instructions. +* ``GIVariadic<>`` operands cannot be defs. +* ``GIVariadic<>`` operands can only appear as the last operand in a 'match' pattern. +* Each instance within a 'match' pattern must be uniquely named. +* Re-using a ``GIVariadic<>`` operand in an 'apply' pattern will result in all + the matched operands being copied from the original instruction. +* The min/max operands will result in the matcher checking that the number of operands + falls within that range. +* ``GIVariadic<>`` operands can be used in C++ code within a rule, which will + result in the operand name being expanded to a value of type ``ArrayRef``. + +.. code-block:: text + + // bool checkBuildVectorToUnmerge(ArrayRef); + + def build_vector_to_unmerge: GICombineRule < + (defs root:$root), + (match (G_BUILD_VECTOR $root, GIVariadic<>:$args), + [{ return checkBuildVectorToUnmerge(${args}); }]), + (apply (G_UNMERGE_VALUES $root, $args)) + >; + +.. code-block:: text + + // Will additionally check the number of operands is >= 3 and <= 5. + // ($root is one operand, then 2 to 4 variadic operands). + def build_vector_to_unmerge: GICombineRule < + (defs root:$root), + (match (G_BUILD_VECTOR $root, GIVariadic<2, 4>:$two_to_four), + [{ return checkBuildVectorToUnmerge(${two_to_four}); }]), + (apply (G_UNMERGE_VALUES $root, $two_to_four)) + >; + Builtin Operations ------------------ @@ -240,6 +293,8 @@ This a non-exhaustive list of known issues with MIR patterns at this time. match. e.g. if a pattern needs to work on both i32 and i64, you either need to leave it untyped and check the type in C++, or duplicate the pattern. +* ``GISpecialType`` operands are not allowed within a ``GICombinePatFrag``. +* ``GIVariadic<>`` matched operands must each have a unique name. GICombineRule ------------- diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h index cc2dd2f4e489c7..7b42722ca8d4f1 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h @@ -15,11 +15,13 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_GIMATCHTABLEEXECUTOR_H #define LLVM_CODEGEN_GLOBALISEL_GIMATCHTABLEEXECUTOR_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Bitset.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGenTypes/LowLevelType.h" #include "llvm/IR/Function.h" #include @@ -133,6 +135,12 @@ enum { /// - Ops(ULEB128) - Expected number of operands GIM_CheckNumOperands, + /// Check the instruction has a number of operands <= or >= than given number. + /// - InsnID(ULEB128) - Instruction ID + /// - Ops(ULEB128) - Number of operands + GIM_CheckNumOperandsLE, + GIM_CheckNumOperandsGE, + /// Check an immediate predicate on the specified instruction /// - InsnID(ULEB128) - Instruction ID /// - Pred(2) - The predicate to test @@ -294,12 +302,15 @@ enum { /// Check the specified operands are identical. /// The IgnoreCopies variant looks through COPY instructions before /// comparing the operands. + /// The "All" variants check all operands starting from the index. /// - InsnID(ULEB128) - Instruction ID /// - OpIdx(ULEB128) - Operand index /// - OtherInsnID(ULEB128) - Other instruction ID /// - OtherOpIdx(ULEB128) - Other operand index GIM_CheckIsSameOperand, GIM_CheckIsSameOperandIgnoreCopies, + GIM_CheckAllSameOperand, + GIM_CheckAllSameOperandIgnoreCopies, /// Check we can replace all uses of a register with another. /// - OldInsnID(ULEB128) @@ -362,6 +373,13 @@ enum { /// GIR_Copy but with both New/OldInsnIDs omitted and defaulting to zero. GIR_RootToRootCopy, + /// Copies all operand starting from OpIdx in OldInsnID into the new + /// instruction NewInsnID. + /// - NewInsnID(ULEB128) - Instruction ID to modify + /// - OldInsnID(ULEB128) - Instruction ID to copy from + /// - OpIdx(ULEB128) - The first operand to copy + GIR_CopyRemaining, + /// Copy an operand to the specified instruction or add a zero register if the /// operand is a zero immediate. /// - NewInsnID(ULEB128) - Instruction ID to modify @@ -713,6 +731,12 @@ class GIMatchTableExecutor { return Ret; } + static ArrayRef getRemainingOperands(const MachineInstr &MI, + unsigned FirstVarOp) { + auto Operands = drop_begin(MI.operands(), FirstVarOp); + return {Operands.begin(), Operands.end()}; + } + public: // Faster ULEB128 decoder tailored for the Match Table Executor. // diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h index 90b4fe5518c87f..5a5a750ac6b4aa 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h @@ -309,6 +309,23 @@ bool GIMatchTableExecutor::executeMatchTable( break; } + case GIM_CheckNumOperandsGE: + case GIM_CheckNumOperandsLE: { + uint64_t InsnID = readULEB(); + uint64_t Expected = readULEB(); + const bool IsLE = (MatcherOpcode == GIM_CheckNumOperandsLE); + DEBUG_WITH_TYPE(TgtExecutor::getName(), + dbgs() << CurrentIdx << ": GIM_CheckNumOperands" + << (IsLE ? "LE" : "GE") << "(MIs[" << InsnID + << "], Expected=" << Expected << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + const unsigned NumOps = State.MIs[InsnID]->getNumOperands(); + if (IsLE ? (NumOps <= Expected) : (NumOps >= Expected)) { + if (handleReject() == RejectAndGiveUp) + return false; + } + break; + } case GIM_CheckNumOperands: { uint64_t InsnID = readULEB(); uint64_t Expected = readULEB(); @@ -1081,6 +1098,22 @@ bool GIMatchTableExecutor::executeMatchTable( break; } + case GIR_CopyRemaining: { + uint64_t NewInsnID = readULEB(); + uint64_t OldInsnID = readULEB(); + uint64_t OpIdx = readULEB(); + assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction"); + MachineInstr &OldMI = *State.MIs[OldInsnID]; + MachineInstrBuilder &NewMI = OutMIs[NewInsnID]; + for (const auto &Op : drop_begin(OldMI.operands(), OpIdx)) + NewMI.add(Op); + DEBUG_WITH_TYPE(TgtExecutor::getName(), + dbgs() << CurrentIdx << ": GIR_CopyRemaining(OutMIs[" + << NewInsnID << "], MIs[" << OldInsnID + << "], /*start=*/" << OpIdx << ")\n"); + break; + } + case GIR_CopyOrAddZeroReg: { uint64_t NewInsnID = readULEB(); uint64_t OldInsnID = readULEB(); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 2246e20ecc1dc8..1f26132561cca0 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -128,6 +128,16 @@ class GITypeOf : GISpecialType { string OpName = opName; } +// The type of an operand that can match a variable amount of operands. +// This type contains a minimum and maximum number of operands to match. +// The minimum must be 1 or more, as we cannot have an operand representing +// zero operands, and the max can be zero (which means "unlimited") or a value +// greater than the minimum. +class GIVariadic : GISpecialType { + int MinArgs = min; + int MaxArgs = max; +} + //===----------------------------------------------------------------------===// // Pattern Builtins //===----------------------------------------------------------------------===// diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-variadics.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-variadics.td index 86ae031caecb53..55429f98564afb 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-variadics.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-variadics.td @@ -28,19 +28,48 @@ def InstTest3 : GICombineRule< (match (G_UNMERGE_VALUES $a, $b, $c, $d)), (apply [{ APPLY }])>; +def VariadicTypeTestCxx : GICombineRule< + (defs root:$a), + (match (G_BUILD_VECTOR $a, GIVariadic<2, 4>:$b)), + (apply [{ ${b} }])>; + +def VariadicTypeTestReuse : GICombineRule< + (defs root:$a), + (match (G_BUILD_VECTOR $a, $c, GIVariadic<2, 4>:$b)), + (apply (G_MERGE_VALUES $a, $b, $c))>; + def MyCombiner: GICombiner<"GenMyCombiner", [ InstTest0, InstTest1, InstTest2, - InstTest3 + InstTest3, + VariadicTypeTestCxx, + VariadicTypeTestReuse ]>; +// CHECK: bool GenMyCombiner::runCustomAction(unsigned ApplyID, const MatcherState &State, NewMIVector &OutMIs) const { +// CHECK-NEXT: Helper.getBuilder().setInstrAndDebugLoc(*State.MIs[0]); +// CHECK-NEXT: switch(ApplyID) { +// CHECK-NEXT: case GICXXCustomAction_GICombiner0:{ +// CHECK-NEXT: // Apply Patterns +// CHECK-NEXT: APPLY +// CHECK-NEXT: return true; +// CHECK-NEXT: } +// CHECK-NEXT: case GICXXCustomAction_GICombiner1:{ +// CHECK-NEXT: // Apply Patterns +// CHECK-NEXT: getRemainingOperands(*State.MIs[0], 1) +// CHECK-NEXT: return true; +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: llvm_unreachable("Unknown Apply Action"); +// CHECK-NEXT: } + // CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static uint8_t MatchTable0[] = { -// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2([[#LOWER:]]), GIMT_Encode2([[#UPPER:]]), /*)*//*default:*//*Label 2*/ GIMT_Encode4([[#DEFAULT:]]), +// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(70), GIMT_Encode2(74), /*)*//*default:*//*Label 2*/ GIMT_Encode4(127), // CHECK-NEXT: /*TargetOpcode::G_UNMERGE_VALUES*//*Label 0*/ GIMT_Encode4(26), GIMT_Encode4(0), GIMT_Encode4(0), // CHECK-NEXT: /*TargetOpcode::G_BUILD_VECTOR*//*Label 1*/ GIMT_Encode4(55), -// CHECK-NEXT: // Label 0: @[[#%u, mul(UPPER-LOWER, 4) + 10]] +// CHECK-NEXT: // Label 0: @26 // CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 3*/ GIMT_Encode4(40), // Rule ID 2 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule2Enabled), // CHECK-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/2, @@ -77,7 +106,35 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: // Combiner Rule #1: InstTest1 // CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner0), // CHECK-NEXT: // Label 5: @69 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 6*/ GIMT_Encode4(83), // Rule ID 0 // +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 6*/ GIMT_Encode4(86), // Rule ID 4 // +// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule4Enabled), +// CHECK-NEXT: GIM_CheckNumOperandsGE, /*MI*/0, /*Expected*/3, +// CHECK-NEXT: GIM_CheckNumOperandsLE, /*MI*/0, /*Expected*/5, +// CHECK-NEXT: // MIs[0] a +// CHECK-NEXT: // No operand predicates +// CHECK-NEXT: // MIs[0] b +// CHECK-NEXT: // No operand predicates +// CHECK-NEXT: // Combiner Rule #4: VariadicTypeTestCxx +// CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner1), +// CHECK-NEXT: // Label 6: @86 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 7*/ GIMT_Encode4(112), // Rule ID 5 // +// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule5Enabled), +// CHECK-NEXT: GIM_CheckNumOperandsGE, /*MI*/0, /*Expected*/4, +// CHECK-NEXT: GIM_CheckNumOperandsLE, /*MI*/0, /*Expected*/6, +// CHECK-NEXT: // MIs[0] a +// CHECK-NEXT: // No operand predicates +// CHECK-NEXT: // MIs[0] c +// CHECK-NEXT: // No operand predicates +// CHECK-NEXT: // MIs[0] b +// CHECK-NEXT: // No operand predicates +// CHECK-NEXT: // Combiner Rule #5: VariadicTypeTestReuse +// CHECK-NEXT: GIR_BuildRootMI, /*Opcode*/GIMT_Encode2(TargetOpcode::G_MERGE_VALUES), +// CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/0, // a +// CHECK-NEXT: GIR_CopyRemaining, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/2, // b +// CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/1, // c +// CHECK-NEXT: GIR_EraseRootFromParent_Done, +// CHECK-NEXT: // Label 7: @112 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 8*/ GIMT_Encode4(126), // Rule ID 0 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule0Enabled), // CHECK-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/4, // CHECK-NEXT: // MIs[0] a @@ -90,10 +147,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: // No operand predicates // CHECK-NEXT: // Combiner Rule #0: InstTest0 // CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner0), -// CHECK-NEXT: // Label 6: @83 +// CHECK-NEXT: // Label 8: @126 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 2: @[[#%u, DEFAULT]] +// CHECK-NEXT: // Label 2: @127 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: }; // Size: [[#%u, DEFAULT + 1]] bytes +// CHECK-NEXT: }; // Size: 128 bytes // CHECK-NEXT: return MatchTable0; // CHECK-NEXT: } diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/operand-types.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/operand-types.td index 4769bed9724012..9b5598661c8de6 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/operand-types.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/operand-types.td @@ -104,8 +104,32 @@ def TypeOfProp : GICombineRule< (apply (G_ANYEXT $x, GITypeOf<"$y">:$tmp), (G_ANYEXT $tmp, $y))>; +// CHECK: (CombineRule name:VariadicTypeTest id:3 root:a +// CHECK-NEXT: (MatchPats +// CHECK-NEXT: __VariadicTypeTest_match_0:(CodeGenInstructionPattern G_UNMERGE_VALUES operands:[$a, $b, GIVariadic<1,0>:$z]) +// CHECK-NEXT: ) +// CHECK-NEXT: (ApplyPats +// CHECK-NEXT: __VariadicTypeTest_apply_0:(CodeGenInstructionPattern G_UNMERGE_VALUES operands:[$a, $b, GIVariadic<1,0>:$z]) +// CHECK-NEXT: ) +// CHECK-NEXT: (OperandTable MatchPats +// CHECK-NEXT: a -> __VariadicTypeTest_match_0 +// CHECK-NEXT: b -> __VariadicTypeTest_match_0 +// CHECK-NEXT: z -> +// CHECK-NEXT: ) +// CHECK-NEXT: (OperandTable ApplyPats +// CHECK-NEXT: a -> __VariadicTypeTest_apply_0 +// CHECK-NEXT: b -> __VariadicTypeTest_apply_0 +// CHECK-NEXT: z -> +// CHECK-NEXT: ) +// CHECK-NEXT: ) +def VariadicTypeTest: GICombineRule< + (defs root:$a), + (match (G_UNMERGE_VALUES $a, $b, GIVariadic<>:$z)), + (apply (G_UNMERGE_VALUES $a, $b, $z))>; + def MyCombiner: GICombiner<"GenMyCombiner", [ InstTest0, PatFragTest0, - TypeOfProp + TypeOfProp, + VariadicTypeTest, ]>; diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/typeof-errors.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/typeof-errors.td index ee7b8f5f3a39cb..076fdb78ae8aa0 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/typeof-errors.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/typeof-errors.td @@ -21,7 +21,7 @@ def UnknownOperand : GICombineRule< (match (G_ZEXT $dst, $src)), (apply (G_ANYEXT $dst, (GITypeOf<"$unknown"> 0)))>; -// CHECK: :[[@LINE+2]]:{{[0-9]+}}: error: GISpecialType is not supported in 'match' patterns +// CHECK: :[[@LINE+2]]:{{[0-9]+}}: error: GITypeOf is not supported in 'match' patterns // CHECK: :[[@LINE+1]]:{{[0-9]+}}: note: operand 1 of '__UseInMatch_match_0' has type 'GITypeOf<$dst>' def UseInMatch : GICombineRule< (defs root:$dst), @@ -41,7 +41,7 @@ def UseInPF: GICombineRule< (match (PFWithTypeOF $dst)), (apply (G_ANYEXT $dst, (i32 0)))>; -// CHECK: :[[@LINE+2]]:{{[0-9]+}}: error: GISpecialType is not supported in 'match' patterns +// CHECK: :[[@LINE+2]]:{{[0-9]+}}: error: GITypeOf is not supported in 'match' patterns // CHECK: :[[@LINE+1]]:{{[0-9]+}}: note: operand 1 of '__InferredUseInMatch_match_0' has type 'GITypeOf<$dst>' def InferredUseInMatch : GICombineRule< (defs root:$dst), @@ -63,6 +63,13 @@ def TypeOfApplyTmp : GICombineRule< (apply (G_ANYEXT $dst, i32:$tmp), (G_ANYEXT $tmp, (GITypeOf<"$tmp"> 0)))>; +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: type 'GITypeOf<$src>' is ill-formed: 'src' is a variadic pack operand +def TypeOfVariadic : GICombineRule< + (defs root:$dst), + (match (G_BUILD_VECTOR $dst, $x, GIVariadic<>:$src)), + (apply (G_ANYEXT GITypeOf<"$src">:$tmp, $x), + (G_ANYEXT $dst, $tmp))>; + // CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Failed to parse one or more rules def MyCombiner: GICombiner<"GenMyCombiner", [ NoDollarSign, @@ -71,5 +78,6 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ UseInPF, InferredUseInMatch, InferenceConflict, - TypeOfApplyTmp + TypeOfApplyTmp, + TypeOfVariadic ]>; diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/variadic-errors.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/variadic-errors.td new file mode 100644 index 00000000000000..2b44e184c98e20 --- /dev/null +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/variadic-errors.td @@ -0,0 +1,89 @@ +// RUN: not llvm-tblgen -I %p/../../../include -gen-global-isel-combiner \ +// RUN: -combiners=MyCombiner %s 2>&1| \ +// RUN: FileCheck %s -implicit-check-not=error: + +include "llvm/Target/Target.td" +include "llvm/Target/GlobalISel/Combine.td" + +def MyTargetISA : InstrInfo; +def MyTarget : Target { let InstructionSet = MyTargetISA; } + +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: 'G_BUILD_VECTOR': GIVariadic can only be used on the last operand +def VariadicNotLastInList : GICombineRule< + (defs root:$dst), + (match (G_BUILD_VECTOR $dst, $a, GIVariadic<>:$b, $c)), + (apply (G_ANYEXT $dst, $a))>; + +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: 'G_IMPLICIT_DEF': GIVariadic cannot be used on defs +def VariadicAsDef : GICombineRule< + (defs root:$dst), + (match (G_IMPLICIT_DEF GIVariadic<1>:$dst)), + (apply [{ APPLY }])>; + +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: conflicting types for operand 'args': 'GIVariadic<2,4>' vs 'GIVariadic<3,6>' +def ConflictingInference : GICombineRule< + (defs root:$dst), + (match (G_BUILD_VECTOR $dst, GIVariadic<2, 4>:$args)), + (apply (G_MERGE_VALUES $dst, GIVariadic<3, 6>:$args))>; + +// CHECK: :[[@LINE+2]]:{{[0-9]+}}: error: cannot parse operand type: minimum number of arguments must be greater than zero in GIVariadic +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Failed to parse pattern: '(G_BUILD_VECTOR ?:$dst, anonymous_8021:$a)' +def InvalidBounds0 : GICombineRule< + (defs root:$dst), + (match (G_BUILD_VECTOR $dst, GIVariadic<0>:$a)), + (apply [{ APPLY }])>; + +// CHECK: :[[@LINE+2]]:{{[0-9]+}}: error: cannot parse operand type: maximum number of arguments (1) must be zero, or greater than the minimum number of arguments (1) in GIVariadic +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Failed to parse pattern: '(G_BUILD_VECTOR ?:$dst, anonymous_8022:$a)' +def InvalidBounds1 : GICombineRule< + (defs root:$dst), + (match (G_BUILD_VECTOR $dst, GIVariadic<1,1>:$a)), + (apply [{ APPLY }])>; + +// CHECK: :[[@LINE+2]]:{{[0-9]+}}: error: each instance of a GIVariadic operand must have a unique name within the match patterns +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: note: 'c' is used multiple times +def VariadicTypeTestEqOp : GICombineRule< + (defs root:$a), + (match (G_MERGE_VALUES $b, $c), + (G_BUILD_VECTOR $a, $b, GIVariadic<2, 4>:$c)), + (apply (G_MERGE_VALUES $a, $c))>; + +// TODO: We could support this if needed + +// CHECK: :[[@LINE+3]]:{{[0-9]+}}: error: GISpecialType is not supported in GICombinePatFrag +// CHECK: :[[@LINE+2]]:{{[0-9]+}}: note: operand 1 of '__PFWithVariadic_alt0_pattern_0' has type 'GIVariadic<1,0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Could not parse GICombinePatFrag 'PFWithVariadic' +def PFWithVariadic: GICombinePatFrag< + (outs $dst), (ins), + [(pattern (G_ANYEXT $dst, GIVariadic<>:$b))]>; + +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Failed to parse pattern: '(PFWithVariadic ?:$dst)' +def UseInPF: GICombineRule< + (defs root:$dst), + (match (PFWithVariadic $dst)), + (apply (G_ANYEXT $dst, (i32 0)))>; + +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: cannot use a GIVariadic operand on non-variadic instruction 'G_MUL' +def NotVariadicInstMatch : GICombineRule< + (defs root:$a), + (match (G_MUL $a, $c, GIVariadic<2, 4>:$b)), + (apply (G_MERGE_VALUES $a, $b, $c))>; + +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: cannot use a GIVariadic operand on non-variadic instruction 'G_MUL' +def NotVariadicInstApply : GICombineRule< + (defs root:$a), + (match (G_BUILD_VECTOR $a, GIVariadic<2, 4>:$b)), + (apply (G_MUL $a, $b, $c))>; + +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Failed to parse one or more rules +def MyCombiner: GICombiner<"GenMyCombiner", [ + VariadicNotLastInList, + VariadicAsDef, + ConflictingInference, + InvalidBounds0, + InvalidBounds1, + VariadicTypeTestEqOp, + UseInPF, + NotVariadicInstMatch, + NotVariadicInstApply +]>; diff --git a/llvm/utils/TableGen/Common/GlobalISel/CodeExpander.h b/llvm/utils/TableGen/Common/GlobalISel/CodeExpander.h index 0b1e6ceab52c29..345da613f84352 100644 --- a/llvm/utils/TableGen/Common/GlobalISel/CodeExpander.h +++ b/llvm/utils/TableGen/Common/GlobalISel/CodeExpander.h @@ -32,7 +32,7 @@ class raw_ostream; class CodeExpander { StringRef Code; const CodeExpansions &Expansions; - const ArrayRef &Loc; + ArrayRef Loc; bool ShowExpansions; StringRef Indent; diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp index e011e78c67b153..bcba295442ad58 100644 --- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp +++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp @@ -687,10 +687,6 @@ StringRef RuleMatcher::getOpcode() const { return Matchers.front()->getOpcode(); } -unsigned RuleMatcher::getNumOperands() const { - return Matchers.front()->getNumOperands(); -} - LLTCodeGen RuleMatcher::getFirstConditionAsRootType() { InstructionMatcher &InsnMatcher = *Matchers.front(); if (!InsnMatcher.predicates_empty()) @@ -1344,6 +1340,7 @@ std::string OperandMatcher::getOperandExpr(unsigned InsnVarID) const { unsigned OperandMatcher::getInsnVarID() const { return Insn.getInsnVarID(); } TempTypeIdx OperandMatcher::getTempTypeIdx(RuleMatcher &Rule) { + assert(!IsVariadic && "Cannot use this on variadic operands!"); if (TTIdx >= 0) { // Temp type index not assigned yet, so assign one and add the necessary // predicate. @@ -1506,8 +1503,20 @@ StringRef InstructionOpcodeMatcher::getOperandType(unsigned OpIdx) const { void InstructionNumOperandsMatcher::emitPredicateOpcodes( MatchTable &Table, RuleMatcher &Rule) const { - Table << MatchTable::Opcode("GIM_CheckNumOperands") - << MatchTable::Comment("MI") << MatchTable::ULEB128Value(InsnVarID) + StringRef Opc; + switch (CK) { + case CheckKind::Eq: + Opc = "GIM_CheckNumOperands"; + break; + case CheckKind::GE: + Opc = "GIM_CheckNumOperandsGE"; + break; + case CheckKind::LE: + Opc = "GIM_CheckNumOperandsLE"; + break; + } + Table << MatchTable::Opcode(Opc) << MatchTable::Comment("MI") + << MatchTable::ULEB128Value(InsnVarID) << MatchTable::Comment("Expected") << MatchTable::ULEB128Value(NumOperands) << MatchTable::LineBreak; } @@ -1695,12 +1704,15 @@ void MIFlagsInstructionPredicateMatcher::emitPredicateOpcodes( OperandMatcher & InstructionMatcher::addOperand(unsigned OpIdx, const std::string &SymbolicName, - unsigned AllocatedTemporariesBaseID) { - Operands.emplace_back(new OperandMatcher(*this, OpIdx, SymbolicName, - AllocatedTemporariesBaseID)); + unsigned AllocatedTemporariesBaseID, + bool IsVariadic) { + assert(Operands.empty() || + !Operands.back()->isVariadic() && + "Cannot add more operands after a variadic operand"); + Operands.emplace_back(new OperandMatcher( + *this, OpIdx, SymbolicName, AllocatedTemporariesBaseID, IsVariadic)); if (!SymbolicName.empty()) Rule.defineOperand(SymbolicName, *Operands.back()); - return *Operands.back(); } @@ -1726,9 +1738,10 @@ OperandMatcher &InstructionMatcher::addPhysRegInput(Record *Reg, unsigned OpIdx, void InstructionMatcher::emitPredicateOpcodes(MatchTable &Table, RuleMatcher &Rule) { - if (NumOperandsCheck) - InstructionNumOperandsMatcher(InsnVarID, getNumOperands()) + if (canAddNumOperandsCheck()) { + InstructionNumOperandsMatcher(InsnVarID, getNumOperandMatchers()) .emitPredicateOpcodes(Table, Rule); + } // First emit all instruction level predicates need to be verified before we // can verify operands. @@ -1793,11 +1806,13 @@ void InstructionMatcher::optimize() { Stash.push_back(predicates_pop_front()); if (Stash.back().get() == &OpcMatcher) { - if (NumOperandsCheck && OpcMatcher.isVariadicNumOperands() && - getNumOperands() != 0) - Stash.emplace_back( - new InstructionNumOperandsMatcher(InsnVarID, getNumOperands())); - NumOperandsCheck = false; + // FIXME: Is this even needed still? Why the isVariadicNumOperands check? + if (canAddNumOperandsCheck() && OpcMatcher.isVariadicNumOperands() && + getNumOperandMatchers() != 0) { + Stash.emplace_back(new InstructionNumOperandsMatcher( + InsnVarID, getNumOperandMatchers())); + } + AllowNumOpsCheck = false; for (auto &OM : Operands) for (auto &OP : OM->predicates()) @@ -1862,11 +1877,13 @@ OperandRenderer::~OperandRenderer() {} void CopyRenderer::emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule, unsigned NewInsnID, unsigned OldInsnID, - unsigned OpIdx, StringRef Name) { - if (NewInsnID == 0 && OldInsnID == 0) { + unsigned OpIdx, StringRef Name, + bool ForVariadic) { + if (!ForVariadic && NewInsnID == 0 && OldInsnID == 0) { Table << MatchTable::Opcode("GIR_RootToRootCopy"); } else { - Table << MatchTable::Opcode("GIR_Copy") << MatchTable::Comment("NewInsnID") + Table << MatchTable::Opcode(ForVariadic ? "GIR_CopyRemaining" : "GIR_Copy") + << MatchTable::Comment("NewInsnID") << MatchTable::ULEB128Value(NewInsnID) << MatchTable::Comment("OldInsnID") << MatchTable::ULEB128Value(OldInsnID); @@ -1880,8 +1897,9 @@ void CopyRenderer::emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const { const OperandMatcher &Operand = Rule.getOperandMatcher(SymbolicName); unsigned OldInsnVarID = Rule.getInsnVarID(Operand.getInstructionMatcher()); + emitRenderOpcodes(Table, Rule, NewInsnID, OldInsnVarID, Operand.getOpIdx(), - SymbolicName); + SymbolicName, Operand.isVariadic()); } //===- CopyPhysRegRenderer ------------------------------------------------===// @@ -2127,10 +2145,10 @@ void CustomOperandRenderer::emitRenderOpcodes(MatchTable &Table, bool BuildMIAction::canMutate(RuleMatcher &Rule, const InstructionMatcher *Insn) const { - if (!Insn) + if (!Insn || Insn->hasVariadicMatcher()) return false; - if (OperandRenderers.size() != Insn->getNumOperands()) + if (OperandRenderers.size() != Insn->getNumOperandMatchers()) return false; for (const auto &Renderer : enumerate(OperandRenderers)) { diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h index 9e345dceddf528..e3eb1633a0b293 100644 --- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h +++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h @@ -641,6 +641,10 @@ class RuleMatcher : public Matcher { return make_range(actions_begin(), actions_end()); } + bool hasOperand(StringRef SymbolicName) const { + return DefinedOperands.contains(SymbolicName); + } + void defineOperand(StringRef SymbolicName, OperandMatcher &OM); void definePhysRegOperand(Record *Reg, OperandMatcher &OM); @@ -678,7 +682,6 @@ class RuleMatcher : public Matcher { const PredicateMatcher &getFirstCondition() const override; LLTCodeGen getFirstConditionAsRootType(); bool hasFirstCondition() const override; - unsigned getNumOperands() const; StringRef getOpcode() const; // FIXME: Remove this as soon as possible @@ -1255,12 +1258,16 @@ class OperandMatcher : public PredicateListMatcher { TempTypeIdx TTIdx = 0; + // TODO: has many implications, figure them all out + bool IsVariadic = false; + public: OperandMatcher(InstructionMatcher &Insn, unsigned OpIdx, const std::string &SymbolicName, - unsigned AllocatedTemporariesBaseID) + unsigned AllocatedTemporariesBaseID, bool IsVariadic = false) : Insn(Insn), OpIdx(OpIdx), SymbolicName(SymbolicName), - AllocatedTemporariesBaseID(AllocatedTemporariesBaseID) {} + AllocatedTemporariesBaseID(AllocatedTemporariesBaseID), + IsVariadic(IsVariadic) {} bool hasSymbolicName() const { return !SymbolicName.empty(); } StringRef getSymbolicName() const { return SymbolicName; } @@ -1272,7 +1279,8 @@ class OperandMatcher : public PredicateListMatcher { /// Construct a new operand predicate and add it to the matcher. template std::optional addPredicate(Args &&...args) { - if (isSameAsAnotherOperand()) + // TODO: Should variadic ops support predicates? + if (isSameAsAnotherOperand() || IsVariadic) return std::nullopt; Predicates.emplace_back(std::make_unique( getInsnVarID(), getOpIdx(), std::forward(args)...)); @@ -1282,6 +1290,8 @@ class OperandMatcher : public PredicateListMatcher { unsigned getOpIdx() const { return OpIdx; } unsigned getInsnVarID() const; + bool isVariadic() const { return IsVariadic; } + /// If this OperandMatcher has not been assigned a TempTypeIdx yet, assigns it /// one and adds a `RecordRegisterType` predicate to this matcher. If one has /// already been assigned, simply returns it. @@ -1405,20 +1415,28 @@ class InstructionOpcodeMatcher : public InstructionPredicateMatcher { }; class InstructionNumOperandsMatcher final : public InstructionPredicateMatcher { +public: + enum class CheckKind { Eq, LE, GE }; + +private: unsigned NumOperands = 0; + CheckKind CK; public: - InstructionNumOperandsMatcher(unsigned InsnVarID, unsigned NumOperands) + InstructionNumOperandsMatcher(unsigned InsnVarID, unsigned NumOperands, + CheckKind CK = CheckKind::Eq) : InstructionPredicateMatcher(IPM_NumOperands, InsnVarID), - NumOperands(NumOperands) {} + NumOperands(NumOperands), CK(CK) {} static bool classof(const PredicateMatcher *P) { return P->getKind() == IPM_NumOperands; } bool isIdentical(const PredicateMatcher &B) const override { - return InstructionPredicateMatcher::isIdentical(B) && - NumOperands == cast(&B)->NumOperands; + if (!InstructionPredicateMatcher::isIdentical(B)) + return false; + const auto &Other = *cast(&B); + return NumOperands == Other.NumOperands && CK == Other.CK; } void emitPredicateOpcodes(MatchTable &Table, @@ -1729,20 +1747,32 @@ class InstructionMatcher final : public PredicateListMatcher { /// The operands to match. All rendered operands must be present even if the /// condition is always true. OperandVec Operands; - bool NumOperandsCheck = true; std::string SymbolicName; unsigned InsnVarID; + bool AllowNumOpsCheck; /// PhysRegInputs - List list has an entry for each explicitly specified /// physreg input to the pattern. The first elt is the Register node, the /// second is the recorded slot number the input pattern match saved it in. SmallVector, 2> PhysRegInputs; + bool canAddNumOperandsCheck() const { + // Add if it's allowed, and: + // - We don't have a variadic operand + // - We don't already have such a check. + return AllowNumOpsCheck && !hasVariadicMatcher() && + none_of(Predicates, [&](const auto &P) { + return P->getKind() == + InstructionPredicateMatcher::IPM_NumOperands; + }); + } + public: InstructionMatcher(RuleMatcher &Rule, StringRef SymbolicName, - bool NumOpsCheck = true) - : Rule(Rule), NumOperandsCheck(NumOpsCheck), SymbolicName(SymbolicName) { + bool AllowNumOpsCheck = true) + : Rule(Rule), SymbolicName(SymbolicName), + AllowNumOpsCheck(AllowNumOpsCheck) { // We create a new instruction matcher. // Get a new ID for that instruction. InsnVarID = Rule.implicitlyDefineInsnVar(*this); @@ -1762,7 +1792,8 @@ class InstructionMatcher final : public PredicateListMatcher { /// Add an operand to the matcher. OperandMatcher &addOperand(unsigned OpIdx, const std::string &SymbolicName, - unsigned AllocatedTemporariesBaseID); + unsigned AllocatedTemporariesBaseID, + bool IsVariadic = false); OperandMatcher &getOperand(unsigned OpIdx); OperandMatcher &addPhysRegInput(Record *Reg, unsigned OpIdx, unsigned TempOpIdx); @@ -1772,7 +1803,12 @@ class InstructionMatcher final : public PredicateListMatcher { } StringRef getSymbolicName() const { return SymbolicName; } - unsigned getNumOperands() const { return Operands.size(); } + + unsigned getNumOperandMatchers() const { return Operands.size(); } + bool hasVariadicMatcher() const { + return !Operands.empty() && Operands.back()->isVariadic(); + } + OperandVec::iterator operands_begin() { return Operands.begin(); } OperandVec::iterator operands_end() { return Operands.end(); } iterator_range operands() { @@ -1834,9 +1870,10 @@ class InstructionOperandMatcher : public OperandPredicateMatcher { public: InstructionOperandMatcher(unsigned InsnVarID, unsigned OpIdx, RuleMatcher &Rule, StringRef SymbolicName, - bool NumOpsCheck = true) + bool AllowNumOpsCheck = true) : OperandPredicateMatcher(OPM_Instruction, InsnVarID, OpIdx), - InsnMatcher(new InstructionMatcher(Rule, SymbolicName, NumOpsCheck)), + InsnMatcher( + new InstructionMatcher(Rule, SymbolicName, AllowNumOpsCheck)), Flags(Rule.getGISelFlags()) {} static bool classof(const PredicateMatcher *P) { @@ -1917,7 +1954,8 @@ class CopyRenderer : public OperandRenderer { static void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule, unsigned NewInsnID, unsigned OldInsnID, - unsigned OpIdx, StringRef Name); + unsigned OpIdx, StringRef Name, + bool ForVariadic = false); void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override; }; diff --git a/llvm/utils/TableGen/Common/GlobalISel/Patterns.cpp b/llvm/utils/TableGen/Common/GlobalISel/Patterns.cpp index 28a9bcc2568c34..52f7b0fcbd624e 100644 --- a/llvm/utils/TableGen/Common/GlobalISel/Patterns.cpp +++ b/llvm/utils/TableGen/Common/GlobalISel/Patterns.cpp @@ -46,6 +46,33 @@ std::optional PatternType::get(ArrayRef DiagLoc, return PT; } + if (R->isSubClassOf(VariadicClassName)) { + const int64_t Min = R->getValueAsInt("MinArgs"); + const int64_t Max = R->getValueAsInt("MaxArgs"); + + if (Min == 0) { + PrintError( + DiagLoc, + DiagCtx + + ": minimum number of arguments must be greater than zero in " + + VariadicClassName); + return std::nullopt; + } + + if (Max <= Min && Max != 0) { + PrintError(DiagLoc, DiagCtx + ": maximum number of arguments (" + + Twine(Max) + + ") must be zero, or greater " + "than the minimum number of arguments (" + + Twine(Min) + ") in " + VariadicClassName); + return std::nullopt; + } + + PatternType PT(PT_VariadicPack); + PT.Data.VPTI = {unsigned(Min), unsigned(Max)}; + return PT; + } + PrintError(DiagLoc, DiagCtx + ": unknown type '" + R->getName() + "'"); return std::nullopt; } @@ -66,6 +93,11 @@ const Record *PatternType::getLLTRecord() const { return Data.Def; } +VariadicPackTypeInfo PatternType::getVariadicPackTypeInfo() const { + assert(isVariadicPack()); + return Data.VPTI; +} + bool PatternType::operator==(const PatternType &Other) const { if (Kind != Other.Kind) return false; @@ -77,6 +109,8 @@ bool PatternType::operator==(const PatternType &Other) const { return Data.Def == Other.Data.Def; case PT_TypeOf: return Data.Str == Other.Data.Str; + case PT_VariadicPack: + return Data.VPTI == Other.Data.VPTI; } llvm_unreachable("Unknown Type Kind"); @@ -90,6 +124,10 @@ std::string PatternType::str() const { return Data.Def->getName().str(); case PT_TypeOf: return (TypeOfClassName + "<$" + getTypeOfOpName() + ">").str(); + case PT_VariadicPack: + return (VariadicClassName + "<" + Twine(Data.VPTI.Min) + "," + + Twine(Data.VPTI.Max) + ">") + .str(); } llvm_unreachable("Unknown type!"); @@ -525,6 +563,7 @@ bool PatFrag::checkSemantics() { case Pattern::K_CXX: continue; case Pattern::K_CodeGenInstruction: + // TODO: Allow VarArgs? if (cast(Pat.get())->diagnoseAllSpecialTypes( Def.getLoc(), PatternType::SpecialTyClassName + " is not supported in " + ClassName)) diff --git a/llvm/utils/TableGen/Common/GlobalISel/Patterns.h b/llvm/utils/TableGen/Common/GlobalISel/Patterns.h index 76d018bdbd71c4..2d25ce37ed76c4 100644 --- a/llvm/utils/TableGen/Common/GlobalISel/Patterns.h +++ b/llvm/utils/TableGen/Common/GlobalISel/Patterns.h @@ -45,21 +45,36 @@ class RuleMatcher; //===- PatternType --------------------------------------------------------===// +struct VariadicPackTypeInfo { + VariadicPackTypeInfo(unsigned Min, unsigned Max) : Min(Min), Max(Max) { + assert(Min >= 1 && (Max >= Min || Max == 0)); + } + + bool operator==(const VariadicPackTypeInfo &Other) const { + return Min == Other.Min && Max == Other.Max; + } + + unsigned Min; + unsigned Max; +}; + /// Represent the type of a Pattern Operand. /// /// Types have two form: /// - LLTs, which are straightforward. -/// - Special types, e.g. GITypeOf +/// - Special types, e.g. GITypeOf, Variadic arguments list. class PatternType { public: static constexpr StringLiteral SpecialTyClassName = "GISpecialType"; static constexpr StringLiteral TypeOfClassName = "GITypeOf"; + static constexpr StringLiteral VariadicClassName = "GIVariadic"; enum PTKind : uint8_t { PT_None, PT_ValueType, PT_TypeOf, + PT_VariadicPack, }; PatternType() : Kind(PT_None), Data() {} @@ -70,11 +85,15 @@ class PatternType { bool isNone() const { return Kind == PT_None; } bool isLLT() const { return Kind == PT_ValueType; } - bool isSpecial() const { return isTypeOf(); } + bool isSpecial() const { return isTypeOf() || isVariadicPack(); } bool isTypeOf() const { return Kind == PT_TypeOf; } + bool isVariadicPack() const { return Kind == PT_VariadicPack; } + + PTKind getKind() const { return Kind; } StringRef getTypeOfOpName() const; const Record *getLLTRecord() const; + VariadicPackTypeInfo getVariadicPackTypeInfo() const; explicit operator bool() const { return !isNone(); } @@ -95,6 +114,9 @@ class PatternType { /// PT_TypeOf -> Operand name (without the '$') StringRef Str; + + /// PT_VariadicPack -> min-max number of operands allowed. + VariadicPackTypeInfo VPTI; } Data; }; @@ -313,6 +335,8 @@ class InstructionPattern : public Pattern { InstructionOperand &getOperand(unsigned K) { return Operands[K]; } const InstructionOperand &getOperand(unsigned K) const { return Operands[K]; } + const InstructionOperand &operands_back() const { return Operands.back(); } + /// When this InstructionPattern is used as the match root, returns the /// operands that must be redefined in the 'apply' pattern for the rule to be /// valid. diff --git a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp index e8fbaed0f50e84..0f8f1cce817002 100644 --- a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp @@ -99,8 +99,14 @@ void declareInstExpansion(CodeExpansions &CE, const BuildMIAction &A, void declareOperandExpansion(CodeExpansions &CE, const OperandMatcher &OM, StringRef Name) { - CE.declare(Name, "State.MIs[" + to_string(OM.getInsnVarID()) + - "]->getOperand(" + to_string(OM.getOpIdx()) + ")"); + if (OM.isVariadic()) { + CE.declare(Name, "getRemainingOperands(*State.MIs[" + + to_string(OM.getInsnVarID()) + "], " + + to_string(OM.getOpIdx()) + ")"); + } else { + CE.declare(Name, "State.MIs[" + to_string(OM.getInsnVarID()) + + "]->getOperand(" + to_string(OM.getOpIdx()) + ")"); + } } void declareTempRegExpansion(CodeExpansions &CE, unsigned TempRegID, @@ -128,18 +134,6 @@ LLTCodeGen getLLTCodeGen(const PatternType &PT) { return *MVTToLLT(getValueType(PT.getLLTRecord())); } -LLTCodeGenOrTempType getLLTCodeGenOrTempType(const PatternType &PT, - RuleMatcher &RM) { - assert(!PT.isNone()); - - if (PT.isLLT()) - return getLLTCodeGen(PT); - - assert(PT.isTypeOf()); - auto &OM = RM.getOperandMatcher(PT.getTypeOfOpName()); - return OM.getTempTypeIdx(RM); -} - //===- PrettyStackTrace Helpers ------------------------------------------===// class PrettyStackTraceParse : public PrettyStackTraceEntry { @@ -668,6 +662,9 @@ class CombineRuleBuilder { return CGT.getInstruction(RuleDef.getRecords().getDef("G_CONSTANT")); } + std::optional + getLLTCodeGenOrTempType(const PatternType &PT, RuleMatcher &RM); + void PrintError(Twine Msg) const { ::PrintError(&RuleDef, Msg); } void PrintWarning(Twine Msg) const { ::PrintWarning(RuleDef.getLoc(), Msg); } void PrintNote(Twine Msg) const { ::PrintNote(RuleDef.getLoc(), Msg); } @@ -960,6 +957,24 @@ void CombineRuleBuilder::verify() const { } #endif +std::optional +CombineRuleBuilder::getLLTCodeGenOrTempType(const PatternType &PT, + RuleMatcher &RM) { + assert(!PT.isNone()); + + if (PT.isLLT()) + return getLLTCodeGen(PT); + + assert(PT.isTypeOf()); + auto &OM = RM.getOperandMatcher(PT.getTypeOfOpName()); + if (OM.isVariadic()) { + PrintError("type '" + PT.str() + "' is ill-formed: '" + + OM.getSymbolicName() + "' is a variadic pack operand"); + return std::nullopt; + } + return OM.getTempTypeIdx(RM); +} + void CombineRuleBuilder::print(raw_ostream &OS, const PatternAlternatives &Alts) const { SmallVector Strings( @@ -1079,11 +1094,18 @@ bool CombineRuleBuilder::typecheckPatterns() { // match patterns. for (auto &Pat : values(MatchPats)) { if (auto *IP = dyn_cast(Pat.get())) { - if (IP->diagnoseAllSpecialTypes( - RuleDef.getLoc(), PatternType::SpecialTyClassName + - " is not supported in 'match' patterns")) { - return false; + bool HasDiag = false; + for (const auto &[Idx, Op] : enumerate(IP->operands())) { + if (Op.getType().isTypeOf()) { + PrintError(PatternType::TypeOfClassName + + " is not supported in 'match' patterns"); + PrintNote("operand " + Twine(Idx) + " of '" + IP->getName() + + "' has type '" + Op.getType().str() + "'"); + HasDiag = true; + } } + if (HasDiag) + return false; } } return true; @@ -1145,6 +1167,39 @@ bool CombineRuleBuilder::buildPermutationsToEmit() { bool CombineRuleBuilder::checkSemantics() { assert(MatchRoot && "Cannot call this before findRoots()"); + const auto CheckVariadicOperands = [&](const InstructionPattern &IP, + bool IsMatch) { + bool HasVariadic = false; + for (auto &Op : IP.operands()) { + if (!Op.getType().isVariadicPack()) + continue; + + HasVariadic = true; + + if (IsMatch && &Op != &IP.operands_back()) { + PrintError("'" + IP.getInstName() + + "': " + PatternType::VariadicClassName + + " can only be used on the last operand"); + return false; + } + + if (Op.isDef()) { + PrintError("'" + IP.getInstName() + "': " + + PatternType::VariadicClassName + " cannot be used on defs"); + return false; + } + } + + if (HasVariadic && !IP.isVariadic()) { + PrintError("cannot use a " + PatternType::VariadicClassName + + " operand on non-variadic instruction '" + IP.getInstName() + + "'"); + return false; + } + + return true; + }; + bool UsesWipMatchOpcode = false; for (const auto &Match : MatchPats) { const auto *Pat = Match.second.get(); @@ -1155,17 +1210,23 @@ bool CombineRuleBuilder::checkSemantics() { continue; } - // MIFlags in match cannot use the following syntax: (MIFlags $mi) - if (const auto *CGP = dyn_cast(Pat)) { - if (auto *FI = CGP->getMIFlagsInfo()) { - if (!FI->copy_flags().empty()) { - PrintError( - "'match' patterns cannot refer to flags from other instructions"); - PrintNote("MIFlags in '" + CGP->getName() + - "' refer to: " + join(FI->copy_flags(), ", ")); - return false; + if (const auto IP = dyn_cast(Pat)) { + if (!CheckVariadicOperands(*IP, /*IsMatch=*/true)) + return false; + + // MIFlags in match cannot use the following syntax: (MIFlags $mi) + if (const auto *CGP = dyn_cast(Pat)) { + if (auto *FI = CGP->getMIFlagsInfo()) { + if (!FI->copy_flags().empty()) { + PrintError("'match' patterns cannot refer to flags from other " + "instructions"); + PrintNote("MIFlags in '" + CGP->getName() + + "' refer to: " + join(FI->copy_flags(), ", ")); + return false; + } } } + continue; } const auto *AOP = dyn_cast(Pat); @@ -1197,6 +1258,9 @@ bool CombineRuleBuilder::checkSemantics() { if (!IP) continue; + if (!CheckVariadicOperands(*IP, /*IsMatch=*/false)) + return false; + if (UsesWipMatchOpcode) { PrintError("cannot use wip_match_opcode in combination with apply " "instruction patterns!"); @@ -1839,7 +1903,7 @@ bool CombineRuleBuilder::emitCXXMatchApply(CodeExpansions &CE, RuleMatcher &M, for (auto &Pat : ApplyPats) { auto *CXXPat = cast(Pat.second.get()); CodeExpander Expander(CXXPat->getRawCode(), CE, RuleDef.getLoc(), - /*ShowExpansions=*/ false); + /*ShowExpansions=*/false); OS << LS; Expander.emit(OS); } @@ -1939,8 +2003,8 @@ bool CombineRuleBuilder::emitInstructionApplyPattern( continue; } - // Determine what we're dealing with. Are we replace a matched instruction? - // Creating a new one? + // Determine what we're dealing with. Are we replacing a matched + // instruction? Creating a new one? auto OpLookupRes = MatchOpTable.lookup(OpName); if (OpLookupRes.Found) { if (OpLookupRes.isLiveIn()) { @@ -1986,8 +2050,11 @@ bool CombineRuleBuilder::emitInstructionApplyPattern( declareTempRegExpansion(CE, TempRegID, OpName); // Always insert the action at the beginning, otherwise we may end up // using the temp reg before it's available. - M.insertAction( - M.actions_begin(), getLLTCodeGenOrTempType(Ty, M), TempRegID); + auto Result = getLLTCodeGenOrTempType(Ty, M); + if (!Result) + return false; + M.insertAction(M.actions_begin(), *Result, + TempRegID); } DstMI.addRenderer(TempRegID, /*IsDef=*/true); @@ -2044,16 +2111,18 @@ bool CombineRuleBuilder::emitCodeGenInstructionApplyImmOperand( } auto ImmTy = getLLTCodeGenOrTempType(Ty, M); + if (!ImmTy) + return false; if (isGConstant) { - DstMI.addRenderer(O.getImmValue(), ImmTy); + DstMI.addRenderer(O.getImmValue(), *ImmTy); return true; } unsigned TempRegID = M.allocateTempRegID(); // Ensure MakeTempReg & the BuildConstantAction occur at the beginning. auto InsertIt = M.insertAction(M.actions_begin(), - ImmTy, TempRegID); + *ImmTy, TempRegID); M.insertAction(++InsertIt, TempRegID, O.getImmValue()); DstMI.addRenderer(TempRegID); return true; @@ -2159,6 +2228,8 @@ bool CombineRuleBuilder::emitCodeGenInstructionMatchPattern( assert(RemappedO.isNamedOperand() == OriginalO.isNamedOperand() && "Cannot remap an unnamed operand to a named one!"); + const auto Ty = RemappedO.getType(); + const auto OpName = RemappedO.isNamedOperand() ? RemappedO.getOperandName().str() : ""; @@ -2170,11 +2241,41 @@ bool CombineRuleBuilder::emitCodeGenInstructionMatchPattern( // RealIdx = expected index in the MachineInstr. const unsigned RealIdx = (P.isIntrinsic() && !OriginalO.isDef()) ? (Idx + 1) : Idx; + + if (Ty.isVariadicPack() && M.hasOperand(OpName)) { + // TODO: We could add some CheckIsSameOperand opcode variant that checks + // all operands. We could also just emit a C++ code snippet lazily to do + // the check since it's probably fairly rare that we need to do it. + // + // I'm just not sure it's worth the effort at this stage. + PrintError("each instance of a " + PatternType::VariadicClassName + + " operand must have a unique name within the match patterns"); + PrintNote("'" + OpName + "' is used multiple times"); + return false; + } + OperandMatcher &OM = - IM.addOperand(RealIdx, OpName, AllocatedTemporariesBaseID++); + IM.addOperand(RealIdx, OpName, AllocatedTemporariesBaseID++, + /*IsVariadic=*/Ty.isVariadicPack()); if (!OpName.empty()) declareOperandExpansion(CE, OM, OriginalO.getOperandName()); + if (Ty.isVariadicPack()) { + // In the presence of variadics, the InstructionMatcher won't insert a + // InstructionNumOperandsMatcher implicitly, so we have to emit our own. + assert((Idx + 1) == P.operands_size() && + "VariadicPack isn't last operand!"); + auto VPTI = Ty.getVariadicPackTypeInfo(); + assert(VPTI.Min > 0 && (VPTI.Max == 0 || VPTI.Max > VPTI.Min)); + IM.addPredicate( + RealIdx + VPTI.Min, InstructionNumOperandsMatcher::CheckKind::GE); + if (VPTI.Max) { + IM.addPredicate( + RealIdx + VPTI.Max, InstructionNumOperandsMatcher::CheckKind::LE); + } + break; + } + // Handle immediates. if (RemappedO.hasImmValue()) { if (isLiteralImm(P, Idx)) @@ -2190,16 +2291,14 @@ bool CombineRuleBuilder::emitCodeGenInstructionMatchPattern( // for that Operand. "OM" here is always a new OperandMatcher. // // Always emit a check for unnamed operands. - if (OpName.empty() || - !M.getOperandMatcher(OpName).contains()) { - if (const auto Ty = RemappedO.getType()) { - // TODO: We could support GITypeOf here on the condition that the - // OperandMatcher exists already. Though it's clunky to make this work - // and isn't all that useful so it's just rejected in typecheckPatterns - // at this time. - assert(Ty.isLLT() && "Only LLTs are supported in match patterns!"); - OM.addPredicate(getLLTCodeGen(Ty)); - } + if (Ty && (OpName.empty() || + !M.getOperandMatcher(OpName).contains())) { + // TODO: We could support GITypeOf here on the condition that the + // OperandMatcher exists already. Though it's clunky to make this work + // and isn't all that useful so it's just rejected in typecheckPatterns + // at this time. + assert(Ty.isLLT()); + OM.addPredicate(getLLTCodeGen(Ty)); } // Stop here if the operand is a def, or if it had no name. @@ -2558,8 +2657,10 @@ GICombinerEmitter::buildMatchTable(MutableArrayRef Rules) { const Matcher *B) { auto *L = static_cast(A); auto *R = static_cast(B); - return std::make_tuple(OpcodeOrder[L->getOpcode()], L->getNumOperands()) < - std::make_tuple(OpcodeOrder[R->getOpcode()], R->getNumOperands()); + return std::make_tuple(OpcodeOrder[L->getOpcode()], + L->insnmatchers_front().getNumOperandMatchers()) < + std::make_tuple(OpcodeOrder[R->getOpcode()], + R->insnmatchers_front().getNumOperandMatchers()); }); for (Matcher *Rule : InputRules) diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index 2ebe8f75cd6fe3..a491a049e7c812 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -2248,8 +2248,10 @@ GlobalISelEmitter::buildMatchTable(MutableArrayRef Rules, const Matcher *B) { auto *L = static_cast(A); auto *R = static_cast(B); - return std::tuple(OpcodeOrder[L->getOpcode()], L->getNumOperands()) < - std::tuple(OpcodeOrder[R->getOpcode()], R->getNumOperands()); + return std::tuple(OpcodeOrder[L->getOpcode()], + L->insnmatchers_front().getNumOperandMatchers()) < + std::tuple(OpcodeOrder[R->getOpcode()], + R->insnmatchers_front().getNumOperandMatchers()); }); for (Matcher *Rule : InputRules) From 04e8433165de66fa8514ef2db53d9f6dd7c244c0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 31 Jul 2024 23:25:25 -0700 Subject: [PATCH 066/114] [RISCV] Add vector bf16 load/store intrinsic tests. NFC This adds bf16 to the unit stride, strided, and index load and store intrinsics. clang already assumes these work with Zvfbfmin. --- llvm/test/CodeGen/RISCV/rvv/vle.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vleff.ll | 532 ++++++++++++++++++--- llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll | 194 +++++++- llvm/test/CodeGen/RISCV/rvv/vloxei.ll | 244 +++++++++- llvm/test/CodeGen/RISCV/rvv/vlse.ll | 286 ++++++++++- llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll | 194 +++++++- llvm/test/CodeGen/RISCV/rvv/vluxei.ll | 244 +++++++++- llvm/test/CodeGen/RISCV/rvv/vse.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll | 186 ++++++- llvm/test/CodeGen/RISCV/rvv/vsoxei.ll | 234 ++++++++- llvm/test/CodeGen/RISCV/rvv/vsse.ll | 280 ++++++++++- llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll | 186 ++++++- llvm/test/CodeGen/RISCV/rvv/vsuxei.ll | 234 ++++++++- 13 files changed, 2740 insertions(+), 82 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vle.ll b/llvm/test/CodeGen/RISCV/rvv/vle.ll index a16792235f1ba4..7591bb7358e56e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vle.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vle.nxv1i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vleff.ll b/llvm/test/CodeGen/RISCV/rvv/vleff.ll index 7a8ed4153c352a..6cbe858e44ea91 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vleff.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vleff.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s --check-prefixes=CHECK,RV64 declare { , iXLen } @llvm.riscv.vleff.nxv1i64( @@ -1636,13 +1636,13 @@ entry: ret %b } -declare { , iXLen } @llvm.riscv.vleff.nxv1f16( +declare { , iXLen } @llvm.riscv.vleff.nxv1bf16( , ptr, iXLen); -define @intrinsic_vleff_v_nxv1half_nxv1f16(ptr %0, iXLen %1, iXLen* %2) nounwind { -; RV32-LABEL: intrinsic_vleff_v_nxv1half_nxv1f16: +define @intrinsic_vleff_v_nxv1half_nxv1bf16(ptr %0, iXLen %1, iXLen* %2) nounwind { +; RV32-LABEL: intrinsic_vleff_v_nxv1half_nxv1bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; RV32-NEXT: vle16ff.v v8, (a0) @@ -1650,7 +1650,7 @@ define @intrinsic_vleff_v_nxv1half_nxv1f16(ptr %0, iXLen %1, ; RV32-NEXT: sw a0, 0(a2) ; RV32-NEXT: ret ; -; RV64-LABEL: intrinsic_vleff_v_nxv1half_nxv1f16: +; RV64-LABEL: intrinsic_vleff_v_nxv1half_nxv1bf16: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; RV64-NEXT: vle16ff.v v8, (a0) @@ -1658,7 +1658,7 @@ define @intrinsic_vleff_v_nxv1half_nxv1f16(ptr %0, iXLen %1, ; RV64-NEXT: sd a0, 0(a2) ; RV64-NEXT: ret entry: - %a = call { , iXLen } @llvm.riscv.vleff.nxv1f16( + %a = call { , iXLen } @llvm.riscv.vleff.nxv1bf16( undef, ptr %0, iXLen %1) @@ -1668,15 +1668,15 @@ entry: ret %b } -declare { , iXLen } @llvm.riscv.vleff.mask.nxv1f16( +declare { , iXLen } @llvm.riscv.vleff.mask.nxv1bf16( , ptr, , iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv1half_nxv1f16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { -; RV32-LABEL: intrinsic_vleff_mask_v_nxv1half_nxv1f16: +define @intrinsic_vleff_mask_v_nxv1half_nxv1bf16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +; RV32-LABEL: intrinsic_vleff_mask_v_nxv1half_nxv1bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; RV32-NEXT: vle16ff.v v8, (a0), v0.t @@ -1684,7 +1684,7 @@ define @intrinsic_vleff_mask_v_nxv1half_nxv1f16( @intrinsic_vleff_mask_v_nxv1half_nxv1f16(, iXLen } @llvm.riscv.vleff.mask.nxv1f16( + %a = call { , iXLen } @llvm.riscv.vleff.mask.nxv1bf16( %0, ptr %1, %2, @@ -1704,13 +1704,13 @@ entry: ret %b } -declare { , iXLen } @llvm.riscv.vleff.nxv2f16( +declare { , iXLen } @llvm.riscv.vleff.nxv2bf16( , ptr, iXLen); -define @intrinsic_vleff_v_nxv2half_nxv2f16(ptr %0, iXLen %1, iXLen* %2) nounwind { -; RV32-LABEL: intrinsic_vleff_v_nxv2half_nxv2f16: +define @intrinsic_vleff_v_nxv2half_nxv2bf16(ptr %0, iXLen %1, iXLen* %2) nounwind { +; RV32-LABEL: intrinsic_vleff_v_nxv2half_nxv2bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; RV32-NEXT: vle16ff.v v8, (a0) @@ -1718,7 +1718,7 @@ define @intrinsic_vleff_v_nxv2half_nxv2f16(ptr %0, iXLen %1, ; RV32-NEXT: sw a0, 0(a2) ; RV32-NEXT: ret ; -; RV64-LABEL: intrinsic_vleff_v_nxv2half_nxv2f16: +; RV64-LABEL: intrinsic_vleff_v_nxv2half_nxv2bf16: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; RV64-NEXT: vle16ff.v v8, (a0) @@ -1726,7 +1726,7 @@ define @intrinsic_vleff_v_nxv2half_nxv2f16(ptr %0, iXLen %1, ; RV64-NEXT: sd a0, 0(a2) ; RV64-NEXT: ret entry: - %a = call { , iXLen } @llvm.riscv.vleff.nxv2f16( + %a = call { , iXLen } @llvm.riscv.vleff.nxv2bf16( undef, ptr %0, iXLen %1) @@ -1736,15 +1736,15 @@ entry: ret %b } -declare { , iXLen } @llvm.riscv.vleff.mask.nxv2f16( +declare { , iXLen } @llvm.riscv.vleff.mask.nxv2bf16( , ptr, , iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv2half_nxv2f16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { -; RV32-LABEL: intrinsic_vleff_mask_v_nxv2half_nxv2f16: +define @intrinsic_vleff_mask_v_nxv2half_nxv2bf16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +; RV32-LABEL: intrinsic_vleff_mask_v_nxv2half_nxv2bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; RV32-NEXT: vle16ff.v v8, (a0), v0.t @@ -1752,7 +1752,7 @@ define @intrinsic_vleff_mask_v_nxv2half_nxv2f16( @intrinsic_vleff_mask_v_nxv2half_nxv2f16(, iXLen } @llvm.riscv.vleff.mask.nxv2f16( + %a = call { , iXLen } @llvm.riscv.vleff.mask.nxv2bf16( %0, ptr %1, %2, @@ -1772,13 +1772,13 @@ entry: ret %b } -declare { , iXLen } @llvm.riscv.vleff.nxv4f16( +declare { , iXLen } @llvm.riscv.vleff.nxv4bf16( , ptr, iXLen); -define @intrinsic_vleff_v_nxv4half_nxv4f16(ptr %0, iXLen %1, iXLen* %2) nounwind { -; RV32-LABEL: intrinsic_vleff_v_nxv4half_nxv4f16: +define @intrinsic_vleff_v_nxv4half_nxv4bf16(ptr %0, iXLen %1, iXLen* %2) nounwind { +; RV32-LABEL: intrinsic_vleff_v_nxv4half_nxv4bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; RV32-NEXT: vle16ff.v v8, (a0) @@ -1786,7 +1786,7 @@ define @intrinsic_vleff_v_nxv4half_nxv4f16(ptr %0, iXLen %1, ; RV32-NEXT: sw a0, 0(a2) ; RV32-NEXT: ret ; -; RV64-LABEL: intrinsic_vleff_v_nxv4half_nxv4f16: +; RV64-LABEL: intrinsic_vleff_v_nxv4half_nxv4bf16: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; RV64-NEXT: vle16ff.v v8, (a0) @@ -1794,7 +1794,7 @@ define @intrinsic_vleff_v_nxv4half_nxv4f16(ptr %0, iXLen %1, ; RV64-NEXT: sd a0, 0(a2) ; RV64-NEXT: ret entry: - %a = call { , iXLen } @llvm.riscv.vleff.nxv4f16( + %a = call { , iXLen } @llvm.riscv.vleff.nxv4bf16( undef, ptr %0, iXLen %1) @@ -1804,15 +1804,15 @@ entry: ret %b } -declare { , iXLen } @llvm.riscv.vleff.mask.nxv4f16( +declare { , iXLen } @llvm.riscv.vleff.mask.nxv4bf16( , ptr, , iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv4half_nxv4f16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { -; RV32-LABEL: intrinsic_vleff_mask_v_nxv4half_nxv4f16: +define @intrinsic_vleff_mask_v_nxv4half_nxv4bf16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +; RV32-LABEL: intrinsic_vleff_mask_v_nxv4half_nxv4bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; RV32-NEXT: vle16ff.v v8, (a0), v0.t @@ -1820,7 +1820,7 @@ define @intrinsic_vleff_mask_v_nxv4half_nxv4f16( @intrinsic_vleff_mask_v_nxv4half_nxv4f16(, iXLen } @llvm.riscv.vleff.mask.nxv4f16( + %a = call { , iXLen } @llvm.riscv.vleff.mask.nxv4bf16( %0, ptr %1, %2, @@ -1840,13 +1840,13 @@ entry: ret %b } -declare { , iXLen } @llvm.riscv.vleff.nxv8f16( +declare { , iXLen } @llvm.riscv.vleff.nxv8bf16( , ptr, iXLen); -define @intrinsic_vleff_v_nxv8half_nxv8f16(ptr %0, iXLen %1, iXLen* %2) nounwind { -; RV32-LABEL: intrinsic_vleff_v_nxv8half_nxv8f16: +define @intrinsic_vleff_v_nxv8half_nxv8bf16(ptr %0, iXLen %1, iXLen* %2) nounwind { +; RV32-LABEL: intrinsic_vleff_v_nxv8half_nxv8bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV32-NEXT: vle16ff.v v8, (a0) @@ -1854,7 +1854,7 @@ define @intrinsic_vleff_v_nxv8half_nxv8f16(ptr %0, iXLen %1, ; RV32-NEXT: sw a0, 0(a2) ; RV32-NEXT: ret ; -; RV64-LABEL: intrinsic_vleff_v_nxv8half_nxv8f16: +; RV64-LABEL: intrinsic_vleff_v_nxv8half_nxv8bf16: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV64-NEXT: vle16ff.v v8, (a0) @@ -1862,7 +1862,7 @@ define @intrinsic_vleff_v_nxv8half_nxv8f16(ptr %0, iXLen %1, ; RV64-NEXT: sd a0, 0(a2) ; RV64-NEXT: ret entry: - %a = call { , iXLen } @llvm.riscv.vleff.nxv8f16( + %a = call { , iXLen } @llvm.riscv.vleff.nxv8bf16( undef, ptr %0, iXLen %1) @@ -1872,15 +1872,15 @@ entry: ret %b } -declare { , iXLen } @llvm.riscv.vleff.mask.nxv8f16( +declare { , iXLen } @llvm.riscv.vleff.mask.nxv8bf16( , ptr, , iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv8half_nxv8f16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { -; RV32-LABEL: intrinsic_vleff_mask_v_nxv8half_nxv8f16: +define @intrinsic_vleff_mask_v_nxv8half_nxv8bf16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +; RV32-LABEL: intrinsic_vleff_mask_v_nxv8half_nxv8bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; RV32-NEXT: vle16ff.v v8, (a0), v0.t @@ -1888,7 +1888,7 @@ define @intrinsic_vleff_mask_v_nxv8half_nxv8f16( @intrinsic_vleff_mask_v_nxv8half_nxv8f16(, iXLen } @llvm.riscv.vleff.mask.nxv8f16( + %a = call { , iXLen } @llvm.riscv.vleff.mask.nxv8bf16( %0, ptr %1, %2, @@ -1908,13 +1908,13 @@ entry: ret %b } -declare { , iXLen } @llvm.riscv.vleff.nxv16f16( +declare { , iXLen } @llvm.riscv.vleff.nxv16bf16( , ptr, iXLen); -define @intrinsic_vleff_v_nxv16half_nxv16f16(ptr %0, iXLen %1, iXLen* %2) nounwind { -; RV32-LABEL: intrinsic_vleff_v_nxv16half_nxv16f16: +define @intrinsic_vleff_v_nxv16half_nxv16bf16(ptr %0, iXLen %1, iXLen* %2) nounwind { +; RV32-LABEL: intrinsic_vleff_v_nxv16half_nxv16bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; RV32-NEXT: vle16ff.v v8, (a0) @@ -1922,7 +1922,7 @@ define @intrinsic_vleff_v_nxv16half_nxv16f16(ptr %0, iXLen ; RV32-NEXT: sw a0, 0(a2) ; RV32-NEXT: ret ; -; RV64-LABEL: intrinsic_vleff_v_nxv16half_nxv16f16: +; RV64-LABEL: intrinsic_vleff_v_nxv16half_nxv16bf16: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; RV64-NEXT: vle16ff.v v8, (a0) @@ -1930,7 +1930,7 @@ define @intrinsic_vleff_v_nxv16half_nxv16f16(ptr %0, iXLen ; RV64-NEXT: sd a0, 0(a2) ; RV64-NEXT: ret entry: - %a = call { , iXLen } @llvm.riscv.vleff.nxv16f16( + %a = call { , iXLen } @llvm.riscv.vleff.nxv16bf16( undef, ptr %0, iXLen %1) @@ -1940,15 +1940,15 @@ entry: ret %b } -declare { , iXLen } @llvm.riscv.vleff.mask.nxv16f16( +declare { , iXLen } @llvm.riscv.vleff.mask.nxv16bf16( , ptr, , iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv16half_nxv16f16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { -; RV32-LABEL: intrinsic_vleff_mask_v_nxv16half_nxv16f16: +define @intrinsic_vleff_mask_v_nxv16half_nxv16bf16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +; RV32-LABEL: intrinsic_vleff_mask_v_nxv16half_nxv16bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; RV32-NEXT: vle16ff.v v8, (a0), v0.t @@ -1956,7 +1956,7 @@ define @intrinsic_vleff_mask_v_nxv16half_nxv16f16( @intrinsic_vleff_mask_v_nxv16half_nxv16f16(, iXLen } @llvm.riscv.vleff.mask.nxv16f16( + %a = call { , iXLen } @llvm.riscv.vleff.mask.nxv16bf16( %0, ptr %1, %2, @@ -1976,13 +1976,13 @@ entry: ret %b } -declare { , iXLen } @llvm.riscv.vleff.nxv32f16( +declare { , iXLen } @llvm.riscv.vleff.nxv32bf16( , ptr, iXLen); -define @intrinsic_vleff_v_nxv32half_nxv32f16(ptr %0, iXLen %1, iXLen* %2) nounwind { -; RV32-LABEL: intrinsic_vleff_v_nxv32half_nxv32f16: +define @intrinsic_vleff_v_nxv32half_nxv32bf16(ptr %0, iXLen %1, iXLen* %2) nounwind { +; RV32-LABEL: intrinsic_vleff_v_nxv32half_nxv32bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; RV32-NEXT: vle16ff.v v8, (a0) @@ -1990,7 +1990,7 @@ define @intrinsic_vleff_v_nxv32half_nxv32f16(ptr %0, iXLen ; RV32-NEXT: sw a0, 0(a2) ; RV32-NEXT: ret ; -; RV64-LABEL: intrinsic_vleff_v_nxv32half_nxv32f16: +; RV64-LABEL: intrinsic_vleff_v_nxv32half_nxv32bf16: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; RV64-NEXT: vle16ff.v v8, (a0) @@ -1998,7 +1998,7 @@ define @intrinsic_vleff_v_nxv32half_nxv32f16(ptr %0, iXLen ; RV64-NEXT: sd a0, 0(a2) ; RV64-NEXT: ret entry: - %a = call { , iXLen } @llvm.riscv.vleff.nxv32f16( + %a = call { , iXLen } @llvm.riscv.vleff.nxv32bf16( undef, ptr %0, iXLen %1) @@ -2008,15 +2008,15 @@ entry: ret %b } -declare { , iXLen } @llvm.riscv.vleff.mask.nxv32f16( +declare { , iXLen } @llvm.riscv.vleff.mask.nxv32bf16( , ptr, , iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv32half_nxv32f16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { -; RV32-LABEL: intrinsic_vleff_mask_v_nxv32half_nxv32f16: +define @intrinsic_vleff_mask_v_nxv32half_nxv32bf16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +; RV32-LABEL: intrinsic_vleff_mask_v_nxv32half_nxv32bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; RV32-NEXT: vle16ff.v v8, (a0), v0.t @@ -2024,7 +2024,7 @@ define @intrinsic_vleff_mask_v_nxv32half_nxv32f16( @intrinsic_vleff_mask_v_nxv32half_nxv32f16(, iXLen } @llvm.riscv.vleff.mask.nxv32f16( + %a = call { , iXLen } @llvm.riscv.vleff.mask.nxv32bf16( %0, ptr %1, %2, @@ -2044,6 +2044,414 @@ entry: ret %b } +declare { , iXLen } @llvm.riscv.vleff.nxv1f16( + , + ptr, + iXLen); + +define @intrinsic_vleff_v_nxv1bfloat_nxv1f16(ptr %0, iXLen %1, iXLen* %2) nounwind { +; RV32-LABEL: intrinsic_vleff_v_nxv1bfloat_nxv1f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; RV32-NEXT: vle16ff.v v8, (a0) +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: sw a0, 0(a2) +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vleff_v_nxv1bfloat_nxv1f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; RV64-NEXT: vle16ff.v v8, (a0) +; RV64-NEXT: csrr a0, vl +; RV64-NEXT: sd a0, 0(a2) +; RV64-NEXT: ret +entry: + %a = call { , iXLen } @llvm.riscv.vleff.nxv1f16( + undef, + ptr %0, + iXLen %1) + %b = extractvalue { , iXLen } %a, 0 + %c = extractvalue { , iXLen } %a, 1 + store iXLen %c, iXLen* %2 + ret %b +} + +declare { , iXLen } @llvm.riscv.vleff.mask.nxv1f16( + , + ptr, + , + iXLen, + iXLen); + +define @intrinsic_vleff_mask_v_nxv1bfloat_nxv1f16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +; RV32-LABEL: intrinsic_vleff_mask_v_nxv1bfloat_nxv1f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; RV32-NEXT: vle16ff.v v8, (a0), v0.t +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: sw a0, 0(a2) +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vleff_mask_v_nxv1bfloat_nxv1f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; RV64-NEXT: vle16ff.v v8, (a0), v0.t +; RV64-NEXT: csrr a0, vl +; RV64-NEXT: sd a0, 0(a2) +; RV64-NEXT: ret +entry: + %a = call { , iXLen } @llvm.riscv.vleff.mask.nxv1f16( + %0, + ptr %1, + %2, + iXLen %3, iXLen 1) + %b = extractvalue { , iXLen } %a, 0 + %c = extractvalue { , iXLen } %a, 1 + store iXLen %c, iXLen* %4 + + ret %b +} + +declare { , iXLen } @llvm.riscv.vleff.nxv2f16( + , + ptr, + iXLen); + +define @intrinsic_vleff_v_nxv2bfloat_nxv2f16(ptr %0, iXLen %1, iXLen* %2) nounwind { +; RV32-LABEL: intrinsic_vleff_v_nxv2bfloat_nxv2f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; RV32-NEXT: vle16ff.v v8, (a0) +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: sw a0, 0(a2) +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vleff_v_nxv2bfloat_nxv2f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; RV64-NEXT: vle16ff.v v8, (a0) +; RV64-NEXT: csrr a0, vl +; RV64-NEXT: sd a0, 0(a2) +; RV64-NEXT: ret +entry: + %a = call { , iXLen } @llvm.riscv.vleff.nxv2f16( + undef, + ptr %0, + iXLen %1) + %b = extractvalue { , iXLen } %a, 0 + %c = extractvalue { , iXLen } %a, 1 + store iXLen %c, iXLen* %2 + ret %b +} + +declare { , iXLen } @llvm.riscv.vleff.mask.nxv2f16( + , + ptr, + , + iXLen, + iXLen); + +define @intrinsic_vleff_mask_v_nxv2bfloat_nxv2f16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +; RV32-LABEL: intrinsic_vleff_mask_v_nxv2bfloat_nxv2f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; RV32-NEXT: vle16ff.v v8, (a0), v0.t +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: sw a0, 0(a2) +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vleff_mask_v_nxv2bfloat_nxv2f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; RV64-NEXT: vle16ff.v v8, (a0), v0.t +; RV64-NEXT: csrr a0, vl +; RV64-NEXT: sd a0, 0(a2) +; RV64-NEXT: ret +entry: + %a = call { , iXLen } @llvm.riscv.vleff.mask.nxv2f16( + %0, + ptr %1, + %2, + iXLen %3, iXLen 1) + %b = extractvalue { , iXLen } %a, 0 + %c = extractvalue { , iXLen } %a, 1 + store iXLen %c, iXLen* %4 + + ret %b +} + +declare { , iXLen } @llvm.riscv.vleff.nxv4f16( + , + ptr, + iXLen); + +define @intrinsic_vleff_v_nxv4bfloat_nxv4f16(ptr %0, iXLen %1, iXLen* %2) nounwind { +; RV32-LABEL: intrinsic_vleff_v_nxv4bfloat_nxv4f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; RV32-NEXT: vle16ff.v v8, (a0) +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: sw a0, 0(a2) +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vleff_v_nxv4bfloat_nxv4f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; RV64-NEXT: vle16ff.v v8, (a0) +; RV64-NEXT: csrr a0, vl +; RV64-NEXT: sd a0, 0(a2) +; RV64-NEXT: ret +entry: + %a = call { , iXLen } @llvm.riscv.vleff.nxv4f16( + undef, + ptr %0, + iXLen %1) + %b = extractvalue { , iXLen } %a, 0 + %c = extractvalue { , iXLen } %a, 1 + store iXLen %c, iXLen* %2 + ret %b +} + +declare { , iXLen } @llvm.riscv.vleff.mask.nxv4f16( + , + ptr, + , + iXLen, + iXLen); + +define @intrinsic_vleff_mask_v_nxv4bfloat_nxv4f16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +; RV32-LABEL: intrinsic_vleff_mask_v_nxv4bfloat_nxv4f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; RV32-NEXT: vle16ff.v v8, (a0), v0.t +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: sw a0, 0(a2) +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vleff_mask_v_nxv4bfloat_nxv4f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; RV64-NEXT: vle16ff.v v8, (a0), v0.t +; RV64-NEXT: csrr a0, vl +; RV64-NEXT: sd a0, 0(a2) +; RV64-NEXT: ret +entry: + %a = call { , iXLen } @llvm.riscv.vleff.mask.nxv4f16( + %0, + ptr %1, + %2, + iXLen %3, iXLen 1) + %b = extractvalue { , iXLen } %a, 0 + %c = extractvalue { , iXLen } %a, 1 + store iXLen %c, iXLen* %4 + + ret %b +} + +declare { , iXLen } @llvm.riscv.vleff.nxv8f16( + , + ptr, + iXLen); + +define @intrinsic_vleff_v_nxv8bfloat_nxv8f16(ptr %0, iXLen %1, iXLen* %2) nounwind { +; RV32-LABEL: intrinsic_vleff_v_nxv8bfloat_nxv8f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vle16ff.v v8, (a0) +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: sw a0, 0(a2) +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vleff_v_nxv8bfloat_nxv8f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vle16ff.v v8, (a0) +; RV64-NEXT: csrr a0, vl +; RV64-NEXT: sd a0, 0(a2) +; RV64-NEXT: ret +entry: + %a = call { , iXLen } @llvm.riscv.vleff.nxv8f16( + undef, + ptr %0, + iXLen %1) + %b = extractvalue { , iXLen } %a, 0 + %c = extractvalue { , iXLen } %a, 1 + store iXLen %c, iXLen* %2 + ret %b +} + +declare { , iXLen } @llvm.riscv.vleff.mask.nxv8f16( + , + ptr, + , + iXLen, + iXLen); + +define @intrinsic_vleff_mask_v_nxv8bfloat_nxv8f16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +; RV32-LABEL: intrinsic_vleff_mask_v_nxv8bfloat_nxv8f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; RV32-NEXT: vle16ff.v v8, (a0), v0.t +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: sw a0, 0(a2) +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vleff_mask_v_nxv8bfloat_nxv8f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; RV64-NEXT: vle16ff.v v8, (a0), v0.t +; RV64-NEXT: csrr a0, vl +; RV64-NEXT: sd a0, 0(a2) +; RV64-NEXT: ret +entry: + %a = call { , iXLen } @llvm.riscv.vleff.mask.nxv8f16( + %0, + ptr %1, + %2, + iXLen %3, iXLen 1) + %b = extractvalue { , iXLen } %a, 0 + %c = extractvalue { , iXLen } %a, 1 + store iXLen %c, iXLen* %4 + + ret %b +} + +declare { , iXLen } @llvm.riscv.vleff.nxv16f16( + , + ptr, + iXLen); + +define @intrinsic_vleff_v_nxv16bfloat_nxv16f16(ptr %0, iXLen %1, iXLen* %2) nounwind { +; RV32-LABEL: intrinsic_vleff_v_nxv16bfloat_nxv16f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; RV32-NEXT: vle16ff.v v8, (a0) +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: sw a0, 0(a2) +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vleff_v_nxv16bfloat_nxv16f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; RV64-NEXT: vle16ff.v v8, (a0) +; RV64-NEXT: csrr a0, vl +; RV64-NEXT: sd a0, 0(a2) +; RV64-NEXT: ret +entry: + %a = call { , iXLen } @llvm.riscv.vleff.nxv16f16( + undef, + ptr %0, + iXLen %1) + %b = extractvalue { , iXLen } %a, 0 + %c = extractvalue { , iXLen } %a, 1 + store iXLen %c, iXLen* %2 + ret %b +} + +declare { , iXLen } @llvm.riscv.vleff.mask.nxv16f16( + , + ptr, + , + iXLen, + iXLen); + +define @intrinsic_vleff_mask_v_nxv16bfloat_nxv16f16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +; RV32-LABEL: intrinsic_vleff_mask_v_nxv16bfloat_nxv16f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; RV32-NEXT: vle16ff.v v8, (a0), v0.t +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: sw a0, 0(a2) +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vleff_mask_v_nxv16bfloat_nxv16f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; RV64-NEXT: vle16ff.v v8, (a0), v0.t +; RV64-NEXT: csrr a0, vl +; RV64-NEXT: sd a0, 0(a2) +; RV64-NEXT: ret +entry: + %a = call { , iXLen } @llvm.riscv.vleff.mask.nxv16f16( + %0, + ptr %1, + %2, + iXLen %3, iXLen 1) + %b = extractvalue { , iXLen } %a, 0 + %c = extractvalue { , iXLen } %a, 1 + store iXLen %c, iXLen* %4 + + ret %b +} + +declare { , iXLen } @llvm.riscv.vleff.nxv32f16( + , + ptr, + iXLen); + +define @intrinsic_vleff_v_nxv32bfloat_nxv32f16(ptr %0, iXLen %1, iXLen* %2) nounwind { +; RV32-LABEL: intrinsic_vleff_v_nxv32bfloat_nxv32f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; RV32-NEXT: vle16ff.v v8, (a0) +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: sw a0, 0(a2) +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vleff_v_nxv32bfloat_nxv32f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; RV64-NEXT: vle16ff.v v8, (a0) +; RV64-NEXT: csrr a0, vl +; RV64-NEXT: sd a0, 0(a2) +; RV64-NEXT: ret +entry: + %a = call { , iXLen } @llvm.riscv.vleff.nxv32f16( + undef, + ptr %0, + iXLen %1) + %b = extractvalue { , iXLen } %a, 0 + %c = extractvalue { , iXLen } %a, 1 + store iXLen %c, iXLen* %2 + ret %b +} + +declare { , iXLen } @llvm.riscv.vleff.mask.nxv32f16( + , + ptr, + , + iXLen, + iXLen); + +define @intrinsic_vleff_mask_v_nxv32bfloat_nxv32f16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +; RV32-LABEL: intrinsic_vleff_mask_v_nxv32bfloat_nxv32f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; RV32-NEXT: vle16ff.v v8, (a0), v0.t +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: sw a0, 0(a2) +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vleff_mask_v_nxv32bfloat_nxv32f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; RV64-NEXT: vle16ff.v v8, (a0), v0.t +; RV64-NEXT: csrr a0, vl +; RV64-NEXT: sd a0, 0(a2) +; RV64-NEXT: ret +entry: + %a = call { , iXLen } @llvm.riscv.vleff.mask.nxv32f16( + %0, + ptr %1, + %2, + iXLen %3, iXLen 1) + %b = extractvalue { , iXLen } %a, 0 + %c = extractvalue { , iXLen } %a, 1 + store iXLen %c, iXLen* %4 + + ret %b +} + declare { , iXLen } @llvm.riscv.vleff.nxv1i8( , ptr, diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll index c56ee04fb6f605..0578248c6f72dd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+zvfh,+f,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfh,+zvfbfmin -verify-machineinstrs \ ; RUN: < %s | FileCheck %s ; The intrinsics are not supported with RV32. @@ -960,6 +960,198 @@ entry: ret %a } +declare @llvm.riscv.vloxei.nxv1bf16.nxv1i64( + , + ptr, + , + i64); + +define @intrinsic_vloxei_v_nxv1bf16_nxv1bf16_nxv1i64(ptr %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1bf16_nxv1bf16_nxv1i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vloxei64.v v9, (a0), v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vloxei.nxv1bf16.nxv1i64( + undef, + ptr %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vloxei.mask.nxv1bf16.nxv1i64( + , + ptr, + , + , + i64, + i64); + +define @intrinsic_vloxei_mask_v_nxv1bf16_nxv1bf16_nxv1i64( %0, ptr %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1bf16_nxv1bf16_nxv1i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vloxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vloxei.mask.nxv1bf16.nxv1i64( + %0, + ptr %1, + %2, + %3, + i64 %4, i64 1) + + ret %a +} + +declare @llvm.riscv.vloxei.nxv2bf16.nxv2i64( + , + ptr, + , + i64); + +define @intrinsic_vloxei_v_nxv2bf16_nxv2bf16_nxv2i64(ptr %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2bf16_nxv2bf16_nxv2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vloxei64.v v10, (a0), v8 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vloxei.nxv2bf16.nxv2i64( + undef, + ptr %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vloxei.mask.nxv2bf16.nxv2i64( + , + ptr, + , + , + i64, + i64); + +define @intrinsic_vloxei_mask_v_nxv2bf16_nxv2bf16_nxv2i64( %0, ptr %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2bf16_nxv2bf16_nxv2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vloxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vloxei.mask.nxv2bf16.nxv2i64( + %0, + ptr %1, + %2, + %3, + i64 %4, i64 1) + + ret %a +} + +declare @llvm.riscv.vloxei.nxv4bf16.nxv4i64( + , + ptr, + , + i64); + +define @intrinsic_vloxei_v_nxv4bf16_nxv4bf16_nxv4i64(ptr %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4bf16_nxv4bf16_nxv4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vloxei64.v v12, (a0), v8 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vloxei.nxv4bf16.nxv4i64( + undef, + ptr %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vloxei.mask.nxv4bf16.nxv4i64( + , + ptr, + , + , + i64, + i64); + +define @intrinsic_vloxei_mask_v_nxv4bf16_nxv4bf16_nxv4i64( %0, ptr %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4bf16_nxv4bf16_nxv4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vloxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vloxei.mask.nxv4bf16.nxv4i64( + %0, + ptr %1, + %2, + %3, + i64 %4, i64 1) + + ret %a +} + +declare @llvm.riscv.vloxei.nxv8bf16.nxv8i64( + , + ptr, + , + i64); + +define @intrinsic_vloxei_v_nxv8bf16_nxv8bf16_nxv8i64(ptr %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8bf16_nxv8bf16_nxv8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vloxei64.v v16, (a0), v8 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vloxei.nxv8bf16.nxv8i64( + undef, + ptr %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vloxei.mask.nxv8bf16.nxv8i64( + , + ptr, + , + , + i64, + i64); + +define @intrinsic_vloxei_mask_v_nxv8bf16_nxv8bf16_nxv8i64( %0, ptr %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8bf16_nxv8bf16_nxv8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vloxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vloxei.mask.nxv8bf16.nxv8i64( + %0, + ptr %1, + %2, + %3, + i64 %4, i64 1) + + ret %a +} + declare @llvm.riscv.vloxei.nxv1f32.nxv1i64( , ptr, diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxei.ll b/llvm/test/CodeGen/RISCV/rvv/vloxei.ll index 8f0141526a62be..9126b44caf99fe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vloxei.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vloxei.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vloxei.nxv1i8.nxv1i32( @@ -4631,6 +4631,246 @@ entry: ret %a } +declare @llvm.riscv.vloxei.nxv1bf16.nxv1i32( + , + ptr, + , + iXLen); + +define @intrinsic_vloxei_v_nxv1bf16_nxv1bf16_nxv1i32(ptr %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1bf16_nxv1bf16_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vloxei32.v v9, (a0), v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vloxei.nxv1bf16.nxv1i32( + undef, + ptr %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.vloxei.mask.nxv1bf16.nxv1i32( + , + ptr, + , + , + iXLen, + iXLen); + +define @intrinsic_vloxei_mask_v_nxv1bf16_nxv1bf16_nxv1i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1bf16_nxv1bf16_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vloxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vloxei.mask.nxv1bf16.nxv1i32( + %0, + ptr %1, + %2, + %3, + iXLen %4, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vloxei.nxv2bf16.nxv2i32( + , + ptr, + , + iXLen); + +define @intrinsic_vloxei_v_nxv2bf16_nxv2bf16_nxv2i32(ptr %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2bf16_nxv2bf16_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vloxei32.v v9, (a0), v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vloxei.nxv2bf16.nxv2i32( + undef, + ptr %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.vloxei.mask.nxv2bf16.nxv2i32( + , + ptr, + , + , + iXLen, + iXLen); + +define @intrinsic_vloxei_mask_v_nxv2bf16_nxv2bf16_nxv2i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2bf16_nxv2bf16_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vloxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vloxei.mask.nxv2bf16.nxv2i32( + %0, + ptr %1, + %2, + %3, + iXLen %4, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vloxei.nxv4bf16.nxv4i32( + , + ptr, + , + iXLen); + +define @intrinsic_vloxei_v_nxv4bf16_nxv4bf16_nxv4i32(ptr %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4bf16_nxv4bf16_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vloxei32.v v10, (a0), v8 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vloxei.nxv4bf16.nxv4i32( + undef, + ptr %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.vloxei.mask.nxv4bf16.nxv4i32( + , + ptr, + , + , + iXLen, + iXLen); + +define @intrinsic_vloxei_mask_v_nxv4bf16_nxv4bf16_nxv4i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4bf16_nxv4bf16_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vloxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vloxei.mask.nxv4bf16.nxv4i32( + %0, + ptr %1, + %2, + %3, + iXLen %4, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vloxei.nxv8bf16.nxv8i32( + , + ptr, + , + iXLen); + +define @intrinsic_vloxei_v_nxv8bf16_nxv8bf16_nxv8i32(ptr %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8bf16_nxv8bf16_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vloxei32.v v12, (a0), v8 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vloxei.nxv8bf16.nxv8i32( + undef, + ptr %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.vloxei.mask.nxv8bf16.nxv8i32( + , + ptr, + , + , + iXLen, + iXLen); + +define @intrinsic_vloxei_mask_v_nxv8bf16_nxv8bf16_nxv8i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8bf16_nxv8bf16_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vloxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vloxei.mask.nxv8bf16.nxv8i32( + %0, + ptr %1, + %2, + %3, + iXLen %4, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vloxei.nxv16bf16.nxv16i32( + , + ptr, + , + iXLen); + +define @intrinsic_vloxei_v_nxv16bf16_nxv16bf16_nxv16i32(ptr %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv16bf16_nxv16bf16_nxv16i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vloxei32.v v16, (a0), v8 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vloxei.nxv16bf16.nxv16i32( + undef, + ptr %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.vloxei.mask.nxv16bf16.nxv16i32( + , + ptr, + , + , + iXLen, + iXLen); + +define @intrinsic_vloxei_mask_v_nxv16bf16_nxv16bf16_nxv16i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv16bf16_nxv16bf16_nxv16i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vloxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vloxei.mask.nxv16bf16.nxv16i32( + %0, + ptr %1, + %2, + %3, + iXLen %4, iXLen 1) + + ret %a +} + declare @llvm.riscv.vloxei.nxv1f32.nxv1i8( , ptr, diff --git a/llvm/test/CodeGen/RISCV/rvv/vlse.ll b/llvm/test/CodeGen/RISCV/rvv/vlse.ll index 3b191a7f8bb809..7f8f2d61e8b76b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlse.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vlse.nxv1i64( @@ -1414,6 +1414,288 @@ entry: ret %a } +declare @llvm.riscv.vlse.nxv1bf16( + , + ptr, + iXLen, + iXLen); + +define @intrinsic_vlse_v_nxv1bf16_nxv1bf16(ptr %0, iXLen %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vlse_v_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vlse.nxv1bf16( + undef, + ptr %0, + iXLen %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.vlse.mask.nxv1bf16( + , + ptr, + iXLen, + , + iXLen, + iXLen); + +define @intrinsic_vlse_mask_v_nxv1bf16_nxv1bf16( %0, ptr %1, iXLen %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vlse_mask_v_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vlse.mask.nxv1bf16( + %0, + ptr %1, + iXLen %2, + %3, + iXLen %4, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vlse.nxv2bf16( + , + ptr, + iXLen, + iXLen); + +define @intrinsic_vlse_v_nxv2bf16_nxv2bf16(ptr %0, iXLen %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vlse_v_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vlse.nxv2bf16( + undef, + ptr %0, + iXLen %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.vlse.mask.nxv2bf16( + , + ptr, + iXLen, + , + iXLen, + iXLen); + +define @intrinsic_vlse_mask_v_nxv2bf16_nxv2bf16( %0, ptr %1, iXLen %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vlse_mask_v_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vlse.mask.nxv2bf16( + %0, + ptr %1, + iXLen %2, + %3, + iXLen %4, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vlse.nxv4bf16( + , + ptr, + iXLen, + iXLen); + +define @intrinsic_vlse_v_nxv4bf16_nxv4bf16(ptr %0, iXLen %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vlse_v_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vlse.nxv4bf16( + undef, + ptr %0, + iXLen %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.vlse.mask.nxv4bf16( + , + ptr, + iXLen, + , + iXLen, + iXLen); + +define @intrinsic_vlse_mask_v_nxv4bf16_nxv4bf16( %0, ptr %1, iXLen %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vlse_mask_v_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vlse.mask.nxv4bf16( + %0, + ptr %1, + iXLen %2, + %3, + iXLen %4, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vlse.nxv8bf16( + , + ptr, + iXLen, + iXLen); + +define @intrinsic_vlse_v_nxv8bf16_nxv8bf16(ptr %0, iXLen %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vlse_v_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vlse.nxv8bf16( + undef, + ptr %0, + iXLen %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.vlse.mask.nxv8bf16( + , + ptr, + iXLen, + , + iXLen, + iXLen); + +define @intrinsic_vlse_mask_v_nxv8bf16_nxv8bf16( %0, ptr %1, iXLen %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vlse_mask_v_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, mu +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vlse.mask.nxv8bf16( + %0, + ptr %1, + iXLen %2, + %3, + iXLen %4, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vlse.nxv16bf16( + , + ptr, + iXLen, + iXLen); + +define @intrinsic_vlse_v_nxv16bf16_nxv16bf16(ptr %0, iXLen %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vlse_v_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vlse.nxv16bf16( + undef, + ptr %0, + iXLen %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.vlse.mask.nxv16bf16( + , + ptr, + iXLen, + , + iXLen, + iXLen); + +define @intrinsic_vlse_mask_v_nxv16bf16_nxv16bf16( %0, ptr %1, iXLen %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vlse_mask_v_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vlse.mask.nxv16bf16( + %0, + ptr %1, + iXLen %2, + %3, + iXLen %4, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vlse.nxv32bf16( + , + ptr, + iXLen, + iXLen); + +define @intrinsic_vlse_v_nxv32bf16_nxv32bf16(ptr %0, iXLen %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vlse_v_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vlse.nxv32bf16( + undef, + ptr %0, + iXLen %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.vlse.mask.nxv32bf16( + , + ptr, + iXLen, + , + iXLen, + iXLen); + +define @intrinsic_vlse_mask_v_nxv32bf16_nxv32bf16( %0, ptr %1, iXLen %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vlse_mask_v_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, mu +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vlse.mask.nxv32bf16( + %0, + ptr %1, + iXLen %2, + %3, + iXLen %4, iXLen 1) + + ret %a +} + declare @llvm.riscv.vlse.nxv1i8( , ptr, diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll index 93c821b5357c82..d908bd8b4d9504 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+zvfh,+f,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs \ ; RUN: < %s | FileCheck %s ; The intrinsics are not supported with RV32. @@ -960,6 +960,198 @@ entry: ret %a } +declare @llvm.riscv.vluxei.nxv1bf16.nxv1i64( + , + ptr, + , + i64); + +define @intrinsic_vluxei_v_nxv1bf16_nxv1bf16_nxv1i64(ptr %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1bf16_nxv1bf16_nxv1i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vluxei64.v v9, (a0), v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.nxv1bf16.nxv1i64( + undef, + ptr %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vluxei.mask.nxv1bf16.nxv1i64( + , + ptr, + , + , + i64, + i64); + +define @intrinsic_vluxei_mask_v_nxv1bf16_nxv1bf16_nxv1i64( %0, ptr %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1bf16_nxv1bf16_nxv1i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vluxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.mask.nxv1bf16.nxv1i64( + %0, + ptr %1, + %2, + %3, + i64 %4, i64 1) + + ret %a +} + +declare @llvm.riscv.vluxei.nxv2bf16.nxv2i64( + , + ptr, + , + i64); + +define @intrinsic_vluxei_v_nxv2bf16_nxv2bf16_nxv2i64(ptr %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2bf16_nxv2bf16_nxv2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vluxei64.v v10, (a0), v8 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.nxv2bf16.nxv2i64( + undef, + ptr %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vluxei.mask.nxv2bf16.nxv2i64( + , + ptr, + , + , + i64, + i64); + +define @intrinsic_vluxei_mask_v_nxv2bf16_nxv2bf16_nxv2i64( %0, ptr %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2bf16_nxv2bf16_nxv2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vluxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.mask.nxv2bf16.nxv2i64( + %0, + ptr %1, + %2, + %3, + i64 %4, i64 1) + + ret %a +} + +declare @llvm.riscv.vluxei.nxv4bf16.nxv4i64( + , + ptr, + , + i64); + +define @intrinsic_vluxei_v_nxv4bf16_nxv4bf16_nxv4i64(ptr %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4bf16_nxv4bf16_nxv4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vluxei64.v v12, (a0), v8 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.nxv4bf16.nxv4i64( + undef, + ptr %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vluxei.mask.nxv4bf16.nxv4i64( + , + ptr, + , + , + i64, + i64); + +define @intrinsic_vluxei_mask_v_nxv4bf16_nxv4bf16_nxv4i64( %0, ptr %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4bf16_nxv4bf16_nxv4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vluxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.mask.nxv4bf16.nxv4i64( + %0, + ptr %1, + %2, + %3, + i64 %4, i64 1) + + ret %a +} + +declare @llvm.riscv.vluxei.nxv8bf16.nxv8i64( + , + ptr, + , + i64); + +define @intrinsic_vluxei_v_nxv8bf16_nxv8bf16_nxv8i64(ptr %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8bf16_nxv8bf16_nxv8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vluxei64.v v16, (a0), v8 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.nxv8bf16.nxv8i64( + undef, + ptr %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vluxei.mask.nxv8bf16.nxv8i64( + , + ptr, + , + , + i64, + i64); + +define @intrinsic_vluxei_mask_v_nxv8bf16_nxv8bf16_nxv8i64( %0, ptr %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8bf16_nxv8bf16_nxv8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vluxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.mask.nxv8bf16.nxv8i64( + %0, + ptr %1, + %2, + %3, + i64 %4, i64 1) + + ret %a +} + declare @llvm.riscv.vluxei.nxv1f32.nxv1i64( , ptr, diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxei.ll b/llvm/test/CodeGen/RISCV/rvv/vluxei.ll index 679726fe666952..565ede979d12c3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vluxei.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vluxei.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vluxei.nxv1i8.nxv1i32( @@ -1151,6 +1151,246 @@ entry: ret %a } +declare @llvm.riscv.vluxei.nxv1bf16.nxv1i32( + , + ptr, + , + iXLen); + +define @intrinsic_vluxei_v_nxv1bf16_nxv1bf16_nxv1i32(ptr %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1bf16_nxv1bf16_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vluxei32.v v9, (a0), v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.nxv1bf16.nxv1i32( + undef, + ptr %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.vluxei.mask.nxv1bf16.nxv1i32( + , + ptr, + , + , + iXLen, + iXLen); + +define @intrinsic_vluxei_mask_v_nxv1bf16_nxv1bf16_nxv1i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1bf16_nxv1bf16_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vluxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.mask.nxv1bf16.nxv1i32( + %0, + ptr %1, + %2, + %3, + iXLen %4, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vluxei.nxv2bf16.nxv2i32( + , + ptr, + , + iXLen); + +define @intrinsic_vluxei_v_nxv2bf16_nxv2bf16_nxv2i32(ptr %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2bf16_nxv2bf16_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vluxei32.v v9, (a0), v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.nxv2bf16.nxv2i32( + undef, + ptr %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.vluxei.mask.nxv2bf16.nxv2i32( + , + ptr, + , + , + iXLen, + iXLen); + +define @intrinsic_vluxei_mask_v_nxv2bf16_nxv2bf16_nxv2i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2bf16_nxv2bf16_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vluxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.mask.nxv2bf16.nxv2i32( + %0, + ptr %1, + %2, + %3, + iXLen %4, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vluxei.nxv4bf16.nxv4i32( + , + ptr, + , + iXLen); + +define @intrinsic_vluxei_v_nxv4bf16_nxv4bf16_nxv4i32(ptr %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4bf16_nxv4bf16_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vluxei32.v v10, (a0), v8 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.nxv4bf16.nxv4i32( + undef, + ptr %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.vluxei.mask.nxv4bf16.nxv4i32( + , + ptr, + , + , + iXLen, + iXLen); + +define @intrinsic_vluxei_mask_v_nxv4bf16_nxv4bf16_nxv4i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4bf16_nxv4bf16_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vluxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.mask.nxv4bf16.nxv4i32( + %0, + ptr %1, + %2, + %3, + iXLen %4, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vluxei.nxv8bf16.nxv8i32( + , + ptr, + , + iXLen); + +define @intrinsic_vluxei_v_nxv8bf16_nxv8bf16_nxv8i32(ptr %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8bf16_nxv8bf16_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vluxei32.v v12, (a0), v8 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.nxv8bf16.nxv8i32( + undef, + ptr %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.vluxei.mask.nxv8bf16.nxv8i32( + , + ptr, + , + , + iXLen, + iXLen); + +define @intrinsic_vluxei_mask_v_nxv8bf16_nxv8bf16_nxv8i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8bf16_nxv8bf16_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vluxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.mask.nxv8bf16.nxv8i32( + %0, + ptr %1, + %2, + %3, + iXLen %4, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vluxei.nxv16bf16.nxv16i32( + , + ptr, + , + iXLen); + +define @intrinsic_vluxei_v_nxv16bf16_nxv16bf16_nxv16i32(ptr %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv16bf16_nxv16bf16_nxv16i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vluxei32.v v16, (a0), v8 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.nxv16bf16.nxv16i32( + undef, + ptr %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.vluxei.mask.nxv16bf16.nxv16i32( + , + ptr, + , + , + iXLen, + iXLen); + +define @intrinsic_vluxei_mask_v_nxv16bf16_nxv16bf16_nxv16i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv16bf16_nxv16bf16_nxv16i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vluxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.mask.nxv16bf16.nxv16i32( + %0, + ptr %1, + %2, + %3, + iXLen %4, iXLen 1) + + ret %a +} + declare @llvm.riscv.vluxei.nxv1f32.nxv1i32( , ptr, diff --git a/llvm/test/CodeGen/RISCV/rvv/vse.ll b/llvm/test/CodeGen/RISCV/rvv/vse.ll index 556b7702649db4..e20d91aa03a810 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vse.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare void @llvm.riscv.vse.nxv1i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll index 168d71dab92d93..5497913fc3723e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+zvfh,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs \ ; RUN: < %s | FileCheck %s ; The intrinsics are not supported with RV32. @@ -924,6 +924,190 @@ entry: ret void } +declare void @llvm.riscv.vsoxei.nxv1bf16.nxv1i64( + , + ptr, + , + i64); + +define void @intrinsic_vsoxei_v_nxv1bf16_nxv1bf16_nxv1i64( %0, ptr %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1bf16_nxv1bf16_nxv1i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vsoxei64.v v8, (a0), v9 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsoxei.nxv1bf16.nxv1i64( + %0, + ptr %1, + %2, + i64 %3) + + ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1bf16.nxv1i64( + , + ptr, + , + , + i64); + +define void @intrinsic_vsoxei_mask_v_nxv1bf16_nxv1bf16_nxv1i64( %0, ptr %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1bf16_nxv1bf16_nxv1i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vsoxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsoxei.mask.nxv1bf16.nxv1i64( + %0, + ptr %1, + %2, + %3, + i64 %4) + + ret void +} + +declare void @llvm.riscv.vsoxei.nxv2bf16.nxv2i64( + , + ptr, + , + i64); + +define void @intrinsic_vsoxei_v_nxv2bf16_nxv2bf16_nxv2i64( %0, ptr %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2bf16_nxv2bf16_nxv2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vsoxei64.v v8, (a0), v10 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsoxei.nxv2bf16.nxv2i64( + %0, + ptr %1, + %2, + i64 %3) + + ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2bf16.nxv2i64( + , + ptr, + , + , + i64); + +define void @intrinsic_vsoxei_mask_v_nxv2bf16_nxv2bf16_nxv2i64( %0, ptr %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2bf16_nxv2bf16_nxv2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vsoxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsoxei.mask.nxv2bf16.nxv2i64( + %0, + ptr %1, + %2, + %3, + i64 %4) + + ret void +} + +declare void @llvm.riscv.vsoxei.nxv4bf16.nxv4i64( + , + ptr, + , + i64); + +define void @intrinsic_vsoxei_v_nxv4bf16_nxv4bf16_nxv4i64( %0, ptr %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4bf16_nxv4bf16_nxv4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsoxei64.v v8, (a0), v12 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsoxei.nxv4bf16.nxv4i64( + %0, + ptr %1, + %2, + i64 %3) + + ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4bf16.nxv4i64( + , + ptr, + , + , + i64); + +define void @intrinsic_vsoxei_mask_v_nxv4bf16_nxv4bf16_nxv4i64( %0, ptr %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4bf16_nxv4bf16_nxv4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsoxei.mask.nxv4bf16.nxv4i64( + %0, + ptr %1, + %2, + %3, + i64 %4) + + ret void +} + +declare void @llvm.riscv.vsoxei.nxv8bf16.nxv8i64( + , + ptr, + , + i64); + +define void @intrinsic_vsoxei_v_nxv8bf16_nxv8bf16_nxv8i64( %0, ptr %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8bf16_nxv8bf16_nxv8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsoxei64.v v8, (a0), v16 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsoxei.nxv8bf16.nxv8i64( + %0, + ptr %1, + %2, + i64 %3) + + ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8bf16.nxv8i64( + , + ptr, + , + , + i64); + +define void @intrinsic_vsoxei_mask_v_nxv8bf16_nxv8bf16_nxv8i64( %0, ptr %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8bf16_nxv8bf16_nxv8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsoxei.mask.nxv8bf16.nxv8i64( + %0, + ptr %1, + %2, + %3, + i64 %4) + + ret void +} + declare void @llvm.riscv.vsoxei.nxv1f32.nxv1i64( , ptr, diff --git a/llvm/test/CodeGen/RISCV/rvv/vsoxei.ll b/llvm/test/CodeGen/RISCV/rvv/vsoxei.ll index bcb00242741cb1..568b34f4e32be6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsoxei.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsoxei.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare void @llvm.riscv.vsoxei.nxv1i8.nxv1i32( @@ -4466,6 +4466,236 @@ entry: ret void } +declare void @llvm.riscv.vsoxei.nxv1bf16.nxv1i32( + , + ptr, + , + iXLen); + +define void @intrinsic_vsoxei_v_nxv1bf16_nxv1bf16_nxv1i32( %0, ptr %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1bf16_nxv1bf16_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vsoxei32.v v8, (a0), v9 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsoxei.nxv1bf16.nxv1i32( + %0, + ptr %1, + %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1bf16.nxv1i32( + , + ptr, + , + , + iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1bf16_nxv1bf16_nxv1i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1bf16_nxv1bf16_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vsoxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsoxei.mask.nxv1bf16.nxv1i32( + %0, + ptr %1, + %2, + %3, + iXLen %4) + + ret void +} + +declare void @llvm.riscv.vsoxei.nxv2bf16.nxv2i32( + , + ptr, + , + iXLen); + +define void @intrinsic_vsoxei_v_nxv2bf16_nxv2bf16_nxv2i32( %0, ptr %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2bf16_nxv2bf16_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vsoxei32.v v8, (a0), v9 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsoxei.nxv2bf16.nxv2i32( + %0, + ptr %1, + %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2bf16.nxv2i32( + , + ptr, + , + , + iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2bf16_nxv2bf16_nxv2i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2bf16_nxv2bf16_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vsoxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsoxei.mask.nxv2bf16.nxv2i32( + %0, + ptr %1, + %2, + %3, + iXLen %4) + + ret void +} + +declare void @llvm.riscv.vsoxei.nxv4bf16.nxv4i32( + , + ptr, + , + iXLen); + +define void @intrinsic_vsoxei_v_nxv4bf16_nxv4bf16_nxv4i32( %0, ptr %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4bf16_nxv4bf16_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsoxei32.v v8, (a0), v10 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsoxei.nxv4bf16.nxv4i32( + %0, + ptr %1, + %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4bf16.nxv4i32( + , + ptr, + , + , + iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4bf16_nxv4bf16_nxv4i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4bf16_nxv4bf16_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsoxei.mask.nxv4bf16.nxv4i32( + %0, + ptr %1, + %2, + %3, + iXLen %4) + + ret void +} + +declare void @llvm.riscv.vsoxei.nxv8bf16.nxv8i32( + , + ptr, + , + iXLen); + +define void @intrinsic_vsoxei_v_nxv8bf16_nxv8bf16_nxv8i32( %0, ptr %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8bf16_nxv8bf16_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsoxei32.v v8, (a0), v12 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsoxei.nxv8bf16.nxv8i32( + %0, + ptr %1, + %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8bf16.nxv8i32( + , + ptr, + , + , + iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8bf16_nxv8bf16_nxv8i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8bf16_nxv8bf16_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsoxei.mask.nxv8bf16.nxv8i32( + %0, + ptr %1, + %2, + %3, + iXLen %4) + + ret void +} + +declare void @llvm.riscv.vsoxei.nxv16bf16.nxv16i32( + , + ptr, + , + iXLen); + +define void @intrinsic_vsoxei_v_nxv16bf16_nxv16bf16_nxv16i32( %0, ptr %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv16bf16_nxv16bf16_nxv16i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsoxei32.v v8, (a0), v16 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsoxei.nxv16bf16.nxv16i32( + %0, + ptr %1, + %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv16bf16.nxv16i32( + , + ptr, + , + , + iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv16bf16_nxv16bf16_nxv16i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv16bf16_nxv16bf16_nxv16i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsoxei.mask.nxv16bf16.nxv16i32( + %0, + ptr %1, + %2, + %3, + iXLen %4) + + ret void +} + declare void @llvm.riscv.vsoxei.nxv1f32.nxv1i8( , ptr, diff --git a/llvm/test/CodeGen/RISCV/rvv/vsse.ll b/llvm/test/CodeGen/RISCV/rvv/vsse.ll index 9b627bcd664673..b2b8334e7a604e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsse.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare void @llvm.riscv.vsse.nxv1i64( @@ -1384,6 +1384,282 @@ entry: ret void } +declare void @llvm.riscv.vsse.nxv1bf16( + , + ptr, + iXLen, + iXLen); + +define void @intrinsic_vsse_v_nxv1bf16_nxv1bf16( %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsse.nxv1bf16( + %0, + ptr %1, + iXLen %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vsse.mask.nxv1bf16( + , + ptr, + iXLen, + , + iXLen); + +define void @intrinsic_vsse_mask_v_nxv1bf16_nxv1bf16( %0, ptr %1, iXLen %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsse.mask.nxv1bf16( + %0, + ptr %1, + iXLen %2, + %3, + iXLen %4) + + ret void +} + +declare void @llvm.riscv.vsse.nxv2bf16( + , + ptr, + iXLen, + iXLen); + +define void @intrinsic_vsse_v_nxv2bf16_nxv2bf16( %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsse.nxv2bf16( + %0, + ptr %1, + iXLen %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vsse.mask.nxv2bf16( + , + ptr, + iXLen, + , + iXLen); + +define void @intrinsic_vsse_mask_v_nxv2bf16_nxv2bf16( %0, ptr %1, iXLen %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsse.mask.nxv2bf16( + %0, + ptr %1, + iXLen %2, + %3, + iXLen %4) + + ret void +} + +declare void @llvm.riscv.vsse.nxv4bf16( + , + ptr, + iXLen, + iXLen); + +define void @intrinsic_vsse_v_nxv4bf16_nxv4bf16( %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsse.nxv4bf16( + %0, + ptr %1, + iXLen %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vsse.mask.nxv4bf16( + , + ptr, + iXLen, + , + iXLen); + +define void @intrinsic_vsse_mask_v_nxv4bf16_nxv4bf16( %0, ptr %1, iXLen %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsse.mask.nxv4bf16( + %0, + ptr %1, + iXLen %2, + %3, + iXLen %4) + + ret void +} + +declare void @llvm.riscv.vsse.nxv8bf16( + , + ptr, + iXLen, + iXLen); + +define void @intrinsic_vsse_v_nxv8bf16_nxv8bf16( %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsse.nxv8bf16( + %0, + ptr %1, + iXLen %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vsse.mask.nxv8bf16( + , + ptr, + iXLen, + , + iXLen); + +define void @intrinsic_vsse_mask_v_nxv8bf16_nxv8bf16( %0, ptr %1, iXLen %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsse.mask.nxv8bf16( + %0, + ptr %1, + iXLen %2, + %3, + iXLen %4) + + ret void +} + +declare void @llvm.riscv.vsse.nxv16bf16( + , + ptr, + iXLen, + iXLen); + +define void @intrinsic_vsse_v_nxv16bf16_nxv16bf16( %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsse.nxv16bf16( + %0, + ptr %1, + iXLen %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vsse.mask.nxv16bf16( + , + ptr, + iXLen, + , + iXLen); + +define void @intrinsic_vsse_mask_v_nxv16bf16_nxv16bf16( %0, ptr %1, iXLen %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsse.mask.nxv16bf16( + %0, + ptr %1, + iXLen %2, + %3, + iXLen %4) + + ret void +} + +declare void @llvm.riscv.vsse.nxv32bf16( + , + ptr, + iXLen, + iXLen); + +define void @intrinsic_vsse_v_nxv32bf16_nxv32bf16( %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsse.nxv32bf16( + %0, + ptr %1, + iXLen %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vsse.mask.nxv32bf16( + , + ptr, + iXLen, + , + iXLen); + +define void @intrinsic_vsse_mask_v_nxv32bf16_nxv32bf16( %0, ptr %1, iXLen %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsse.mask.nxv32bf16( + %0, + ptr %1, + iXLen %2, + %3, + iXLen %4) + + ret void +} + declare void @llvm.riscv.vsse.nxv1i8( , ptr, diff --git a/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll index dbc7e719f14a8c..3572f5909400a5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+zvfh,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs \ ; RUN: < %s | FileCheck %s ; The intrinsics are not supported with RV32. @@ -924,6 +924,190 @@ entry: ret void } +declare void @llvm.riscv.vsuxei.nxv1bf16.nxv1i64( + , + ptr, + , + i64); + +define void @intrinsic_vsuxei_v_nxv1bf16_nxv1bf16_nxv1i64( %0, ptr %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1bf16_nxv1bf16_nxv1i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vsuxei64.v v8, (a0), v9 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsuxei.nxv1bf16.nxv1i64( + %0, + ptr %1, + %2, + i64 %3) + + ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1bf16.nxv1i64( + , + ptr, + , + , + i64); + +define void @intrinsic_vsuxei_mask_v_nxv1bf16_nxv1bf16_nxv1i64( %0, ptr %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1bf16_nxv1bf16_nxv1i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vsuxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsuxei.mask.nxv1bf16.nxv1i64( + %0, + ptr %1, + %2, + %3, + i64 %4) + + ret void +} + +declare void @llvm.riscv.vsuxei.nxv2bf16.nxv2i64( + , + ptr, + , + i64); + +define void @intrinsic_vsuxei_v_nxv2bf16_nxv2bf16_nxv2i64( %0, ptr %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2bf16_nxv2bf16_nxv2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vsuxei64.v v8, (a0), v10 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsuxei.nxv2bf16.nxv2i64( + %0, + ptr %1, + %2, + i64 %3) + + ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2bf16.nxv2i64( + , + ptr, + , + , + i64); + +define void @intrinsic_vsuxei_mask_v_nxv2bf16_nxv2bf16_nxv2i64( %0, ptr %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2bf16_nxv2bf16_nxv2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vsuxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsuxei.mask.nxv2bf16.nxv2i64( + %0, + ptr %1, + %2, + %3, + i64 %4) + + ret void +} + +declare void @llvm.riscv.vsuxei.nxv4bf16.nxv4i64( + , + ptr, + , + i64); + +define void @intrinsic_vsuxei_v_nxv4bf16_nxv4bf16_nxv4i64( %0, ptr %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4bf16_nxv4bf16_nxv4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsuxei64.v v8, (a0), v12 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsuxei.nxv4bf16.nxv4i64( + %0, + ptr %1, + %2, + i64 %3) + + ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4bf16.nxv4i64( + , + ptr, + , + , + i64); + +define void @intrinsic_vsuxei_mask_v_nxv4bf16_nxv4bf16_nxv4i64( %0, ptr %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4bf16_nxv4bf16_nxv4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsuxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsuxei.mask.nxv4bf16.nxv4i64( + %0, + ptr %1, + %2, + %3, + i64 %4) + + ret void +} + +declare void @llvm.riscv.vsuxei.nxv8bf16.nxv8i64( + , + ptr, + , + i64); + +define void @intrinsic_vsuxei_v_nxv8bf16_nxv8bf16_nxv8i64( %0, ptr %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8bf16_nxv8bf16_nxv8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsuxei64.v v8, (a0), v16 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsuxei.nxv8bf16.nxv8i64( + %0, + ptr %1, + %2, + i64 %3) + + ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8bf16.nxv8i64( + , + ptr, + , + , + i64); + +define void @intrinsic_vsuxei_mask_v_nxv8bf16_nxv8bf16_nxv8i64( %0, ptr %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8bf16_nxv8bf16_nxv8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsuxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsuxei.mask.nxv8bf16.nxv8i64( + %0, + ptr %1, + %2, + %3, + i64 %4) + + ret void +} + declare void @llvm.riscv.vsuxei.nxv1f32.nxv1i64( , ptr, diff --git a/llvm/test/CodeGen/RISCV/rvv/vsuxei.ll b/llvm/test/CodeGen/RISCV/rvv/vsuxei.ll index 7413177918e63e..dc6dbe768741a0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsuxei.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsuxei.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare void @llvm.riscv.vsuxei.nxv1i8.nxv1i32( @@ -4466,6 +4466,236 @@ entry: ret void } +declare void @llvm.riscv.vsuxei.nxv1bf16.nxv1i32( + , + ptr, + , + iXLen); + +define void @intrinsic_vsuxei_v_nxv1bf16_nxv1bf16_nxv1i32( %0, ptr %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1bf16_nxv1bf16_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vsuxei32.v v8, (a0), v9 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsuxei.nxv1bf16.nxv1i32( + %0, + ptr %1, + %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1bf16.nxv1i32( + , + ptr, + , + , + iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1bf16_nxv1bf16_nxv1i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1bf16_nxv1bf16_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vsuxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsuxei.mask.nxv1bf16.nxv1i32( + %0, + ptr %1, + %2, + %3, + iXLen %4) + + ret void +} + +declare void @llvm.riscv.vsuxei.nxv2bf16.nxv2i32( + , + ptr, + , + iXLen); + +define void @intrinsic_vsuxei_v_nxv2bf16_nxv2bf16_nxv2i32( %0, ptr %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2bf16_nxv2bf16_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vsuxei32.v v8, (a0), v9 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsuxei.nxv2bf16.nxv2i32( + %0, + ptr %1, + %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2bf16.nxv2i32( + , + ptr, + , + , + iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2bf16_nxv2bf16_nxv2i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2bf16_nxv2bf16_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vsuxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsuxei.mask.nxv2bf16.nxv2i32( + %0, + ptr %1, + %2, + %3, + iXLen %4) + + ret void +} + +declare void @llvm.riscv.vsuxei.nxv4bf16.nxv4i32( + , + ptr, + , + iXLen); + +define void @intrinsic_vsuxei_v_nxv4bf16_nxv4bf16_nxv4i32( %0, ptr %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4bf16_nxv4bf16_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsuxei32.v v8, (a0), v10 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsuxei.nxv4bf16.nxv4i32( + %0, + ptr %1, + %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4bf16.nxv4i32( + , + ptr, + , + , + iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4bf16_nxv4bf16_nxv4i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4bf16_nxv4bf16_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsuxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsuxei.mask.nxv4bf16.nxv4i32( + %0, + ptr %1, + %2, + %3, + iXLen %4) + + ret void +} + +declare void @llvm.riscv.vsuxei.nxv8bf16.nxv8i32( + , + ptr, + , + iXLen); + +define void @intrinsic_vsuxei_v_nxv8bf16_nxv8bf16_nxv8i32( %0, ptr %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8bf16_nxv8bf16_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsuxei32.v v8, (a0), v12 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsuxei.nxv8bf16.nxv8i32( + %0, + ptr %1, + %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8bf16.nxv8i32( + , + ptr, + , + , + iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8bf16_nxv8bf16_nxv8i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8bf16_nxv8bf16_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsuxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsuxei.mask.nxv8bf16.nxv8i32( + %0, + ptr %1, + %2, + %3, + iXLen %4) + + ret void +} + +declare void @llvm.riscv.vsuxei.nxv16bf16.nxv16i32( + , + ptr, + , + iXLen); + +define void @intrinsic_vsuxei_v_nxv16bf16_nxv16bf16_nxv16i32( %0, ptr %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv16bf16_nxv16bf16_nxv16i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsuxei32.v v8, (a0), v16 +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsuxei.nxv16bf16.nxv16i32( + %0, + ptr %1, + %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv16bf16.nxv16i32( + , + ptr, + , + , + iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv16bf16_nxv16bf16_nxv16i32( %0, ptr %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv16bf16_nxv16bf16_nxv16i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsuxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vsuxei.mask.nxv16bf16.nxv16i32( + %0, + ptr %1, + %2, + %3, + iXLen %4) + + ret void +} + declare void @llvm.riscv.vsuxei.nxv1f32.nxv1i8( , ptr, From 84a3739ac072c95af9fa80e36d9e0f52d11e28eb Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 31 Jul 2024 23:25:30 -0700 Subject: [PATCH 067/114] [RISCV] Replace Zvfh with Zvfhmin on vector load/store intrinsic tests. NFC clang uses these with Zvfhmin so we should test them. --- llvm/test/CodeGen/RISCV/rvv/vle.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/vleff.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vloxei.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/vlse.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vluxei.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/vse.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsoxei.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/vsse.ll | 4 ++-- llvm/test/CodeGen/RISCV/rvv/vsuxei.ll | 4 ++-- 12 files changed, 21 insertions(+), 21 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vle.ll b/llvm/test/CodeGen/RISCV/rvv/vle.ll index 7591bb7358e56e..bc92dfd16c880e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vle.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vle.nxv1i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vleff.ll b/llvm/test/CodeGen/RISCV/rvv/vleff.ll index 6cbe858e44ea91..39e0a0d02e88d2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vleff.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vleff.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s --check-prefixes=CHECK,RV64 declare { , iXLen } @llvm.riscv.vleff.nxv1i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll index 0578248c6f72dd..5d28534972b3f8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfh,+zvfbfmin -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs \ ; RUN: < %s | FileCheck %s ; The intrinsics are not supported with RV32. diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxei.ll b/llvm/test/CodeGen/RISCV/rvv/vloxei.ll index 9126b44caf99fe..65eedbb0cc898d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vloxei.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vloxei.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vloxei.nxv1i8.nxv1i32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vlse.ll b/llvm/test/CodeGen/RISCV/rvv/vlse.ll index 7f8f2d61e8b76b..3dcd254d0c1959 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlse.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vlse.nxv1i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll index d908bd8b4d9504..b102a12dea9a99 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs \ ; RUN: < %s | FileCheck %s ; The intrinsics are not supported with RV32. diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxei.ll b/llvm/test/CodeGen/RISCV/rvv/vluxei.ll index 565ede979d12c3..240f377be1ce3b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vluxei.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vluxei.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vluxei.nxv1i8.nxv1i32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vse.ll b/llvm/test/CodeGen/RISCV/rvv/vse.ll index e20d91aa03a810..f2ae2136078c0f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vse.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare void @llvm.riscv.vse.nxv1i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll index 5497913fc3723e..f3ad06529210a4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs \ ; RUN: < %s | FileCheck %s ; The intrinsics are not supported with RV32. diff --git a/llvm/test/CodeGen/RISCV/rvv/vsoxei.ll b/llvm/test/CodeGen/RISCV/rvv/vsoxei.ll index 568b34f4e32be6..89222711d4d918 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsoxei.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsoxei.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare void @llvm.riscv.vsoxei.nxv1i8.nxv1i32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vsse.ll b/llvm/test/CodeGen/RISCV/rvv/vsse.ll index b2b8334e7a604e..6908a2000653b0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsse.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare void @llvm.riscv.vsse.nxv1i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vsuxei.ll b/llvm/test/CodeGen/RISCV/rvv/vsuxei.ll index dc6dbe768741a0..69b1173d9531c7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsuxei.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsuxei.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare void @llvm.riscv.vsuxei.nxv1i8.nxv1i32( From ab33c3dd65ab9b2101e42eaa05ed781a21753f65 Mon Sep 17 00:00:00 2001 From: pvanhout Date: Thu, 1 Aug 2024 08:42:17 +0200 Subject: [PATCH 068/114] [GlobalISel][TableGen] Make variadic-errors.td test more robust Use a regex instead of hardcoded numbers for anonymous pattern suffixes. --- .../TableGen/GlobalISelCombinerEmitter/variadic-errors.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/variadic-errors.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/variadic-errors.td index 2b44e184c98e20..3f8d4a4bf2e13f 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/variadic-errors.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/variadic-errors.td @@ -27,14 +27,14 @@ def ConflictingInference : GICombineRule< (apply (G_MERGE_VALUES $dst, GIVariadic<3, 6>:$args))>; // CHECK: :[[@LINE+2]]:{{[0-9]+}}: error: cannot parse operand type: minimum number of arguments must be greater than zero in GIVariadic -// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Failed to parse pattern: '(G_BUILD_VECTOR ?:$dst, anonymous_8021:$a)' +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Failed to parse pattern: '(G_BUILD_VECTOR ?:$dst, anonymous_{{[0-9]+}}:$a)' def InvalidBounds0 : GICombineRule< (defs root:$dst), (match (G_BUILD_VECTOR $dst, GIVariadic<0>:$a)), (apply [{ APPLY }])>; // CHECK: :[[@LINE+2]]:{{[0-9]+}}: error: cannot parse operand type: maximum number of arguments (1) must be zero, or greater than the minimum number of arguments (1) in GIVariadic -// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Failed to parse pattern: '(G_BUILD_VECTOR ?:$dst, anonymous_8022:$a)' +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Failed to parse pattern: '(G_BUILD_VECTOR ?:$dst, anonymous_{{[0-9]+}}:$a)' def InvalidBounds1 : GICombineRule< (defs root:$dst), (match (G_BUILD_VECTOR $dst, GIVariadic<1,1>:$a)), From e167f753bd4679329ebcf0232f72be9c63d9b671 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Thu, 1 Aug 2024 14:45:21 +0800 Subject: [PATCH 069/114] [C++20] [Modules] Always emit the inline builtins (#101278) See the attached test for the motivation example. If we're too greedy to not emit the definition for inline builtins, we may meet a middle end crash. And it should be good to emit inline builtins always. --- clang/lib/CodeGen/CodeGenModule.cpp | 10 +++---- clang/test/Modules/inline-builtins.cppm | 36 +++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 5 deletions(-) create mode 100644 clang/test/Modules/inline-builtins.cppm diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 760185d4d51ff0..31f6632df9f27d 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4022,6 +4022,11 @@ bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) { return true; const auto *F = cast(GD.getDecl()); + // Inline builtins declaration must be emitted. They often are fortified + // functions. + if (F->isInlineBuiltinDeclaration()) + return true; + if (CodeGenOpts.OptimizationLevel == 0 && !F->hasAttr()) return false; @@ -4067,11 +4072,6 @@ bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) { } } - // Inline builtins declaration must be emitted. They often are fortified - // functions. - if (F->isInlineBuiltinDeclaration()) - return true; - // PR9614. Avoid cases where the source code is lying to us. An available // externally function should have an equivalent function somewhere else, // but a function that calls itself through asm label/`__builtin_` trickery is diff --git a/clang/test/Modules/inline-builtins.cppm b/clang/test/Modules/inline-builtins.cppm new file mode 100644 index 00000000000000..8a0fffbfc25bca --- /dev/null +++ b/clang/test/Modules/inline-builtins.cppm @@ -0,0 +1,36 @@ +// REQUIRES: !system-windows +// +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: cd %t +// +// RUN: %clang_cc1 -std=c++20 -O3 %t/a.cppm -emit-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 -O3 %t/test.cc -fmodule-file=a=%t/a.pcm \ +// RUN: -emit-llvm -o - | FileCheck %t/test.cc + +//--- memmove.h +typedef long unsigned int size_t; +extern "C" void *memmove (void *__dest, const void *__src, size_t __n) + throw () __attribute__ ((__nonnull__ (1, 2))); +extern "C" __inline __attribute__ ((__always_inline__)) __attribute__ ((__gnu_inline__)) void * + memmove (void *__dest, const void *__src, size_t __len) throw () +{ + return __builtin_memmove(__dest, __src, __len); +} + +//--- a.cppm +module; +#include "memmove.h" +export module a; +export using ::memmove; + +//--- test.cc +import a; + +void test() { + int a, b; + unsigned c = 0; + memmove(&a, &b, c); +} + +// CHECK-NOT: memmove From 1d2b2d29d733200b704f38d220d22ecc07d6cf42 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 1 Aug 2024 10:55:28 +0400 Subject: [PATCH 070/114] AMDGPU: Cleanup extract_subvector actions (NFC) (#101454) The base AMDGPUISelLowering was setting custom action on 16-bit vector types, but also set in SIISelLowering. --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 16 ++++++---------- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 2ad91de5663237..862b5c7e3e3d75 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -444,19 +444,15 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32}, Custom); - // FIXME: Why is v8f16/v8bf16 missing? setOperationAction( ISD::EXTRACT_SUBVECTOR, - {MVT::v2f16, MVT::v2bf16, MVT::v2i16, MVT::v4f16, MVT::v4bf16, - MVT::v4i16, MVT::v2f32, MVT::v2i32, MVT::v3f32, MVT::v3i32, - MVT::v4f32, MVT::v4i32, MVT::v5f32, MVT::v5i32, MVT::v6f32, - MVT::v6i32, MVT::v7f32, MVT::v7i32, MVT::v8f32, MVT::v8i32, - MVT::v9f32, MVT::v9i32, MVT::v10i32, MVT::v10f32, MVT::v11i32, - MVT::v11f32, MVT::v12i32, MVT::v12f32, MVT::v16f16, MVT::v16bf16, - MVT::v16i16, MVT::v16f32, MVT::v16i32, MVT::v32f32, MVT::v32i32, + {MVT::v2f32, MVT::v2i32, MVT::v3f32, MVT::v3i32, MVT::v4f32, + MVT::v4i32, MVT::v5f32, MVT::v5i32, MVT::v6f32, MVT::v6i32, + MVT::v7f32, MVT::v7i32, MVT::v8f32, MVT::v8i32, MVT::v9f32, + MVT::v9i32, MVT::v10i32, MVT::v10f32, MVT::v11i32, MVT::v11f32, + MVT::v12i32, MVT::v12f32, MVT::v16i32, MVT::v32f32, MVT::v32i32, MVT::v2f64, MVT::v2i64, MVT::v3f64, MVT::v3i64, MVT::v4f64, - MVT::v4i64, MVT::v8f64, MVT::v8i64, MVT::v16f64, MVT::v16i64, - MVT::v32i16, MVT::v32f16, MVT::v32bf16}, + MVT::v4i64, MVT::v8f64, MVT::v8i64, MVT::v16f64, MVT::v16i64}, Custom); setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 8a811f7a7c02d6..d5d7a2522a09bc 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -629,10 +629,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, case ISD::EXTRACT_VECTOR_ELT: case ISD::INSERT_VECTOR_ELT: case ISD::INSERT_SUBVECTOR: - case ISD::EXTRACT_SUBVECTOR: case ISD::SCALAR_TO_VECTOR: case ISD::IS_FPCLASS: break; + case ISD::EXTRACT_SUBVECTOR: case ISD::CONCAT_VECTORS: setOperationAction(Op, VT, Custom); break; From fdce0bfb7f84dc3a29acaefe04ee2f3d75d52c46 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 1 Aug 2024 15:25:36 +0800 Subject: [PATCH 071/114] [RISCV] Add back missing vmv_v_x_vl pattern predicates (#101455) Looks like these got left behind in 17e2d07ad15e02c9c757fdd4a532c43747ed8bf3 --- .../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 394da8040a13e9..699536b1869692 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -2369,14 +2369,16 @@ foreach vti = AllVectors in { } foreach vti = AllIntegerVectors in { - def : Pat<(vti.Vector (riscv_vmv_v_x_vl vti.RegClass:$passthru, GPR:$rs2, VLOpFrag)), - (!cast("PseudoVMV_V_X_"#vti.LMul.MX) - vti.RegClass:$passthru, GPR:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; - defvar ImmPat = !cast("sew"#vti.SEW#"simm5"); - def : Pat<(vti.Vector (riscv_vmv_v_x_vl vti.RegClass:$passthru, (ImmPat simm5:$imm5), - VLOpFrag)), - (!cast("PseudoVMV_V_I_"#vti.LMul.MX) - vti.RegClass:$passthru, simm5:$imm5, GPR:$vl, vti.Log2SEW, TU_MU)>; + let Predicates = GetVTypePredicates.Predicates in { + def : Pat<(vti.Vector (riscv_vmv_v_x_vl vti.RegClass:$passthru, GPR:$rs2, VLOpFrag)), + (!cast("PseudoVMV_V_X_"#vti.LMul.MX) + vti.RegClass:$passthru, GPR:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; + defvar ImmPat = !cast("sew"#vti.SEW#"simm5"); + def : Pat<(vti.Vector (riscv_vmv_v_x_vl vti.RegClass:$passthru, (ImmPat simm5:$imm5), + VLOpFrag)), + (!cast("PseudoVMV_V_I_"#vti.LMul.MX) + vti.RegClass:$passthru, simm5:$imm5, GPR:$vl, vti.Log2SEW, TU_MU)>; + } } } From 7088a5ed880f29129ec844c66068e8cb61ca98bf Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Thu, 1 Aug 2024 09:28:29 +0200 Subject: [PATCH 072/114] [lldb][FreeBSD] Fix NativeRegisterContextFreeBSD_{arm,mips64,powerpc} declarations (#101403) Similar to #97796, fix the type of the `native_thread` parameter for the arm, mips64 and powerpc variants of `NativeRegisterContextFreeBSD_*`. Otherwise, this leads to compile errors similar to: ``` lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_powerpc.cpp:85:39: error: out-of-line definition of 'NativeRegisterContextFreeBSD_powerpc' does not match any declaration in 'lldb_private::process_freebsd::NativeRegisterContextFreeBSD_powerpc' 85 | NativeRegisterContextFreeBSD_powerpc::NativeRegisterContextFreeBSD_powerpc( | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``` --- .../Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm.h | 2 +- .../Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h | 2 +- .../Process/FreeBSD/NativeRegisterContextFreeBSD_powerpc.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm.h b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm.h index 89ffa617294aa6..b9537e6952f6c4 100644 --- a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm.h +++ b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm.h @@ -30,7 +30,7 @@ class NativeProcessFreeBSD; class NativeRegisterContextFreeBSD_arm : public NativeRegisterContextFreeBSD { public: NativeRegisterContextFreeBSD_arm(const ArchSpec &target_arch, - NativeThreadProtocol &native_thread); + NativeThreadFreeBSD &native_thread); uint32_t GetRegisterSetCount() const override; diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h index 0b4a508a7d5dd9..286b4fd8d8b996 100644 --- a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h +++ b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h @@ -31,7 +31,7 @@ class NativeRegisterContextFreeBSD_mips64 : public NativeRegisterContextFreeBSD { public: NativeRegisterContextFreeBSD_mips64(const ArchSpec &target_arch, - NativeThreadProtocol &native_thread); + NativeThreadFreeBSD &native_thread); uint32_t GetRegisterSetCount() const override; diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_powerpc.h b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_powerpc.h index 3df371036f9153..420db822acc0ff 100644 --- a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_powerpc.h +++ b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_powerpc.h @@ -31,7 +31,7 @@ class NativeRegisterContextFreeBSD_powerpc : public NativeRegisterContextFreeBSD { public: NativeRegisterContextFreeBSD_powerpc(const ArchSpec &target_arch, - NativeThreadProtocol &native_thread); + NativeThreadFreeBSD &native_thread); uint32_t GetRegisterSetCount() const override; From 17ba4f4053e303be3e5408d34eaf687a49cefb06 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Thu, 1 Aug 2024 08:43:59 +0000 Subject: [PATCH 073/114] Revert "[mlir][Transforms] Dialect conversion: Skip materializations when running without converter (#101318)" This reverts commit 2aa96fcf751ee948702e8447de62d6bea8235e3a. This was merged without a test. Also it seems it was only fixing an issue for users which used a particular workaround that is not actually needed anymore (skipping UnrealizedConversionCast operands). --- .../Transforms/Utils/DialectConversion.cpp | 38 ++++++++----------- .../test-legalize-type-conversion.mlir | 3 +- 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index fdd0175ffae53f..f26aa0a1516a69 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -1316,43 +1316,37 @@ Block *ConversionPatternRewriterImpl::applySignatureConversion( continue; } - // This is a 1->1+ mapping. + // This is a 1->1+ mapping. 1->N mappings are not fully supported in the + // dialect conversion. Therefore, we need an argument materialization to + // turn the replacement block arguments into a single SSA value that can be + // used as a replacement. auto replArgs = newBlock->getArguments().slice(inputMap->inputNo, inputMap->size); - - // When there is no type converter, assume that the new block argument - // types are legal. This is reasonable to assume because they were - // specified by the user. - // FIXME: This won't work for 1->N conversions because multiple output - // types are not supported in parts of the dialect conversion. In such a - // case, we currently use the original block argument type (produced by - // the argument materialization). - if (!converter && replArgs.size() == 1) { - mapping.map(origArg, replArgs[0]); - appendRewrite(block, origArg); - continue; - } - - // 1->N mappings are not fully supported in the dialect conversion. - // Therefore, we need an argument materialization to turn the replacement - // block arguments into a single SSA value (of the original type) that can - // be used as a replacement. Value argMat = buildUnresolvedMaterialization( MaterializationKind::Argument, newBlock, newBlock->begin(), origArg.getLoc(), /*inputs=*/replArgs, origArgType, converter); mapping.map(origArg, argMat); appendRewrite(block, origArg); - // Now legalize the type by building a target materialization. Type legalOutputType; - if (converter) + if (converter) { legalOutputType = converter->convertType(origArgType); + } else if (replArgs.size() == 1) { + // When there is no type converter, assume that the new block argument + // types are legal. This is reasonable to assume because they were + // specified by the user. + // FIXME: This won't work for 1->N conversions because multiple output + // types are not supported in parts of the dialect conversion. In such a + // case, we currently use the original block argument type (produced by + // the argument materialization). + legalOutputType = replArgs[0].getType(); + } if (legalOutputType && legalOutputType != origArgType) { Value targetMat = buildUnresolvedTargetMaterialization( origArg.getLoc(), argMat, legalOutputType, converter); mapping.map(argMat, targetMat); - appendRewrite(block, origArg); } + appendRewrite(block, origArg); } appendRewrite(newBlock, block, converter); diff --git a/mlir/test/Transforms/test-legalize-type-conversion.mlir b/mlir/test/Transforms/test-legalize-type-conversion.mlir index 07dfb49473f5eb..d0563fed8e5d94 100644 --- a/mlir/test/Transforms/test-legalize-type-conversion.mlir +++ b/mlir/test/Transforms/test-legalize-type-conversion.mlir @@ -103,9 +103,8 @@ func.func @test_block_argument_not_converted() { // Make sure argument type changes aren't implicitly forwarded. func.func @test_signature_conversion_no_converter() { "test.signature_conversion_no_converter"() ({ - // expected-error@below {{failed to materialize conversion for block argument #0 that remained live after conversion, type was 'f32'}} + // expected-error@below {{failed to legalize unresolved materialization from ('f64') to 'f32' that remained live after conversion}} ^bb0(%arg0: f32): - // expected-note@below{{see existing live user here}} "test.type_consumer"(%arg0) : (f32) -> () "test.return"(%arg0) : (f32) -> () }) : () -> () From 5dfdac74cadd9483a66eb17e51dc632b554cccb1 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 1 Aug 2024 10:57:21 +0200 Subject: [PATCH 074/114] [libc++][NFC] Avoid opening namespace std in the tests (#94160) This also adds a few FIXMEs where we use UB in the tests. --- .../libcxx/utilities/any/allocator.pass.cpp | 100 ++--- .../format/enable_insertable.compile.pass.cpp | 24 +- .../common_with.compile.pass.cpp | 409 ++++++++---------- .../common_reference.compile.pass.cpp | 30 +- libcxx/test/std/containers/Emplaceable.h | 13 +- libcxx/test/std/containers/NotConstructible.h | 14 +- .../container.node/node_handle.pass.cpp | 15 +- .../unord/unord.map/compare.pass.cpp | 12 +- .../insert_or_assign.pass.cpp | 9 +- .../unord.map.modifiers/try.emplace.pass.cpp | 9 +- .../syserr/is_error_code_enum.pass.cpp | 8 +- .../syserr/is_error_condition_enum.pass.cpp | 8 +- .../ErrorCodeEnum.pass.cpp | 8 +- .../lwg3629.pass.cpp | 6 +- .../ErrorCodeEnum.pass.cpp | 8 +- .../syserr.errcode.modifiers/lwg3629.pass.cpp | 6 +- .../lwg3629.pass.cpp | 6 +- .../lwg3629.pass.cpp | 6 +- .../propagate_const.nonmembers/hash.pass.cpp | 12 +- .../indirectly_readable.compile.pass.cpp | 28 +- .../user_defined_char_type.pass.cpp | 13 +- .../range.subrange/ctor.range_size.pass.cpp | 6 +- .../string_append/push_back.pass.cpp | 4 +- libcxx/test/std/time/rep.h | 8 +- .../difference_type.pass.cpp | 14 +- .../allocator.traits.types/size_type.pass.cpp | 10 +- .../common_reference.compile.pass.cpp | 10 +- .../meta.trans.other/common_type.pass.cpp | 39 +- .../optional/optional.hash/hash.pass.cpp | 6 +- .../tuple_size_incomplete.pass.cpp | 5 +- .../pairs/pairs.pair/ctor.pair_like.pass.cpp | 8 +- .../variant/variant.hash/hash.pass.cpp | 11 +- .../variant.visit.member/visit.pass.cpp | 1 + .../variant/variant.visit/visit.pass.cpp | 1 + libcxx/test/support/Counter.h | 12 +- 35 files changed, 364 insertions(+), 515 deletions(-) diff --git a/libcxx/test/libcxx/utilities/any/allocator.pass.cpp b/libcxx/test/libcxx/utilities/any/allocator.pass.cpp index daaf74a2783fde..eab3ca88264938 100644 --- a/libcxx/test/libcxx/utilities/any/allocator.pass.cpp +++ b/libcxx/test/libcxx/utilities/any/allocator.pass.cpp @@ -39,62 +39,62 @@ bool Large_was_deallocated = false; bool Small_was_constructed = false; bool Small_was_destroyed = false; -namespace std { - template <> - struct allocator { - using value_type = Large; - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - using propagate_on_container_move_assignment = std::true_type; - using is_always_equal = std::true_type; - - Large* allocate(std::size_t n) { - Large_was_allocated = true; - return static_cast(::operator new(n * sizeof(Large))); - } +template <> +struct std::allocator { + using value_type = Large; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using propagate_on_container_move_assignment = std::true_type; + using is_always_equal = std::true_type; + + Large* allocate(std::size_t n) { + Large_was_allocated = true; + return static_cast(::operator new(n * sizeof(Large))); + } - template - void construct(Large* p, Args&& ...args) { - new (p) Large(std::forward(args)...); - Large_was_constructed = true; - } + template + void construct(Large* p, Args&&... args) { + new (p) Large(std::forward(args)...); + Large_was_constructed = true; + } - void destroy(Large* p) { - p->~Large(); - Large_was_destroyed = true; - } + void destroy(Large* p) { + p->~Large(); + Large_was_destroyed = true; + } - void deallocate(Large* p, std::size_t) { - Large_was_deallocated = true; - return ::operator delete(p); - } - }; - - template <> - struct allocator { - using value_type = Small; - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - using propagate_on_container_move_assignment = std::true_type; - using is_always_equal = std::true_type; - - Small* allocate(std::size_t) { assert(false); return nullptr; } - - template - void construct(Small* p, Args&& ...args) { - new (p) Small(std::forward(args)...); - Small_was_constructed = true; - } + void deallocate(Large* p, std::size_t) { + Large_was_deallocated = true; + return ::operator delete(p); + } +}; + +template <> +struct std::allocator { + using value_type = Small; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using propagate_on_container_move_assignment = std::true_type; + using is_always_equal = std::true_type; + + Small* allocate(std::size_t) { + assert(false); + return nullptr; + } - void destroy(Small* p) { - p->~Small(); - Small_was_destroyed = true; - } + template + void construct(Small* p, Args&&... args) { + new (p) Small(std::forward(args)...); + Small_was_constructed = true; + } - void deallocate(Small*, std::size_t) { assert(false); } - }; -} // end namespace std + void destroy(Small* p) { + p->~Small(); + Small_was_destroyed = true; + } + void deallocate(Small*, std::size_t) { assert(false); } +}; int main(int, char**) { // Test large types diff --git a/libcxx/test/libcxx/utilities/format/enable_insertable.compile.pass.cpp b/libcxx/test/libcxx/utilities/format/enable_insertable.compile.pass.cpp index 2249e2ebb4130e..064d7cf720d85c 100644 --- a/libcxx/test/libcxx/utilities/format/enable_insertable.compile.pass.cpp +++ b/libcxx/test/libcxx/utilities/format/enable_insertable.compile.pass.cpp @@ -62,26 +62,24 @@ struct valid { void insert(iterator, CharT*, CharT*); }; -namespace std::__format { template <> -inline constexpr bool __enable_insertable> = true; +inline constexpr bool std::__format::__enable_insertable> = true; template <> -inline constexpr bool __enable_insertable> = true; +inline constexpr bool std::__format::__enable_insertable> = true; template <> -inline constexpr bool __enable_insertable> = true; +inline constexpr bool std::__format::__enable_insertable> = true; template <> -inline constexpr bool __enable_insertable> = true; +inline constexpr bool std::__format::__enable_insertable> = true; #ifndef TEST_HAS_NO_WIDE_CHARACTERS template <> -inline constexpr bool __enable_insertable> = true; +inline constexpr bool std::__format::__enable_insertable> = true; template <> -inline constexpr bool __enable_insertable> = true; +inline constexpr bool std::__format::__enable_insertable> = true; template <> -inline constexpr bool __enable_insertable> = true; +inline constexpr bool std::__format::__enable_insertable> = true; template <> -inline constexpr bool __enable_insertable> = true; +inline constexpr bool std::__format::__enable_insertable> = true; #endif -} // namespace std::__format static_assert(!std::__format::__insertable>); static_assert(!std::__format::__insertable>); @@ -96,12 +94,10 @@ static_assert(!std::__format::__insertable>); static_assert(std::__format::__insertable>); #endif -namespace std::__format { template <> -inline constexpr bool __enable_insertable> = true; +inline constexpr bool std::__format::__enable_insertable> = true; template <> -inline constexpr bool __enable_insertable> = true; -} // namespace std::__format +inline constexpr bool std::__format::__enable_insertable> = true; static_assert(!std::__format::__insertable>); static_assert(!std::__format::__insertable>); diff --git a/libcxx/test/std/concepts/concepts.lang/concept.common/common_with.compile.pass.cpp b/libcxx/test/std/concepts/concepts.lang/concept.common/common_with.compile.pass.cpp index 5d80c1cba26559..d72df6dbb3ca9d 100644 --- a/libcxx/test/std/concepts/concepts.lang/concept.common/common_with.compile.pass.cpp +++ b/libcxx/test/std/concepts/concepts.lang/concept.common/common_with.compile.pass.cpp @@ -262,17 +262,16 @@ struct BadBasicCommonType { // should be placed so the test doesn't get deleted. }; -namespace std { template <> -struct common_type { +struct std::common_type { using type = BadBasicCommonType; }; template <> -struct common_type { +struct std::common_type { using type = int; }; -} // namespace std + static_assert(requires { typename std::common_type_t; }); @@ -289,17 +288,16 @@ static_assert(!std::convertible_to); struct T1 {}; static_assert(!std::convertible_to); -namespace std { template <> -struct common_type { +struct std::common_type { using type = DullCommonType; }; template <> -struct common_type { +struct std::common_type { using type = DullCommonType; }; -} // namespace std + static_assert(HasValidCommonType()); static_assert(!CheckCommonWith()); @@ -314,17 +312,16 @@ struct T2 {}; static_assert( !std::convertible_to); -namespace std { template <> -struct common_type { +struct std::common_type { using type = CommonTypeImplicitlyConstructibleFromInt; }; template <> -struct common_type { +struct std::common_type { using type = CommonTypeImplicitlyConstructibleFromInt; }; -} // namespace std + static_assert(HasValidCommonType()); static_assert(!CheckCommonWith()); @@ -339,17 +336,16 @@ struct T3 {}; static_assert( !std::convertible_to); -namespace std { template <> -struct common_type { +struct std::common_type { using type = CommonTypeExplicitlyConstructibleFromInt; }; template <> -struct common_type { +struct std::common_type { using type = CommonTypeExplicitlyConstructibleFromInt; }; -} // namespace std + static_assert(HasValidCommonType()); static_assert(!CheckCommonWith()); @@ -361,17 +357,16 @@ static_assert(requires(T4 t4) { static_cast(t4); }); -namespace std { template <> -struct common_type { +struct std::common_type { using type = CommonTypeImplicitlyConstructibleFromT4; }; template <> -struct common_type { +struct std::common_type { using type = CommonTypeImplicitlyConstructibleFromT4; }; -} // namespace std + static_assert(HasValidCommonType()); static_assert(!CheckCommonWith()); @@ -383,17 +378,16 @@ static_assert(requires(T5 t5) { static_cast(t5); }); -namespace std { template <> -struct common_type { +struct std::common_type { using type = CommonTypeExplicitlyConstructibleFromT5; }; template <> -struct common_type { +struct std::common_type { using type = CommonTypeExplicitlyConstructibleFromT5; }; -} // namespace std + static_assert(HasValidCommonType()); static_assert(!CheckCommonWith()); @@ -403,113 +397,111 @@ struct CommonTypeNoCommonReference { CommonTypeNoCommonReference(int); }; -namespace std { template <> -struct common_type { +struct std::common_type { using type = CommonTypeNoCommonReference; }; template <> -struct common_type { +struct std::common_type { using type = CommonTypeNoCommonReference; }; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; +struct std::common_type {}; template <> -struct common_type {}; -} // namespace std +struct std::common_type {}; template constexpr bool HasCommonReference() noexcept { @@ -526,177 +518,176 @@ struct CommonTypeNoMetaCommonReference { CommonTypeNoMetaCommonReference(int); }; -namespace std { template <> -struct common_type { +struct std::common_type { using type = CommonTypeNoMetaCommonReference; }; template <> -struct common_type { +struct std::common_type { using type = CommonTypeNoMetaCommonReference; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; template <> -struct common_type { +struct std::common_type { using type = void; }; -} // namespace std + static_assert(HasValidCommonType()); static_assert(HasValidCommonType()); static_assert(HasCommonReference()); @@ -709,284 +700,242 @@ struct CommonWithInt { operator int() const volatile; }; -namespace std { template <> -struct common_type { +struct std::common_type { using type = int; }; template <> -struct common_type : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; -} // namespace std +struct std::common_type : std::common_type {}; + static_assert(CheckCommonWith()); struct CommonWithIntButRefLong { operator int() const volatile; }; -namespace std { template <> -struct common_type { +struct std::common_type { using type = int; }; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type { +struct std::common_type { using type = long; }; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type + : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type + : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type { +}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type { +}; template <> -struct common_type - : common_type {}; +struct std::common_type + : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type + : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type + : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type + : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type + : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type + : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type + : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type + : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type + : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type + : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type + : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type + : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type + : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type + : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type + : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type + : std::common_type {}; template <> -struct common_type - : common_type {}; +struct std::common_type + : std::common_type {}; template <> -struct common_type - : common_type {}; -} // namespace std +struct std::common_type + : std::common_type {}; + static_assert(CheckCommonWith()); diff --git a/libcxx/test/std/concepts/concepts.lang/concept.commonref/common_reference.compile.pass.cpp b/libcxx/test/std/concepts/concepts.lang/concept.commonref/common_reference.compile.pass.cpp index 7c37dafc2d6919..ab79130f812f02 100644 --- a/libcxx/test/std/concepts/concepts.lang/concept.commonref/common_reference.compile.pass.cpp +++ b/libcxx/test/std/concepts/concepts.lang/concept.commonref/common_reference.compile.pass.cpp @@ -273,17 +273,16 @@ struct BadBasicCommonReference { static_assert(std::convertible_to); static_assert(std::convertible_to); -namespace std { template