diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 80e635e4a57eca..564a58e4eb6709 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -146,6 +146,25 @@ let TargetGuard = "sme" in { [IsOverloadNone, IsStreamingCompatible, IsOutZA]>; } +let TargetGuard = "sme2p1" in { + def SVZERO_ZA64_VG1x2 : SInst<"svzero_za64_vg1x2", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg1x2", + [IsOverloadNone, IsStreaming, IsInOutZA]>; + def SVZERO_ZA64_VG1x4 : SInst<"svzero_za64_vg1x4", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg1x4", + [IsOverloadNone, IsStreaming, IsInOutZA]>; + def SVZERO_ZA64_VG2x1 : SInst<"svzero_za64_vg2x1", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg2x1", + [IsOverloadNone, IsStreaming, IsInOutZA]>; + def SVZERO_ZA64_VG2x2 : SInst<"svzero_za64_vg2x2", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg2x2", + [IsOverloadNone, IsStreaming, IsInOutZA]>; + def SVZERO_ZA64_VG2x4 : SInst<"svzero_za64_vg2x4", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg2x4", + [IsOverloadNone, IsStreaming, IsInOutZA]>; + def SVZERO_ZA64_VG4x1 : SInst<"svzero_za64_vg4x1", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg4x1", + [IsOverloadNone, IsStreaming, IsInOutZA]>; + def SVZERO_ZA64_VG4x2 : SInst<"svzero_za64_vg4x2", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg4x2", + [IsOverloadNone, IsStreaming, IsInOutZA]>; + def SVZERO_ZA64_VG4x4 : SInst<"svzero_za64_vg4x4", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg4x4", + [IsOverloadNone, IsStreaming, IsInOutZA]>; +} + //////////////////////////////////////////////////////////////////////////////// // SME - Counting elements in a streaming vector diff --git a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c new file mode 100644 index 00000000000000..bdd75798554148 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c @@ -0,0 +1,139 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#define SVE_ACLE_FUNC(A1,A2) A1##A2 + +// CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x2( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 [[SLICE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg1x2j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svzero_za64_vg1x2(uint32_t slice) __arm_streaming __arm_inout("za") +{ + SVE_ACLE_FUNC(svzero_za64,_vg1x2)(slice); +} + +// CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x4( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg1x4(i32 [[SLICE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg1x4j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg1x4(i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svzero_za64_vg1x4(uint32_t slice) __arm_streaming __arm_inout("za"){ + SVE_ACLE_FUNC(svzero_za64,_vg1x4)(slice); +} + +// CHECK-LABEL: define dso_local void @test_svzero_za64_vg2x1( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg2x1(i32 [[SLICE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg2x1j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg2x1(i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svzero_za64_vg2x1(uint32_t slice) __arm_streaming __arm_inout("za"){ + SVE_ACLE_FUNC(svzero_za64,_vg2x1)(slice); +} + +// CHECK-LABEL: define dso_local void @test_svzero_za64_vg2x2( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg2x2(i32 [[SLICE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg2x2j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg2x2(i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svzero_za64_vg2x2(uint32_t slice) __arm_streaming __arm_inout("za"){ + SVE_ACLE_FUNC(svzero_za64,_vg2x2)(slice); +} + +// CHECK-LABEL: define dso_local void @test_svzero_za64_vg2x4( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg2x4(i32 [[SLICE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg2x4j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg2x4(i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svzero_za64_vg2x4(uint32_t slice) __arm_streaming __arm_inout("za"){ + SVE_ACLE_FUNC(svzero_za64,_vg2x4)(slice); +} + +// CHECK-LABEL: define dso_local void @test_svzero_za64_vg4x1( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg4x1(i32 [[SLICE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg4x1j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg4x1(i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svzero_za64_vg4x1(uint32_t slice) __arm_streaming __arm_inout("za"){ + SVE_ACLE_FUNC(svzero_za64,_vg4x1)(slice); +} + +// CHECK-LABEL: define dso_local void @test_svzero_za64_vg4x2( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg4x2(i32 [[SLICE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg4x2j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg4x2(i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svzero_za64_vg4x2(uint32_t slice) __arm_streaming __arm_inout("za"){ + SVE_ACLE_FUNC(svzero_za64,_vg4x2)(slice); +} + +// CHECK-LABEL: define dso_local void @test_svzero_za64_vg4x4( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg4x4(i32 [[SLICE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg4x4j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg4x4(i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svzero_za64_vg4x4(uint32_t slice) __arm_streaming __arm_inout("za"){ + SVE_ACLE_FUNC(svzero_za64,_vg4x4)(slice); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index aad83823881f8e..3da9afc8df6dc3 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3361,6 +3361,12 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_bfmlslt : SME2_BFMLS_Intrinsic; def int_aarch64_sve_bfmlslt_lane : SME2_BFMLS_Lane_Intrinsic; + // Multi-vector zeroing + + foreach vg = ["vg1x2", "vg1x4", "vg2x1", "vg2x2", "vg2x4", "vg4x1", "vg4x2", "vg4x4"] in { + def int_aarch64_sme_zero_za64_ # vg : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects]>; + } + // Multi-vector signed saturating doubling multiply high def int_aarch64_sve_sqdmulh_single_vgx2 : SME2_VG2_Multi_Single_Intrinsic; diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 50ee37b0dfebc8..b21b1faf5c9622 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -104,6 +104,13 @@ class sme2_move_to_tile_pseudo + : SMEPseudo2Instr, + Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rs, index_ty:$imm), []> { + let SMEMatrixType = za_flag; + let usesCustomInserter = 1; +} + //===----------------------------------------------------------------------===// // SME pattern match helpers. //===----------------------------------------------------------------------===// @@ -189,6 +196,9 @@ class SME2_Tile_VG4_Multi_Pat(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>; +class SME2_Zero_Matrix_Pat + : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset))), + (!cast(name) $base, $offset)>; //===----------------------------------------------------------------------===// // SME pattern match helpers. //===----------------------------------------------------------------------===// @@ -4815,39 +4825,57 @@ class sme2p1_zero_matrix opc, Operand index_ty, string mnemonic, } multiclass sme2p1_zero_matrix { - def _VG2_Z : sme2p1_zero_matrix<{0b000,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx2"> { + def _VG2_Z : sme2p1_zero_matrix<{0b000,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx2">, SMEPseudo2Instr { bits<3> imm; let Inst{2-0} = imm; } - def _2Z : sme2p1_zero_matrix<{0b001,?,?,?}, uimm3s2range, mnemonic> { + def _2Z : sme2p1_zero_matrix<{0b001,?,?,?}, uimm3s2range, mnemonic>, SMEPseudo2Instr { bits<3> imm; let Inst{2-0} = imm; } - def _VG2_2Z : sme2p1_zero_matrix<{0b0100,?,?}, uimm2s2range, mnemonic, "vgx2"> { + def _VG2_2Z : sme2p1_zero_matrix<{0b0100,?,?}, uimm2s2range, mnemonic, "vgx2">, SMEPseudo2Instr { bits<2> imm; let Inst{1-0} = imm; } - def _VG4_2Z : sme2p1_zero_matrix<{0b0110,?,?}, uimm2s2range, mnemonic, "vgx4"> { + def _VG4_2Z : sme2p1_zero_matrix<{0b0110,?,?}, uimm2s2range, mnemonic, "vgx4">, SMEPseudo2Instr { bits<2> imm; let Inst{1-0} = imm; } - def _VG4_Z : sme2p1_zero_matrix<{0b100,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx4"> { + def _VG4_Z : sme2p1_zero_matrix<{0b100,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx4">, SMEPseudo2Instr { bits<3> imm; let Inst{2-0} = imm; } - def _4Z : sme2p1_zero_matrix<{0b1010,?,?}, uimm2s4range, mnemonic> { + def _4Z : sme2p1_zero_matrix<{0b1010,?,?}, uimm2s4range, mnemonic>, SMEPseudo2Instr { bits<2> imm; let Inst{1-0} = imm; } - def _VG2_4Z :sme2p1_zero_matrix<{0b11000,?}, uimm1s4range, mnemonic, "vgx2"> { + def _VG2_4Z : sme2p1_zero_matrix<{0b11000,?}, uimm1s4range, mnemonic, "vgx2">, SMEPseudo2Instr { bits<1> imm; let Inst{0} = imm; } - def _VG4_4Z :sme2p1_zero_matrix<{0b11100,?}, uimm1s4range, mnemonic, "vgx4"> { + def _VG4_4Z : sme2p1_zero_matrix<{0b11100,?}, uimm1s4range, mnemonic, "vgx4">, SMEPseudo2Instr { bits<1> imm; let Inst{0} = imm; } -} + + def NAME # _VG2_Z_PSEUDO : sem2p1_zero_matrix_pseudo; + def NAME # _VG4_Z_PSEUDO : sem2p1_zero_matrix_pseudo; + def NAME # _2Z_PSEUDO : sem2p1_zero_matrix_pseudo; + def NAME # _VG2_2Z_PSEUDO : sem2p1_zero_matrix_pseudo; + def NAME # _VG4_2Z_PSEUDO : sem2p1_zero_matrix_pseudo; + def NAME # _4Z_PSEUDO : sem2p1_zero_matrix_pseudo; + def NAME # _VG2_4Z_PSEUDO : sem2p1_zero_matrix_pseudo; + def NAME # _VG4_4Z_PSEUDO : sem2p1_zero_matrix_pseudo; + + def : SME2_Zero_Matrix_Pat; + def : SME2_Zero_Matrix_Pat; + def : SME2_Zero_Matrix_Pat; + def : SME2_Zero_Matrix_Pat; + def : SME2_Zero_Matrix_Pat; + def : SME2_Zero_Matrix_Pat; + def : SME2_Zero_Matrix_Pat; + def : SME2_Zero_Matrix_Pat; +} //===----------------------------------------------------------------------===// // SME2.1 lookup table expand two non-contiguous registers diff --git a/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll b/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll new file mode 100644 index 00000000000000..ba77637580f4cb --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll @@ -0,0 +1,190 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -verify-machineinstrs < %s | FileCheck %s + +target triple = "aarch64-linux" + +define void @test_svzero_za64_vg1x2(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg1x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 0, vgx2] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 %slice) + ret void +} + +define void @test_svzero_za64_vg1x2_offset(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg1x2_offset: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 7, vgx2] +; CHECK-NEXT: ret +entry: + %slice.max = add i32 %slice, 7 + tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 %slice.max) + ret void +} + +define void @test_svzero_za64_vg1x4(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg1x4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 0, vgx4] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.zero.za64.vg1x4(i32 %slice) + ret void +} + +define void @test_svzero_za64_vg1x4_offset(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg1x4_offset: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 1, vgx4] +; CHECK-NEXT: ret +entry: + %slice.min = add i32 %slice, 1 + tail call void @llvm.aarch64.sme.zero.za64.vg1x4(i32 %slice.min) + ret void +} + +define void @test_svzero_za64_vg2x1(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg2x1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 0:1] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.zero.za64.vg2x1(i32 %slice) + ret void +} + +define void @test_svzero_za64_vg2x1_offset(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg2x1_offset: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 6:7] +; CHECK-NEXT: ret +entry: + %slice.max = add i32 %slice, 6 + tail call void @llvm.aarch64.sme.zero.za64.vg2x1(i32 %slice.max) + ret void +} + +define void @test_svzero_za64_vg2x2(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg2x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 0:1, vgx2] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.zero.za64.vg2x2(i32 %slice) + ret void +} + +define void @test_svzero_za64_vg2x2_offset(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg2x2_offset: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 2:3, vgx2] +; CHECK-NEXT: ret +entry: + %slice.max = add i32 %slice, 2 + tail call void @llvm.aarch64.sme.zero.za64.vg2x2(i32 %slice.max) + ret void +} + +define void @test_svzero_za64_vg2x4(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg2x4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 0:1, vgx4] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.zero.za64.vg2x4(i32 %slice) + ret void +} + +define void @test_svzero_za64_vg2x4_offset(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg2x4_offset: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add w8, w0, #1 +; CHECK-NEXT: zero za.d[w8, 0:1, vgx4] +; CHECK-NEXT: ret +entry: + %slice.min = add i32 %slice, 1 + tail call void @llvm.aarch64.sme.zero.za64.vg2x4(i32 %slice.min) + ret void +} + +define void @test_svzero_za64_vg4x1(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg4x1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 0:3] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.zero.za64.vg4x1(i32 %slice) + ret void +} + +define void @test_svzero_za64_vg4x1_offset(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg4x1_offset: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 4:7] +; CHECK-NEXT: ret +entry: + %slice.max = add i32 %slice, 4 + tail call void @llvm.aarch64.sme.zero.za64.vg4x1(i32 %slice.max) + ret void +} + +define void @test_svzero_za64_vg4x2(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg4x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 0:3, vgx2] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.zero.za64.vg4x2(i32 %slice) + ret void +} + +define void @test_svzero_za64_vg4x2_offset(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg4x2_offset: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 0:3, vgx2] +; CHECK-NEXT: ret +entry: + %slice.max = add i32 %slice, 0 + tail call void @llvm.aarch64.sme.zero.za64.vg4x2(i32 %slice.max) + ret void +} + +define void @test_svzero_za64_vg4x4(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg4x4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 0:3, vgx4] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.zero.za64.vg4x4(i32 %slice) + ret void +} + +define void @test_svzero_za64_vg4x4_offset(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg4x4_offset: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add w8, w0, #1 +; CHECK-NEXT: zero za.d[w8, 0:3, vgx4] +; CHECK-NEXT: ret +entry: + %slice.min = add i32 %slice, 1 + tail call void @llvm.aarch64.sme.zero.za64.vg4x4(i32 %slice.min) + ret void +} + +attributes #0 = { nounwind "target-features" = "+sme2p1"}