From ebe7b9c00ae562024a0dbbaf06f516504602a3b5 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Thu, 18 Jul 2024 20:49:53 +0800 Subject: [PATCH] [SLP][REVEC] Make Instruction::Call support vector instructions. (#99317) --- .../Transforms/Vectorize/SLPVectorizer.cpp | 2 +- llvm/test/Transforms/SLPVectorizer/revec.ll | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index b994645cece61ec..d8c3bae06e932f5 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -13499,7 +13499,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { } ScalarArg = CEI->getArgOperand(I); if (cast(OpVec->getType())->getElementType() != - ScalarArg->getType() && + ScalarArg->getType()->getScalarType() && It == MinBWs.end()) { auto *CastTy = getWidenedType(ScalarArg->getType(), VecTy->getNumElements()); diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll index 4b37b100763a956..c2dc6d0ab73b719 100644 --- a/llvm/test/Transforms/SLPVectorizer/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/revec.ll @@ -38,3 +38,23 @@ entry: store <4 x i32> %add.i65, ptr %arrayidx42, align 4 ret void } + +define void @test2(ptr %in, ptr %out) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i16>, ptr [[IN:%.*]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> [[TMP0]], <16 x i16> [[TMP0]]) +; CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[OUT:%.*]], align 2 +; CHECK-NEXT: ret void +; +entry: + %0 = getelementptr i16, ptr %in, i64 8 + %1 = load <8 x i16>, ptr %in, align 2 + %2 = load <8 x i16>, ptr %0, align 2 + %3 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %1, <8 x i16> %1) + %4 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %2, <8 x i16> %2) + %5 = getelementptr i16, ptr %out, i64 8 + store <8 x i16> %3, ptr %out, align 2 + store <8 x i16> %4, ptr %5, align 2 + ret void +}