diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index e45fcb2b5c790c..893fef4095b27c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5443,6 +5443,22 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { if (!TE.ReorderIndices.empty()) return TE.ReorderIndices; + SmallVector UserBVHead(TE.Scalars.size()); + for (auto [I, V] : zip(UserBVHead, TE.Scalars)) { + if (!V->hasNUsesOrMore(1)) + continue; + auto *II = dyn_cast(*V->user_begin()); + if (!II) + continue; + Instruction *BVHead = nullptr; + BasicBlock *BB = II->getParent(); + while (II && II->hasOneUse() && II->getParent() == BB) { + BVHead = II; + II = dyn_cast(II->getOperand(0)); + } + I = BVHead; + } + auto PHICompare = [&](unsigned I1, unsigned I2) { Value *V1 = TE.Scalars[I1]; Value *V2 = TE.Scalars[I2]; @@ -5454,21 +5470,60 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { return false; auto *FirstUserOfPhi1 = cast(*V1->user_begin()); auto *FirstUserOfPhi2 = cast(*V2->user_begin()); - if (auto *IE1 = dyn_cast(FirstUserOfPhi1)) - if (auto *IE2 = dyn_cast(FirstUserOfPhi2)) { - if (!areTwoInsertFromSameBuildVector( - IE1, IE2, - [](InsertElementInst *II) { return II->getOperand(0); })) - return I1 < I2; + if (FirstUserOfPhi1->getParent() != FirstUserOfPhi2->getParent()) + return DT->dominates(FirstUserOfPhi1->getParent(), + FirstUserOfPhi2->getParent()); + auto *IE1 = dyn_cast(FirstUserOfPhi1); + auto *IE2 = dyn_cast(FirstUserOfPhi2); + auto *EE1 = dyn_cast(FirstUserOfPhi1); + auto *EE2 = dyn_cast(FirstUserOfPhi2); + if (IE1 && !IE2) + return true; + if (!IE1 && IE2) + return false; + if (IE1 && IE2) { + if (UserBVHead[I1] && !UserBVHead[I2]) + return true; + if (!UserBVHead[I1]) + return false; + if (UserBVHead[I1] == UserBVHead[I2]) return getElementIndex(IE1) < getElementIndex(IE2); - } - if (auto *EE1 = dyn_cast(FirstUserOfPhi1)) - if (auto *EE2 = dyn_cast(FirstUserOfPhi2)) { - if (EE1->getOperand(0) != EE2->getOperand(0)) - return I1 < I2; + if (UserBVHead[I1]->getParent() != UserBVHead[I2]->getParent()) + return DT->dominates(UserBVHead[I1]->getParent(), + UserBVHead[I2]->getParent()); + return UserBVHead[I1]->comesBefore(UserBVHead[I2]); + } + if (EE1 && !EE2) + return true; + if (!EE1 && EE2) + return false; + if (EE1 && EE2) { + if (EE1->getOperand(0) == EE2->getOperand(0)) return getElementIndex(EE1) < getElementIndex(EE2); + auto *I1 = dyn_cast(EE1->getOperand(0)); + if (I1 && !I2) + return true; + if (!I1 && I2) + return false; + auto *I2 = dyn_cast(EE2->getOperand(0)); + if (I1 && I2) { + if (I1->getParent() != I2->getParent()) + return DT->dominates(I1->getParent(), I2->getParent()); + return I1->comesBefore(I2); } - return I1 < I2; + auto *P1 = dyn_cast(EE1->getOperand(0)); + auto *P2 = dyn_cast(EE2->getOperand(0)); + if (P1 && !P2) + return true; + if (!P1 && P2) + return false; + if (P1 && P2) + return P1->getArgNo() < P2->getArgNo(); + // TODO: add analysis for other value kinds. + return EE1->getOperand(0)->getValueID() < + EE2->getOperand(0)->getValueID(); + } + return false; }; DenseMap PhiToId; SmallVector Phis(TE.Scalars.size()); diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll index dbc4f3d59d4f9b..d6073ea4bbbae6 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll @@ -33,40 +33,40 @@ define void @test() { ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x float> [[TMP6]], float [[I68]], i32 6 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x float> [[TMP7]], float [[I66]], i32 7 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x float> [[TMP8]], float [[I72]], i32 13 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x float> [[TMP9]], float [[I69]], i32 14 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I67]], i32 15 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x float> [[TMP9]], float [[I67]], i32 14 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I69]], i32 15 ; CHECK-NEXT: br i1 poison, label %[[BB167:.*]], label %[[BB77:.*]] ; CHECK: [[BB77]]: -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> ; CHECK-NEXT: br label %[[BB78:.*]] ; CHECK: [[BB78]]: ; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], %[[BB77]] ], [ [[TMP30:%.*]], %[[BB78]] ] ; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x float> [ poison, %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ] -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <16 x float> [[TMP17]], [[TMP13]] -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x float> [[TMP23]], <16 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v2f32(<16 x float> [[TMP14]], <2 x float> [[TMP0]], i64 2) ; CHECK-NEXT: [[TMP26:%.*]] = fmul fast <16 x float> [[TMP24]], [[TMP25]] ; CHECK-NEXT: [[TMP27:%.*]] = fadd fast <16 x float> [[TMP26]], [[TMP18]] ; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <16 x float> [[TMP27]], poison ; CHECK-NEXT: [[TMP29:%.*]] = fadd fast <16 x float> [[TMP28]], poison -; CHECK-NEXT: [[TMP30]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP30]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> ; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: br i1 poison, label %[[BB78]], label %[[BB167]] ; CHECK: [[BB167]]: ; CHECK-NEXT: [[TMP32:%.*]] = phi <16 x float> [ [[TMP11]], %[[BB64]] ], [ [[TMP29]], %[[BB78]] ] -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x float> [[TMP32]], i32 15 +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x float> [[TMP32]], i32 14 ; CHECK-NEXT: store float [[TMP33]], ptr poison, align 1 ; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x float> [[TMP32]], i32 13 ; CHECK-NEXT: store float [[TMP34]], ptr poison, align 1 -; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x float> [[TMP32]], i32 14 +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x float> [[TMP32]], i32 15 ; CHECK-NEXT: br i1 poison, label %[[BB186:.*]], label %[[BB184:.*]] ; CHECK: [[BB184]]: ; CHECK-NEXT: br label %[[BB185:.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll b/llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll index 34c068478c5f5e..d4b737a6bc4211 100644 --- a/llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll @@ -10,9 +10,9 @@ define i1 @test() { ; CHECK-NEXT: br label [[ELSE]] ; CHECK: else: ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, [[THEN]] ], [ zeroinitializer, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1 ; CHECK-NEXT: [[BF_CAST162:%.*]] = and i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP0]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP0]], <2 x i32> ; CHECK-NEXT: [[T13:%.*]] = and <2 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: br label [[ELSE1:%.*]] ; CHECK: else1: