[VectorCombine] Add Cmp and Select for shuffleToIdentity #92794

davemgreen · 2024-05-20T17:45:52Z

Other than some additional checks needed for compare predicates and selects with scalar condition operands, these are relatively simple additions to what already exists.

llvmbot · 2024-05-20T17:46:24Z

@llvm/pr-subscribers-llvm-transforms

Author: David Green (davemgreen)

Changes

Other than some additional checks needed for compare predicates and selects with scalar condition operands, these are relatively simple additions to what already exists.

I will rebase over #92766, but already had the patch for this version.

Full diff: https://github.com/llvm/llvm-project/pull/92794.diff

2 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+18-2)
(modified) llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll (+4-26)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 15deaf908422d..981e17fa9aa2c 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1757,6 +1757,13 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
             return false;
           if (IL.first->getValueID() != Item[0].first->getValueID())
             return false;
+          if (auto *CI = dyn_cast<CmpInst>(IL.first))
+            if (CI->getPredicate() !=
+                cast<CmpInst>(Item[0].first)->getPredicate())
+              return false;
+          if (auto *SI = dyn_cast<SelectInst>(IL.first))
+            if (!isa<VectorType>(SI->getOperand(0)->getType()))
+              return false;
           if (isa<CallInst>(IL.first) && !isa<IntrinsicInst>(IL.first))
             return false;
           auto *II = dyn_cast<IntrinsicInst>(IL.first);
@@ -1769,12 +1776,17 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
 
     // Check the operator is one that we support. We exclude div/rem in case
     // they hit UB from poison lanes.
-    if (isa<BinaryOperator>(Item[0].first) &&
-        !cast<BinaryOperator>(Item[0].first)->isIntDivRem()) {
+    if ((isa<BinaryOperator>(Item[0].first) &&
+         !cast<BinaryOperator>(Item[0].first)->isIntDivRem()) ||
+        isa<CmpInst>(Item[0].first)) {
       Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
       Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 1));
     } else if (isa<UnaryOperator>(Item[0].first)) {
       Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
+    } else if (isa<SelectInst>(Item[0].first)) {
+      Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
+      Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 1));
+      Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 2));
     } else if (auto *II = dyn_cast<IntrinsicInst>(Item[0].first);
                II && isTriviallyVectorizable(II->getIntrinsicID())) {
       for (unsigned Op = 0, E = II->getNumOperands() - 1; Op < E; Op++) {
@@ -1834,6 +1846,10 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
     if (auto BI = dyn_cast<BinaryOperator>(I))
       return Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(),
                                  Ops[0], Ops[1]);
+    if (auto CI = dyn_cast<CmpInst>(I))
+      return Builder.CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);
+    if (auto SI = dyn_cast<SelectInst>(I))
+      return Builder.CreateSelect(Ops[0], Ops[1], Ops[2], "", SI);
     if (II)
       return Builder.CreateIntrinsic(DstTy, II->getIntrinsicID(), Ops);
     assert(isa<UnaryInstruction>(I) &&
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index eb368471b1d84..523c0a476de63 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -399,19 +399,8 @@ define <8 x i8> @extrause_shuffle(<8 x i8> %a, <8 x i8> %b) {
 
 define <8 x i8> @icmpsel(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
 ; CHECK-LABEL: @icmpsel(
-; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[CB:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[CT:%.*]] = shufflevector <8 x i8> [[C]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[DB:%.*]] = shufflevector <8 x i8> [[D:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[DT:%.*]] = shufflevector <8 x i8> [[D]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[ABT1:%.*]] = icmp slt <4 x i8> [[AT]], [[BT]]
-; CHECK-NEXT:    [[ABB1:%.*]] = icmp slt <4 x i8> [[AB]], [[BB]]
-; CHECK-NEXT:    [[ABT:%.*]] = select <4 x i1> [[ABT1]], <4 x i8> [[CT]], <4 x i8> [[DT]]
-; CHECK-NEXT:    [[ABB:%.*]] = select <4 x i1> [[ABB1]], <4 x i8> [[CB]], <4 x i8> [[DB]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <8 x i8> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select <8 x i1> [[TMP1]], <8 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]
 ; CHECK-NEXT:    ret <8 x i8> [[R]]
 ;
   %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -465,19 +454,8 @@ define <8 x i8> @icmpsel_diffentcond(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x
 
 define <8 x i8> @fcmpsel(<8 x half> %a, <8 x half> %b, <8 x i8> %c, <8 x i8> %d) {
 ; CHECK-LABEL: @fcmpsel(
-; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[BB:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[BT:%.*]] = shufflevector <8 x half> [[B]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[CB:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[CT:%.*]] = shufflevector <8 x i8> [[C]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[DB:%.*]] = shufflevector <8 x i8> [[D:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[DT:%.*]] = shufflevector <8 x i8> [[D]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[ABT1:%.*]] = fcmp olt <4 x half> [[AT]], [[BT]]
-; CHECK-NEXT:    [[ABB1:%.*]] = fcmp olt <4 x half> [[AB]], [[BB]]
-; CHECK-NEXT:    [[ABT:%.*]] = select <4 x i1> [[ABT1]], <4 x i8> [[CT]], <4 x i8> [[DT]]
-; CHECK-NEXT:    [[ABB:%.*]] = select <4 x i1> [[ABB1]], <4 x i8> [[CB]], <4 x i8> [[DB]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select <8 x i1> [[TMP1]], <8 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]
 ; CHECK-NEXT:    ret <8 x i8> [[R]]
 ;
   %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>

RKSimon · 2024-05-22T10:23:40Z

rebase?

github-actions · 2024-05-23T17:21:11Z

✅ With the latest revision this PR passed the C/C++ code formatter.

RKSimon

LGTM with a couple of minors

RKSimon · 2024-05-24T09:51:20Z

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

@@ -1742,6 +1742,10 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
  if (auto *BI = dyn_cast<BinaryOperator>(I))
    return Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(), Ops[0],
                               Ops[1]);
+  if (auto CI = dyn_cast<CmpInst>(I))
+    return Builder.CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);
+  if (auto SI = dyn_cast<SelectInst>(I))


In the future, to catch such nits, I would recommend getting the official clangd extension from LLVM in VSCode (if you use VSCode to contribute to LLVM): it will automatically yellow-underline such instances, and prompt you about the fix.

It'd be nice for the CI code_formatter stage to pick these up

RKSimon · 2024-05-24T09:51:25Z

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

@@ -1742,6 +1742,10 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
  if (auto *BI = dyn_cast<BinaryOperator>(I))
    return Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(), Ops[0],
                               Ops[1]);
+  if (auto CI = dyn_cast<CmpInst>(I))


Other than some additional checks needed for compare predicates and selects with scalar condition operands, these are relatively simple additions to what already exists.

artagnon · 2024-05-28T11:32:32Z

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

+    if ((isa<BinaryOperator>(FrontV) &&
+         !cast<BinaryOperator>(FrontV)->isIntDivRem()) ||


Use dyn_cast in place of isa + cast.

Other than some additional checks needed for compare predicates and selects with scalar condition operands, these are relatively simple additions to what already exists.

steven-johnson · 2024-05-30T16:48:43Z

This appears to have injected a regression in codegen in Halide -- before this, an expression of the form select(u8_1 > 7, u8_1, u8_2) (values are uint8) would generate a pblend*b instruction on x86, eg

    movdqa  -16(%edi,%edx), %xmm2
    movdqa  (%edi,%edx), %xmm3
    movdqa  %xmm2, %xmm0
    pmaxub  %xmm1, %xmm0
    pcmpeqb %xmm2, %xmm0
    pblendvb    %xmm0, %xmm2, %xmm3

now, it does a much more complex operation:

    vmovdqa -16(%r14,%rdx), %xmm1
    vmovdqa (%r14,%rdx), %xmm2
    vinsertf128 $1, %xmm2, %ymm1, %ymm3
    vpmaxub %xmm0, %xmm2, %xmm4
    vpcmpeqb    %xmm4, %xmm2, %xmm4
    vpmaxub %xmm0, %xmm1, %xmm5
    vpcmpeqb    %xmm5, %xmm1, %xmm1
    vinsertf128 $1, %xmm4, %ymm1, %ymm1
    vinsertf128 $1, 16(%r14,%rdx), %ymm2, %ymm2
    vandnps %ymm2, %ymm1, %ymm2
    vandps  %ymm1, %ymm3, %ymm1
    vorps   %ymm2, %ymm1, %ymm1
    vmovaps %ymm1, (%rcx,%rdx)

We will temporarily disable this test on our side, but the apparent regression in codegen looks bad -- we should probably either revert this or plan a fix-forward.

davemgreen · 2024-05-30T16:55:18Z

Hi - Yeah I can take a look. Do you have a reproducer? I don't read X86 very fluently but the hope was that this would only remove unnecessary shuffles.

steven-johnson · 2024-05-30T17:04:12Z

Enclosed are before-and-after (BAD and GOOD) IR generated by Halide.
export.zip

davemgreen · 2024-05-30T17:20:16Z

Thanks, it looks like these, if there is no other optimizations going on: https://godbolt.org/z/MfETGqdse
Good:

.LBB0_2:                                # %"2_for_op_pblend_b_0.s0.x.x"
        vmovdqa xmm1, xmmword ptr [r14 + rdx - 16]
        vmovdqa xmm2, xmmword ptr [r14 + rdx]
        vmovdqa xmm3, xmmword ptr [r14 + rdx + 16]
        vpmaxub xmm4, xmm1, xmm0
        vpcmpeqb        xmm4, xmm1, xmm4
        vpmaxub xmm5, xmm2, xmm0
        vpcmpeqb        xmm5, xmm2, xmm5
        vpblendvb       xmm1, xmm2, xmm1, xmm4
        vpblendvb       xmm2, xmm3, xmm2, xmm5
        vmovdqa xmmword ptr [rcx + rdx + 16], xmm2
        vmovdqa xmmword ptr [rcx + rdx], xmm1
        add     rdx, 32
        cmp     rdx, 768
        jne     .LBB0_2

Bad:

.LBB0_2:                                # %"2_for_op_pblend_b_0.s0.x.x"
        vmovdqa xmm1, xmmword ptr [r14 + rdx - 16]
        vmovdqa xmm2, xmmword ptr [r14 + rdx]
        vinsertf128     ymm3, ymm1, xmm2, 1
        vpmaxub xmm4, xmm2, xmm0
        vpcmpeqb        xmm4, xmm2, xmm4
        vpmaxub xmm5, xmm1, xmm0
        vpcmpeqb        xmm1, xmm1, xmm5
        vinsertf128     ymm1, ymm1, xmm4, 1
        vinsertf128     ymm2, ymm2, xmmword ptr [r14 + rdx + 16], 1
        vandnps ymm2, ymm1, ymm2
        vandps  ymm1, ymm3, ymm1
        vorps   ymm1, ymm1, ymm2
        vmovaps ymmword ptr [rcx + rdx], ymm1
        add     rdx, 32
        cmp     rdx, 768
        jne     .LBB0_2

davemgreen · 2024-05-30T17:25:28Z

Apparently vblendvb with ymm registers is only available with avx2: https://godbolt.org/z/d38hbxMaP
It looks quite nice then:

.LBB0_2:                                # %"2_for_op_pblend_b_0.s0.x.x"
        vmovdqa xmm1, xmmword ptr [r14 + rdx - 16]
        vmovdqa xmm2, xmmword ptr [r14 + rdx]
        vinserti128     ymm1, ymm1, xmm2, 1
        vpmaxub ymm3, ymm1, ymm0
        vpcmpeqb        ymm3, ymm1, ymm3
        vinserti128     ymm2, ymm2, xmmword ptr [r14 + rdx + 16], 1
        vpblendvb       ymm1, ymm2, ymm1, ymm3
        vmovdqa ymmword ptr [rcx + rdx], ymm1
        add     rdx, 32
        cmp     rdx, 768
        jne     .LBB0_2

steven-johnson · 2024-05-30T17:37:46Z

only available with avx2

The failure in question appears with avx (but not avx2) codegen enabled

RKSimon · 2024-05-30T19:16:45Z

@davemgreen I'll take a look at this - we have a few cases on AVX1 where we don't split 256-bit vectors when we probably should.

davemgreen · 2024-05-30T19:29:49Z

Thanks - I can also add a costmodel, which seems to help too with no avx (although does rely on splat shuffles/concats looking expensive). It was something I might need in the future. I've upstreamed patches for all the parts I had, but there was another motivating example I had of that needed "piecewise splat's", which should be cheap if we can recognize them. I will run some tests to see what happens with a costmodel added.

llvm/llvm-project#92794 broke generation of pblend*b in some situations. A fix is underway; this just comments out those failures temporarily.

davemgreen · 2024-05-31T09:56:08Z

There is a quick cost-model added in #93937.

…s where we have to split/concat 128-bit subvectors We'd be better off consistently using 128-bit instructions Based off a regression reported after #92794

…h of the operands are free to split. Often on AVX1 we're better off consistently using 128-bit instructions, so recognise when the operands are loads that can be freely/cheaply split - ideally this functionality needs to be moved to isFreeToSplitVector but we're using it in a few places where we don't want to split loads yet. Based off a regression reported after #92794

RKSimon · 2024-05-31T13:44:24Z

@steven-johnson Please can you tell me if b52962d addresses your perf regression?

steven-johnson · 2024-05-31T16:46:43Z

Testing now.

…

On Fri, May 31, 2024 at 6:44 AM Simon Pilgrim ***@***.***> wrote: @steven-johnson <https://github.com/steven-johnson> Please can you tell me if b52962d <b52962d> addresses your perf regression? — Reply to this email directly, view it on GitHub <#92794 (comment)>, or unsubscribe <https://github.com/notifications/unsubscribe-auth/AACQJ65L4M2RMNOT2VWZINLZFB5E5AVCNFSM6AAAAABIADZSGWVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDCNBSGE4TMOJTGY> . You are receiving this because you were mentioned.Message ID: ***@***.***>

steven-johnson · 2024-05-31T17:59:25Z

b52962d appears to address it, please let me know when it lands.

RKSimon · 2024-05-31T19:37:02Z

Already committed to trunk. I'll probably try to extend it further as 256-bit integer ops are tricky on AVX1, so please report any other perf issues you see.

…897600144 Local branch amd-gfx 2958976 Merged main:cbed9a64491d82d6c4a3a7d0cd97cdee32ff2301 into amd-gfx:fcac22e68cfc Remote branch main 516a9f5 [VectorCombine] Add Cmp and Select for shuffleToIdentity (llvm#92794)

SLTozer · 2024-06-28T15:13:11Z

This looks to cause an assertion with the following reproducer:

$ cat repro.ll
define void @foo(ptr %this) {
entry:
  %0 = load <2 x float>, ptr inttoptr (i64 -60 to ptr), align 4
  %1 = load <2 x float>, ptr inttoptr (i64 -72 to ptr), align 4
  %2 = extractelement <2 x float> %0, i64 0
  %cmp.i903 = fcmp ogt float %2, 0.000000e+00
  %sel1639 = select i1 %cmp.i903, <2 x float> %0, <2 x float> %1
  %3 = fcmp ogt <2 x float> %0, zeroinitializer
  %sel48.i913 = select <2 x i1> %3, <2 x float> %0, <2 x float> %1
  %4 = shufflevector <2 x float> %sel1639, <2 x float> %sel48.i913, <2 x i32> <i32 0, i32 3>
  %5 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %4, <2 x float> zeroinitializer)
  %6 = getelementptr i8, ptr %this, i64 1424
  %7 = extractelement <2 x float> %5, i64 0
  store float %7, ptr %6, align 16
  ret void
}
$ ./build/bin/opt -passes=vector-combine repro.ll -S
opt: /home/gbtozers/dev/upstream-llvm/llvm/include/llvm/Support/Casting.h:578: decltype(auto) llvm::cast(From *) [To = llvm::FixedVectorType, From = llvm::Type]: Assertion `isa<To>(Val) && "cast<Ty>() argument of incompatible type!"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.      Program arguments: ./build/bin/opt -passes=vector-combine repro.ll -S
 #0 0x0000557d91364278 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (./build/bin/opt+0x3ee2278)
 #1 0x0000557d913622a0 llvm::sys::RunSignalHandlers() (./build/bin/opt+0x3ee02a0)
 #2 0x0000557d91364c18 SignalHandler(int) Signals.cpp:0:0
 #3 0x00007f756af8a520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520)
 #4 0x00007f756afde9fc __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #5 0x00007f756afde9fc __pthread_kill_internal ./nptl/pthread_kill.c:78:10
 #6 0x00007f756afde9fc pthread_kill ./nptl/pthread_kill.c:89:10
 #7 0x00007f756af8a476 gsignal ./signal/../sysdeps/posix/raise.c:27:6
 #8 0x00007f756af707f3 abort ./stdlib/abort.c:81:7
 #9 0x00007f756af7071b _nl_load_domain ./intl/loadmsgcat.c:1177:9
#10 0x00007f756af81e96 (/lib/x86_64-linux-gnu/libc.so.6+0x39e96)
#11 0x0000557d927a119f (anonymous namespace)::VectorCombine::run()::$_0::operator()(llvm::Instruction&) const VectorCombine.cpp:0:0
#12 0x0000557d927980d1 llvm::VectorCombinePass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (./build/bin/opt+0x53160d1)
#13 0x0000557d924d4cfd llvm::detail::PassModel<llvm::Function, llvm::VectorCombinePass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#14 0x0000557d9152fd5b llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (./build/bin/opt+0x40add5b)
#15 0x0000557d924d688d llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#16 0x0000557d9153479e llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (./build/bin/opt+0x40b279e)
#17 0x0000557d924cfd7d llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) PassBuilderPipelines.cpp:0:0
#18 0x0000557d9152eaab llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (./build/bin/opt+0x40acaab)
#19 0x0000557d92475fcc llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool) (./build/bin/opt+0x4ff3fcc)
#20 0x0000557d9132d12b optMain (./build/bin/opt+0x3eab12b)
#21 0x00007f756af71d90 __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:58:16
#22 0x00007f756af71e40 call_init ./csu/../csu/libc-start.c:128:20
#23 0x00007f756af71e40 __libc_start_main ./csu/../csu/libc-start.c:379:5
#24 0x0000557d9132afe5 _start (./build/bin/opt+0x3ea8fe5)
Aborted

davemgreen · 2024-06-28T15:23:31Z

Thanks for the reproducer - it looks like the test we have for scalar-select conditions isn't working as it should.

davemgreen · 2024-06-28T16:29:36Z

It is hopefully fixed in 76c8e1d. Let me know if not! Thanks

davemgreen requested review from artagnon, RKSimon and alexey-bataev May 20, 2024 17:45

llvmbot added vectorizers llvm:transforms labels May 20, 2024

davemgreen force-pushed the gh-shuffleToIdentity-cmpsel branch from a8e1d40 to 650a4a2 Compare May 23, 2024 17:18

davemgreen force-pushed the gh-shuffleToIdentity-cmpsel branch from 650a4a2 to db5f5e0 Compare May 23, 2024 17:45

RKSimon approved these changes May 24, 2024

View reviewed changes

davemgreen added 2 commits May 28, 2024 12:27

[VectorCombine] Add Cmp and Select for shuffleToIdentity

008ad1c

Other than some additional checks needed for compare predicates and selects with scalar condition operands, these are relatively simple additions to what already exists.

Rebase and update auto*

d294768

davemgreen force-pushed the gh-shuffleToIdentity-cmpsel branch from db5f5e0 to d294768 Compare May 28, 2024 11:28

artagnon reviewed May 28, 2024

View reviewed changes

davemgreen merged commit 516a9f5 into llvm:main May 28, 2024
4 of 7 checks passed

davemgreen deleted the gh-shuffleToIdentity-cmpsel branch May 30, 2024 16:55

steven-johnson added a commit to halide/Halide that referenced this pull request May 30, 2024

Temporary workaround for LLVM injection

0383b94

llvm/llvm-project#92794 broke generation of pblend*b in some situations. A fix is underway; this just comments out those failures temporarily.

steven-johnson mentioned this pull request May 30, 2024

Temporary workaround for LLVM injection halide/Halide#8250

Closed

davemgreen mentioned this pull request May 31, 2024

[VectorCombine] Add a cost model for shuffleToIdentity. #93937

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[VectorCombine] Add Cmp and Select for shuffleToIdentity #92794

[VectorCombine] Add Cmp and Select for shuffleToIdentity #92794

davemgreen commented May 20, 2024 •

edited

Loading

llvmbot commented May 20, 2024

RKSimon commented May 22, 2024

github-actions bot commented May 23, 2024 •

edited

Loading

RKSimon left a comment

RKSimon May 24, 2024

artagnon May 24, 2024

RKSimon May 24, 2024

RKSimon May 24, 2024

artagnon May 28, 2024

steven-johnson commented May 30, 2024

davemgreen commented May 30, 2024 •

edited

Loading

steven-johnson commented May 30, 2024

davemgreen commented May 30, 2024

davemgreen commented May 30, 2024

steven-johnson commented May 30, 2024

RKSimon commented May 30, 2024

davemgreen commented May 30, 2024

davemgreen commented May 31, 2024

RKSimon commented May 31, 2024

steven-johnson commented May 31, 2024 via email

steven-johnson commented May 31, 2024

RKSimon commented May 31, 2024

SLTozer commented Jun 28, 2024

davemgreen commented Jun 28, 2024

davemgreen commented Jun 28, 2024

		if ((isa<BinaryOperator>(FrontV) &&
		!cast<BinaryOperator>(FrontV)->isIntDivRem()) \|\|

[VectorCombine] Add Cmp and Select for shuffleToIdentity #92794

[VectorCombine] Add Cmp and Select for shuffleToIdentity #92794

Conversation

davemgreen commented May 20, 2024 • edited Loading

llvmbot commented May 20, 2024

RKSimon commented May 22, 2024

github-actions bot commented May 23, 2024 • edited Loading

RKSimon left a comment

Choose a reason for hiding this comment

RKSimon May 24, 2024

Choose a reason for hiding this comment

artagnon May 24, 2024

Choose a reason for hiding this comment

RKSimon May 24, 2024

Choose a reason for hiding this comment

RKSimon May 24, 2024

Choose a reason for hiding this comment

artagnon May 28, 2024

Choose a reason for hiding this comment

steven-johnson commented May 30, 2024

davemgreen commented May 30, 2024 • edited Loading

steven-johnson commented May 30, 2024

davemgreen commented May 30, 2024

davemgreen commented May 30, 2024

steven-johnson commented May 30, 2024

RKSimon commented May 30, 2024

davemgreen commented May 30, 2024

davemgreen commented May 31, 2024

RKSimon commented May 31, 2024

steven-johnson commented May 31, 2024 via email

steven-johnson commented May 31, 2024

RKSimon commented May 31, 2024

SLTozer commented Jun 28, 2024

davemgreen commented Jun 28, 2024

davemgreen commented Jun 28, 2024

davemgreen commented May 20, 2024 •

edited

Loading

github-actions bot commented May 23, 2024 •

edited

Loading

davemgreen commented May 30, 2024 •

edited

Loading