Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[VectorCombine] Add Cmp and Select for shuffleToIdentity #92794

Merged
merged 2 commits into from
May 28, 2024

Conversation

davemgreen
Copy link
Collaborator

@davemgreen davemgreen commented May 20, 2024

Other than some additional checks needed for compare predicates and selects with scalar condition operands, these are relatively simple additions to what already exists.

@llvmbot
Copy link
Collaborator

llvmbot commented May 20, 2024

@llvm/pr-subscribers-llvm-transforms

Author: David Green (davemgreen)

Changes

Other than some additional checks needed for compare predicates and selects with scalar condition operands, these are relatively simple additions to what already exists.

I will rebase over #92766, but already had the patch for this version.


Full diff: https://github.com/llvm/llvm-project/pull/92794.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+18-2)
  • (modified) llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll (+4-26)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 15deaf908422d..981e17fa9aa2c 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1757,6 +1757,13 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
             return false;
           if (IL.first->getValueID() != Item[0].first->getValueID())
             return false;
+          if (auto *CI = dyn_cast<CmpInst>(IL.first))
+            if (CI->getPredicate() !=
+                cast<CmpInst>(Item[0].first)->getPredicate())
+              return false;
+          if (auto *SI = dyn_cast<SelectInst>(IL.first))
+            if (!isa<VectorType>(SI->getOperand(0)->getType()))
+              return false;
           if (isa<CallInst>(IL.first) && !isa<IntrinsicInst>(IL.first))
             return false;
           auto *II = dyn_cast<IntrinsicInst>(IL.first);
@@ -1769,12 +1776,17 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
 
     // Check the operator is one that we support. We exclude div/rem in case
     // they hit UB from poison lanes.
-    if (isa<BinaryOperator>(Item[0].first) &&
-        !cast<BinaryOperator>(Item[0].first)->isIntDivRem()) {
+    if ((isa<BinaryOperator>(Item[0].first) &&
+         !cast<BinaryOperator>(Item[0].first)->isIntDivRem()) ||
+        isa<CmpInst>(Item[0].first)) {
       Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
       Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 1));
     } else if (isa<UnaryOperator>(Item[0].first)) {
       Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
+    } else if (isa<SelectInst>(Item[0].first)) {
+      Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
+      Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 1));
+      Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 2));
     } else if (auto *II = dyn_cast<IntrinsicInst>(Item[0].first);
                II && isTriviallyVectorizable(II->getIntrinsicID())) {
       for (unsigned Op = 0, E = II->getNumOperands() - 1; Op < E; Op++) {
@@ -1834,6 +1846,10 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
     if (auto BI = dyn_cast<BinaryOperator>(I))
       return Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(),
                                  Ops[0], Ops[1]);
+    if (auto CI = dyn_cast<CmpInst>(I))
+      return Builder.CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);
+    if (auto SI = dyn_cast<SelectInst>(I))
+      return Builder.CreateSelect(Ops[0], Ops[1], Ops[2], "", SI);
     if (II)
       return Builder.CreateIntrinsic(DstTy, II->getIntrinsicID(), Ops);
     assert(isa<UnaryInstruction>(I) &&
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index eb368471b1d84..523c0a476de63 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -399,19 +399,8 @@ define <8 x i8> @extrause_shuffle(<8 x i8> %a, <8 x i8> %b) {
 
 define <8 x i8> @icmpsel(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
 ; CHECK-LABEL: @icmpsel(
-; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[CB:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[CT:%.*]] = shufflevector <8 x i8> [[C]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[DB:%.*]] = shufflevector <8 x i8> [[D:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[DT:%.*]] = shufflevector <8 x i8> [[D]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[ABT1:%.*]] = icmp slt <4 x i8> [[AT]], [[BT]]
-; CHECK-NEXT:    [[ABB1:%.*]] = icmp slt <4 x i8> [[AB]], [[BB]]
-; CHECK-NEXT:    [[ABT:%.*]] = select <4 x i1> [[ABT1]], <4 x i8> [[CT]], <4 x i8> [[DT]]
-; CHECK-NEXT:    [[ABB:%.*]] = select <4 x i1> [[ABB1]], <4 x i8> [[CB]], <4 x i8> [[DB]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <8 x i8> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select <8 x i1> [[TMP1]], <8 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]
 ; CHECK-NEXT:    ret <8 x i8> [[R]]
 ;
   %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -465,19 +454,8 @@ define <8 x i8> @icmpsel_diffentcond(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x
 
 define <8 x i8> @fcmpsel(<8 x half> %a, <8 x half> %b, <8 x i8> %c, <8 x i8> %d) {
 ; CHECK-LABEL: @fcmpsel(
-; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[BB:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[BT:%.*]] = shufflevector <8 x half> [[B]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[CB:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[CT:%.*]] = shufflevector <8 x i8> [[C]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[DB:%.*]] = shufflevector <8 x i8> [[D:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[DT:%.*]] = shufflevector <8 x i8> [[D]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[ABT1:%.*]] = fcmp olt <4 x half> [[AT]], [[BT]]
-; CHECK-NEXT:    [[ABB1:%.*]] = fcmp olt <4 x half> [[AB]], [[BB]]
-; CHECK-NEXT:    [[ABT:%.*]] = select <4 x i1> [[ABT1]], <4 x i8> [[CT]], <4 x i8> [[DT]]
-; CHECK-NEXT:    [[ABB:%.*]] = select <4 x i1> [[ABB1]], <4 x i8> [[CB]], <4 x i8> [[DB]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select <8 x i1> [[TMP1]], <8 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]
 ; CHECK-NEXT:    ret <8 x i8> [[R]]
 ;
   %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>

@RKSimon
Copy link
Collaborator

RKSimon commented May 22, 2024

rebase?

@davemgreen davemgreen force-pushed the gh-shuffleToIdentity-cmpsel branch from a8e1d40 to 650a4a2 Compare May 23, 2024 17:18
Copy link

github-actions bot commented May 23, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

@davemgreen davemgreen force-pushed the gh-shuffleToIdentity-cmpsel branch from 650a4a2 to db5f5e0 Compare May 23, 2024 17:45
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM with a couple of minors

@@ -1742,6 +1742,10 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
if (auto *BI = dyn_cast<BinaryOperator>(I))
return Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(), Ops[0],
Ops[1]);
if (auto CI = dyn_cast<CmpInst>(I))
return Builder.CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);
if (auto SI = dyn_cast<SelectInst>(I))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

auto *SI

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the future, to catch such nits, I would recommend getting the official clangd extension from LLVM in VSCode (if you use VSCode to contribute to LLVM): it will automatically yellow-underline such instances, and prompt you about the fix.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It'd be nice for the CI code_formatter stage to pick these up

@@ -1742,6 +1742,10 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
if (auto *BI = dyn_cast<BinaryOperator>(I))
return Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(), Ops[0],
Ops[1]);
if (auto CI = dyn_cast<CmpInst>(I))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

auto *CI

Other than some additional checks needed for compare predicates and selects
with scalar condition operands, these are relatively simple additions to what
already exists.
@davemgreen davemgreen force-pushed the gh-shuffleToIdentity-cmpsel branch from db5f5e0 to d294768 Compare May 28, 2024 11:28
Comment on lines +1845 to +1846
if ((isa<BinaryOperator>(FrontV) &&
!cast<BinaryOperator>(FrontV)->isIntDivRem()) ||
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use dyn_cast in place of isa + cast.

@davemgreen davemgreen merged commit 516a9f5 into llvm:main May 28, 2024
4 of 7 checks passed
vg0204 pushed a commit to vg0204/llvm-project that referenced this pull request May 29, 2024
Other than some additional checks needed for compare predicates and
selects with scalar condition operands, these are relatively simple
additions to what already exists.
@steven-johnson
Copy link

This appears to have injected a regression in codegen in Halide -- before this, an expression of the form select(u8_1 > 7, u8_1, u8_2) (values are uint8) would generate a pblend*b instruction on x86, eg

    movdqa  -16(%edi,%edx), %xmm2
    movdqa  (%edi,%edx), %xmm3
    movdqa  %xmm2, %xmm0
    pmaxub  %xmm1, %xmm0
    pcmpeqb %xmm2, %xmm0
    pblendvb    %xmm0, %xmm2, %xmm3

now, it does a much more complex operation:

    vmovdqa -16(%r14,%rdx), %xmm1
    vmovdqa (%r14,%rdx), %xmm2
    vinsertf128 $1, %xmm2, %ymm1, %ymm3
    vpmaxub %xmm0, %xmm2, %xmm4
    vpcmpeqb    %xmm4, %xmm2, %xmm4
    vpmaxub %xmm0, %xmm1, %xmm5
    vpcmpeqb    %xmm5, %xmm1, %xmm1
    vinsertf128 $1, %xmm4, %ymm1, %ymm1
    vinsertf128 $1, 16(%r14,%rdx), %ymm2, %ymm2
    vandnps %ymm2, %ymm1, %ymm2
    vandps  %ymm1, %ymm3, %ymm1
    vorps   %ymm2, %ymm1, %ymm1
    vmovaps %ymm1, (%rcx,%rdx)

We will temporarily disable this test on our side, but the apparent regression in codegen looks bad -- we should probably either revert this or plan a fix-forward.

@davemgreen
Copy link
Collaborator Author

davemgreen commented May 30, 2024

Hi - Yeah I can take a look. Do you have a reproducer? I don't read X86 very fluently but the hope was that this would only remove unnecessary shuffles.

@davemgreen davemgreen deleted the gh-shuffleToIdentity-cmpsel branch May 30, 2024 16:55
@steven-johnson
Copy link

Enclosed are before-and-after (BAD and GOOD) IR generated by Halide.
export.zip

@davemgreen
Copy link
Collaborator Author

Thanks, it looks like these, if there is no other optimizations going on: https://godbolt.org/z/MfETGqdse
Good:

.LBB0_2:                                # %"2_for_op_pblend_b_0.s0.x.x"
        vmovdqa xmm1, xmmword ptr [r14 + rdx - 16]
        vmovdqa xmm2, xmmword ptr [r14 + rdx]
        vmovdqa xmm3, xmmword ptr [r14 + rdx + 16]
        vpmaxub xmm4, xmm1, xmm0
        vpcmpeqb        xmm4, xmm1, xmm4
        vpmaxub xmm5, xmm2, xmm0
        vpcmpeqb        xmm5, xmm2, xmm5
        vpblendvb       xmm1, xmm2, xmm1, xmm4
        vpblendvb       xmm2, xmm3, xmm2, xmm5
        vmovdqa xmmword ptr [rcx + rdx + 16], xmm2
        vmovdqa xmmword ptr [rcx + rdx], xmm1
        add     rdx, 32
        cmp     rdx, 768
        jne     .LBB0_2

Bad:

.LBB0_2:                                # %"2_for_op_pblend_b_0.s0.x.x"
        vmovdqa xmm1, xmmword ptr [r14 + rdx - 16]
        vmovdqa xmm2, xmmword ptr [r14 + rdx]
        vinsertf128     ymm3, ymm1, xmm2, 1
        vpmaxub xmm4, xmm2, xmm0
        vpcmpeqb        xmm4, xmm2, xmm4
        vpmaxub xmm5, xmm1, xmm0
        vpcmpeqb        xmm1, xmm1, xmm5
        vinsertf128     ymm1, ymm1, xmm4, 1
        vinsertf128     ymm2, ymm2, xmmword ptr [r14 + rdx + 16], 1
        vandnps ymm2, ymm1, ymm2
        vandps  ymm1, ymm3, ymm1
        vorps   ymm1, ymm1, ymm2
        vmovaps ymmword ptr [rcx + rdx], ymm1
        add     rdx, 32
        cmp     rdx, 768
        jne     .LBB0_2

@davemgreen
Copy link
Collaborator Author

Apparently vblendvb with ymm registers is only available with avx2: https://godbolt.org/z/d38hbxMaP
It looks quite nice then:

.LBB0_2:                                # %"2_for_op_pblend_b_0.s0.x.x"
        vmovdqa xmm1, xmmword ptr [r14 + rdx - 16]
        vmovdqa xmm2, xmmword ptr [r14 + rdx]
        vinserti128     ymm1, ymm1, xmm2, 1
        vpmaxub ymm3, ymm1, ymm0
        vpcmpeqb        ymm3, ymm1, ymm3
        vinserti128     ymm2, ymm2, xmmword ptr [r14 + rdx + 16], 1
        vpblendvb       ymm1, ymm2, ymm1, ymm3
        vmovdqa ymmword ptr [rcx + rdx], ymm1
        add     rdx, 32
        cmp     rdx, 768
        jne     .LBB0_2

@steven-johnson
Copy link

only available with avx2

The failure in question appears with avx (but not avx2) codegen enabled

@RKSimon
Copy link
Collaborator

RKSimon commented May 30, 2024

@davemgreen I'll take a look at this - we have a few cases on AVX1 where we don't split 256-bit vectors when we probably should.

@davemgreen
Copy link
Collaborator Author

Thanks - I can also add a costmodel, which seems to help too with no avx (although does rely on splat shuffles/concats looking expensive). It was something I might need in the future. I've upstreamed patches for all the parts I had, but there was another motivating example I had of that needed "piecewise splat's", which should be cheap if we can recognize them. I will run some tests to see what happens with a costmodel added.

steven-johnson added a commit to halide/Halide that referenced this pull request May 30, 2024
llvm/llvm-project#92794 broke generation of pblend*b in some situations. A fix is underway; this just comments out those failures temporarily.
@davemgreen
Copy link
Collaborator Author

There is a quick cost-model added in #93937.

RKSimon added a commit that referenced this pull request May 31, 2024
…s where we have to split/concat 128-bit subvectors

We'd be better off consistently using 128-bit instructions

Based off a regression reported after #92794
RKSimon added a commit that referenced this pull request May 31, 2024
…h of the operands are free to split.

Often on AVX1 we're better off consistently using 128-bit instructions, so recognise when the operands are loads that can be freely/cheaply split - ideally this functionality needs to be moved to isFreeToSplitVector but we're using it in a few places where we don't want to split loads yet.

Based off a regression reported after #92794
@RKSimon
Copy link
Collaborator

RKSimon commented May 31, 2024

@steven-johnson Please can you tell me if b52962d addresses your perf regression?

@steven-johnson
Copy link

steven-johnson commented May 31, 2024 via email

@steven-johnson
Copy link

b52962d appears to address it, please let me know when it lands.

@RKSimon
Copy link
Collaborator

RKSimon commented May 31, 2024

Already committed to trunk. I'll probably try to extend it further as 256-bit integer ops are tricky on AVX1, so please report any other perf issues you see.

qiaojbao pushed a commit to GPUOpen-Drivers/llvm-project that referenced this pull request Jun 26, 2024
…897600144

Local branch amd-gfx 2958976 Merged main:cbed9a64491d82d6c4a3a7d0cd97cdee32ff2301 into amd-gfx:fcac22e68cfc
Remote branch main 516a9f5 [VectorCombine] Add Cmp and Select for shuffleToIdentity (llvm#92794)
@SLTozer
Copy link
Contributor

SLTozer commented Jun 28, 2024

This looks to cause an assertion with the following reproducer:

$ cat repro.ll
define void @foo(ptr %this) {
entry:
  %0 = load <2 x float>, ptr inttoptr (i64 -60 to ptr), align 4
  %1 = load <2 x float>, ptr inttoptr (i64 -72 to ptr), align 4
  %2 = extractelement <2 x float> %0, i64 0
  %cmp.i903 = fcmp ogt float %2, 0.000000e+00
  %sel1639 = select i1 %cmp.i903, <2 x float> %0, <2 x float> %1
  %3 = fcmp ogt <2 x float> %0, zeroinitializer
  %sel48.i913 = select <2 x i1> %3, <2 x float> %0, <2 x float> %1
  %4 = shufflevector <2 x float> %sel1639, <2 x float> %sel48.i913, <2 x i32> <i32 0, i32 3>
  %5 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %4, <2 x float> zeroinitializer)
  %6 = getelementptr i8, ptr %this, i64 1424
  %7 = extractelement <2 x float> %5, i64 0
  store float %7, ptr %6, align 16
  ret void
}
$ ./build/bin/opt -passes=vector-combine repro.ll -S
opt: /home/gbtozers/dev/upstream-llvm/llvm/include/llvm/Support/Casting.h:578: decltype(auto) llvm::cast(From *) [To = llvm::FixedVectorType, From = llvm::Type]: Assertion `isa<To>(Val) && "cast<Ty>() argument of incompatible type!"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.      Program arguments: ./build/bin/opt -passes=vector-combine repro.ll -S
 #0 0x0000557d91364278 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (./build/bin/opt+0x3ee2278)
 #1 0x0000557d913622a0 llvm::sys::RunSignalHandlers() (./build/bin/opt+0x3ee02a0)
 #2 0x0000557d91364c18 SignalHandler(int) Signals.cpp:0:0
 #3 0x00007f756af8a520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520)
 #4 0x00007f756afde9fc __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #5 0x00007f756afde9fc __pthread_kill_internal ./nptl/pthread_kill.c:78:10
 #6 0x00007f756afde9fc pthread_kill ./nptl/pthread_kill.c:89:10
 #7 0x00007f756af8a476 gsignal ./signal/../sysdeps/posix/raise.c:27:6
 #8 0x00007f756af707f3 abort ./stdlib/abort.c:81:7
 #9 0x00007f756af7071b _nl_load_domain ./intl/loadmsgcat.c:1177:9
#10 0x00007f756af81e96 (/lib/x86_64-linux-gnu/libc.so.6+0x39e96)
#11 0x0000557d927a119f (anonymous namespace)::VectorCombine::run()::$_0::operator()(llvm::Instruction&) const VectorCombine.cpp:0:0
#12 0x0000557d927980d1 llvm::VectorCombinePass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (./build/bin/opt+0x53160d1)
#13 0x0000557d924d4cfd llvm::detail::PassModel<llvm::Function, llvm::VectorCombinePass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#14 0x0000557d9152fd5b llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (./build/bin/opt+0x40add5b)
#15 0x0000557d924d688d llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#16 0x0000557d9153479e llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (./build/bin/opt+0x40b279e)
#17 0x0000557d924cfd7d llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) PassBuilderPipelines.cpp:0:0
#18 0x0000557d9152eaab llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (./build/bin/opt+0x40acaab)
#19 0x0000557d92475fcc llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool) (./build/bin/opt+0x4ff3fcc)
#20 0x0000557d9132d12b optMain (./build/bin/opt+0x3eab12b)
#21 0x00007f756af71d90 __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:58:16
#22 0x00007f756af71e40 call_init ./csu/../csu/libc-start.c:128:20
#23 0x00007f756af71e40 __libc_start_main ./csu/../csu/libc-start.c:379:5
#24 0x0000557d9132afe5 _start (./build/bin/opt+0x3ea8fe5)
Aborted

@davemgreen
Copy link
Collaborator Author

Thanks for the reproducer - it looks like the test we have for scalar-select conditions isn't working as it should.

@davemgreen
Copy link
Collaborator Author

It is hopefully fixed in 76c8e1d. Let me know if not! Thanks

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

6 participants