diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index bf205b1706a6c9..eae0200f37f043 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3403,6 +3403,11 @@ static bool isLegalArithImmed(uint64_t C) { return IsLegal; } +static bool cannotBeIntMin(SDValue CheckedVal, SelectionDAG &DAG) { + KnownBits KnownSrc = DAG.computeKnownBits(CheckedVal); + return !KnownSrc.getSignedMinValue().isMinSignedValue(); +} + // Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on // the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags // can be set differently by this operation. It comes down to whether @@ -3410,12 +3415,14 @@ static bool isLegalArithImmed(uint64_t C) { // everything is fine. If not then the optimization is wrong. Thus general // comparisons are only valid if op2 != 0. // -// So, finally, the only LLVM-native comparisons that don't mention C and V -// are SETEQ and SETNE. They're the only ones we can safely use CMN for in -// the absence of information about op2. -static bool isCMN(SDValue Op, ISD::CondCode CC) { +// So, finally, the only LLVM-native comparisons that don't mention C or V +// are the ones that aren't unsigned comparisons. They're the only ones we can +// safely use CMN for in the absence of information about op2. +static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG) { return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) && - (CC == ISD::SETEQ || CC == ISD::SETNE); + (isIntEqualitySetCC(CC) || + (isUnsignedIntSetCC(CC) && DAG.isKnownNeverZero(Op.getOperand(1))) || + (isSignedIntSetCC(CC) && cannotBeIntMin(Op.getOperand(1), DAG))); } static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl, @@ -3460,11 +3467,12 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, // register to WZR/XZR if it ends up being unused. unsigned Opcode = AArch64ISD::SUBS; - if (isCMN(RHS, CC)) { + if (isCMN(RHS, CC, DAG)) { // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ? Opcode = AArch64ISD::ADDS; RHS = RHS.getOperand(1); - } else if (isCMN(LHS, CC)) { + } else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) && + isIntEqualitySetCC(CC)) { // As we are looking for EQ/NE compares, the operands can be commuted ; can // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ? Opcode = AArch64ISD::ADDS; @@ -3566,13 +3574,15 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, Opcode = AArch64ISD::CCMN; RHS = DAG.getConstant(Imm.abs(), DL, Const->getValueType(0)); } - } else if (RHS.getOpcode() == ISD::SUB) { - SDValue SubOp0 = RHS.getOperand(0); - if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { - // See emitComparison() on why we can only do this for SETEQ and SETNE. - Opcode = AArch64ISD::CCMN; - RHS = RHS.getOperand(1); - } + } else if (isCMN(RHS, CC, DAG)) { + Opcode = AArch64ISD::CCMN; + RHS = RHS.getOperand(1); + } else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) && + isIntEqualitySetCC(CC)) { + // As we are looking for EQ/NE compares, the operands can be commuted ; can + // we combine a (CCMP (sub 0, op1), op2) into a CCMN instruction ? + Opcode = AArch64ISD::CCMN; + LHS = LHS.getOperand(1); } if (Opcode == 0) Opcode = AArch64ISD::CCMP; @@ -3890,8 +3900,8 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, // cmp w12, w11, lsl #1 if (!isa(RHS) || !isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) { - bool LHSIsCMN = isCMN(LHS, CC); - bool RHSIsCMN = isCMN(RHS, CC); + bool LHSIsCMN = isCMN(LHS, CC, DAG); + bool RHSIsCMN = isCMN(RHS, CC, DAG); SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS; SDValue TheRHS = RHSIsCMN ? RHS.getOperand(1) : RHS; @@ -3904,7 +3914,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Cmp; AArch64CC::CondCode AArch64CC; - if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa(RHS)) { + if (isIntEqualitySetCC(CC) && isa(RHS)) { const ConstantSDNode *RHSC = cast(RHS); // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095. diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll index 14cb0c82b1c039..4b816df75a730e 100644 --- a/llvm/test/CodeGen/AArch64/cmp-chains.ll +++ b/llvm/test/CodeGen/AArch64/cmp-chains.ll @@ -258,3 +258,246 @@ define i32 @neg_range_int(i32 %a, i32 %b, i32 %c) { ret i32 %retval.0 } +; (b > -(d | 1) && a < c) +define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) { +; SDISEL-LABEL: neg_range_int_comp: +; SDISEL: // %bb.0: +; SDISEL-NEXT: orr w8, w3, #0x1 +; SDISEL-NEXT: cmp w0, w2 +; SDISEL-NEXT: ccmn w1, w8, #4, lt +; SDISEL-NEXT: csel w0, w1, w0, gt +; SDISEL-NEXT: ret +; +; GISEL-LABEL: neg_range_int_comp: +; GISEL: // %bb.0: +; GISEL-NEXT: orr w8, w3, #0x1 +; GISEL-NEXT: cmp w0, w2 +; GISEL-NEXT: neg w8, w8 +; GISEL-NEXT: ccmp w1, w8, #4, lt +; GISEL-NEXT: csel w0, w1, w0, gt +; GISEL-NEXT: ret + %dor = or i32 %d, 1 + %negd = sub i32 0, %dor + %cmp = icmp sgt i32 %b, %negd + %cmp1 = icmp slt i32 %a, %c + %or.cond = and i1 %cmp, %cmp1 + %retval.0 = select i1 %or.cond, i32 %b, i32 %a + ret i32 %retval.0 +} + +; (b >u -(d | 1) && a < c) +define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) { +; SDISEL-LABEL: neg_range_int_comp_u: +; SDISEL: // %bb.0: +; SDISEL-NEXT: orr w8, w3, #0x1 +; SDISEL-NEXT: cmp w0, w2 +; SDISEL-NEXT: ccmn w1, w8, #0, lt +; SDISEL-NEXT: csel w0, w1, w0, hi +; SDISEL-NEXT: ret +; +; GISEL-LABEL: neg_range_int_comp_u: +; GISEL: // %bb.0: +; GISEL-NEXT: orr w8, w3, #0x1 +; GISEL-NEXT: cmp w0, w2 +; GISEL-NEXT: neg w8, w8 +; GISEL-NEXT: ccmp w1, w8, #0, lt +; GISEL-NEXT: csel w0, w1, w0, hi +; GISEL-NEXT: ret + %dor = or i32 %d, 1 + %negd = sub i32 0, %dor + %cmp = icmp ugt i32 %b, %negd + %cmp1 = icmp slt i32 %a, %c + %or.cond = and i1 %cmp, %cmp1 + %retval.0 = select i1 %or.cond, i32 %b, i32 %a + ret i32 %retval.0 +} + +; (b > -(d | 1) && a u < c) +define i32 @neg_range_int_comp_ua(i32 %a, i32 %b, i32 %c, i32 %d) { +; SDISEL-LABEL: neg_range_int_comp_ua: +; SDISEL: // %bb.0: +; SDISEL-NEXT: orr w8, w3, #0x1 +; SDISEL-NEXT: cmp w0, w2 +; SDISEL-NEXT: ccmn w1, w8, #4, lo +; SDISEL-NEXT: csel w0, w1, w0, gt +; SDISEL-NEXT: ret +; +; GISEL-LABEL: neg_range_int_comp_ua: +; GISEL: // %bb.0: +; GISEL-NEXT: orr w8, w3, #0x1 +; GISEL-NEXT: cmp w0, w2 +; GISEL-NEXT: neg w8, w8 +; GISEL-NEXT: ccmp w1, w8, #4, lo +; GISEL-NEXT: csel w0, w1, w0, gt +; GISEL-NEXT: ret + %dor = or i32 %d, 1 + %negd = sub i32 0, %dor + %cmp = icmp sgt i32 %b, %negd + %cmp1 = icmp ult i32 %a, %c + %or.cond = and i1 %cmp, %cmp1 + %retval.0 = select i1 %or.cond, i32 %b, i32 %a + ret i32 %retval.0 +} + +; (b <= -3 && a > c) +define i32 @neg_range_int_2(i32 %a, i32 %b, i32 %c) { +; SDISEL-LABEL: neg_range_int_2: +; SDISEL: // %bb.0: +; SDISEL-NEXT: cmp w0, w2 +; SDISEL-NEXT: ccmn w1, #4, #4, gt +; SDISEL-NEXT: csel w0, w1, w0, gt +; SDISEL-NEXT: ret +; +; GISEL-LABEL: neg_range_int_2: +; GISEL: // %bb.0: +; GISEL-NEXT: cmp w0, w2 +; GISEL-NEXT: ccmn w1, #3, #8, gt +; GISEL-NEXT: csel w0, w1, w0, ge +; GISEL-NEXT: ret + %cmp = icmp sge i32 %b, -3 + %cmp1 = icmp sgt i32 %a, %c + %or.cond = and i1 %cmp, %cmp1 + %retval.0 = select i1 %or.cond, i32 %b, i32 %a + ret i32 %retval.0 +} + +; (b < -(d | 1) && a >= c) +define i32 @neg_range_int_comp2(i32 %a, i32 %b, i32 %c, i32 %d) { +; SDISEL-LABEL: neg_range_int_comp2: +; SDISEL: // %bb.0: +; SDISEL-NEXT: orr w8, w3, #0x1 +; SDISEL-NEXT: cmp w0, w2 +; SDISEL-NEXT: ccmn w1, w8, #0, ge +; SDISEL-NEXT: csel w0, w1, w0, lt +; SDISEL-NEXT: ret +; +; GISEL-LABEL: neg_range_int_comp2: +; GISEL: // %bb.0: +; GISEL-NEXT: orr w8, w3, #0x1 +; GISEL-NEXT: cmp w0, w2 +; GISEL-NEXT: neg w8, w8 +; GISEL-NEXT: ccmp w1, w8, #0, ge +; GISEL-NEXT: csel w0, w1, w0, lt +; GISEL-NEXT: ret + %dor = or i32 %d, 1 + %negd = sub i32 0, %dor + %cmp = icmp slt i32 %b, %negd + %cmp1 = icmp sge i32 %a, %c + %or.cond = and i1 %cmp, %cmp1 + %retval.0 = select i1 %or.cond, i32 %b, i32 %a + ret i32 %retval.0 +} + +; (b c) +define i32 @neg_range_int_comp_u2(i32 %a, i32 %b, i32 %c, i32 %d) { +; SDISEL-LABEL: neg_range_int_comp_u2: +; SDISEL: // %bb.0: +; SDISEL-NEXT: orr w8, w3, #0x1 +; SDISEL-NEXT: cmp w0, w2 +; SDISEL-NEXT: ccmn w1, w8, #2, gt +; SDISEL-NEXT: csel w0, w1, w0, lo +; SDISEL-NEXT: ret +; +; GISEL-LABEL: neg_range_int_comp_u2: +; GISEL: // %bb.0: +; GISEL-NEXT: orr w8, w3, #0x1 +; GISEL-NEXT: cmp w0, w2 +; GISEL-NEXT: neg w8, w8 +; GISEL-NEXT: ccmp w1, w8, #2, gt +; GISEL-NEXT: csel w0, w1, w0, lo +; GISEL-NEXT: ret + %dor = or i32 %d, 1 + %negd = sub i32 0, %dor + %cmp = icmp ult i32 %b, %negd + %cmp1 = icmp sgt i32 %a, %c + %or.cond = and i1 %cmp, %cmp1 + %retval.0 = select i1 %or.cond, i32 %b, i32 %a + ret i32 %retval.0 +} + +; (b > -(d | 1) && a u > c) +define i32 @neg_range_int_comp_ua2(i32 %a, i32 %b, i32 %c, i32 %d) { +; SDISEL-LABEL: neg_range_int_comp_ua2: +; SDISEL: // %bb.0: +; SDISEL-NEXT: orr w8, w3, #0x1 +; SDISEL-NEXT: cmp w0, w2 +; SDISEL-NEXT: ccmn w1, w8, #4, hi +; SDISEL-NEXT: csel w0, w1, w0, gt +; SDISEL-NEXT: ret +; +; GISEL-LABEL: neg_range_int_comp_ua2: +; GISEL: // %bb.0: +; GISEL-NEXT: orr w8, w3, #0x1 +; GISEL-NEXT: cmp w0, w2 +; GISEL-NEXT: neg w8, w8 +; GISEL-NEXT: ccmp w1, w8, #4, hi +; GISEL-NEXT: csel w0, w1, w0, gt +; GISEL-NEXT: ret + %dor = or i32 %d, 1 + %negd = sub i32 0, %dor + %cmp = icmp sgt i32 %b, %negd + %cmp1 = icmp ugt i32 %a, %c + %or.cond = and i1 %cmp, %cmp1 + %retval.0 = select i1 %or.cond, i32 %b, i32 %a + ret i32 %retval.0 +} + +; (b > -(d | 1) && a u == c) +define i32 @neg_range_int_comp_ua3(i32 %a, i32 %b, i32 %c, i32 %d) { +; SDISEL-LABEL: neg_range_int_comp_ua3: +; SDISEL: // %bb.0: +; SDISEL-NEXT: orr w8, w3, #0x1 +; SDISEL-NEXT: cmp w0, w2 +; SDISEL-NEXT: ccmn w1, w8, #4, eq +; SDISEL-NEXT: csel w0, w1, w0, gt +; SDISEL-NEXT: ret +; +; GISEL-LABEL: neg_range_int_comp_ua3: +; GISEL: // %bb.0: +; GISEL-NEXT: orr w8, w3, #0x1 +; GISEL-NEXT: cmp w0, w2 +; GISEL-NEXT: neg w8, w8 +; GISEL-NEXT: ccmp w1, w8, #4, eq +; GISEL-NEXT: csel w0, w1, w0, gt +; GISEL-NEXT: ret + %dor = or i32 %d, 1 + %negd = sub i32 0, %dor + %cmp = icmp sgt i32 %b, %negd + %cmp1 = icmp eq i32 %a, %c + %or.cond = and i1 %cmp, %cmp1 + %retval.0 = select i1 %or.cond, i32 %b, i32 %a + ret i32 %retval.0 +} + +; -(a | 1) > (b | 3) && a < c +define i32 @neg_range_int_c(i32 %a, i32 %b, i32 %c) { +; SDISEL-LABEL: neg_range_int_c: +; SDISEL: // %bb.0: // %entry +; SDISEL-NEXT: orr w8, w0, #0x1 +; SDISEL-NEXT: orr w9, w1, #0x3 +; SDISEL-NEXT: cmn w9, w8 +; SDISEL-NEXT: ccmp w2, w0, #2, lo +; SDISEL-NEXT: cset w0, lo +; SDISEL-NEXT: ret +; +; GISEL-LABEL: neg_range_int_c: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: orr w8, w0, #0x1 +; GISEL-NEXT: orr w9, w1, #0x3 +; GISEL-NEXT: neg w8, w8 +; GISEL-NEXT: cmp w9, w8 +; GISEL-NEXT: cset w8, lo +; GISEL-NEXT: cmp w2, w0 +; GISEL-NEXT: cset w9, lo +; GISEL-NEXT: and w0, w8, w9 +; GISEL-NEXT: ret +entry: + %or = or i32 %a, 1 + %sub = sub i32 0, %or + %or1 = or i32 %b, 3 + %cmp = icmp ult i32 %or1, %sub + %cmp2 = icmp ult i32 %c, %a + %0 = and i1 %cmp, %cmp2 + %land.ext = zext i1 %0 to i32 + ret i32 %land.ext +} diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll index 09a6e26fe5a403..22440b79bdcd46 100644 --- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll +++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll @@ -262,4 +262,210 @@ define <4 x i65> @sign_4xi65(<4 x i65> %a) { ret <4 x i65> %res } +define i32 @or_neg(i32 %x, i32 %y) { +; CHECK-LABEL: or_neg: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, w0, #0x1 +; CHECK-NEXT: cmn w1, w8 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret + %3 = or i32 %x, 1 + %4 = sub i32 0, %3 + %5 = icmp sgt i32 %4, %y + %6 = zext i1 %5 to i32 + ret i32 %6 +} + +define i32 @or_neg_ugt(i32 %x, i32 %y) { +; CHECK-LABEL: or_neg_ugt: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, w0, #0x1 +; CHECK-NEXT: cmn w1, w8 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %3 = or i32 %x, 1 + %4 = sub i32 0, %3 + %5 = icmp ugt i32 %4, %y + %6 = zext i1 %5 to i32 + ret i32 %6 +} + +; Negative test + +define i32 @or_neg_no_smin(i32 %x, i32 %y) { +; CHECK-LABEL: or_neg_no_smin: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: cmp w8, w1 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %4 = sub i32 0, %x + %5 = icmp sgt i32 %4, %y + %6 = zext i1 %5 to i32 + ret i32 %6 +} + +; Negative test + +define i32 @or_neg_ult_no_zero(i32 %x, i32 %y) { +; CHECK-LABEL: or_neg_ult_no_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: cmp w8, w1 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %4 = sub i32 0, %x + %5 = icmp ult i32 %4, %y + %6 = zext i1 %5 to i32 + ret i32 %6 +} + +define i32 @or_neg_no_smin_but_zero(i32 %x, i32 %y) { +; CHECK-LABEL: or_neg_no_smin_but_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: bic w8, w0, w0, asr #31 +; CHECK-NEXT: cmn w1, w8 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret + %3 = call i32 @llvm.smax.i32(i32 %x, i32 0) + %4 = sub i32 0, %3 + %5 = icmp sgt i32 %4, %y + %6 = zext i1 %5 to i32 + ret i32 %6 +} + +define i32 @or_neg_slt_zero_but_no_smin(i32 %x, i32 %y) { +; CHECK-LABEL: or_neg_slt_zero_but_no_smin: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #9 // =0x9 +; CHECK-NEXT: cmp w0, #9 +; CHECK-NEXT: csel w8, w0, w8, lo +; CHECK-NEXT: neg w8, w8 +; CHECK-NEXT: cmp w8, w1 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret + %3 = call i32 @llvm.umin.i32(i32 %x, i32 9) + %4 = sub i32 0, %3 + %5 = icmp ugt i32 %4, %y + %6 = zext i1 %5 to i32 + ret i32 %6 +} + +define i32 @or_neg2(i32 %x, i32 %y) { +; CHECK-LABEL: or_neg2: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, w0, #0x1 +; CHECK-NEXT: cmn w1, w8 +; CHECK-NEXT: cset w0, le +; CHECK-NEXT: ret + %3 = or i32 %x, 1 + %4 = sub i32 0, %3 + %5 = icmp sge i32 %4, %y + %6 = zext i1 %5 to i32 + ret i32 %6 +} + +define i32 @or_neg3(i32 %x, i32 %y) { +; CHECK-LABEL: or_neg3: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, w0, #0x1 +; CHECK-NEXT: cmn w1, w8 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %3 = or i32 %x, 1 + %4 = sub i32 0, %3 + %5 = icmp slt i32 %4, %y + %6 = zext i1 %5 to i32 + ret i32 %6 +} + +define i32 @or_neg4(i32 %x, i32 %y) { +; CHECK-LABEL: or_neg4: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, w0, #0x1 +; CHECK-NEXT: cmn w1, w8 +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ret + %3 = or i32 %x, 1 + %4 = sub i32 0, %3 + %5 = icmp sle i32 %4, %y + %6 = zext i1 %5 to i32 + ret i32 %6 +} + +define i32 @or_neg_ult(i32 %x, i32 %y) { +; CHECK-LABEL: or_neg_ult: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, w0, #0x1 +; CHECK-NEXT: cmn w1, w8 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %3 = or i32 %x, 1 + %4 = sub i32 0, %3 + %5 = icmp ugt i32 %4, %y + %6 = zext i1 %5 to i32 + ret i32 %6 +} + +define i32 @or_neg_no_smin2(i32 %x, i32 %y) { +; CHECK-LABEL: or_neg_no_smin2: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: cmp w8, w1 +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ret + %4 = sub i32 0, %x + %5 = icmp sge i32 %4, %y + %6 = zext i1 %5 to i32 + ret i32 %6 +} + +; Negative test + +define i32 @or_neg_ult_no_zero2(i32 %x, i32 %y) { +; CHECK-LABEL: or_neg_ult_no_zero2: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: cmp w8, w1 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %4 = sub i32 0, %x + %5 = icmp ult i32 %4, %y + %6 = zext i1 %5 to i32 + ret i32 %6 +} + +define i32 @or_neg_no_smin_but_zero2(i32 %x, i32 %y) { +; CHECK-LABEL: or_neg_no_smin_but_zero2: +; CHECK: // %bb.0: +; CHECK-NEXT: bic w8, w0, w0, asr #31 +; CHECK-NEXT: cmn w1, w8 +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ret + %3 = call i32 @llvm.smax.i32(i32 %x, i32 0) + %4 = sub i32 0, %3 + %5 = icmp sle i32 %4, %y + %6 = zext i1 %5 to i32 + ret i32 %6 +} + +define i32 @or_neg_slt_zero_but_no_smin2(i32 %x, i32 %y) { +; CHECK-LABEL: or_neg_slt_zero_but_no_smin2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #9 // =0x9 +; CHECK-NEXT: cmp w0, #9 +; CHECK-NEXT: csel w8, w0, w8, lo +; CHECK-NEXT: neg w8, w8 +; CHECK-NEXT: cmp w8, w1 +; CHECK-NEXT: cset w0, hs +; CHECK-NEXT: ret + %3 = call i32 @llvm.umin.i32(i32 %x, i32 9) + %4 = sub i32 0, %3 + %5 = icmp uge i32 %4, %y + %6 = zext i1 %5 to i32 + ret i32 %6 +} + +declare i32 @llvm.smax.i32(i32, i32) +declare i32 @llvm.umax.i32(i32, i32) declare void @use_4xi1(<4 x i1>)