From bf63243d53586c3798b446c8a4e0695d4893ae37 Mon Sep 17 00:00:00 2001 From: Marius Kamp Date: Sat, 27 Apr 2024 19:46:42 +0200 Subject: [PATCH] [InstCombine] Fold Minimum over Trailing/Leading Bits Counts (#90000) The new transformation folds `umin(cttz(x), c)` to `cttz(x | (1 << c))` and `umin(ctlz(x), c)` to `ctlz(x | ((1 << (bitwidth - 1)) >> c))`. The transformation is only implemented for constant `c` to not increase the number of instructions. The idea of the transformation is to set the c-th lowest (for `cttz`) or highest (for `ctlz`) bit in the operand. In this way, the `cttz` or `ctlz` instruction always returns at most `c`. Alive2 proofs: https://alive2.llvm.org/ce/z/y8Hdb8 --- .../InstCombine/InstCombineCalls.cpp | 50 ++++++++++ .../Transforms/InstCombine/umin_cttz_ctlz.ll | 96 +++++++++---------- 2 files changed, 98 insertions(+), 48 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 1913ef92c16c0e..f216cf1647acf1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1428,6 +1428,46 @@ static Instruction *foldBitOrderCrossLogicOp(Value *V, return nullptr; } +/// Fold an unsigned minimum of trailing or leading zero bits counts: +/// umin(cttz(CtOp, ZeroUndef), ConstOp) --> cttz(CtOp | (1 << ConstOp)) +/// umin(ctlz(CtOp, ZeroUndef), ConstOp) --> ctlz(CtOp | ((1 << (bitwidth-1)) +/// >> ConstOp)) +static Value * +foldMinimumOverTrailingOrLeadingZeroCount(Intrinsic::ID IntrID, Value *I0, + Value *I1, const DataLayout &DL, + InstCombiner::BuilderTy &Builder) { + assert((IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz) && + "This helper only supports cttz and ctlz intrinsics"); + + if (!I0->hasOneUse()) { + return nullptr; + } + + auto *II0 = dyn_cast(I0); + if (!II0 || II0->getIntrinsicID() != IntrID) { + return nullptr; + } + + auto BitWidth = I1->getType()->getScalarSizeInBits(); + auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); }; + if (!match(I1, m_CheckedInt(LessBitWidth))) { + // We have a constant >= BitWidth (which can be handled by CVP) + // or a non-splat vector with elements < and >= BitWidth + return nullptr; + } + + auto *Ty = I1->getType(); + Constant *NewConst = ConstantFoldBinaryOpOperands( + IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr, + IntrID == Intrinsic::cttz + ? ConstantInt::get(Ty, 1) + : ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)), + cast(I1), DL); + return Builder.CreateBinaryIntrinsic( + IntrID, Builder.CreateOr(II0->getArgOperand(0), NewConst), + II0->getArgOperand(1)); +} + /// CallInst simplification. This mostly only handles folding of intrinsic /// instructions. For normal calls, it allows visitCallBase to do the heavy /// lifting. @@ -1633,6 +1673,16 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { Value *Cmp = Builder.CreateICmpNE(I0, Zero); return CastInst::Create(Instruction::ZExt, Cmp, II->getType()); } + // umin(cttz(x), const) --> cttz(x | (1 << const)) + if (Value *FoldedCttz = foldMinimumOverTrailingOrLeadingZeroCount( + Intrinsic::cttz, I0, I1, DL, Builder)) { + return replaceInstUsesWith(*II, FoldedCttz); + } + // umin(ctlz(x), const) --> ctlz(x | ((1 << (bitwidth - 1) >> const))) + if (Value *FoldedCtlz = foldMinimumOverTrailingOrLeadingZeroCount( + Intrinsic::ctlz, I0, I1, DL, Builder)) { + return replaceInstUsesWith(*II, FoldedCtlz); + } [[fallthrough]]; } case Intrinsic::umax: { diff --git a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll index e6e0215e920c57..0d87122660cfa1 100644 --- a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll +++ b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll @@ -4,8 +4,8 @@ define i8 @umin_cttz_i8_zero_undefined(i8 %X) { ; CHECK-LABEL: define i8 @umin_cttz_i8_zero_undefined( ; CHECK-SAME: i8 [[X:%.*]]) { -; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64 +; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true) ; CHECK-NEXT: ret i8 [[RET]] ; %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true) @@ -16,8 +16,8 @@ define i8 @umin_cttz_i8_zero_undefined(i8 %X) { define i8 @umin_cttz_i8_zero_defined(i8 %X) { ; CHECK-LABEL: define i8 @umin_cttz_i8_zero_defined( ; CHECK-SAME: i8 [[X:%.*]]) { -; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 false) -; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64 +; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true) ; CHECK-NEXT: ret i8 [[RET]] ; %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 false) @@ -28,8 +28,8 @@ define i8 @umin_cttz_i8_zero_defined(i8 %X) { define i8 @umin_cttz_i8_commuted_zero_undefined(i8 %X) { ; CHECK-LABEL: define i8 @umin_cttz_i8_commuted_zero_undefined( ; CHECK-SAME: i8 [[X:%.*]]) { -; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64 +; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true) ; CHECK-NEXT: ret i8 [[RET]] ; %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true) @@ -51,8 +51,8 @@ define i8 @umin_cttz_i8_negative_ge_bitwidth_zero_undefined(i8 %X) { define i16 @umin_cttz_i16_zero_undefined(i16 %X) { ; CHECK-LABEL: define i16 @umin_cttz_i16_zero_undefined( ; CHECK-SAME: i16 [[X:%.*]]) { -; CHECK-NEXT: [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.cttz.i16(i16 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i16 [[X]], 64 +; CHECK-NEXT: [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.cttz.i16(i16 [[TMP1]], i1 true) ; CHECK-NEXT: ret i16 [[RET]] ; %cttz = call i16 @llvm.cttz.i16(i16 %X, i1 true) @@ -63,8 +63,8 @@ define i16 @umin_cttz_i16_zero_undefined(i16 %X) { define i32 @umin_cttz_i32_zero_undefined(i32 %X) { ; CHECK-LABEL: define i32 @umin_cttz_i32_zero_undefined( ; CHECK-SAME: i32 [[X:%.*]]) { -; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTTZ]], i32 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], 64 +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 true) ; CHECK-NEXT: ret i32 [[RET]] ; %cttz = call i32 @llvm.cttz.i32(i32 %X, i1 true) @@ -75,8 +75,8 @@ define i32 @umin_cttz_i32_zero_undefined(i32 %X) { define i64 @umin_cttz_i64_zero_undefined(i64 %X) { ; CHECK-LABEL: define i64 @umin_cttz_i64_zero_undefined( ; CHECK-SAME: i64 [[X:%.*]]) { -; CHECK-NEXT: [[CTTZ:%.*]] = call range(i64 0, 65) i64 @llvm.cttz.i64(i64 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTTZ]], i64 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[X]], 64 +; CHECK-NEXT: [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.cttz.i64(i64 [[TMP1]], i1 true) ; CHECK-NEXT: ret i64 [[RET]] ; %cttz = call i64 @llvm.cttz.i64(i64 %X, i1 true) @@ -108,8 +108,8 @@ define i1 @umin_cttz_i1_zero_defined(i1 %X) { define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) { ; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true) @@ -120,8 +120,8 @@ define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) { define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) { ; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true) @@ -132,8 +132,8 @@ define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) { define <2 x i32> @umin_cttz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i32> %X) { ; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_no_splat_all_lt_bitwidth_zero_undefined( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true) @@ -144,9 +144,9 @@ define <2 x i32> @umin_cttz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i define <2 x i32> @umin_cttz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(<2 x i32> %X) { ; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> ) -; CHECK-NEXT: ret <2 x i32> [[RET]] +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true) +; CHECK-NEXT: [[RET1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[RET]], <2 x i32> ) +; CHECK-NEXT: ret <2 x i32> [[RET1]] ; %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true) %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> ) @@ -156,9 +156,9 @@ define <2 x i32> @umin_cttz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefi define <2 x i32> @umin_cttz_2xi32_negative_no_splat_none_lt_bitwidth_zero_undefined(<2 x i32> %X) { ; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_negative_no_splat_none_lt_bitwidth_zero_undefined( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> ) -; CHECK-NEXT: ret <2 x i32> [[RET]] +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true) +; CHECK-NEXT: [[RET1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[RET]], <2 x i32> ) +; CHECK-NEXT: ret <2 x i32> [[RET1]] ; %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true) %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> ) @@ -194,8 +194,8 @@ define i16 @umin_cttz_i16_negative_two_uses(i16 %X) { define i8 @umin_ctlz_i8_zero_undefined(i8 %X) { ; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_undefined( ; CHECK-SAME: i8 [[X:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2 +; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true) ; CHECK-NEXT: ret i8 [[RET]] ; %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true) @@ -206,8 +206,8 @@ define i8 @umin_ctlz_i8_zero_undefined(i8 %X) { define i8 @umin_ctlz_i8_zero_defined(i8 %X) { ; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_defined( ; CHECK-SAME: i8 [[X:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 false) -; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2 +; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true) ; CHECK-NEXT: ret i8 [[RET]] ; %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 false) @@ -218,8 +218,8 @@ define i8 @umin_ctlz_i8_zero_defined(i8 %X) { define i8 @umin_ctlz_i8_commuted_zero_undefined(i8 %X) { ; CHECK-LABEL: define i8 @umin_ctlz_i8_commuted_zero_undefined( ; CHECK-SAME: i8 [[X:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2 +; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true) ; CHECK-NEXT: ret i8 [[RET]] ; %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true) @@ -241,8 +241,8 @@ define i8 @umin_ctlz_i8_negative_ge_bitwidth_zero_undefined(i8 %X) { define i16 @umin_ctlz_i16_zero_undefined(i16 %X) { ; CHECK-LABEL: define i16 @umin_ctlz_i16_zero_undefined( ; CHECK-SAME: i16 [[X:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = call range(i16 0, 17) i16 @llvm.ctlz.i16(i16 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTLZ]], i16 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i16 [[X]], 512 +; CHECK-NEXT: [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.ctlz.i16(i16 [[TMP1]], i1 true) ; CHECK-NEXT: ret i16 [[RET]] ; %ctlz = call i16 @llvm.ctlz.i16(i16 %X, i1 true) @@ -253,8 +253,8 @@ define i16 @umin_ctlz_i16_zero_undefined(i16 %X) { define i32 @umin_ctlz_i32_zero_undefined(i32 %X) { ; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_undefined( ; CHECK-SAME: i32 [[X:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTLZ]], i32 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], 33554432 +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 true) ; CHECK-NEXT: ret i32 [[RET]] ; %ctlz = call i32 @llvm.ctlz.i32(i32 %X, i1 true) @@ -265,8 +265,8 @@ define i32 @umin_ctlz_i32_zero_undefined(i32 %X) { define i64 @umin_ctlz_i64_zero_undefined(i64 %X) { ; CHECK-LABEL: define i64 @umin_ctlz_i64_zero_undefined( ; CHECK-SAME: i64 [[X:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTLZ]], i64 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[X]], 144115188075855872 +; CHECK-NEXT: [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.ctlz.i64(i64 [[TMP1]], i1 true) ; CHECK-NEXT: ret i64 [[RET]] ; %ctlz = call i64 @llvm.ctlz.i64(i64 %X, i1 true) @@ -298,8 +298,8 @@ define i1 @umin_ctlz_i1_zero_defined(i1 %X) { define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) { ; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true) @@ -310,8 +310,8 @@ define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) { define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) { ; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true) @@ -322,8 +322,8 @@ define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) { define <2 x i32> @umin_ctlz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i32> %X) { ; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_no_splat_all_lt_bitwidth_zero_undefined( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true) @@ -334,9 +334,9 @@ define <2 x i32> @umin_ctlz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(<2 x i32> %X) { ; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> ) -; CHECK-NEXT: ret <2 x i32> [[RET]] +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true) +; CHECK-NEXT: [[RET1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[RET]], <2 x i32> ) +; CHECK-NEXT: ret <2 x i32> [[RET1]] ; %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true) %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> ) @@ -346,9 +346,9 @@ define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefi define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_none_lt_bitwidth_zero_undefined(<2 x i32> %X) { ; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_none_lt_bitwidth_zero_undefined( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> ) -; CHECK-NEXT: ret <2 x i32> [[RET]] +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true) +; CHECK-NEXT: [[RET1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[RET]], <2 x i32> ) +; CHECK-NEXT: ret <2 x i32> [[RET1]] ; %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true) %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> )