diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 0b13b4aad9c326..36d64c88427883 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -2466,6 +2466,18 @@ inline typename m_Intrinsic_Ty::Ty m_BSwap(const Opnd0 &Op0) { return m_Intrinsic(Op0); } +template +inline typename m_Intrinsic_Ty::Ty m_Ctlz(const Opnd0 &Op0, + const Opnd1 &Op1) { + return m_Intrinsic(Op0, Op1); +} + +template +inline typename m_Intrinsic_Ty::Ty m_Cttz(const Opnd0 &Op0, + const Opnd1 &Op1) { + return m_Intrinsic(Op0, Op1); +} + template inline typename m_Intrinsic_Ty::Ty m_FAbs(const Opnd0 &Op0) { return m_Intrinsic(Op0); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index e5652458f150b5..db742fbe668cc3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1633,6 +1633,39 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { Value *Cmp = Builder.CreateICmpNE(I0, Zero); return CastInst::Create(Instruction::ZExt, Cmp, II->getType()); } + // umin(cttz(x), const) --> cttz(x | (1 << const)) + Value *X; + const APInt *Y; + Value *Z; + if (match(I0, m_OneUse(m_Cttz(m_Value(X), m_Value(Z)))) && + match(I1, m_APInt(Y))) { + Value *CttzOp = X; + if (Y->ult(I1->getType()->getScalarType()->getIntegerBitWidth())) { + auto One = APInt::getOneBitSet( + I1->getType()->getScalarType()->getIntegerBitWidth(), 0); + Value *NewConst = ConstantInt::get(I1->getType(), One << *Y); + CttzOp = Builder.CreateOr(X, NewConst); + } + return CallInst::Create(Intrinsic::getDeclaration(II->getModule(), + Intrinsic::cttz, + II->getType()), + {CttzOp, Z}); + } + // umin(ctlz(x), const) --> ctlz(x | ((1 << (bitwidth - 1) >> const))) + if (match(I0, m_OneUse(m_Ctlz(m_Value(X), m_Value(Z)))) && + match(I1, m_APInt(Y))) { + Value *CtlzOp = X; + if (Y->ult(I1->getType()->getScalarType()->getIntegerBitWidth())) { + auto Min = APInt::getSignedMinValue( + I1->getType()->getScalarType()->getIntegerBitWidth()); + Value *NewConst = ConstantInt::get(I1->getType(), Min.lshr(*Y)); + CtlzOp = Builder.CreateOr(X, NewConst); + } + return CallInst::Create(Intrinsic::getDeclaration(II->getModule(), + Intrinsic::ctlz, + II->getType()), + {CtlzOp, Z}); + } [[fallthrough]]; } case Intrinsic::umax: { diff --git a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll index 25c9d75c2bbdc2..91f5b818c7ff9a 100644 --- a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll +++ b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll @@ -25,8 +25,8 @@ declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) define i8 @umin_cttz_i8_zero_undefined(i8 %X) { ; CHECK-LABEL: define i8 @umin_cttz_i8_zero_undefined( ; CHECK-SAME: i8 [[X:%.*]]) { -; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64 +; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true) ; CHECK-NEXT: ret i8 [[RET]] ; %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true) @@ -37,8 +37,8 @@ define i8 @umin_cttz_i8_zero_undefined(i8 %X) { define i8 @umin_cttz_i8_zero_defined(i8 %X) { ; CHECK-LABEL: define i8 @umin_cttz_i8_zero_defined( ; CHECK-SAME: i8 [[X:%.*]]) { -; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 false) -; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64 +; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true) ; CHECK-NEXT: ret i8 [[RET]] ; %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 false) @@ -49,8 +49,8 @@ define i8 @umin_cttz_i8_zero_defined(i8 %X) { define i8 @umin_cttz_i8_commuted_zero_undefined(i8 %X) { ; CHECK-LABEL: define i8 @umin_cttz_i8_commuted_zero_undefined( ; CHECK-SAME: i8 [[X:%.*]]) { -; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64 +; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true) ; CHECK-NEXT: ret i8 [[RET]] ; %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true) @@ -72,8 +72,8 @@ define i8 @umin_cttz_i8_ge_bitwidth_zero_undefined(i8 %X) { define i16 @umin_cttz_i16_zero_undefined(i16 %X) { ; CHECK-LABEL: define i16 @umin_cttz_i16_zero_undefined( ; CHECK-SAME: i16 [[X:%.*]]) { -; CHECK-NEXT: [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.cttz.i16(i16 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i16 [[X]], 64 +; CHECK-NEXT: [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.cttz.i16(i16 [[TMP1]], i1 true) ; CHECK-NEXT: ret i16 [[RET]] ; %cttz = call i16 @llvm.cttz.i16(i16 %X, i1 true) @@ -84,8 +84,8 @@ define i16 @umin_cttz_i16_zero_undefined(i16 %X) { define i32 @umin_cttz_i32_zero_undefined(i32 %X) { ; CHECK-LABEL: define i32 @umin_cttz_i32_zero_undefined( ; CHECK-SAME: i32 [[X:%.*]]) { -; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTTZ]], i32 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], 64 +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 true) ; CHECK-NEXT: ret i32 [[RET]] ; %cttz = call i32 @llvm.cttz.i32(i32 %X, i1 true) @@ -96,8 +96,8 @@ define i32 @umin_cttz_i32_zero_undefined(i32 %X) { define i64 @umin_cttz_i64_zero_undefined(i64 %X) { ; CHECK-LABEL: define i64 @umin_cttz_i64_zero_undefined( ; CHECK-SAME: i64 [[X:%.*]]) { -; CHECK-NEXT: [[CTTZ:%.*]] = call range(i64 0, 65) i64 @llvm.cttz.i64(i64 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTTZ]], i64 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[X]], 64 +; CHECK-NEXT: [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.cttz.i64(i64 [[TMP1]], i1 true) ; CHECK-NEXT: ret i64 [[RET]] ; %cttz = call i64 @llvm.cttz.i64(i64 %X, i1 true) @@ -129,8 +129,8 @@ define i1 @umin_cttz_i1_zero_defined(i1 %X) { define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) { ; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true) @@ -191,8 +191,8 @@ define i16 @umin_cttz_i16_negative_two_uses(i16 %X) { define i8 @umin_ctlz_i8_zero_undefined(i8 %X) { ; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_undefined( ; CHECK-SAME: i8 [[X:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2 +; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true) ; CHECK-NEXT: ret i8 [[RET]] ; %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true) @@ -203,8 +203,8 @@ define i8 @umin_ctlz_i8_zero_undefined(i8 %X) { define i8 @umin_ctlz_i8_zero_defined(i8 %X) { ; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_defined( ; CHECK-SAME: i8 [[X:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 false) -; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2 +; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true) ; CHECK-NEXT: ret i8 [[RET]] ; %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 false) @@ -215,8 +215,8 @@ define i8 @umin_ctlz_i8_zero_defined(i8 %X) { define i8 @umin_ctlz_i8_commuted_zero_undefined(i8 %X) { ; CHECK-LABEL: define i8 @umin_ctlz_i8_commuted_zero_undefined( ; CHECK-SAME: i8 [[X:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2 +; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true) ; CHECK-NEXT: ret i8 [[RET]] ; %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true) @@ -238,8 +238,8 @@ define i8 @umin_ctlz_i8_ge_bitwidth_zero_undefined(i8 %X) { define i16 @umin_ctlz_i16_zero_undefined(i16 %X) { ; CHECK-LABEL: define i16 @umin_ctlz_i16_zero_undefined( ; CHECK-SAME: i16 [[X:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = call range(i16 0, 17) i16 @llvm.ctlz.i16(i16 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTLZ]], i16 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i16 [[X]], 512 +; CHECK-NEXT: [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.ctlz.i16(i16 [[TMP1]], i1 true) ; CHECK-NEXT: ret i16 [[RET]] ; %ctlz = call i16 @llvm.ctlz.i16(i16 %X, i1 true) @@ -250,8 +250,8 @@ define i16 @umin_ctlz_i16_zero_undefined(i16 %X) { define i32 @umin_ctlz_i32_zero_undefined(i32 %X) { ; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_undefined( ; CHECK-SAME: i32 [[X:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTLZ]], i32 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], 33554432 +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 true) ; CHECK-NEXT: ret i32 [[RET]] ; %ctlz = call i32 @llvm.ctlz.i32(i32 %X, i1 true) @@ -262,8 +262,8 @@ define i32 @umin_ctlz_i32_zero_undefined(i32 %X) { define i64 @umin_ctlz_i64_zero_undefined(i64 %X) { ; CHECK-LABEL: define i64 @umin_ctlz_i64_zero_undefined( ; CHECK-SAME: i64 [[X:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTLZ]], i64 6) +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[X]], 144115188075855872 +; CHECK-NEXT: [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.ctlz.i64(i64 [[TMP1]], i1 true) ; CHECK-NEXT: ret i64 [[RET]] ; %ctlz = call i64 @llvm.ctlz.i64(i64 %X, i1 true) @@ -295,8 +295,8 @@ define i1 @umin_ctlz_i1_zero_defined(i1 %X) { define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) { ; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true) -; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)