Skip to content

Commit

Permalink
[InstCombine] Fold Minimum over Trailing/Leading Bits Counts (#90000)
Browse files Browse the repository at this point in the history
The new transformation folds `umin(cttz(x), c)` to `cttz(x | (1 << c))`
and `umin(ctlz(x), c)` to `ctlz(x | ((1 << (bitwidth - 1)) >> c))`. The
transformation is only implemented for constant `c` to not increase the
number of instructions.

The idea of the transformation is to set the c-th lowest (for `cttz`) or
highest (for `ctlz`) bit in the operand. In this way, the `cttz` or
`ctlz` instruction always returns at most `c`.

Alive2 proofs: https://alive2.llvm.org/ce/z/xRZTE7
  • Loading branch information
mskamp committed Apr 28, 2024
1 parent 9fdd0b1 commit c727b10
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 28 deletions.
12 changes: 12 additions & 0 deletions llvm/include/llvm/IR/PatternMatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -2466,6 +2466,18 @@ inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BSwap(const Opnd0 &Op0) {
return m_Intrinsic<Intrinsic::bswap>(Op0);
}

template <typename Opnd0, typename Opnd1>
inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_Ctlz(const Opnd0 &Op0,
const Opnd1 &Op1) {
return m_Intrinsic<Intrinsic::ctlz>(Op0, Op1);
}

template <typename Opnd0, typename Opnd1>
inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_Cttz(const Opnd0 &Op0,
const Opnd1 &Op1) {
return m_Intrinsic<Intrinsic::cttz>(Op0, Op1);
}

template <typename Opnd0>
inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FAbs(const Opnd0 &Op0) {
return m_Intrinsic<Intrinsic::fabs>(Op0);
Expand Down
33 changes: 33 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1633,6 +1633,39 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Value *Cmp = Builder.CreateICmpNE(I0, Zero);
return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
}
// umin(cttz(x), const) --> cttz(x | (1 << const))
Value *X;
const APInt *Y;
Value *Z;
if (match(I0, m_OneUse(m_Cttz(m_Value(X), m_Value(Z)))) &&
match(I1, m_APInt(Y))) {
Value *CttzOp = X;
if (Y->ult(I1->getType()->getScalarType()->getIntegerBitWidth())) {
auto One = APInt::getOneBitSet(
I1->getType()->getScalarType()->getIntegerBitWidth(), 0);
Value *NewConst = ConstantInt::get(I1->getType(), One << *Y);
CttzOp = Builder.CreateOr(X, NewConst);
}
return CallInst::Create(Intrinsic::getDeclaration(II->getModule(),
Intrinsic::cttz,
II->getType()),
{CttzOp, Z});
}
// umin(ctlz(x), const) --> ctlz(x | ((1 << (bitwidth - 1) >> const)))
if (match(I0, m_OneUse(m_Ctlz(m_Value(X), m_Value(Z)))) &&
match(I1, m_APInt(Y))) {
Value *CtlzOp = X;
if (Y->ult(I1->getType()->getScalarType()->getIntegerBitWidth())) {
auto Min = APInt::getSignedMinValue(
I1->getType()->getScalarType()->getIntegerBitWidth());
Value *NewConst = ConstantInt::get(I1->getType(), Min.lshr(*Y));
CtlzOp = Builder.CreateOr(X, NewConst);
}
return CallInst::Create(Intrinsic::getDeclaration(II->getModule(),
Intrinsic::ctlz,
II->getType()),
{CtlzOp, Z});
}
[[fallthrough]];
}
case Intrinsic::umax: {
Expand Down
56 changes: 28 additions & 28 deletions llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
define i8 @umin_cttz_i8_zero_undefined(i8 %X) {
; CHECK-LABEL: define i8 @umin_cttz_i8_zero_undefined(
; CHECK-SAME: i8 [[X:%.*]]) {
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
; CHECK-NEXT: ret i8 [[RET]]
;
%cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
Expand All @@ -37,8 +37,8 @@ define i8 @umin_cttz_i8_zero_undefined(i8 %X) {
define i8 @umin_cttz_i8_zero_defined(i8 %X) {
; CHECK-LABEL: define i8 @umin_cttz_i8_zero_defined(
; CHECK-SAME: i8 [[X:%.*]]) {
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 false)
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
; CHECK-NEXT: ret i8 [[RET]]
;
%cttz = call i8 @llvm.cttz.i8(i8 %X, i1 false)
Expand All @@ -49,8 +49,8 @@ define i8 @umin_cttz_i8_zero_defined(i8 %X) {
define i8 @umin_cttz_i8_commuted_zero_undefined(i8 %X) {
; CHECK-LABEL: define i8 @umin_cttz_i8_commuted_zero_undefined(
; CHECK-SAME: i8 [[X:%.*]]) {
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
; CHECK-NEXT: ret i8 [[RET]]
;
%cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
Expand All @@ -72,8 +72,8 @@ define i8 @umin_cttz_i8_ge_bitwidth_zero_undefined(i8 %X) {
define i16 @umin_cttz_i16_zero_undefined(i16 %X) {
; CHECK-LABEL: define i16 @umin_cttz_i16_zero_undefined(
; CHECK-SAME: i16 [[X:%.*]]) {
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.cttz.i16(i16 [[X]], i1 true)
; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 6)
; CHECK-NEXT: [[TMP1:%.*]] = or i16 [[X]], 64
; CHECK-NEXT: [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.cttz.i16(i16 [[TMP1]], i1 true)
; CHECK-NEXT: ret i16 [[RET]]
;
%cttz = call i16 @llvm.cttz.i16(i16 %X, i1 true)
Expand All @@ -84,8 +84,8 @@ define i16 @umin_cttz_i16_zero_undefined(i16 %X) {
define i32 @umin_cttz_i32_zero_undefined(i32 %X) {
; CHECK-LABEL: define i32 @umin_cttz_i32_zero_undefined(
; CHECK-SAME: i32 [[X:%.*]]) {
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true)
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTTZ]], i32 6)
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], 64
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 true)
; CHECK-NEXT: ret i32 [[RET]]
;
%cttz = call i32 @llvm.cttz.i32(i32 %X, i1 true)
Expand All @@ -96,8 +96,8 @@ define i32 @umin_cttz_i32_zero_undefined(i32 %X) {
define i64 @umin_cttz_i64_zero_undefined(i64 %X) {
; CHECK-LABEL: define i64 @umin_cttz_i64_zero_undefined(
; CHECK-SAME: i64 [[X:%.*]]) {
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i64 0, 65) i64 @llvm.cttz.i64(i64 [[X]], i1 true)
; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTTZ]], i64 6)
; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[X]], 64
; CHECK-NEXT: [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.cttz.i64(i64 [[TMP1]], i1 true)
; CHECK-NEXT: ret i64 [[RET]]
;
%cttz = call i64 @llvm.cttz.i64(i64 %X, i1 true)
Expand Down Expand Up @@ -129,8 +129,8 @@ define i1 @umin_cttz_i1_zero_defined(i1 %X) {
define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) {
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 6>)
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 64, i32 64>
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true)
; CHECK-NEXT: ret <2 x i32> [[RET]]
;
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
Expand Down Expand Up @@ -191,8 +191,8 @@ define i16 @umin_cttz_i16_negative_two_uses(i16 %X) {
define i8 @umin_ctlz_i8_zero_undefined(i8 %X) {
; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_undefined(
; CHECK-SAME: i8 [[X:%.*]]) {
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
; CHECK-NEXT: ret i8 [[RET]]
;
%ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
Expand All @@ -203,8 +203,8 @@ define i8 @umin_ctlz_i8_zero_undefined(i8 %X) {
define i8 @umin_ctlz_i8_zero_defined(i8 %X) {
; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_defined(
; CHECK-SAME: i8 [[X:%.*]]) {
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 false)
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
; CHECK-NEXT: ret i8 [[RET]]
;
%ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 false)
Expand All @@ -215,8 +215,8 @@ define i8 @umin_ctlz_i8_zero_defined(i8 %X) {
define i8 @umin_ctlz_i8_commuted_zero_undefined(i8 %X) {
; CHECK-LABEL: define i8 @umin_ctlz_i8_commuted_zero_undefined(
; CHECK-SAME: i8 [[X:%.*]]) {
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
; CHECK-NEXT: ret i8 [[RET]]
;
%ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
Expand All @@ -238,8 +238,8 @@ define i8 @umin_ctlz_i8_ge_bitwidth_zero_undefined(i8 %X) {
define i16 @umin_ctlz_i16_zero_undefined(i16 %X) {
; CHECK-LABEL: define i16 @umin_ctlz_i16_zero_undefined(
; CHECK-SAME: i16 [[X:%.*]]) {
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i16 0, 17) i16 @llvm.ctlz.i16(i16 [[X]], i1 true)
; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTLZ]], i16 6)
; CHECK-NEXT: [[TMP1:%.*]] = or i16 [[X]], 512
; CHECK-NEXT: [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.ctlz.i16(i16 [[TMP1]], i1 true)
; CHECK-NEXT: ret i16 [[RET]]
;
%ctlz = call i16 @llvm.ctlz.i16(i16 %X, i1 true)
Expand All @@ -250,8 +250,8 @@ define i16 @umin_ctlz_i16_zero_undefined(i16 %X) {
define i32 @umin_ctlz_i32_zero_undefined(i32 %X) {
; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_undefined(
; CHECK-SAME: i32 [[X:%.*]]) {
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTLZ]], i32 6)
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], 33554432
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 true)
; CHECK-NEXT: ret i32 [[RET]]
;
%ctlz = call i32 @llvm.ctlz.i32(i32 %X, i1 true)
Expand All @@ -262,8 +262,8 @@ define i32 @umin_ctlz_i32_zero_undefined(i32 %X) {
define i64 @umin_ctlz_i64_zero_undefined(i64 %X) {
; CHECK-LABEL: define i64 @umin_ctlz_i64_zero_undefined(
; CHECK-SAME: i64 [[X:%.*]]) {
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTLZ]], i64 6)
; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[X]], 144115188075855872
; CHECK-NEXT: [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.ctlz.i64(i64 [[TMP1]], i1 true)
; CHECK-NEXT: ret i64 [[RET]]
;
%ctlz = call i64 @llvm.ctlz.i64(i64 %X, i1 true)
Expand Down Expand Up @@ -295,8 +295,8 @@ define i1 @umin_ctlz_i1_zero_defined(i1 %X) {
define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) {
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 6>)
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 33554432, i32 33554432>
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true)
; CHECK-NEXT: ret <2 x i32> [[RET]]
;
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
Expand Down

0 comments on commit c727b10

Please sign in to comment.