Skip to content

Commit

Permalink
[InstCombine] Fold Minimum over Trailing/Leading Bits Counts (#90402)
Browse files Browse the repository at this point in the history
The new transformation folds `umin(cttz(x), c)` to `cttz(x | (1 << c))`
and `umin(ctlz(x), c)` to `ctlz(x | ((1 << (bitwidth - 1)) >> c))`. The
transformation is only implemented for constant `c` to not increase the
number of instructions.
    
The idea of the transformation is to set the c-th lowest (for `cttz`) or
highest (for `ctlz`) bit in the operand. In this way, the `cttz` or
`ctlz` instruction always returns at most `c`.
    
Alive2 proofs: https://alive2.llvm.org/ce/z/y8Hdb8

Fixes #90000
  • Loading branch information
mskamp authored Jul 13, 2024
1 parent b22adf0 commit 949bbdc
Show file tree
Hide file tree
Showing 2 changed files with 429 additions and 0 deletions.
47 changes: 47 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1456,6 +1456,43 @@ static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
return UsedIndices.all() ? V : nullptr;
}

/// Fold an unsigned minimum of trailing or leading zero bits counts:
/// umin(cttz(CtOp, ZeroUndef), ConstOp) --> cttz(CtOp | (1 << ConstOp))
/// umin(ctlz(CtOp, ZeroUndef), ConstOp) --> ctlz(CtOp | (SignedMin
/// >> ConstOp))
template <Intrinsic::ID IntrID>
static Value *
foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1,
const DataLayout &DL,
InstCombiner::BuilderTy &Builder) {
static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
"This helper only supports cttz and ctlz intrinsics");

Value *CtOp;
Value *ZeroUndef;
if (!match(I0,
m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp), m_Value(ZeroUndef)))))
return nullptr;

unsigned BitWidth = I1->getType()->getScalarSizeInBits();
auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
if (!match(I1, m_CheckedInt(LessBitWidth)))
// We have a constant >= BitWidth (which can be handled by CVP)
// or a non-splat vector with elements < and >= BitWidth
return nullptr;

Type *Ty = I1->getType();
Constant *NewConst = ConstantFoldBinaryOpOperands(
IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
IntrID == Intrinsic::cttz
? ConstantInt::get(Ty, 1)
: ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
cast<Constant>(I1), DL);
return Builder.CreateBinaryIntrinsic(
IntrID, Builder.CreateOr(CtOp, NewConst),
ConstantInt::getTrue(ZeroUndef->getType()));
}

/// CallInst simplification. This mostly only handles folding of intrinsic
/// instructions. For normal calls, it allows visitCallBase to do the heavy
/// lifting.
Expand Down Expand Up @@ -1661,6 +1698,16 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Value *Cmp = Builder.CreateICmpNE(I0, Zero);
return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
}
// umin(cttz(x), const) --> cttz(x | (1 << const))
if (Value *FoldedCttz =
foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::cttz>(
I0, I1, DL, Builder))
return replaceInstUsesWith(*II, FoldedCttz);
// umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const))
if (Value *FoldedCtlz =
foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::ctlz>(
I0, I1, DL, Builder))
return replaceInstUsesWith(*II, FoldedCtlz);
[[fallthrough]];
}
case Intrinsic::umax: {
Expand Down
Loading

0 comments on commit 949bbdc

Please sign in to comment.