-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Reduce multiplicands of even numbers when a shift is involved #92475
Conversation
I do want to expand this to do things like splat vectors though the code for that will be a bit more involved. I want to at least get something working and then do baby steps. |
@llvm/pr-subscribers-llvm-transforms Author: AtariDreams (AtariDreams) ChangesWe can improve analysis, codegen, and enable other folds if we can take expressions like (x * 6) >> 2 and replace them with (x * 3) >> 1 (assuming no overflow of course). Because every shift is a division of 2, we can replace a multiplication with an even number with that number divided by 2 and require one less shift and keep going until we get 0 or an odd number for the multiplicand. Alive2 Proof: https://alive2.llvm.org/ce/z/C9FvwB Full diff: https://github.com/llvm/llvm-project/pull/92475.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index ba297111d945f..473d997afbb3f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1469,17 +1469,32 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
// able to invert the transform and perf may suffer with an extra mul
// instruction.
if (Op0->hasOneUse()) {
- APInt NewMulC = MulC->lshr(ShAmtC);
- // if c is divisible by (1 << ShAmtC):
- // lshr (mul nuw x, MulC), ShAmtC -> mul nuw nsw x, (MulC >> ShAmtC)
- if (MulC->eq(NewMulC.shl(ShAmtC))) {
- auto *NewMul =
- BinaryOperator::CreateNUWMul(X, ConstantInt::get(Ty, NewMulC));
- assert(ShAmtC != 0 &&
- "lshr X, 0 should be handled by simplifyLShrInst.");
- NewMul->setHasNoSignedWrap(true);
- return NewMul;
+ unsigned CommonZeros = std::min(MulC->countr_zero(), ShAmtC);
+ if (CommonZeros != 0) {
+ APInt NewMulC = MulC->lshr(CommonZeros);
+ unsigned NewShAmtC = ShAmtC - CommonZeros;
+ // if c is divisible by (1 << ShAmtC):
+ // lshr (mul nuw x, MulC), ShAmtC -> mul nuw nsw x, (MulC >> ShAmtC)
+ if (NewShAmtC == 0) {
+ auto *NewMul =
+ BinaryOperator::CreateNUWMul(X, ConstantInt::get(Ty, NewMulC));
+ NewMul->setHasNoSignedWrap(true);
+ return NewMul;
+ }
+
+ // We can reduce things like lshr (mul nuw x, 6), 2 to lshr (mul nuw
+ // nsw x, 3), 1
+ // TODO: What about if ALL uses can be simplified in this way? Is that
+ // likely enough to happen to justify even caring?
+ auto *NewMul = Builder.CreateMul(X, ConstantInt::get(Ty, NewMulC), "",
+ /*NUW*/ true, /*NSW*/ true);
+ auto *NewLshr = BinaryOperator::CreateLShr(
+ NewMul, ConstantInt::get(Ty, NewShAmtC));
+ NewLshr->copyIRFlags(&I); // We can preserve 'exact'-ness.
+ return NewLshr;
}
+ assert(ShAmtC != 0 &&
+ "lshr X, 0 should be handled by simplifyLShrInst.");
}
}
diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll
index fa92c1c4b3be4..6b4595209864c 100644
--- a/llvm/test/Transforms/InstCombine/lshr.ll
+++ b/llvm/test/Transforms/InstCombine/lshr.ll
@@ -591,8 +591,8 @@ define i32 @shl_add_lshr_neg(i32 %x, i32 %y, i32 %z) {
define i32 @mul_splat_fold_wrong_mul_const(i32 %x) {
; CHECK-LABEL: @mul_splat_fold_wrong_mul_const(
-; CHECK-NEXT: [[M:%.*]] = mul nuw i32 [[X:%.*]], 65538
-; CHECK-NEXT: [[T:%.*]] = lshr i32 [[M]], 16
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i32 [[X:%.*]], 32769
+; CHECK-NEXT: [[T:%.*]] = lshr i32 [[TMP1]], 15
; CHECK-NEXT: ret i32 [[T]]
;
%m = mul nuw i32 %x, 65538
@@ -1403,3 +1403,51 @@ define <2 x i8> @bool_add_lshr_vec_wrong_shift_amt(<2 x i1> %a, <2 x i1> %b) {
%lshr = lshr <2 x i8> %add, <i8 1, i8 2>
ret <2 x i8> %lshr
}
+
+define i32 @reduce_shift(i32 %x) {
+; CHECK-LABEL: @reduce_shift(
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i32 [[X:%.*]], 3
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP1]], 2
+; CHECK-NEXT: ret i32 [[SHR]]
+;
+ %mul = mul nuw i32 %x, 12
+ %shr = lshr i32 %mul, 4
+ ret i32 %shr
+}
+
+; Negative test
+
+define i32 @reduce_shift_no_nuw(i32 %x) {
+; CHECK-LABEL: @reduce_shift_no_nuw(
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[X:%.*]], 12
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[MUL]], 4
+; CHECK-NEXT: ret i32 [[SHR]]
+;
+ %mul = mul nsw i32 %x, 12
+ %shr = lshr i32 %mul, 4
+ ret i32 %shr
+}
+
+; Negative test
+
+define i32 @reduce_shift_wrong_mul(i32 %x) {
+; CHECK-LABEL: @reduce_shift_wrong_mul(
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[X:%.*]], 11
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[MUL]], 4
+; CHECK-NEXT: ret i32 [[SHR]]
+;
+ %mul = mul nuw i32 %x, 11
+ %shr = lshr i32 %mul, 4
+ ret i32 %shr
+}
+
+define i32 @reduce_shift_exact(i32 %x) {
+; CHECK-LABEL: @reduce_shift_exact(
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[X:%.*]], 11
+; CHECK-NEXT: [[SHR:%.*]] = lshr exact i32 [[MUL]], 4
+; CHECK-NEXT: ret i32 [[SHR]]
+;
+ %mul = mul nuw i32 %x, 11
+ %shr = lshr exact i32 %mul, 4
+ ret i32 %shr
+}
|
200ff14
to
bf74be0
Compare
@nikic What do you think about this? |
013e4e4
to
4571fac
Compare
…volved We can improve analysis, codegen, and enable other folds if we can take expressions like (x * 6) >> 2 and replace them with (x * 3) >> 1 (assuming no overflow of course). Because every shift is a division of 2, we can replace a multiplication with an even number with that number divided by 2 and require one less shift, and keep going until we get 0 or an odd number for the multiplicand. Alive2 Proofs: https://alive2.llvm.org/ce/z/C9FvwB https://alive2.llvm.org/ce/z/7Zsx3b
We can improve analysis, codegen, and enable other folds if we can take expressions like (x * 6) >> 2 and replace them with (x * 3) >> 1 (assuming no overflow of course).
Because every shift is a division of 2, we can replace a multiplication with an even number with that number divided by 2 and require one less shift and keep going until we get 0 or an odd number for the multiplicand.
Alive2 Proofs:
https://alive2.llvm.org/ce/z/C9FvwB
https://alive2.llvm.org/ce/z/7Zsx3b