-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Check for undef before transforming lshr (mul (X, 2^N + 1)), N -> add (X, lshr(X, N)) #96324
Conversation
@llvm/pr-subscribers-llvm-transforms Author: AtariDreams (AtariDreams) ChangesFull diff: https://github.com/llvm/llvm-project/pull/96324.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 06b434857c657..2c6e9723a56ac 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1500,6 +1500,8 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
// lshr (mul nuw (X, 2^N + 1)), N -> add nuw (X, lshr(X, N))
if (Op0->hasOneUse()) {
+ if (!isGuaranteedNotToBeUndef(X, &AC, &I, &DT))
+ X = Builder.CreateFreeze(X, ".fr");
auto *NewAdd = BinaryOperator::CreateNUWAdd(
X, Builder.CreateLShr(X, ConstantInt::get(Ty, ShAmtC), "",
I.isExact()));
@@ -1531,6 +1533,8 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
if (match(Op0, m_OneUse(m_NSWMul(m_Value(X), m_APInt(MulC))))) {
if (BitWidth > 2 && (*MulC - 1).isPowerOf2() &&
MulC->logBase2() == ShAmtC) {
+ if (!isGuaranteedNotToBeUndef(X, &AC, &I, &DT))
+ X = Builder.CreateFreeze(X, ".fr");
return BinaryOperator::CreateNSWAdd(
X, Builder.CreateLShr(X, ConstantInt::get(Ty, ShAmtC), "",
I.isExact()));
diff --git a/llvm/test/Transforms/InstCombine/ashr-lshr.ll b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
index c2a4f35412670..2532ce3975e80 100644
--- a/llvm/test/Transforms/InstCombine/ashr-lshr.ll
+++ b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
@@ -607,8 +607,9 @@ define <2 x i8> @ashr_known_pos_exact_vec(<2 x i8> %x, <2 x i8> %y) {
define i32 @lshr_mul_times_3_div_2(i32 %0) {
; CHECK-LABEL: @lshr_mul_times_3_div_2(
-; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 1
-; CHECK-NEXT: [[LSHR:%.*]] = add nuw nsw i32 [[TMP2]], [[TMP0]]
+; CHECK-NEXT: [[DOTFR:%.*]] = freeze i32 [[TMP0:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[DOTFR]], 1
+; CHECK-NEXT: [[LSHR:%.*]] = add nuw nsw i32 [[DOTFR]], [[TMP2]]
; CHECK-NEXT: ret i32 [[LSHR]]
;
%mul = mul nsw nuw i32 %0, 3
@@ -618,8 +619,9 @@ define i32 @lshr_mul_times_3_div_2(i32 %0) {
define i32 @lshr_mul_times_3_div_2_exact(i32 %x) {
; CHECK-LABEL: @lshr_mul_times_3_div_2_exact(
-; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 1
-; CHECK-NEXT: [[LSHR:%.*]] = add nsw i32 [[TMP1]], [[X]]
+; CHECK-NEXT: [[DOTFR:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[DOTFR]], 1
+; CHECK-NEXT: [[LSHR:%.*]] = add nsw i32 [[DOTFR]], [[TMP1]]
; CHECK-NEXT: ret i32 [[LSHR]]
;
%mul = mul nsw i32 %x, 3
@@ -657,8 +659,9 @@ define i32 @mul_times_3_div_2_multiuse_lshr(i32 %x) {
define i32 @lshr_mul_times_3_div_2_exact_2(i32 %x) {
; CHECK-LABEL: @lshr_mul_times_3_div_2_exact_2(
-; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 1
-; CHECK-NEXT: [[LSHR:%.*]] = add nuw i32 [[TMP1]], [[X]]
+; CHECK-NEXT: [[DOTFR:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[DOTFR]], 1
+; CHECK-NEXT: [[LSHR:%.*]] = add nuw i32 [[DOTFR]], [[TMP1]]
; CHECK-NEXT: ret i32 [[LSHR]]
;
%mul = mul nuw i32 %x, 3
@@ -668,8 +671,9 @@ define i32 @lshr_mul_times_3_div_2_exact_2(i32 %x) {
define i32 @lshr_mul_times_5_div_4(i32 %0) {
; CHECK-LABEL: @lshr_mul_times_5_div_4(
-; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 2
-; CHECK-NEXT: [[LSHR:%.*]] = add nuw nsw i32 [[TMP2]], [[TMP0]]
+; CHECK-NEXT: [[DOTFR:%.*]] = freeze i32 [[TMP0:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[DOTFR]], 2
+; CHECK-NEXT: [[LSHR:%.*]] = add nuw nsw i32 [[DOTFR]], [[TMP2]]
; CHECK-NEXT: ret i32 [[LSHR]]
;
%mul = mul nsw nuw i32 %0, 5
@@ -679,8 +683,9 @@ define i32 @lshr_mul_times_5_div_4(i32 %0) {
define i32 @lshr_mul_times_5_div_4_exact(i32 %x) {
; CHECK-LABEL: @lshr_mul_times_5_div_4_exact(
-; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 2
-; CHECK-NEXT: [[LSHR:%.*]] = add nsw i32 [[TMP1]], [[X]]
+; CHECK-NEXT: [[DOTFR:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[DOTFR]], 2
+; CHECK-NEXT: [[LSHR:%.*]] = add nsw i32 [[DOTFR]], [[TMP1]]
; CHECK-NEXT: ret i32 [[LSHR]]
;
%mul = mul nsw i32 %x, 5
@@ -718,8 +723,9 @@ define i32 @mul_times_5_div_4_multiuse_lshr(i32 %x) {
define i32 @lshr_mul_times_5_div_4_exact_2(i32 %x) {
; CHECK-LABEL: @lshr_mul_times_5_div_4_exact_2(
-; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 2
-; CHECK-NEXT: [[LSHR:%.*]] = add nuw i32 [[TMP1]], [[X]]
+; CHECK-NEXT: [[DOTFR:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[DOTFR]], 2
+; CHECK-NEXT: [[LSHR:%.*]] = add nuw i32 [[DOTFR]], [[TMP1]]
; CHECK-NEXT: ret i32 [[LSHR]]
;
%mul = mul nuw i32 %x, 5
diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll
index 01e07985ba6ab..a300bc71a84eb 100644
--- a/llvm/test/Transforms/InstCombine/lshr.ll
+++ b/llvm/test/Transforms/InstCombine/lshr.ll
@@ -741,8 +741,9 @@ define i32 @mul_splat_fold_wrong_lshr_const(i32 %x) {
define i32 @mul_splat_fold_no_nuw(i32 %x) {
; CHECK-LABEL: @mul_splat_fold_no_nuw(
-; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 16
-; CHECK-NEXT: [[T:%.*]] = add nsw i32 [[TMP1]], [[X]]
+; CHECK-NEXT: [[DOTFR:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[DOTFR]], 16
+; CHECK-NEXT: [[T:%.*]] = add nsw i32 [[DOTFR]], [[TMP1]]
; CHECK-NEXT: ret i32 [[T]]
;
%m = mul nsw i32 %x, 65537
|
@nikic Given how this is a multi-use, it would make sense to put freeze here, no? |
@dtcxzyw Thoughts? |
b4bb27f
to
39ae61a
Compare
@@ -1499,7 +1499,7 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { | |||
return replaceInstUsesWith(I, X); | |||
|
|||
// lshr (mul nuw (X, 2^N + 1)), N -> add nuw (X, lshr(X, N)) | |||
if (Op0->hasOneUse()) { | |||
if (Op0->hasOneUse() && isGuaranteedNotToBeUndef(X, &AC, &I, &DT)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should do if (!isGuaranteedNotToBeUndef) CreateFreeze
instead.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should do
if (!isGuaranteedNotToBeUndef) CreateFreeze
instead.
That creates an extra instruction, so we end up with more instructions than we started with.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's ok if the extra instruction is freeze.
TBH I don't really want to fix partial-undef multi-use miscompiles. We don't have any evidence that they have any read-world relevance. We typically do not even fix full-undef multi-use miscompiles in pre-existing transforms (we only don't accept them in new ones). |
Honestly, that's valid. I will just close this for now and do some more research. |
@nikic Can you clarify this policy in instcombine contributor guide? |
Alive2 didn't catch the partial undef.
https://alive2.llvm.org/ce/z/VkqNSN