[InstCombine] Improve bitfield addition #77184

ParkHanbum · 2024-01-06T07:53:50Z

Fixes #33874.

llvmbot · 2024-01-06T07:54:15Z

@llvm/pr-subscribers-llvm-transforms

Author: hanbeom (ParkHanbum)

Changes

Patch is 21.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/77184.diff

2 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (+146)
(modified) llvm/test/Transforms/InstCombine/or.ll (+398)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index c03f50d75814d8..b25d4fd9605788 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3314,6 +3314,149 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
   return foldAndOrOfICmpsUsingRanges(LHS, RHS, IsAnd);
 }
 
+struct BitFieldAddBitMask {
+  const APInt *Lower;
+  const APInt *Upper;
+};
+struct BitFieldOptBitMask {
+  const APInt *Lower;
+  const APInt *Upper;
+  const APInt *New;
+};
+struct BitFieldAddInfo {
+  Value *X;
+  Value *Y;
+  bool opt;
+  union {
+    BitFieldAddBitMask AddMask;
+    BitFieldOptBitMask OptMask;
+  };
+};
+
+static Value *foldBitFieldArithmetic(BinaryOperator &I,
+                                     InstCombiner::BuilderTy &Builder) {
+  auto *Disjoint = dyn_cast<PossiblyDisjointInst>(&I);
+  if (!Disjoint || !Disjoint->isDisjoint())
+    return nullptr;
+
+  unsigned BitWidth = I.getType()->getScalarSizeInBits();
+  auto AccumulateY = [&](Value *LoY, Value *UpY, APInt LoMask,
+                         APInt UpMask) -> Value * {
+    Value *Y = nullptr;
+    auto CLoY = dyn_cast_or_null<Constant>(LoY);
+    auto CUpY = dyn_cast_or_null<Constant>(UpY);
+    if ((CLoY == nullptr) ^ (CUpY == nullptr))
+      return nullptr;
+
+    if (CLoY && CUpY) {
+      APInt IUpY = CUpY->getUniqueInteger();
+      APInt ILoY = CLoY->getUniqueInteger();
+      if (!(IUpY.isSubsetOf(UpMask) && ILoY.isSubsetOf(LoMask)))
+        return nullptr;
+      Y = ConstantInt::get(CLoY->getType(), ILoY + IUpY);
+    } else if (LoY == UpY) {
+      Y = LoY;
+    }
+
+    return Y;
+  };
+
+  auto MatchBitFieldAdd =
+      [&](BinaryOperator &I) -> std::optional<BitFieldAddInfo> {
+    const APInt *OptLoMask, *OptUpMask, *LoMask, *UpMask, *UpMask2 = nullptr;
+    Value *X, *Y, *UpY;
+    auto BitFieldAddUpper = m_CombineOr(
+        m_And(m_c_Add(m_And(m_Value(X), m_APInt(UpMask)), m_Value(UpY)),
+              m_APInt(UpMask2)),
+        m_c_Add(m_And(m_Value(X), m_APInt(UpMask)), m_Value(UpY)));
+    auto BitFieldAdd =
+        m_c_Or(BitFieldAddUpper,
+               m_And(m_c_Add(m_Deferred(X), m_Value(Y)), m_APInt(LoMask)));
+    auto BitFieldAddIC =
+        m_c_Or(m_And(m_c_Add(m_Value(X), m_Value(Y)), m_APInt(LoMask)),
+               m_And(m_c_Add(m_Deferred(X), m_Value(UpY)), m_APInt(UpMask)));
+    auto OptBitFieldAdd = m_c_Or(
+        m_c_Xor(m_CombineOr(
+                    m_c_Add(m_And(m_Value(X), m_APInt(OptLoMask)),
+                            m_And(m_Value(Y), m_APInt(OptLoMask))),
+                    m_c_Add(m_And(m_Value(X), m_APInt(OptLoMask)), m_Value(Y))),
+                m_CombineOr(m_And(m_Deferred(X), m_APInt(OptUpMask)),
+                            m_And(m_c_Xor(m_Deferred(X), m_Value(UpY)),
+                                  m_APInt(OptUpMask)))),
+        BitFieldAddUpper);
+
+    if (match(&I, BitFieldAdd) || match(&I, BitFieldAddIC)) {
+      APInt Mask = APInt::getBitsSet(BitWidth, BitWidth - UpMask->countl_zero(),
+                                     BitWidth);
+      if (!((UpMask2 == nullptr || *UpMask == *UpMask2) &&
+            (LoMask->popcount() >= 2 && UpMask->popcount() >= 2) &&
+            (LoMask->isShiftedMask() && UpMask->isShiftedMask()) &&
+            ((*LoMask & *UpMask) == 0) &&
+            ((Mask ^ *LoMask ^ *UpMask).isAllOnes())))
+        return std::nullopt;
+
+      if (!(Y = AccumulateY(Y, UpY, *LoMask, *UpMask)))
+        return std::nullopt;
+
+      return {{X, Y, false, {{LoMask, UpMask}}}};
+    }
+
+    if (match(&I, OptBitFieldAdd)) {
+      APInt Mask = APInt::getBitsSet(
+          BitWidth, BitWidth - OptUpMask->countl_zero(), BitWidth);
+      APInt Mask2 = APInt::getBitsSet(
+          BitWidth, BitWidth - UpMask->countl_zero(), BitWidth);
+      if (!((UpMask2 == nullptr || *UpMask == *UpMask2) &&
+            (UpMask->isShiftedMask() && UpMask->popcount() >= 2) &&
+            ((*UpMask & (*OptLoMask | *OptUpMask)) == 0) &&
+            ((~*OptLoMask ^ Mask) == *OptUpMask) &&
+            (Mask2 ^ *UpMask ^ (*OptLoMask ^ *OptUpMask)).isAllOnes()))
+        return std::nullopt;
+
+      if (!(Y = AccumulateY(Y, UpY, (*OptLoMask + *OptUpMask), *UpMask)))
+        return std::nullopt;
+
+      struct BitFieldAddInfo Info = {X, Y, true, {{OptLoMask, OptUpMask}}};
+      Info.OptMask.New = UpMask;
+      return {Info};
+    }
+
+    return std::nullopt;
+  };
+
+  auto Info = MatchBitFieldAdd(I);
+  if (Info) {
+    Value *X = Info->X;
+    Value *Y = Info->Y;
+    APInt BitLoMask, BitUpMask;
+    if (Info->opt) {
+      unsigned NewHiBit = BitWidth - (Info->OptMask.New->countl_zero() + 1);
+      BitLoMask = *Info->OptMask.Lower | *Info->OptMask.New;
+      BitLoMask.clearBit(NewHiBit);
+      BitUpMask = *Info->OptMask.Upper;
+      BitUpMask.setBit(NewHiBit);
+    } else {
+      unsigned LowerHiBit = BitWidth - (Info->AddMask.Lower->countl_zero() + 1);
+      unsigned UpperHiBit = BitWidth - (Info->AddMask.Upper->countl_zero() + 1);
+      BitLoMask = *Info->AddMask.Lower | *Info->AddMask.Upper;
+      BitLoMask.clearBit(LowerHiBit);
+      BitLoMask.clearBit(UpperHiBit);
+      BitUpMask = APInt::getOneBitSet(BitWidth, LowerHiBit);
+      BitUpMask.setBit(UpperHiBit);
+    }
+
+    auto AndXLower = Builder.CreateAnd(X, BitLoMask);
+    auto AndYLower = Builder.CreateAnd(Y, BitLoMask);
+    auto Add = Builder.CreateNUWAdd(AndXLower, AndYLower);
+    auto Xor1 = Builder.CreateXor(X, Y);
+    auto AndUpper = Builder.CreateAnd(Xor1, BitUpMask);
+    auto Xor = Builder.CreateXor(Add, AndUpper);
+    return Xor;
+  }
+
+  return nullptr;
+}
+
 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
 // here. We should standardize that construct where it is needed or choose some
 // other way to ensure that commutated variants of patterns are not missed.
@@ -3884,6 +4027,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
       return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *C1 | *C2));
   }
 
+  if (Value *Res = foldBitFieldArithmetic(I, Builder))
+    return replaceInstUsesWith(I, Res);
+
   return nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll
index 573a11599141a7..8eafd45466b994 100644
--- a/llvm/test/Transforms/InstCombine/or.ll
+++ b/llvm/test/Transforms/InstCombine/or.ll
@@ -1777,3 +1777,401 @@ if.then:
 if.else:
   ret i32 0
 }
+
+; test or disjoint which used for BitField Arithmetic.
+; Positive
+define i8 @src_2_bitfield_op(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_2_bitfield_op(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 11
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[Y:%.*]], 11
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP4:%.*]] = and i8 [[TMP3]], 20
+; CHECK-NEXT:    [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    ret i8 [[BF_SET20]]
+;
+entry:
+  %narrow = add i8 %y, %x
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 24
+  %bf.lshr1228 = add i8 %bf.lshr, %y
+  %bf.shl = and i8 %bf.lshr1228, 24
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  ret i8 %bf.set20
+}
+
+define i8 @src_2_bitfield_const(i8 %x) {
+; CHECK-LABEL: @src_2_bitfield_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 11
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i8 [[TMP0]], 9
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[X]], 20
+; CHECK-NEXT:    [[BF_SET20:%.*]] = xor i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i8 [[BF_SET20]]
+;
+entry:
+  %narrow = add i8 %x, 1
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 24
+  %bf.lshr1228 = add i8 %bf.lshr, 8
+  %bf.shl = and i8 %bf.lshr1228, 24
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  ret i8 %bf.set20
+}
+
+define i8 @src_3_bitfield_op(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_3_bitfield_op(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 107
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[Y:%.*]], 107
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP4:%.*]] = and i8 [[TMP3]], -108
+; CHECK-NEXT:    [[BF_SET33:%.*]] = xor i8 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %y, %x
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 24
+  %bf.lshr1244 = add i8 %bf.lshr, %y
+  %bf.shl = and i8 %bf.lshr1244, 24
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -32
+  %bf.lshr2547 = add i8 %bf.lshr22, %y
+  %bf.value30 = and i8 %bf.lshr2547, -32
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @src_3_bitfield_const(i8 %x) {
+; CHECK-LABEL: @src_3_bitfield_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 107
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i8 [[TMP0]], 41
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[X]], -108
+; CHECK-NEXT:    [[BF_SET33:%.*]] = xor i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %x, 1
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 24
+  %bf.lshr1244 = add i8 %bf.lshr, 8
+  %bf.shl = and i8 %bf.lshr1244, 24
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -32
+  %bf.lshr2547 = add i8 %bf.lshr22, 32
+  %bf.value30 = and i8 %bf.lshr2547, -32
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+; test or disjoint which used for BitField Arithmetic.
+; Negative
+define i8 @src_bit_arithmetic_bitsize_1_low(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitsize_1_low(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[BF_VALUE:%.*]] = and i8 [[NARROW]], 1
+; CHECK-NEXT:    [[BF_LSHR:%.*]] = and i8 [[X]], 30
+; CHECK-NEXT:    [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT:    [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 30
+; CHECK-NEXT:    [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT:    [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT:    [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT:    [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT:    [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %y, %x
+  %bf.value = and i8 %narrow, 1
+  %bf.lshr = and i8 %x, 30
+  %bf.lshr1244 = add i8 %bf.lshr, %y
+  %bf.shl = and i8 %bf.lshr1244, 30
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -32
+  %bf.lshr2547 = add i8 %bf.lshr22, %y
+  %bf.value30 = and i8 %bf.lshr2547, -32
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitsize_1_mid(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitsize_1_mid(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[BF_VALUE:%.*]] = and i8 [[NARROW]], 15
+; CHECK-NEXT:    [[BF_LSHR:%.*]] = and i8 [[X]], 16
+; CHECK-NEXT:    [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT:    [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 16
+; CHECK-NEXT:    [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT:    [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT:    [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT:    [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT:    [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %y, %x
+  %bf.value = and i8 %narrow, 15
+  %bf.lshr = and i8 %x, 16
+  %bf.lshr1244 = add i8 %bf.lshr, %y
+  %bf.shl = and i8 %bf.lshr1244, 16
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -32
+  %bf.lshr2547 = add i8 %bf.lshr22, %y
+  %bf.value30 = and i8 %bf.lshr2547, -32
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitsize_1_high(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitsize_1_high(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 59
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[Y:%.*]], 59
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP4:%.*]] = and i8 [[TMP3]], 68
+; CHECK-NEXT:    [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[BF_LSHR22:%.*]] = and i8 [[X]], -128
+; CHECK-NEXT:    [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT:    [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -128
+; CHECK-NEXT:    [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %y, %x
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 120
+  %bf.lshr1244 = add i8 %bf.lshr, %y
+  %bf.shl = and i8 %bf.lshr1244, 120
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -128
+  %bf.lshr2547 = add i8 %bf.lshr22, %y
+  %bf.value30 = and i8 %bf.lshr2547, -128
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitmask_low_over_mid(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitmask_low_over_mid(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[BF_VALUE:%.*]] = and i8 [[NARROW]], 17
+; CHECK-NEXT:    [[BF_LSHR:%.*]] = and i8 [[X]], 24
+; CHECK-NEXT:    [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT:    [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
+; CHECK-NEXT:    [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT:    [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT:    [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT:    [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT:    [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %y, %x
+  %bf.value = and i8 %narrow, 17
+  %bf.lshr = and i8 %x, 24
+  %bf.lshr1244 = add i8 %bf.lshr, %y
+  %bf.shl = and i8 %bf.lshr1244, 24
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -32
+  %bf.lshr2547 = add i8 %bf.lshr22, %y
+  %bf.value30 = and i8 %bf.lshr2547, -32
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitmask_mid_over_high(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitmask_mid_over_high(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 27
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[Y:%.*]], 27
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP4:%.*]] = and i8 [[TMP3]], 36
+; CHECK-NEXT:    [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT:    [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT:    [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT:    [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %y, %x
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 56
+  %bf.lshr1244 = add i8 %bf.lshr, %y
+  %bf.shl = and i8 %bf.lshr1244, 56
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -32
+  %bf.lshr2547 = add i8 %bf.lshr22, %y
+  %bf.value30 = and i8 %bf.lshr2547, -32
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitmask_mid_under_lower(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitmask_mid_under_lower(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
+; CHECK-NEXT:    [[BF_LSHR:%.*]] = and i8 [[X]], 28
+; CHECK-NEXT:    [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT:    [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
+; CHECK-NEXT:    [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT:    [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT:    [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT:    [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT:    [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %y, %x
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 28
+  %bf.lshr1244 = add i8 %bf.lshr, %y
+  %bf.shl = and i8 %bf.lshr1244, 24
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -32
+  %bf.lshr2547 = add i8 %bf.lshr22, %y
+  %bf.value30 = and i8 %bf.lshr2547, -32
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitmask_high_under_mid(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitmask_high_under_mid(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 11
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[Y:%.*]], 11
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP4:%.*]] = and i8 [[TMP3]], 20
+; CHECK-NEXT:    [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[BF_LSHR22:%.*]] = and i8 [[X]], -16
+; CHECK-NEXT:    [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT:    [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -16
+; CHECK-NEXT:    [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %y, %x
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 24
+  %bf.lshr1244 = add i8 %bf.lshr, %y
+  %bf.shl = and i8 %bf.lshr1244, 24
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -16
+  %bf.lshr2547 = add i8 %bf.lshr22, %y
+  %bf.value30 = and i8 %bf.lshr2547, -16
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_addition_over_bitmask_low(i8 %x) {
+; CHECK-LABEL: @src_bit_arithmetic_addition_over_bitmask_low(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BF_VALUE:%.*]] = and i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[BF_LSHR1244:%.*]] = add i8 [[X]], 8
+; CHECK-NEXT:    [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
+; CHECK-NEXT:    [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT:    [[BF_VALUE30:%.*]] = add i8 [[TMP0]], 32
+; CHECK-NEXT:    [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %x, 8
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 24
+  %bf.lshr1244 = add i8 %bf.lshr, 8
+  %bf.shl = and i8 %bf.lshr1244, 24
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -32
+  %bf.lshr2547 = add i8 %bf.lshr22, 32
+  %bf.value30 = and i8 %bf.lshr2547, -32
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_addition_over_bitmask_mid(i8 %x) {
+; CHECK-LABEL: @src_bit_arithmetic_addition_over_bitmask_mid(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[NARROW:%.*]] = add i8 [[X:%.*]], 1
+; CHECK-NEXT:    [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
+; CHECK-NEXT:    [[BF_LSHR:%.*]] = and i8 [[X]], 24
+; CHECK-NEXT:    [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_LSHR]]
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT:    [[BF_VALUE30:%.*]] = add i8 [[TMP0]], 32
+; CHECK-NEXT:    [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %x, 1
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 24
+  %bf.lshr1244 = add i8 %bf.lshr, 32
+  %bf.shl = and i8 %bf.lshr1244, 24
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -32
+  %bf.lshr2547 = add i8 %bf.lshr22, 32
+  %bf.value30 = and i8 %bf.lshr2547, -32
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @...
[truncated]

ParkHanbum · 2024-01-23T06:17:30Z

@RKSimon is this need to update for conflicts?

RKSimon · 2024-01-23T09:56:38Z

Yes the patch doesn't currently cleanly merge with trubk

RKSimon · 2024-01-23T11:52:59Z

@ParkHanbum reopen this? It still needs quite a bit of work tbh

ParkHanbum · 2024-01-23T11:54:21Z

@RKSimon sorry, some problem occurred while rebase. I'll recover this ASAP

RKSimon · 2024-01-23T11:56:49Z

@nikic can hopefully advise here but I think we can refactor this in more generic terms than exact bitfield patterns.

ParkHanbum · 2024-01-23T12:55:20Z

I think it is ok now.

RKSimon · 2024-02-02T17:49:37Z

@ParkHanbum Sorry for slow response - please can you fix the merge conflicts?

ParkHanbum · 2024-02-03T07:24:32Z

@ParkHanbum Sorry for slow response - please can you fix the merge conflicts?

sure, I'll do it ASAP!!

ParkHanbum · 2024-02-03T09:04:09Z

it seems weird. I see only 2 commit pushed, but so many changed in comparing changes. if this is a problem, let me know.

PR Link: llvm/llvm-project#77184

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

RKSimon · 2024-02-22T10:37:31Z

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

+  };
+};
+
+static Value *foldBitFieldArithmetic(BinaryOperator &I,


To address the reported issues, this has been written in terms of improving bitfield math, but I'm not certain if we should be addressing this in terms of more generic canonicalizations or not - are we likely to hit sub-parts of this in other places do you think?

honestly I have no idea. I focused how to implement your optimization idea in my boundary of llvm knowledge. some advise please to me to let me can think about that. what's mean terms of more generic canonicalizations or not?

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Proof: https://alive2.llvm.org/ce/z/RUL3YU Fixes llvm#33874

goldsteinn · 2024-07-18T07:31:44Z

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

+                // OptUpMask is its result.
+                m_CombineOr(m_And(m_Deferred(X), m_APInt(OptUpMask)),
+                            m_And(m_c_Xor(m_Deferred(X), m_Value(UpY)),
+                                  m_APInt(OptUpMask)))),


Do you mean to be overwriting OptUpMask here and OptLoMask above?

Yes, but I made a mistake and fixed it

github-actions · 2024-07-21T12:37:35Z

✅ With the latest revision this PR passed the C/C++ code formatter.

ParkHanbum requested a review from nikic as a code owner January 6, 2024 07:53

llvmbot added the llvm:transforms label Jan 6, 2024

dtcxzyw changed the title ~~Issue 33874~~ [InstCombine] Improve bitfield addition Jan 6, 2024

RKSimon self-requested a review January 8, 2024 12:50

ParkHanbum force-pushed the issue_33874 branch from cb2b2b6 to 4730bb0 Compare January 9, 2024 03:31

ParkHanbum force-pushed the issue_33874 branch from 4730bb0 to 7d5c40b Compare January 23, 2024 11:38

ParkHanbum closed this Jan 23, 2024

ParkHanbum reopened this Jan 23, 2024

ParkHanbum closed this Jan 23, 2024

ParkHanbum reopened this Jan 23, 2024

ParkHanbum force-pushed the issue_33874 branch 2 times, most recently from cb2b2b6 to 1db7e0c Compare January 23, 2024 12:54

ParkHanbum force-pushed the issue_33874 branch 2 times, most recently from d5ae641 to a10b733 Compare February 3, 2024 08:56

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Feb 4, 2024

pre-commit: test PR77184

6c5192a

PR Link: llvm/llvm-project#77184

dtcxzyw mentioned this pull request Feb 4, 2024

pre-commit: test PR77184 dtcxzyw/llvm-opt-benchmark#185

Closed

RKSimon requested review from dtcxzyw and goldsteinn February 21, 2024 16:45

goldsteinn reviewed Feb 21, 2024

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Show resolved Hide resolved

dtcxzyw requested changes Feb 22, 2024

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Show resolved Hide resolved

RKSimon reviewed Feb 22, 2024

View reviewed changes

ParkHanbum force-pushed the issue_33874 branch 2 times, most recently from ea35412 to ba6f4a8 Compare February 25, 2024 07:37

ParkHanbum added 2 commits July 18, 2024 10:52

[InstCombine] Add test for improving bitfield addition (llvm#33874)

97e293f

Proof: https://alive2.llvm.org/ce/z/RUL3YU Fixes llvm#33874

[InstCombine] Improve bitfield addition (llvm#33874)

9c5c9a1

Proof: https://alive2.llvm.org/ce/z/RUL3YU Fixes llvm#33874

ParkHanbum force-pushed the issue_33874 branch from ba6f4a8 to 9c5c9a1 Compare July 18, 2024 02:38

Change APInt type of the arguments of AccumulateY to const APInt

11baae5

ParkHanbum requested review from dtcxzyw, RKSimon and goldsteinn July 18, 2024 03:03

goldsteinn reviewed Jul 18, 2024

View reviewed changes

Fix overwriting the same variable due to incorrect using

77ed1ab

ParkHanbum force-pushed the issue_33874 branch from 9a95282 to 77ed1ab Compare July 22, 2024 00:43

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[InstCombine] Improve bitfield addition #77184

[InstCombine] Improve bitfield addition #77184

ParkHanbum commented Jan 6, 2024 •

edited by dtcxzyw

Loading

llvmbot commented Jan 6, 2024

ParkHanbum commented Jan 23, 2024

RKSimon commented Jan 23, 2024

RKSimon commented Jan 23, 2024

ParkHanbum commented Jan 23, 2024

RKSimon commented Jan 23, 2024

ParkHanbum commented Jan 23, 2024

RKSimon commented Feb 2, 2024

ParkHanbum commented Feb 3, 2024

ParkHanbum commented Feb 3, 2024

RKSimon Feb 22, 2024

ParkHanbum Feb 25, 2024

goldsteinn Jul 18, 2024

ParkHanbum Jul 21, 2024

github-actions bot commented Jul 21, 2024 •

edited

Loading

[InstCombine] Improve bitfield addition #77184

Are you sure you want to change the base?

[InstCombine] Improve bitfield addition #77184

Conversation

ParkHanbum commented Jan 6, 2024 • edited by dtcxzyw Loading

llvmbot commented Jan 6, 2024

ParkHanbum commented Jan 23, 2024

RKSimon commented Jan 23, 2024

RKSimon commented Jan 23, 2024

ParkHanbum commented Jan 23, 2024

RKSimon commented Jan 23, 2024

ParkHanbum commented Jan 23, 2024

RKSimon commented Feb 2, 2024

ParkHanbum commented Feb 3, 2024

ParkHanbum commented Feb 3, 2024

RKSimon Feb 22, 2024

Choose a reason for hiding this comment

ParkHanbum Feb 25, 2024

Choose a reason for hiding this comment

goldsteinn Jul 18, 2024

Choose a reason for hiding this comment

ParkHanbum Jul 21, 2024

Choose a reason for hiding this comment

github-actions bot commented Jul 21, 2024 • edited Loading

ParkHanbum commented Jan 6, 2024 •

edited by dtcxzyw

Loading

github-actions bot commented Jul 21, 2024 •

edited

Loading