Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AArch64] Take cmn into account when adjusting compare constants #97774

Closed
wants to merge 1 commit into from

Conversation

AreaZR
Copy link
Contributor

@AreaZR AreaZR commented Jul 4, 2024

No description provided.

@llvmbot
Copy link
Collaborator

llvmbot commented Jul 4, 2024

@llvm/pr-subscribers-backend-aarch64

Author: AtariDreams (AtariDreams)

Changes

Patch is 36.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97774.diff

12 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+9-1)
  • (modified) llvm/test/CodeGen/AArch64/arm64-csel.ll (+2-3)
  • (modified) llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll (+10-10)
  • (modified) llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll (+12-8)
  • (modified) llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll (+41-31)
  • (modified) llvm/test/CodeGen/AArch64/select-constant-xor.ll (+2-2)
  • (modified) llvm/test/CodeGen/AArch64/signbit-shift.ll (+4-4)
  • (modified) llvm/test/CodeGen/AArch64/signbit-test.ll (+15-15)
  • (modified) llvm/test/CodeGen/AArch64/tbz-tbnz.ll (+240-66)
  • (modified) llvm/test/CodeGen/AArch64/typepromotion-signed.ll (+8-8)
  • (modified) llvm/test/CodeGen/AArch64/use-cr-result-of-dom-icmp-st.ll (+8-12)
  • (modified) llvm/test/CodeGen/AArch64/win64_vararg.ll (+4-4)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e0c3cc5eddb82..948e1af3372b2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3813,7 +3813,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
                              const SDLoc &dl) {
   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
     EVT VT = RHS.getValueType();
-    uint64_t C = RHSC->getZExtValue();
+    uint64_t C = RHSC->getAPIntValue().abs().getZExtValue();
     if (!isLegalArithImmed(C)) {
       // Constant does not fit, try adjusting it by one?
       switch (CC) {
@@ -3826,6 +3826,8 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
             (VT == MVT::i64 && C != 0x80000000ULL &&
              isLegalArithImmed(C - 1ULL))) {
           CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
+          if (C != RHSC->getAPIntValue())
+            C = (C ^ ~0) + 1;
           C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
           RHS = DAG.getConstant(C, dl, VT);
         }
@@ -3836,6 +3838,8 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
              isLegalArithImmed((uint32_t)(C - 1))) ||
             (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
           CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
+          if (C != RHSC->getAPIntValue())
+            C = (C ^ ~0) + 1;
           C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
           RHS = DAG.getConstant(C, dl, VT);
         }
@@ -3847,6 +3851,8 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
             (VT == MVT::i64 && C != INT64_MAX &&
              isLegalArithImmed(C + 1ULL))) {
           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
+          if (C != RHSC->getAPIntValue())
+            C = (C ^ ~0) + 1;
           C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
           RHS = DAG.getConstant(C, dl, VT);
         }
@@ -3858,6 +3864,8 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
             (VT == MVT::i64 && C != UINT64_MAX &&
              isLegalArithImmed(C + 1ULL))) {
           CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+          if (C != RHSC->getAPIntValue())
+            C = (C ^ ~0) + 1;
           C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
           RHS = DAG.getConstant(C, dl, VT);
         }
diff --git a/llvm/test/CodeGen/AArch64/arm64-csel.ll b/llvm/test/CodeGen/AArch64/arm64-csel.ll
index 1cf99d1b31a8b..69fad57a683ac 100644
--- a/llvm/test/CodeGen/AArch64/arm64-csel.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-csel.ll
@@ -100,9 +100,8 @@ define i32 @foo7(i32 %a, i32 %b) nounwind {
 ; CHECK-NEXT:    subs w8, w0, w1
 ; CHECK-NEXT:    cneg w9, w8, mi
 ; CHECK-NEXT:    cmn w8, #1
-; CHECK-NEXT:    csel w10, w9, w0, lt
-; CHECK-NEXT:    cmp w8, #0
-; CHECK-NEXT:    csel w0, w10, w9, ge
+; CHECK-NEXT:    csel w8, w9, w0, lt
+; CHECK-NEXT:    csel w0, w8, w9, gt
 ; CHECK-NEXT:    ret
 entry:
   %sub = sub nsw i32 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll b/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
index 8fbed8bfdb3fd..8c1ed80c44367 100644
--- a/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
+++ b/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
@@ -13,9 +13,9 @@
 define i32 @f_i8_sign_extend_inreg(i8 %in, i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: f_i8_sign_extend_inreg:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sxtb w8, w0
-; CHECK-NEXT:    cmp w8, #0
-; CHECK-NEXT:    csel w8, w1, w2, ge
+; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
+; CHECK-NEXT:    cmp w8, w0, sxtb
+; CHECK-NEXT:    csel w8, w1, w2, lt
 ; CHECK-NEXT:    add w0, w8, w0, uxtb
 ; CHECK-NEXT:    ret
 entry:
@@ -35,9 +35,9 @@ B:
 define i32 @f_i16_sign_extend_inreg(i16 %in, i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: f_i16_sign_extend_inreg:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sxth w8, w0
-; CHECK-NEXT:    cmp w8, #0
-; CHECK-NEXT:    csel w8, w1, w2, ge
+; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
+; CHECK-NEXT:    cmp w8, w0, sxth
+; CHECK-NEXT:    csel w8, w1, w2, lt
 ; CHECK-NEXT:    add w0, w8, w0, uxth
 ; CHECK-NEXT:    ret
 entry:
@@ -57,8 +57,8 @@ B:
 define i64 @f_i32_sign_extend_inreg(i32 %in, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: f_i32_sign_extend_inreg:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel x8, x1, x2, ge
+; CHECK-NEXT:    cmn w0, #1
+; CHECK-NEXT:    csel x8, x1, x2, gt
 ; CHECK-NEXT:    add x0, x8, w0, uxtw
 ; CHECK-NEXT:    ret
 entry:
@@ -145,8 +145,8 @@ define i64 @f_i32_sign_extend_i64(i32 %in, i64 %a, i64 %b) nounwind {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtw x8, w0
-; CHECK-NEXT:    cmp x8, #0
-; CHECK-NEXT:    csel x8, x1, x2, ge
+; CHECK-NEXT:    cmn x8, #1
+; CHECK-NEXT:    csel x8, x1, x2, gt
 ; CHECK-NEXT:    add x0, x8, w0, uxtw
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index eeb1504d8dc77..9a2ca55c42fd8 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -21,8 +21,9 @@ define i1 @test_signed_i1_f32(float %f) nounwind {
 ; CHECK-LABEL: test_signed_i1_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtzs w8, s0
-; CHECK-NEXT:    ands w8, w8, w8, asr #31
-; CHECK-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-NEXT:    and w8, w8, w8, asr #31
+; CHECK-NEXT:    cmn w8, #1
+; CHECK-NEXT:    csinv w8, w8, wzr, gt
 ; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
     %x = call i1 @llvm.fptosi.sat.i1.f32(float %f)
@@ -197,8 +198,9 @@ define i1 @test_signed_i1_f64(double %f) nounwind {
 ; CHECK-LABEL: test_signed_i1_f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtzs w8, d0
-; CHECK-NEXT:    ands w8, w8, w8, asr #31
-; CHECK-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-NEXT:    and w8, w8, w8, asr #31
+; CHECK-NEXT:    cmn w8, #1
+; CHECK-NEXT:    csinv w8, w8, wzr, gt
 ; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
     %x = call i1 @llvm.fptosi.sat.i1.f64(double %f)
@@ -376,16 +378,18 @@ define i1 @test_signed_i1_f16(half %f) nounwind {
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    fcvt s0, h0
 ; CHECK-CVT-NEXT:    fcvtzs w8, s0
-; CHECK-CVT-NEXT:    ands w8, w8, w8, asr #31
-; CHECK-CVT-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-CVT-NEXT:    and w8, w8, w8, asr #31
+; CHECK-CVT-NEXT:    cmn w8, #1
+; CHECK-CVT-NEXT:    csinv w8, w8, wzr, gt
 ; CHECK-CVT-NEXT:    and w0, w8, #0x1
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_signed_i1_f16:
 ; CHECK-FP16:       // %bb.0:
 ; CHECK-FP16-NEXT:    fcvtzs w8, h0
-; CHECK-FP16-NEXT:    ands w8, w8, w8, asr #31
-; CHECK-FP16-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-FP16-NEXT:    and w8, w8, w8, asr #31
+; CHECK-FP16-NEXT:    cmn w8, #1
+; CHECK-FP16-NEXT:    csinv w8, w8, wzr, gt
 ; CHECK-FP16-NEXT:    and w0, w8, #0x1
 ; CHECK-FP16-NEXT:    ret
     %x = call i1 @llvm.fptosi.sat.i1.f16(half %f)
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index d620a8851ee44..19e1958243928 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -1293,10 +1293,12 @@ define <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) {
 ; CHECK-NEXT:    mov d1, v0.d[1]
 ; CHECK-NEXT:    fcvtzs w9, d0
 ; CHECK-NEXT:    fcvtzs w8, d1
-; CHECK-NEXT:    ands w8, w8, w8, asr #31
-; CHECK-NEXT:    csinv w8, w8, wzr, ge
-; CHECK-NEXT:    ands w9, w9, w9, asr #31
-; CHECK-NEXT:    csinv w9, w9, wzr, ge
+; CHECK-NEXT:    and w9, w9, w9, asr #31
+; CHECK-NEXT:    and w8, w8, w8, asr #31
+; CHECK-NEXT:    cmn w8, #1
+; CHECK-NEXT:    csinv w8, w8, wzr, gt
+; CHECK-NEXT:    cmn w9, #1
+; CHECK-NEXT:    csinv w9, w9, wzr, gt
 ; CHECK-NEXT:    fmov s0, w9
 ; CHECK-NEXT:    mov v0.s[1], w8
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -2047,42 +2049,50 @@ define <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) {
 ; CHECK-CVT-NEXT:    fcvtl2 v1.4s, v0.8h
 ; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-CVT-NEXT:    mov s2, v1.s[1]
+; CHECK-CVT-NEXT:    mov s3, v1.s[2]
+; CHECK-CVT-NEXT:    mov s4, v1.s[3]
 ; CHECK-CVT-NEXT:    fcvtzs w9, s1
-; CHECK-CVT-NEXT:    fcvtzs w13, s0
+; CHECK-CVT-NEXT:    mov s1, v0.s[1]
+; CHECK-CVT-NEXT:    fcvtzs w12, s0
 ; CHECK-CVT-NEXT:    fcvtzs w8, s2
-; CHECK-CVT-NEXT:    mov s2, v1.s[2]
-; CHECK-CVT-NEXT:    mov s1, v1.s[3]
-; CHECK-CVT-NEXT:    ands w8, w8, w8, asr #31
-; CHECK-CVT-NEXT:    fcvtzs w10, s2
-; CHECK-CVT-NEXT:    mov s2, v0.s[1]
-; CHECK-CVT-NEXT:    fcvtzs w11, s1
+; CHECK-CVT-NEXT:    fcvtzs w10, s3
+; CHECK-CVT-NEXT:    fcvtzs w11, s4
+; CHECK-CVT-NEXT:    fcvtzs w13, s1
+; CHECK-CVT-NEXT:    and w9, w9, w9, asr #31
 ; CHECK-CVT-NEXT:    mov s1, v0.s[2]
+; CHECK-CVT-NEXT:    and w12, w12, w12, asr #31
 ; CHECK-CVT-NEXT:    mov s0, v0.s[3]
-; CHECK-CVT-NEXT:    csinv w8, w8, wzr, ge
-; CHECK-CVT-NEXT:    ands w9, w9, w9, asr #31
-; CHECK-CVT-NEXT:    csinv w9, w9, wzr, ge
-; CHECK-CVT-NEXT:    ands w10, w10, w10, asr #31
-; CHECK-CVT-NEXT:    fcvtzs w12, s2
+; CHECK-CVT-NEXT:    and w8, w8, w8, asr #31
+; CHECK-CVT-NEXT:    and w10, w10, w10, asr #31
+; CHECK-CVT-NEXT:    and w11, w11, w11, asr #31
+; CHECK-CVT-NEXT:    and w13, w13, w13, asr #31
 ; CHECK-CVT-NEXT:    fcvtzs w14, s1
+; CHECK-CVT-NEXT:    cmn w8, #1
+; CHECK-CVT-NEXT:    csinv w8, w8, wzr, gt
+; CHECK-CVT-NEXT:    cmn w9, #1
+; CHECK-CVT-NEXT:    csinv w9, w9, wzr, gt
+; CHECK-CVT-NEXT:    cmn w10, #1
+; CHECK-CVT-NEXT:    csinv w10, w10, wzr, gt
+; CHECK-CVT-NEXT:    cmn w11, #1
 ; CHECK-CVT-NEXT:    fmov s1, w9
-; CHECK-CVT-NEXT:    fcvtzs w9, s0
-; CHECK-CVT-NEXT:    csinv w10, w10, wzr, ge
-; CHECK-CVT-NEXT:    ands w11, w11, w11, asr #31
-; CHECK-CVT-NEXT:    csinv w11, w11, wzr, ge
-; CHECK-CVT-NEXT:    ands w12, w12, w12, asr #31
+; CHECK-CVT-NEXT:    csinv w11, w11, wzr, gt
+; CHECK-CVT-NEXT:    cmn w13, #1
+; CHECK-CVT-NEXT:    csinv w13, w13, wzr, gt
+; CHECK-CVT-NEXT:    cmn w12, #1
+; CHECK-CVT-NEXT:    csinv w9, w12, wzr, gt
 ; CHECK-CVT-NEXT:    mov v1.s[1], w8
-; CHECK-CVT-NEXT:    csinv w12, w12, wzr, ge
-; CHECK-CVT-NEXT:    ands w13, w13, w13, asr #31
-; CHECK-CVT-NEXT:    csinv w13, w13, wzr, ge
-; CHECK-CVT-NEXT:    ands w8, w14, w14, asr #31
+; CHECK-CVT-NEXT:    and w8, w14, w14, asr #31
+; CHECK-CVT-NEXT:    fmov s2, w9
+; CHECK-CVT-NEXT:    fcvtzs w9, s0
+; CHECK-CVT-NEXT:    cmn w8, #1
+; CHECK-CVT-NEXT:    csinv w8, w8, wzr, gt
+; CHECK-CVT-NEXT:    mov v2.s[1], w13
 ; CHECK-CVT-NEXT:    mov v1.s[2], w10
-; CHECK-CVT-NEXT:    fmov s2, w13
-; CHECK-CVT-NEXT:    csinv w8, w8, wzr, ge
-; CHECK-CVT-NEXT:    mov v2.s[1], w12
-; CHECK-CVT-NEXT:    mov v1.s[3], w11
 ; CHECK-CVT-NEXT:    mov v2.s[2], w8
-; CHECK-CVT-NEXT:    ands w8, w9, w9, asr #31
-; CHECK-CVT-NEXT:    csinv w8, w8, wzr, ge
+; CHECK-CVT-NEXT:    and w8, w9, w9, asr #31
+; CHECK-CVT-NEXT:    mov v1.s[3], w11
+; CHECK-CVT-NEXT:    cmn w8, #1
+; CHECK-CVT-NEXT:    csinv w8, w8, wzr, gt
 ; CHECK-CVT-NEXT:    mov v2.s[3], w8
 ; CHECK-CVT-NEXT:    uzp1 v0.8h, v2.8h, v1.8h
 ; CHECK-CVT-NEXT:    xtn v0.8b, v0.8h
diff --git a/llvm/test/CodeGen/AArch64/select-constant-xor.ll b/llvm/test/CodeGen/AArch64/select-constant-xor.ll
index 3adf48e84b44c..7188932dbdff2 100644
--- a/llvm/test/CodeGen/AArch64/select-constant-xor.ll
+++ b/llvm/test/CodeGen/AArch64/select-constant-xor.ll
@@ -110,8 +110,8 @@ define i32 @icmpasreq(i32 %input, i32 %a, i32 %b) {
 define i32 @icmpasrne(i32 %input, i32 %a, i32 %b) {
 ; CHECK-LABEL: icmpasrne:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel w0, w1, w2, ge
+; CHECK-NEXT:    cmn w0, #1
+; CHECK-NEXT:    csel w0, w1, w2, gt
 ; CHECK-NEXT:    ret
   %sh = ashr i32 %input, 31
   %c = icmp ne i32 %sh, -1
diff --git a/llvm/test/CodeGen/AArch64/signbit-shift.ll b/llvm/test/CodeGen/AArch64/signbit-shift.ll
index 253ea1cab91fb..0e6da326a31f4 100644
--- a/llvm/test/CodeGen/AArch64/signbit-shift.ll
+++ b/llvm/test/CodeGen/AArch64/signbit-shift.ll
@@ -43,8 +43,8 @@ define i32 @sel_ifpos_tval_bigger(i32 %x) {
 ; CHECK-LABEL: sel_ifpos_tval_bigger:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #41 // =0x29
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    cinc w0, w8, ge
+; CHECK-NEXT:    cmn w0, #1
+; CHECK-NEXT:    cinc w0, w8, gt
 ; CHECK-NEXT:    ret
   %c = icmp sgt i32 %x, -1
   %r = select i1 %c, i32 42, i32 41
@@ -91,8 +91,8 @@ define i32 @sel_ifpos_fval_bigger(i32 %x) {
 ; CHECK-LABEL: sel_ifpos_fval_bigger:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #41 // =0x29
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    cinc w0, w8, lt
+; CHECK-NEXT:    cmn w0, #1
+; CHECK-NEXT:    cinc w0, w8, le
 ; CHECK-NEXT:    ret
   %c = icmp sgt i32 %x, -1
   %r = select i1 %c, i32 41, i32 42
diff --git a/llvm/test/CodeGen/AArch64/signbit-test.ll b/llvm/test/CodeGen/AArch64/signbit-test.ll
index f5eaf80cf7f8d..c74a934ee09d8 100644
--- a/llvm/test/CodeGen/AArch64/signbit-test.ll
+++ b/llvm/test/CodeGen/AArch64/signbit-test.ll
@@ -4,9 +4,9 @@
 define i64 @test_clear_mask_i64_i32(i64 %x) nounwind {
 ; CHECK-LABEL: test_clear_mask_i64_i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #42
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel x0, x8, x0, ge
+; CHECK-NEXT:    mov w8, #42 // =0x2a
+; CHECK-NEXT:    cmn w0, #1
+; CHECK-NEXT:    csel x0, x8, x0, gt
 ; CHECK-NEXT:    ret
 entry:
   %a = and i64 %x, 2147483648
@@ -22,7 +22,7 @@ f:
 define i64 @test_set_mask_i64_i32(i64 %x) nounwind {
 ; CHECK-LABEL: test_set_mask_i64_i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #42
+; CHECK-NEXT:    mov w8, #42 // =0x2a
 ; CHECK-NEXT:    tst x0, #0x80000000
 ; CHECK-NEXT:    csel x0, x8, x0, ne
 ; CHECK-NEXT:    ret
@@ -40,7 +40,7 @@ f:
 define i64 @test_clear_mask_i64_i16(i64 %x) nounwind {
 ; CHECK-LABEL: test_clear_mask_i64_i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #42
+; CHECK-NEXT:    mov w8, #42 // =0x2a
 ; CHECK-NEXT:    tst x0, #0x8000
 ; CHECK-NEXT:    csel x0, x8, x0, eq
 ; CHECK-NEXT:    ret
@@ -58,7 +58,7 @@ f:
 define i64 @test_set_mask_i64_i16(i64 %x) nounwind {
 ; CHECK-LABEL: test_set_mask_i64_i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #42
+; CHECK-NEXT:    mov w8, #42 // =0x2a
 ; CHECK-NEXT:    tst x0, #0x8000
 ; CHECK-NEXT:    csel x0, x8, x0, ne
 ; CHECK-NEXT:    ret
@@ -76,7 +76,7 @@ f:
 define i64 @test_clear_mask_i64_i8(i64 %x) nounwind {
 ; CHECK-LABEL: test_clear_mask_i64_i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #42
+; CHECK-NEXT:    mov w8, #42 // =0x2a
 ; CHECK-NEXT:    tst x0, #0x80
 ; CHECK-NEXT:    csel x0, x8, x0, eq
 ; CHECK-NEXT:    ret
@@ -94,7 +94,7 @@ f:
 define i64 @test_set_mask_i64_i8(i64 %x) nounwind {
 ; CHECK-LABEL: test_set_mask_i64_i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #42
+; CHECK-NEXT:    mov w8, #42 // =0x2a
 ; CHECK-NEXT:    tst x0, #0x80
 ; CHECK-NEXT:    csel x0, x8, x0, ne
 ; CHECK-NEXT:    ret
@@ -112,7 +112,7 @@ f:
 define i32 @test_clear_mask_i32_i16(i32 %x) nounwind {
 ; CHECK-LABEL: test_clear_mask_i32_i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #42
+; CHECK-NEXT:    mov w8, #42 // =0x2a
 ; CHECK-NEXT:    tst w0, #0x8000
 ; CHECK-NEXT:    csel w0, w8, w0, eq
 ; CHECK-NEXT:    ret
@@ -130,7 +130,7 @@ f:
 define i32 @test_set_mask_i32_i16(i32 %x) nounwind {
 ; CHECK-LABEL: test_set_mask_i32_i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #42
+; CHECK-NEXT:    mov w8, #42 // =0x2a
 ; CHECK-NEXT:    tst w0, #0x8000
 ; CHECK-NEXT:    csel w0, w8, w0, ne
 ; CHECK-NEXT:    ret
@@ -148,7 +148,7 @@ f:
 define i32 @test_clear_mask_i32_i8(i32 %x) nounwind {
 ; CHECK-LABEL: test_clear_mask_i32_i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #42
+; CHECK-NEXT:    mov w8, #42 // =0x2a
 ; CHECK-NEXT:    tst w0, #0x80
 ; CHECK-NEXT:    csel w0, w8, w0, eq
 ; CHECK-NEXT:    ret
@@ -166,7 +166,7 @@ f:
 define i32 @test_set_mask_i32_i8(i32 %x) nounwind {
 ; CHECK-LABEL: test_set_mask_i32_i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #42
+; CHECK-NEXT:    mov w8, #42 // =0x2a
 ; CHECK-NEXT:    tst w0, #0x80
 ; CHECK-NEXT:    csel w0, w8, w0, ne
 ; CHECK-NEXT:    ret
@@ -184,7 +184,7 @@ f:
 define i16 @test_clear_mask_i16_i8(i16 %x) nounwind {
 ; CHECK-LABEL: test_clear_mask_i16_i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #42
+; CHECK-NEXT:    mov w8, #42 // =0x2a
 ; CHECK-NEXT:    tst w0, #0x80
 ; CHECK-NEXT:    csel w0, w8, w0, eq
 ; CHECK-NEXT:    ret
@@ -202,7 +202,7 @@ f:
 define i16 @test_set_mask_i16_i8(i16 %x) nounwind {
 ; CHECK-LABEL: test_set_mask_i16_i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #42
+; CHECK-NEXT:    mov w8, #42 // =0x2a
 ; CHECK-NEXT:    tst w0, #0x80
 ; CHECK-NEXT:    csel w0, w8, w0, ne
 ; CHECK-NEXT:    ret
@@ -220,7 +220,7 @@ f:
 define i16 @test_set_mask_i16_i7(i16 %x) nounwind {
 ; CHECK-LABEL: test_set_mask_i16_i7:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #42
+; CHECK-NEXT:    mov w8, #42 // =0x2a
 ; CHECK-NEXT:    tst w0, #0x40
 ; CHECK-NEXT:    csel w0, w8, w0, ne
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll
index d301a380dbb84..86a0f45030f9c 100644
--- a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll
+++ b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll
@@ -1,16 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -O1 -mtriple=aarch64 -aarch64-enable-cond-br-tune=false | FileCheck %s
 
 declare void @t()
 
 define void @test1(i32 %a) {
-; CHECK-LABEL: @test1
+; CHECK-LABEL: test1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub w8, w0, #12
+; CHECK-NEXT:    tbnz w8, #31, .LBB0_2
+; CHECK-NEXT:  // %bb.1: // %if.end
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB0_2: // %if.then
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl t
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   %sub = add nsw i32 %a, -12
   %cmp = icmp slt i32 %sub, 0
   br i1 %cmp, label %if.then, label %if.end
 
-; CHECK: sub [[CMP:w[0-9]+]], w0, #12
-; CHECK: tbnz [[CMP]], #31
 
 if.then:
   call void @t()
@@ -21,14 +32,24 @@ if.end:
 }
 
 define void @test2(i64 %a) {
-; CHECK-LABEL: @test2
+; CHECK-LABEL: test2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub x8, x0, #12
+; CHECK-NEXT:    tbnz x8, #63, .LBB1_2
+; CHECK-NEXT:  // %bb.1: // %if.end
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB1_2: // %if.then
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl t
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   %sub = add nsw i64 %a, -12
   %cmp = icmp slt i64 %sub, 0
   br i1 %cmp, label %if.then, label %if.end
 
-; CHECK: sub [[CMP:x[0-9]+]], x0, #12
-; CHECK: tbnz [[CMP]], #63
 
 if.then:
   call void @t()
@@ -39,14 +60,23 @@ if.end:
 }
 
 define void @test3(i32 %a) {
-; CHECK-LABEL: @test3
+; CHECK-LABEL: test3:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub w8, w0, #12
+; CHECK-NEXT:    tbnz w8, #31, .LBB2_2
+; CHECK-NEXT:  // %bb.1: // %if.then
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl t
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:  .LBB2_2: // %if.end
+; CHECK-NEXT:    ret
 entry:
   %sub = add nsw i32 %a, -12
   %cmp = icmp sgt i32 %sub, -1
   br i1 %cmp, label %if.then, label %if.end
 
-; CHECK: sub [[CMP:w[0-9]+]], w0, #12
-; CHECK: tbnz [[CMP]], #31
 
 if.then:
   call void @t()
@@ -57,14 +87,23 @@ if.end:
 }
 
 define void @test4(i64 %a) {
-; CHECK-LABEL: @test4
+; CHECK-LABEL: test4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub x8, x0, #12
+; CHECK-NEXT:    tbnz x8, #63, .LBB3_2...
[truncated]

@AreaZR AreaZR marked this pull request as draft July 4, 2024 23:34
@AreaZR
Copy link
Contributor Author

AreaZR commented Jul 4, 2024

Another note, it would be nice if we could merge cmps like cmp x1, #0, and cmn x1, #1 if the values tested for 100% overlap, but I don't know if that should be done here, or somewhere else.

There's also regressions when it comes to vectors here, for now, but maybe that would be rectified when #97773 and/or #96349 is merged.

…olding

Turning a cmp into cmn saves an extra mov and negate instruction, so take that into account when choosing when to flip the compare operands.

Also do not consider right-hand operands whose absolute value can be encoded into a cmn.

Revert "[AArch64] Take cmn folding of the right hand side into account when folding"

This reverts commit 213d1cb.

[AArch64] Take cmn into account when adjusting compare constants
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants