From 88dddd61c8cc5dd6e9615832d9ab7f265389939e Mon Sep 17 00:00:00 2001
From: Rose <gfunni234@gmail.com>
Date: Fri, 21 Jun 2024 15:26:02 -0400
Subject: [PATCH] [AArch64] Use isKnownNonZero to optimize to cmn instead of
 cmp

---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 38 +++++++++++++++----
 llvm/test/CodeGen/AArch64/cmp-chains.ll       |  3 +-
 llvm/test/CodeGen/AArch64/cmp-select-sign.ll  |  3 +-
 llvm/test/CodeGen/AArch64/urem-seteq.ll       |  3 +-
 4 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 61de30cdcaef6b..84b0020e66b574 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3394,9 +3394,11 @@ static bool isLegalArithImmed(uint64_t C) {
 // So, finally, the only LLVM-native comparisons that don't mention C and V
 // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
 // the absence of information about op2.
-static bool isCMN(SDValue Op, ISD::CondCode CC) {
+static bool isCMN(SDValue Op, SDValue CheckedVal, ISD::CondCode CC,
+                  SelectionDAG &DAG) {
   return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
-         (CC == ISD::SETEQ || CC == ISD::SETNE);
+         (CC == ISD::SETEQ || CC == ISD::SETNE ||
+          DAG.isKnownNeverZero(CheckedVal));
 }
 
 static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
@@ -3441,15 +3443,24 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   // register to WZR/XZR if it ends up being unused.
   unsigned Opcode = AArch64ISD::SUBS;
 
-  if (isCMN(RHS, CC)) {
+  if (isCMN(RHS, RHS.getOperand(1), CC, DAG)) {
     // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
     Opcode = AArch64ISD::ADDS;
     RHS = RHS.getOperand(1);
-  } else if (isCMN(LHS, CC)) {
+  } else if (isCMN(LHS, RHS, CC, DAG)) {
     // As we are looking for EQ/NE compares, the operands can be commuted ; can
     // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
+    // Not swapping operands, but negation requires inversion
+    CC = ISD::getSetCCSwappedOperands(CC);
     Opcode = AArch64ISD::ADDS;
     LHS = LHS.getOperand(1);
+  } else if (isCMN(LHS, LHS.getOperand(1), CC, DAG)) {
+    // As we are looking for EQ/NE compares, the operands can be commuted ; can
+    // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
+    std::swap(LHS, RHS);
+    CC = ISD::getSetCCSwappedOperands(CC);
+    Opcode = AArch64ISD::ADDS;
+    RHS = RHS.getOperand(1);
   } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
     if (LHS.getOpcode() == ISD::AND) {
       // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
@@ -3549,11 +3560,22 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
     }
   } else if (RHS.getOpcode() == ISD::SUB) {
     SDValue SubOp0 = RHS.getOperand(0);
-    if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+    if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE ||
+                                   DAG.isKnownNeverZero(RHS.getOperand(1)))) {
       // See emitComparison() on why we can only do this for SETEQ and SETNE.
       Opcode = AArch64ISD::CCMN;
       RHS = RHS.getOperand(1);
     }
+  } else if (LHS.getOpcode() == ISD::SUB) {
+    SDValue SubOp0 = RHS.getOperand(0);
+    if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE ||
+                                   DAG.isKnownNeverZero(LHS.getOperand(1)))) {
+      // See emitComparison() on why we can only do this for SETEQ and SETNE.
+      std::swap(LHS, RHS);
+      CC = ISD::getSetCCSwappedOperands(CC);
+      Opcode = AArch64ISD::CCMN;
+      RHS = RHS.getOperand(1);
+    }
   }
   if (Opcode == 0)
     Opcode = AArch64ISD::CCMP;
@@ -3870,9 +3892,9 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   // can be turned into:
   //    cmp     w12, w11, lsl #1
   if (!isa<ConstantSDNode>(RHS) || !isLegalArithImmed(RHS->getAsZExtVal())) {
-    SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
-
-    if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
+    SDValue TheLHS = isCMN(LHS, LHS.getOperand(1), CC, DAG) ? LHS.getOperand(1) : LHS;
+    SDValue TheRHS = isCMN(RHS, RHS.getOperand(1), CC, DAG) ? RHS.getOperand(1) : RHS;
+    if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(TheRHS)) {
       std::swap(LHS, RHS);
       CC = ISD::getSetCCSwappedOperands(CC);
     }
diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll
index d51c9c946f4677..4ea515911b0c51 100644
--- a/llvm/test/CodeGen/AArch64/cmp-chains.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-chains.ll
@@ -263,8 +263,7 @@ define i32 @neg_range_int_cmn(i32 %a, i32 %b, i32 %c) {
 ; SDISEL-LABEL: neg_range_int_cmn:
 ; SDISEL:       // %bb.0:
 ; SDISEL-NEXT:    orr w8, w2, #0x1
-; SDISEL-NEXT:    neg w8, w8
-; SDISEL-NEXT:    cmp w8, w0
+; SDISEL-NEXT:    cmn w0, w8
 ; SDISEL-NEXT:    ccmn w1, #3, #0, le
 ; SDISEL-NEXT:    csel w0, w1, w0, gt
 ; SDISEL-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
index ca20a7a435a648..036d8202a22b31 100644
--- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
@@ -266,8 +266,7 @@ define i32 @or_neg(i32 %x, i32 %y) {
 ; CHECK-LABEL: or_neg:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr w8, w0, #0x1
-; CHECK-NEXT:    neg w8, w8
-; CHECK-NEXT:    cmp w8, w1
+; CHECK-NEXT:    cmn w1, w8
 ; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    ret
   %3 = or i32 %x, 1
diff --git a/llvm/test/CodeGen/AArch64/urem-seteq.ll b/llvm/test/CodeGen/AArch64/urem-seteq.ll
index df87e60c4f8d53..51a74544050095 100644
--- a/llvm/test/CodeGen/AArch64/urem-seteq.ll
+++ b/llvm/test/CodeGen/AArch64/urem-seteq.ll
@@ -242,8 +242,7 @@ define i32 @test_urem_int_min(i32 %X) nounwind {
 define i32 @test_urem_allones(i32 %X) nounwind {
 ; CHECK-LABEL: test_urem_allones:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    neg w8, w0
-; CHECK-NEXT:    cmp w8, #2
+; CHECK-NEXT:    cmn w0, #2
 ; CHECK-NEXT:    cset w0, lo
 ; CHECK-NEXT:    ret
   %urem = urem i32 %X, 4294967295