From 74e39ddcf5d5e039e804cdd3c4943e32ed11af5f Mon Sep 17 00:00:00 2001 From: Rose Date: Tue, 16 Jul 2024 12:12:29 -0400 Subject: [PATCH] [AArch64] Use isKnownNonZero to optimize eligible compares to cmn Turning a cmp into cmn saves an extra mov and negate instruction, so take that into account when choosing when to flip the compare operands. Also do not consider right-hand operands whose absolute value can be encoded into a cmn. adds 0 and sub 0 differ when carry handling, which is useful in unsigned comparisons. The problematic case for unsigned comparisons occurs only when the second argument is zero. Source: https://devblogs.microsoft.com/oldnewthing/20210607-00/?p=105288 --- .../Target/AArch64/AArch64ISelLowering.cpp | 44 +++++++----- llvm/test/CodeGen/AArch64/cmp-chains.ll | 72 ++++++++++++------- llvm/test/CodeGen/AArch64/cmp-select-sign.ll | 15 ++-- 3 files changed, 81 insertions(+), 50 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index df9b0ae1a632f3..4c8726d6534501 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3385,6 +3385,11 @@ static bool isLegalArithImmed(uint64_t C) { return IsLegal; } +static bool cannotBeIntMin(SDValue CheckedVal, SelectionDAG &DAG) { + KnownBits KnownSrc = DAG.computeKnownBits(CheckedVal); + return !KnownSrc.getSignedMinValue().isMinSignedValue(); +} + // Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on // the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags // can be set differently by this operation. It comes down to whether @@ -3392,12 +3397,14 @@ static bool isLegalArithImmed(uint64_t C) { // everything is fine. If not then the optimization is wrong. Thus general // comparisons are only valid if op2 != 0. // -// So, finally, the only LLVM-native comparisons that don't mention C and V -// are SETEQ and SETNE. They're the only ones we can safely use CMN for in -// the absence of information about op2. -static bool isCMN(SDValue Op, ISD::CondCode CC) { +// So, finally, the only LLVM-native comparisons that don't mention C or V +// are the ones that aren't unsigned comparisons. They're the only ones we can +// safely use CMN for in the absence of information about op2. +static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG) { return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) && - (CC == ISD::SETEQ || CC == ISD::SETNE); + (isIntEqualitySetCC(CC) || + (isUnsignedIntSetCC(CC) ? DAG.isKnownNeverZero(Op.getOperand(1)) + : cannotBeIntMin(Op.getOperand(1), DAG))); } static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl, @@ -3442,11 +3449,12 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, // register to WZR/XZR if it ends up being unused. unsigned Opcode = AArch64ISD::SUBS; - if (isCMN(RHS, CC)) { + if (isCMN(RHS, CC, DAG)) { // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ? Opcode = AArch64ISD::ADDS; RHS = RHS.getOperand(1); - } else if (isCMN(LHS, CC)) { + } else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) && + isIntEqualitySetCC(CC)) { // As we are looking for EQ/NE compares, the operands can be commuted ; can // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ? Opcode = AArch64ISD::ADDS; @@ -3548,13 +3556,15 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, Opcode = AArch64ISD::CCMN; RHS = DAG.getConstant(Imm.abs(), DL, Const->getValueType(0)); } - } else if (RHS.getOpcode() == ISD::SUB) { - SDValue SubOp0 = RHS.getOperand(0); - if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { - // See emitComparison() on why we can only do this for SETEQ and SETNE. - Opcode = AArch64ISD::CCMN; - RHS = RHS.getOperand(1); - } + } else if (isCMN(RHS, CC, DAG)) { + Opcode = AArch64ISD::CCMN; + RHS = RHS.getOperand(1); + } else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) && + isIntEqualitySetCC(CC)) { + // As we are looking for EQ/NE compares, the operands can be commuted ; can + // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ? + Opcode = AArch64ISD::CCMN; + LHS = LHS.getOperand(1); } if (Opcode == 0) Opcode = AArch64ISD::CCMP; @@ -3872,8 +3882,8 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, // cmp w12, w11, lsl #1 if (!isa(RHS) || !isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) { - bool LHSIsCMN = isCMN(LHS, CC); - bool RHSIsCMN = isCMN(RHS, CC); + bool LHSIsCMN = isCMN(LHS, CC, DAG); + bool RHSIsCMN = isCMN(RHS, CC, DAG); SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS; SDValue TheRHS = RHSIsCMN ? RHS.getOperand(1) : RHS; @@ -3886,7 +3896,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Cmp; AArch64CC::CondCode AArch64CC; - if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa(RHS)) { + if (isIntEqualitySetCC(CC) && isa(RHS)) { const ConstantSDNode *RHSC = cast(RHS); // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095. diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll index 8178f10bbe5f00..dc6d97c29ce7ef 100644 --- a/llvm/test/CodeGen/AArch64/cmp-chains.ll +++ b/llvm/test/CodeGen/AArch64/cmp-chains.ll @@ -260,14 +260,22 @@ define i32 @neg_range_int(i32 %a, i32 %b, i32 %c) { ; (b > -(d | 1) && a < c) define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: neg_range_int_comp: -; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w3, #0x1 -; CHECK-NEXT: cmp w0, w2 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: ccmp w1, w8, #4, lt -; CHECK-NEXT: csel w0, w1, w0, gt -; CHECK-NEXT: ret +; SDISEL-LABEL: neg_range_int_comp: +; SDISEL: // %bb.0: +; SDISEL-NEXT: orr w8, w3, #0x1 +; SDISEL-NEXT: cmp w0, w2 +; SDISEL-NEXT: ccmn w1, w8, #4, lt +; SDISEL-NEXT: csel w0, w1, w0, gt +; SDISEL-NEXT: ret +; +; GISEL-LABEL: neg_range_int_comp: +; GISEL: // %bb.0: +; GISEL-NEXT: orr w8, w3, #0x1 +; GISEL-NEXT: cmp w0, w2 +; GISEL-NEXT: neg w8, w8 +; GISEL-NEXT: ccmp w1, w8, #4, lt +; GISEL-NEXT: csel w0, w1, w0, gt +; GISEL-NEXT: ret %dor = or i32 %d, 1 %negd = sub i32 0, %dor %cmp = icmp sgt i32 %b, %negd @@ -279,14 +287,22 @@ define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) { ; (b >u -(d | 1) && a < c) define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: neg_range_int_comp_u: -; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w3, #0x1 -; CHECK-NEXT: cmp w0, w2 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: ccmp w1, w8, #0, lt -; CHECK-NEXT: csel w0, w1, w0, hi -; CHECK-NEXT: ret +; SDISEL-LABEL: neg_range_int_comp_u: +; SDISEL: // %bb.0: +; SDISEL-NEXT: orr w8, w3, #0x1 +; SDISEL-NEXT: cmp w0, w2 +; SDISEL-NEXT: ccmn w1, w8, #0, lt +; SDISEL-NEXT: csel w0, w1, w0, hi +; SDISEL-NEXT: ret +; +; GISEL-LABEL: neg_range_int_comp_u: +; GISEL: // %bb.0: +; GISEL-NEXT: orr w8, w3, #0x1 +; GISEL-NEXT: cmp w0, w2 +; GISEL-NEXT: neg w8, w8 +; GISEL-NEXT: ccmp w1, w8, #0, lt +; GISEL-NEXT: csel w0, w1, w0, hi +; GISEL-NEXT: ret %dor = or i32 %d, 1 %negd = sub i32 0, %dor %cmp = icmp ugt i32 %b, %negd @@ -298,14 +314,22 @@ define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) { ; (b > -(d | 1) && a u < c) define i32 @neg_range_int_comp_ua(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: neg_range_int_comp_ua: -; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w3, #0x1 -; CHECK-NEXT: cmp w0, w2 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: ccmp w1, w8, #4, lo -; CHECK-NEXT: csel w0, w1, w0, gt -; CHECK-NEXT: ret +; SDISEL-LABEL: neg_range_int_comp_ua: +; SDISEL: // %bb.0: +; SDISEL-NEXT: orr w8, w3, #0x1 +; SDISEL-NEXT: cmp w0, w2 +; SDISEL-NEXT: ccmn w1, w8, #4, lo +; SDISEL-NEXT: csel w0, w1, w0, gt +; SDISEL-NEXT: ret +; +; GISEL-LABEL: neg_range_int_comp_ua: +; GISEL: // %bb.0: +; GISEL-NEXT: orr w8, w3, #0x1 +; GISEL-NEXT: cmp w0, w2 +; GISEL-NEXT: neg w8, w8 +; GISEL-NEXT: ccmp w1, w8, #4, lo +; GISEL-NEXT: csel w0, w1, w0, gt +; GISEL-NEXT: ret %dor = or i32 %d, 1 %negd = sub i32 0, %dor %cmp = icmp sgt i32 %b, %negd diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll index 6a9b03ec51cc93..a3861804bed158 100644 --- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll +++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll @@ -266,9 +266,8 @@ define i32 @or_neg(i32 %x, i32 %y) { ; CHECK-LABEL: or_neg: ; CHECK: // %bb.0: ; CHECK-NEXT: orr w8, w0, #0x1 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: cmp w8, w1 -; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: cmn w1, w8 +; CHECK-NEXT: cset w0, lt ; CHECK-NEXT: ret %3 = or i32 %x, 1 %4 = sub i32 0, %3 @@ -281,9 +280,8 @@ define i32 @or_neg_ult(i32 %x, i32 %y) { ; CHECK-LABEL: or_neg_ult: ; CHECK: // %bb.0: ; CHECK-NEXT: orr w8, w0, #0x1 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: cmp w8, w1 -; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: cmn w1, w8 +; CHECK-NEXT: cset w0, lo ; CHECK-NEXT: ret %3 = or i32 %x, 1 %4 = sub i32 0, %3 @@ -326,9 +324,8 @@ define i32 @or_neg_no_smin_but_zero(i32 %x, i32 %y) { ; CHECK-LABEL: or_neg_no_smin_but_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: bic w8, w0, w0, asr #31 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: cmp w8, w1 -; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: cmn w1, w8 +; CHECK-NEXT: cset w0, lt ; CHECK-NEXT: ret %3 = call i32 @llvm.smax.i32(i32 %x, i32 0) %4 = sub i32 0, %3