From 3ca975521e928d8ac7cc8dcff7c2f4f9cd750317 Mon Sep 17 00:00:00 2001 From: Rose Date: Sat, 20 Jul 2024 11:28:34 -0400 Subject: [PATCH] [AArch64] Use isKnownNonZero to optimize eligible compares to cmn Turning a cmp into cmn saves an extra mov and negate instruction, so take that into account when choosing when to flip the compare operands. Also do not consider right-hand operands whose absolute value can be encoded into a cmn. adds 0 and sub 0 differ when carry handling, which is useful in unsigned comparisons. The problematic case for unsigned comparisons occurs only when the second argument is zero. Source: https://devblogs.microsoft.com/oldnewthing/20210607-00/?p=105288 --- .../Target/AArch64/AArch64ISelLowering.cpp | 44 +++-- llvm/test/CodeGen/AArch64/cmp-chains.ll | 168 ++++++++++++------ llvm/test/CodeGen/AArch64/cmp-select-sign.ll | 40 ++--- 3 files changed, 155 insertions(+), 97 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 84de1ee8f8923d..bfbf6dde927e57 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3403,6 +3403,11 @@ static bool isLegalArithImmed(uint64_t C) { return IsLegal; } +static bool cannotBeIntMin(SDValue CheckedVal, SelectionDAG &DAG) { + KnownBits KnownSrc = DAG.computeKnownBits(CheckedVal); + return !KnownSrc.getSignedMinValue().isMinSignedValue(); +} + // Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on // the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags // can be set differently by this operation. It comes down to whether @@ -3410,12 +3415,14 @@ static bool isLegalArithImmed(uint64_t C) { // everything is fine. If not then the optimization is wrong. Thus general // comparisons are only valid if op2 != 0. // -// So, finally, the only LLVM-native comparisons that don't mention C and V -// are SETEQ and SETNE. They're the only ones we can safely use CMN for in -// the absence of information about op2. -static bool isCMN(SDValue Op, ISD::CondCode CC) { +// So, finally, the only LLVM-native comparisons that don't mention C or V +// are the ones that aren't unsigned comparisons. They're the only ones we can +// safely use CMN for in the absence of information about op2. +static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG) { return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) && - (CC == ISD::SETEQ || CC == ISD::SETNE); + (isIntEqualitySetCC(CC) || + (isUnsignedIntSetCC(CC) && DAG.isKnownNeverZero(Op.getOperand(1))) || + (isSignedIntSetCC(CC) && cannotBeIntMin(Op.getOperand(1), DAG))); } static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl, @@ -3460,11 +3467,12 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, // register to WZR/XZR if it ends up being unused. unsigned Opcode = AArch64ISD::SUBS; - if (isCMN(RHS, CC)) { + if (isCMN(RHS, CC, DAG)) { // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ? Opcode = AArch64ISD::ADDS; RHS = RHS.getOperand(1); - } else if (isCMN(LHS, CC)) { + } else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) && + isIntEqualitySetCC(CC)) { // As we are looking for EQ/NE compares, the operands can be commuted ; can // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ? Opcode = AArch64ISD::ADDS; @@ -3566,13 +3574,15 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, Opcode = AArch64ISD::CCMN; RHS = DAG.getConstant(Imm.abs(), DL, Const->getValueType(0)); } - } else if (RHS.getOpcode() == ISD::SUB) { - SDValue SubOp0 = RHS.getOperand(0); - if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { - // See emitComparison() on why we can only do this for SETEQ and SETNE. - Opcode = AArch64ISD::CCMN; - RHS = RHS.getOperand(1); - } + } else if (isCMN(RHS, CC, DAG)) { + Opcode = AArch64ISD::CCMN; + RHS = RHS.getOperand(1); + } else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) && + isIntEqualitySetCC(CC)) { + // As we are looking for EQ/NE compares, the operands can be commuted ; can + // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ? + Opcode = AArch64ISD::CCMN; + LHS = LHS.getOperand(1); } if (Opcode == 0) Opcode = AArch64ISD::CCMP; @@ -3890,8 +3900,8 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, // cmp w12, w11, lsl #1 if (!isa(RHS) || !isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) { - bool LHSIsCMN = isCMN(LHS, CC); - bool RHSIsCMN = isCMN(RHS, CC); + bool LHSIsCMN = isCMN(LHS, CC, DAG); + bool RHSIsCMN = isCMN(RHS, CC, DAG); SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS; SDValue TheRHS = RHSIsCMN ? RHS.getOperand(1) : RHS; @@ -3904,7 +3914,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Cmp; AArch64CC::CondCode AArch64CC; - if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa(RHS)) { + if (isIntEqualitySetCC(CC) && isa(RHS)) { const ConstantSDNode *RHSC = cast(RHS); // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095. diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll index bf2072aebfb055..1dfe29be61cc44 100644 --- a/llvm/test/CodeGen/AArch64/cmp-chains.ll +++ b/llvm/test/CodeGen/AArch64/cmp-chains.ll @@ -260,14 +260,22 @@ define i32 @neg_range_int(i32 %a, i32 %b, i32 %c) { ; (b > -(d | 1) && a < c) define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: neg_range_int_comp: -; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w3, #0x1 -; CHECK-NEXT: cmp w0, w2 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: ccmp w1, w8, #4, lt -; CHECK-NEXT: csel w0, w1, w0, gt -; CHECK-NEXT: ret +; SDISEL-LABEL: neg_range_int_comp: +; SDISEL: // %bb.0: +; SDISEL-NEXT: orr w8, w3, #0x1 +; SDISEL-NEXT: cmp w0, w2 +; SDISEL-NEXT: ccmn w1, w8, #4, lt +; SDISEL-NEXT: csel w0, w1, w0, gt +; SDISEL-NEXT: ret +; +; GISEL-LABEL: neg_range_int_comp: +; GISEL: // %bb.0: +; GISEL-NEXT: orr w8, w3, #0x1 +; GISEL-NEXT: cmp w0, w2 +; GISEL-NEXT: neg w8, w8 +; GISEL-NEXT: ccmp w1, w8, #4, lt +; GISEL-NEXT: csel w0, w1, w0, gt +; GISEL-NEXT: ret %dor = or i32 %d, 1 %negd = sub i32 0, %dor %cmp = icmp sgt i32 %b, %negd @@ -279,14 +287,22 @@ define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) { ; (b >u -(d | 1) && a < c) define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: neg_range_int_comp_u: -; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w3, #0x1 -; CHECK-NEXT: cmp w0, w2 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: ccmp w1, w8, #0, lt -; CHECK-NEXT: csel w0, w1, w0, hi -; CHECK-NEXT: ret +; SDISEL-LABEL: neg_range_int_comp_u: +; SDISEL: // %bb.0: +; SDISEL-NEXT: orr w8, w3, #0x1 +; SDISEL-NEXT: cmp w0, w2 +; SDISEL-NEXT: ccmn w1, w8, #0, lt +; SDISEL-NEXT: csel w0, w1, w0, hi +; SDISEL-NEXT: ret +; +; GISEL-LABEL: neg_range_int_comp_u: +; GISEL: // %bb.0: +; GISEL-NEXT: orr w8, w3, #0x1 +; GISEL-NEXT: cmp w0, w2 +; GISEL-NEXT: neg w8, w8 +; GISEL-NEXT: ccmp w1, w8, #0, lt +; GISEL-NEXT: csel w0, w1, w0, hi +; GISEL-NEXT: ret %dor = or i32 %d, 1 %negd = sub i32 0, %dor %cmp = icmp ugt i32 %b, %negd @@ -298,14 +314,22 @@ define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) { ; (b > -(d | 1) && a u < c) define i32 @neg_range_int_comp_ua(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: neg_range_int_comp_ua: -; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w3, #0x1 -; CHECK-NEXT: cmp w0, w2 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: ccmp w1, w8, #4, lo -; CHECK-NEXT: csel w0, w1, w0, gt -; CHECK-NEXT: ret +; SDISEL-LABEL: neg_range_int_comp_ua: +; SDISEL: // %bb.0: +; SDISEL-NEXT: orr w8, w3, #0x1 +; SDISEL-NEXT: cmp w0, w2 +; SDISEL-NEXT: ccmn w1, w8, #4, lo +; SDISEL-NEXT: csel w0, w1, w0, gt +; SDISEL-NEXT: ret +; +; GISEL-LABEL: neg_range_int_comp_ua: +; GISEL: // %bb.0: +; GISEL-NEXT: orr w8, w3, #0x1 +; GISEL-NEXT: cmp w0, w2 +; GISEL-NEXT: neg w8, w8 +; GISEL-NEXT: ccmp w1, w8, #4, lo +; GISEL-NEXT: csel w0, w1, w0, gt +; GISEL-NEXT: ret %dor = or i32 %d, 1 %negd = sub i32 0, %dor %cmp = icmp sgt i32 %b, %negd @@ -339,14 +363,22 @@ define i32 @neg_range_int_2(i32 %a, i32 %b, i32 %c) { ; (b < -(d | 1) && a >= c) define i32 @neg_range_int_comp2(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: neg_range_int_comp2: -; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w3, #0x1 -; CHECK-NEXT: cmp w0, w2 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: ccmp w1, w8, #0, ge -; CHECK-NEXT: csel w0, w1, w0, lt -; CHECK-NEXT: ret +; SDISEL-LABEL: neg_range_int_comp2: +; SDISEL: // %bb.0: +; SDISEL-NEXT: orr w8, w3, #0x1 +; SDISEL-NEXT: cmp w0, w2 +; SDISEL-NEXT: ccmn w1, w8, #0, ge +; SDISEL-NEXT: csel w0, w1, w0, lt +; SDISEL-NEXT: ret +; +; GISEL-LABEL: neg_range_int_comp2: +; GISEL: // %bb.0: +; GISEL-NEXT: orr w8, w3, #0x1 +; GISEL-NEXT: cmp w0, w2 +; GISEL-NEXT: neg w8, w8 +; GISEL-NEXT: ccmp w1, w8, #0, ge +; GISEL-NEXT: csel w0, w1, w0, lt +; GISEL-NEXT: ret %dor = or i32 %d, 1 %negd = sub i32 0, %dor %cmp = icmp slt i32 %b, %negd @@ -358,14 +390,22 @@ define i32 @neg_range_int_comp2(i32 %a, i32 %b, i32 %c, i32 %d) { ; (b c) define i32 @neg_range_int_comp_u2(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: neg_range_int_comp_u2: -; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w3, #0x1 -; CHECK-NEXT: cmp w0, w2 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: ccmp w1, w8, #2, gt -; CHECK-NEXT: csel w0, w1, w0, lo -; CHECK-NEXT: ret +; SDISEL-LABEL: neg_range_int_comp_u2: +; SDISEL: // %bb.0: +; SDISEL-NEXT: orr w8, w3, #0x1 +; SDISEL-NEXT: cmp w0, w2 +; SDISEL-NEXT: ccmn w1, w8, #2, gt +; SDISEL-NEXT: csel w0, w1, w0, lo +; SDISEL-NEXT: ret +; +; GISEL-LABEL: neg_range_int_comp_u2: +; GISEL: // %bb.0: +; GISEL-NEXT: orr w8, w3, #0x1 +; GISEL-NEXT: cmp w0, w2 +; GISEL-NEXT: neg w8, w8 +; GISEL-NEXT: ccmp w1, w8, #2, gt +; GISEL-NEXT: csel w0, w1, w0, lo +; GISEL-NEXT: ret %dor = or i32 %d, 1 %negd = sub i32 0, %dor %cmp = icmp ult i32 %b, %negd @@ -377,14 +417,22 @@ define i32 @neg_range_int_comp_u2(i32 %a, i32 %b, i32 %c, i32 %d) { ; (b > -(d | 1) && a u > c) define i32 @neg_range_int_comp_ua2(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: neg_range_int_comp_ua2: -; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w3, #0x1 -; CHECK-NEXT: cmp w0, w2 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: ccmp w1, w8, #4, hi -; CHECK-NEXT: csel w0, w1, w0, gt -; CHECK-NEXT: ret +; SDISEL-LABEL: neg_range_int_comp_ua2: +; SDISEL: // %bb.0: +; SDISEL-NEXT: orr w8, w3, #0x1 +; SDISEL-NEXT: cmp w0, w2 +; SDISEL-NEXT: ccmn w1, w8, #4, hi +; SDISEL-NEXT: csel w0, w1, w0, gt +; SDISEL-NEXT: ret +; +; GISEL-LABEL: neg_range_int_comp_ua2: +; GISEL: // %bb.0: +; GISEL-NEXT: orr w8, w3, #0x1 +; GISEL-NEXT: cmp w0, w2 +; GISEL-NEXT: neg w8, w8 +; GISEL-NEXT: ccmp w1, w8, #4, hi +; GISEL-NEXT: csel w0, w1, w0, gt +; GISEL-NEXT: ret %dor = or i32 %d, 1 %negd = sub i32 0, %dor %cmp = icmp sgt i32 %b, %negd @@ -396,14 +444,22 @@ define i32 @neg_range_int_comp_ua2(i32 %a, i32 %b, i32 %c, i32 %d) { ; (b > -(d | 1) && a u == c) define i32 @neg_range_int_comp_ua3(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: neg_range_int_comp_ua3: -; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w3, #0x1 -; CHECK-NEXT: cmp w0, w2 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: ccmp w1, w8, #4, eq -; CHECK-NEXT: csel w0, w1, w0, gt -; CHECK-NEXT: ret +; SDISEL-LABEL: neg_range_int_comp_ua3: +; SDISEL: // %bb.0: +; SDISEL-NEXT: orr w8, w3, #0x1 +; SDISEL-NEXT: cmp w0, w2 +; SDISEL-NEXT: ccmn w1, w8, #4, eq +; SDISEL-NEXT: csel w0, w1, w0, gt +; SDISEL-NEXT: ret +; +; GISEL-LABEL: neg_range_int_comp_ua3: +; GISEL: // %bb.0: +; GISEL-NEXT: orr w8, w3, #0x1 +; GISEL-NEXT: cmp w0, w2 +; GISEL-NEXT: neg w8, w8 +; GISEL-NEXT: ccmp w1, w8, #4, eq +; GISEL-NEXT: csel w0, w1, w0, gt +; GISEL-NEXT: ret %dor = or i32 %d, 1 %negd = sub i32 0, %dor %cmp = icmp sgt i32 %b, %negd diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll index a16528ef871a45..22440b79bdcd46 100644 --- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll +++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll @@ -266,9 +266,8 @@ define i32 @or_neg(i32 %x, i32 %y) { ; CHECK-LABEL: or_neg: ; CHECK: // %bb.0: ; CHECK-NEXT: orr w8, w0, #0x1 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: cmp w8, w1 -; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: cmn w1, w8 +; CHECK-NEXT: cset w0, lt ; CHECK-NEXT: ret %3 = or i32 %x, 1 %4 = sub i32 0, %3 @@ -281,9 +280,8 @@ define i32 @or_neg_ugt(i32 %x, i32 %y) { ; CHECK-LABEL: or_neg_ugt: ; CHECK: // %bb.0: ; CHECK-NEXT: orr w8, w0, #0x1 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: cmp w8, w1 -; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: cmn w1, w8 +; CHECK-NEXT: cset w0, lo ; CHECK-NEXT: ret %3 = or i32 %x, 1 %4 = sub i32 0, %3 @@ -326,9 +324,8 @@ define i32 @or_neg_no_smin_but_zero(i32 %x, i32 %y) { ; CHECK-LABEL: or_neg_no_smin_but_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: bic w8, w0, w0, asr #31 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: cmp w8, w1 -; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: cmn w1, w8 +; CHECK-NEXT: cset w0, lt ; CHECK-NEXT: ret %3 = call i32 @llvm.smax.i32(i32 %x, i32 0) %4 = sub i32 0, %3 @@ -358,9 +355,8 @@ define i32 @or_neg2(i32 %x, i32 %y) { ; CHECK-LABEL: or_neg2: ; CHECK: // %bb.0: ; CHECK-NEXT: orr w8, w0, #0x1 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: cmp w8, w1 -; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: cmn w1, w8 +; CHECK-NEXT: cset w0, le ; CHECK-NEXT: ret %3 = or i32 %x, 1 %4 = sub i32 0, %3 @@ -373,9 +369,8 @@ define i32 @or_neg3(i32 %x, i32 %y) { ; CHECK-LABEL: or_neg3: ; CHECK: // %bb.0: ; CHECK-NEXT: orr w8, w0, #0x1 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: cmp w8, w1 -; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: cmn w1, w8 +; CHECK-NEXT: cset w0, gt ; CHECK-NEXT: ret %3 = or i32 %x, 1 %4 = sub i32 0, %3 @@ -388,9 +383,8 @@ define i32 @or_neg4(i32 %x, i32 %y) { ; CHECK-LABEL: or_neg4: ; CHECK: // %bb.0: ; CHECK-NEXT: orr w8, w0, #0x1 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: cmp w8, w1 -; CHECK-NEXT: cset w0, le +; CHECK-NEXT: cmn w1, w8 +; CHECK-NEXT: cset w0, ge ; CHECK-NEXT: ret %3 = or i32 %x, 1 %4 = sub i32 0, %3 @@ -403,9 +397,8 @@ define i32 @or_neg_ult(i32 %x, i32 %y) { ; CHECK-LABEL: or_neg_ult: ; CHECK: // %bb.0: ; CHECK-NEXT: orr w8, w0, #0x1 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: cmp w8, w1 -; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: cmn w1, w8 +; CHECK-NEXT: cset w0, lo ; CHECK-NEXT: ret %3 = or i32 %x, 1 %4 = sub i32 0, %3 @@ -446,9 +439,8 @@ define i32 @or_neg_no_smin_but_zero2(i32 %x, i32 %y) { ; CHECK-LABEL: or_neg_no_smin_but_zero2: ; CHECK: // %bb.0: ; CHECK-NEXT: bic w8, w0, w0, asr #31 -; CHECK-NEXT: neg w8, w8 -; CHECK-NEXT: cmp w8, w1 -; CHECK-NEXT: cset w0, le +; CHECK-NEXT: cmn w1, w8 +; CHECK-NEXT: cset w0, ge ; CHECK-NEXT: ret %3 = call i32 @llvm.smax.i32(i32 %x, i32 0) %4 = sub i32 0, %3