Skip to content

Commit

Permalink
[AArch64] Use isKnownNonZero to optimize eligible compares to cmn
Browse files Browse the repository at this point in the history
Turning a cmp into cmn saves an extra mov and negate instruction, so take that into account when choosing when to flip the compare operands.

Also do not consider right-hand operands whose absolute value can be encoded into a cmn.

adds 0 and sub 0 differ when carry handling, which is useful in unsigned comparisons.

The problematic case for unsigned comparisons occurs only when the second argument is zero.

Source: https://devblogs.microsoft.com/oldnewthing/20210607-00/?p=105288
  • Loading branch information
AreaZR committed Jul 20, 2024
1 parent 69c1658 commit 3ca9755
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 97 deletions.
44 changes: 27 additions & 17 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3403,19 +3403,26 @@ static bool isLegalArithImmed(uint64_t C) {
return IsLegal;
}

static bool cannotBeIntMin(SDValue CheckedVal, SelectionDAG &DAG) {
KnownBits KnownSrc = DAG.computeKnownBits(CheckedVal);
return !KnownSrc.getSignedMinValue().isMinSignedValue();
}

// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
// can be set differently by this operation. It comes down to whether
// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
// everything is fine. If not then the optimization is wrong. Thus general
// comparisons are only valid if op2 != 0.
//
// So, finally, the only LLVM-native comparisons that don't mention C and V
// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
// the absence of information about op2.
static bool isCMN(SDValue Op, ISD::CondCode CC) {
// So, finally, the only LLVM-native comparisons that don't mention C or V
// are the ones that aren't unsigned comparisons. They're the only ones we can
// safely use CMN for in the absence of information about op2.
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG) {
return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
(CC == ISD::SETEQ || CC == ISD::SETNE);
(isIntEqualitySetCC(CC) ||
(isUnsignedIntSetCC(CC) && DAG.isKnownNeverZero(Op.getOperand(1))) ||
(isSignedIntSetCC(CC) && cannotBeIntMin(Op.getOperand(1), DAG)));
}

static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
Expand Down Expand Up @@ -3460,11 +3467,12 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
// register to WZR/XZR if it ends up being unused.
unsigned Opcode = AArch64ISD::SUBS;

if (isCMN(RHS, CC)) {
if (isCMN(RHS, CC, DAG)) {
// Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
Opcode = AArch64ISD::ADDS;
RHS = RHS.getOperand(1);
} else if (isCMN(LHS, CC)) {
} else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
isIntEqualitySetCC(CC)) {
// As we are looking for EQ/NE compares, the operands can be commuted ; can
// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
Opcode = AArch64ISD::ADDS;
Expand Down Expand Up @@ -3566,13 +3574,15 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
Opcode = AArch64ISD::CCMN;
RHS = DAG.getConstant(Imm.abs(), DL, Const->getValueType(0));
}
} else if (RHS.getOpcode() == ISD::SUB) {
SDValue SubOp0 = RHS.getOperand(0);
if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
// See emitComparison() on why we can only do this for SETEQ and SETNE.
Opcode = AArch64ISD::CCMN;
RHS = RHS.getOperand(1);
}
} else if (isCMN(RHS, CC, DAG)) {
Opcode = AArch64ISD::CCMN;
RHS = RHS.getOperand(1);
} else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
isIntEqualitySetCC(CC)) {
// As we are looking for EQ/NE compares, the operands can be commuted ; can
// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
Opcode = AArch64ISD::CCMN;
LHS = LHS.getOperand(1);
}
if (Opcode == 0)
Opcode = AArch64ISD::CCMP;
Expand Down Expand Up @@ -3890,8 +3900,8 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
// cmp w12, w11, lsl #1
if (!isa<ConstantSDNode>(RHS) ||
!isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) {
bool LHSIsCMN = isCMN(LHS, CC);
bool RHSIsCMN = isCMN(RHS, CC);
bool LHSIsCMN = isCMN(LHS, CC, DAG);
bool RHSIsCMN = isCMN(RHS, CC, DAG);
SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS;
SDValue TheRHS = RHSIsCMN ? RHS.getOperand(1) : RHS;

Expand All @@ -3904,7 +3914,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,

SDValue Cmp;
AArch64CC::CondCode AArch64CC;
if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
if (isIntEqualitySetCC(CC) && isa<ConstantSDNode>(RHS)) {
const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);

// The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
Expand Down
168 changes: 112 additions & 56 deletions llvm/test/CodeGen/AArch64/cmp-chains.ll
Original file line number Diff line number Diff line change
Expand Up @@ -260,14 +260,22 @@ define i32 @neg_range_int(i32 %a, i32 %b, i32 %c) {

; (b > -(d | 1) && a < c)
define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: neg_range_int_comp:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w3, #0x1
; CHECK-NEXT: cmp w0, w2
; CHECK-NEXT: neg w8, w8
; CHECK-NEXT: ccmp w1, w8, #4, lt
; CHECK-NEXT: csel w0, w1, w0, gt
; CHECK-NEXT: ret
; SDISEL-LABEL: neg_range_int_comp:
; SDISEL: // %bb.0:
; SDISEL-NEXT: orr w8, w3, #0x1
; SDISEL-NEXT: cmp w0, w2
; SDISEL-NEXT: ccmn w1, w8, #4, lt
; SDISEL-NEXT: csel w0, w1, w0, gt
; SDISEL-NEXT: ret
;
; GISEL-LABEL: neg_range_int_comp:
; GISEL: // %bb.0:
; GISEL-NEXT: orr w8, w3, #0x1
; GISEL-NEXT: cmp w0, w2
; GISEL-NEXT: neg w8, w8
; GISEL-NEXT: ccmp w1, w8, #4, lt
; GISEL-NEXT: csel w0, w1, w0, gt
; GISEL-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp sgt i32 %b, %negd
Expand All @@ -279,14 +287,22 @@ define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) {

; (b >u -(d | 1) && a < c)
define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: neg_range_int_comp_u:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w3, #0x1
; CHECK-NEXT: cmp w0, w2
; CHECK-NEXT: neg w8, w8
; CHECK-NEXT: ccmp w1, w8, #0, lt
; CHECK-NEXT: csel w0, w1, w0, hi
; CHECK-NEXT: ret
; SDISEL-LABEL: neg_range_int_comp_u:
; SDISEL: // %bb.0:
; SDISEL-NEXT: orr w8, w3, #0x1
; SDISEL-NEXT: cmp w0, w2
; SDISEL-NEXT: ccmn w1, w8, #0, lt
; SDISEL-NEXT: csel w0, w1, w0, hi
; SDISEL-NEXT: ret
;
; GISEL-LABEL: neg_range_int_comp_u:
; GISEL: // %bb.0:
; GISEL-NEXT: orr w8, w3, #0x1
; GISEL-NEXT: cmp w0, w2
; GISEL-NEXT: neg w8, w8
; GISEL-NEXT: ccmp w1, w8, #0, lt
; GISEL-NEXT: csel w0, w1, w0, hi
; GISEL-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp ugt i32 %b, %negd
Expand All @@ -298,14 +314,22 @@ define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) {

; (b > -(d | 1) && a u < c)
define i32 @neg_range_int_comp_ua(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: neg_range_int_comp_ua:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w3, #0x1
; CHECK-NEXT: cmp w0, w2
; CHECK-NEXT: neg w8, w8
; CHECK-NEXT: ccmp w1, w8, #4, lo
; CHECK-NEXT: csel w0, w1, w0, gt
; CHECK-NEXT: ret
; SDISEL-LABEL: neg_range_int_comp_ua:
; SDISEL: // %bb.0:
; SDISEL-NEXT: orr w8, w3, #0x1
; SDISEL-NEXT: cmp w0, w2
; SDISEL-NEXT: ccmn w1, w8, #4, lo
; SDISEL-NEXT: csel w0, w1, w0, gt
; SDISEL-NEXT: ret
;
; GISEL-LABEL: neg_range_int_comp_ua:
; GISEL: // %bb.0:
; GISEL-NEXT: orr w8, w3, #0x1
; GISEL-NEXT: cmp w0, w2
; GISEL-NEXT: neg w8, w8
; GISEL-NEXT: ccmp w1, w8, #4, lo
; GISEL-NEXT: csel w0, w1, w0, gt
; GISEL-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp sgt i32 %b, %negd
Expand Down Expand Up @@ -339,14 +363,22 @@ define i32 @neg_range_int_2(i32 %a, i32 %b, i32 %c) {

; (b < -(d | 1) && a >= c)
define i32 @neg_range_int_comp2(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: neg_range_int_comp2:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w3, #0x1
; CHECK-NEXT: cmp w0, w2
; CHECK-NEXT: neg w8, w8
; CHECK-NEXT: ccmp w1, w8, #0, ge
; CHECK-NEXT: csel w0, w1, w0, lt
; CHECK-NEXT: ret
; SDISEL-LABEL: neg_range_int_comp2:
; SDISEL: // %bb.0:
; SDISEL-NEXT: orr w8, w3, #0x1
; SDISEL-NEXT: cmp w0, w2
; SDISEL-NEXT: ccmn w1, w8, #0, ge
; SDISEL-NEXT: csel w0, w1, w0, lt
; SDISEL-NEXT: ret
;
; GISEL-LABEL: neg_range_int_comp2:
; GISEL: // %bb.0:
; GISEL-NEXT: orr w8, w3, #0x1
; GISEL-NEXT: cmp w0, w2
; GISEL-NEXT: neg w8, w8
; GISEL-NEXT: ccmp w1, w8, #0, ge
; GISEL-NEXT: csel w0, w1, w0, lt
; GISEL-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp slt i32 %b, %negd
Expand All @@ -358,14 +390,22 @@ define i32 @neg_range_int_comp2(i32 %a, i32 %b, i32 %c, i32 %d) {

; (b <u -(d | 1) && a > c)
define i32 @neg_range_int_comp_u2(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: neg_range_int_comp_u2:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w3, #0x1
; CHECK-NEXT: cmp w0, w2
; CHECK-NEXT: neg w8, w8
; CHECK-NEXT: ccmp w1, w8, #2, gt
; CHECK-NEXT: csel w0, w1, w0, lo
; CHECK-NEXT: ret
; SDISEL-LABEL: neg_range_int_comp_u2:
; SDISEL: // %bb.0:
; SDISEL-NEXT: orr w8, w3, #0x1
; SDISEL-NEXT: cmp w0, w2
; SDISEL-NEXT: ccmn w1, w8, #2, gt
; SDISEL-NEXT: csel w0, w1, w0, lo
; SDISEL-NEXT: ret
;
; GISEL-LABEL: neg_range_int_comp_u2:
; GISEL: // %bb.0:
; GISEL-NEXT: orr w8, w3, #0x1
; GISEL-NEXT: cmp w0, w2
; GISEL-NEXT: neg w8, w8
; GISEL-NEXT: ccmp w1, w8, #2, gt
; GISEL-NEXT: csel w0, w1, w0, lo
; GISEL-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp ult i32 %b, %negd
Expand All @@ -377,14 +417,22 @@ define i32 @neg_range_int_comp_u2(i32 %a, i32 %b, i32 %c, i32 %d) {

; (b > -(d | 1) && a u > c)
define i32 @neg_range_int_comp_ua2(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: neg_range_int_comp_ua2:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w3, #0x1
; CHECK-NEXT: cmp w0, w2
; CHECK-NEXT: neg w8, w8
; CHECK-NEXT: ccmp w1, w8, #4, hi
; CHECK-NEXT: csel w0, w1, w0, gt
; CHECK-NEXT: ret
; SDISEL-LABEL: neg_range_int_comp_ua2:
; SDISEL: // %bb.0:
; SDISEL-NEXT: orr w8, w3, #0x1
; SDISEL-NEXT: cmp w0, w2
; SDISEL-NEXT: ccmn w1, w8, #4, hi
; SDISEL-NEXT: csel w0, w1, w0, gt
; SDISEL-NEXT: ret
;
; GISEL-LABEL: neg_range_int_comp_ua2:
; GISEL: // %bb.0:
; GISEL-NEXT: orr w8, w3, #0x1
; GISEL-NEXT: cmp w0, w2
; GISEL-NEXT: neg w8, w8
; GISEL-NEXT: ccmp w1, w8, #4, hi
; GISEL-NEXT: csel w0, w1, w0, gt
; GISEL-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp sgt i32 %b, %negd
Expand All @@ -396,14 +444,22 @@ define i32 @neg_range_int_comp_ua2(i32 %a, i32 %b, i32 %c, i32 %d) {

; (b > -(d | 1) && a u == c)
define i32 @neg_range_int_comp_ua3(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: neg_range_int_comp_ua3:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w3, #0x1
; CHECK-NEXT: cmp w0, w2
; CHECK-NEXT: neg w8, w8
; CHECK-NEXT: ccmp w1, w8, #4, eq
; CHECK-NEXT: csel w0, w1, w0, gt
; CHECK-NEXT: ret
; SDISEL-LABEL: neg_range_int_comp_ua3:
; SDISEL: // %bb.0:
; SDISEL-NEXT: orr w8, w3, #0x1
; SDISEL-NEXT: cmp w0, w2
; SDISEL-NEXT: ccmn w1, w8, #4, eq
; SDISEL-NEXT: csel w0, w1, w0, gt
; SDISEL-NEXT: ret
;
; GISEL-LABEL: neg_range_int_comp_ua3:
; GISEL: // %bb.0:
; GISEL-NEXT: orr w8, w3, #0x1
; GISEL-NEXT: cmp w0, w2
; GISEL-NEXT: neg w8, w8
; GISEL-NEXT: ccmp w1, w8, #4, eq
; GISEL-NEXT: csel w0, w1, w0, gt
; GISEL-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp sgt i32 %b, %negd
Expand Down
Loading

0 comments on commit 3ca9755

Please sign in to comment.