Skip to content

Commit

Permalink
[AArch64] Use isKnownNonZero to optimize eligible compares to cmn and…
Browse files Browse the repository at this point in the history
… ccmn (#96349)

Summary:
The problematic case for unsigned comparisons occurs only when the
second argument is zero.

The problematic case for signed comparisons occurs only when the second
argument is the signed minimum value.

We can use KnownBits to know when we don't have to worry about this.

Source: https://devblogs.microsoft.com/oldnewthing/20210607-00/?p=105288

Test Plan: 

Reviewers: 

Subscribers: 

Tasks: 

Tags: 


Differential Revision: https://phabricator.intern.facebook.com/D60251645
  • Loading branch information
AreaZR authored and yuxuanchen1997 committed Jul 25, 2024
1 parent ee68c3e commit 27ec0f1
Show file tree
Hide file tree
Showing 3 changed files with 476 additions and 17 deletions.
44 changes: 27 additions & 17 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3403,19 +3403,26 @@ static bool isLegalArithImmed(uint64_t C) {
return IsLegal;
}

static bool cannotBeIntMin(SDValue CheckedVal, SelectionDAG &DAG) {
KnownBits KnownSrc = DAG.computeKnownBits(CheckedVal);
return !KnownSrc.getSignedMinValue().isMinSignedValue();
}

// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
// can be set differently by this operation. It comes down to whether
// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
// everything is fine. If not then the optimization is wrong. Thus general
// comparisons are only valid if op2 != 0.
//
// So, finally, the only LLVM-native comparisons that don't mention C and V
// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
// the absence of information about op2.
static bool isCMN(SDValue Op, ISD::CondCode CC) {
// So, finally, the only LLVM-native comparisons that don't mention C or V
// are the ones that aren't unsigned comparisons. They're the only ones we can
// safely use CMN for in the absence of information about op2.
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG) {
return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
(CC == ISD::SETEQ || CC == ISD::SETNE);
(isIntEqualitySetCC(CC) ||
(isUnsignedIntSetCC(CC) && DAG.isKnownNeverZero(Op.getOperand(1))) ||
(isSignedIntSetCC(CC) && cannotBeIntMin(Op.getOperand(1), DAG)));
}

static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
Expand Down Expand Up @@ -3460,11 +3467,12 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
// register to WZR/XZR if it ends up being unused.
unsigned Opcode = AArch64ISD::SUBS;

if (isCMN(RHS, CC)) {
if (isCMN(RHS, CC, DAG)) {
// Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
Opcode = AArch64ISD::ADDS;
RHS = RHS.getOperand(1);
} else if (isCMN(LHS, CC)) {
} else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
isIntEqualitySetCC(CC)) {
// As we are looking for EQ/NE compares, the operands can be commuted ; can
// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
Opcode = AArch64ISD::ADDS;
Expand Down Expand Up @@ -3566,13 +3574,15 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
Opcode = AArch64ISD::CCMN;
RHS = DAG.getConstant(Imm.abs(), DL, Const->getValueType(0));
}
} else if (RHS.getOpcode() == ISD::SUB) {
SDValue SubOp0 = RHS.getOperand(0);
if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
// See emitComparison() on why we can only do this for SETEQ and SETNE.
Opcode = AArch64ISD::CCMN;
RHS = RHS.getOperand(1);
}
} else if (isCMN(RHS, CC, DAG)) {
Opcode = AArch64ISD::CCMN;
RHS = RHS.getOperand(1);
} else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
isIntEqualitySetCC(CC)) {
// As we are looking for EQ/NE compares, the operands can be commuted ; can
// we combine a (CCMP (sub 0, op1), op2) into a CCMN instruction ?
Opcode = AArch64ISD::CCMN;
LHS = LHS.getOperand(1);
}
if (Opcode == 0)
Opcode = AArch64ISD::CCMP;
Expand Down Expand Up @@ -3890,8 +3900,8 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
// cmp w12, w11, lsl #1
if (!isa<ConstantSDNode>(RHS) ||
!isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) {
bool LHSIsCMN = isCMN(LHS, CC);
bool RHSIsCMN = isCMN(RHS, CC);
bool LHSIsCMN = isCMN(LHS, CC, DAG);
bool RHSIsCMN = isCMN(RHS, CC, DAG);
SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS;
SDValue TheRHS = RHSIsCMN ? RHS.getOperand(1) : RHS;

Expand All @@ -3904,7 +3914,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,

SDValue Cmp;
AArch64CC::CondCode AArch64CC;
if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
if (isIntEqualitySetCC(CC) && isa<ConstantSDNode>(RHS)) {
const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);

// The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
Expand Down
243 changes: 243 additions & 0 deletions llvm/test/CodeGen/AArch64/cmp-chains.ll
Original file line number Diff line number Diff line change
Expand Up @@ -258,3 +258,246 @@ define i32 @neg_range_int(i32 %a, i32 %b, i32 %c) {
ret i32 %retval.0
}

; (b > -(d | 1) && a < c)
define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) {
; SDISEL-LABEL: neg_range_int_comp:
; SDISEL: // %bb.0:
; SDISEL-NEXT: orr w8, w3, #0x1
; SDISEL-NEXT: cmp w0, w2
; SDISEL-NEXT: ccmn w1, w8, #4, lt
; SDISEL-NEXT: csel w0, w1, w0, gt
; SDISEL-NEXT: ret
;
; GISEL-LABEL: neg_range_int_comp:
; GISEL: // %bb.0:
; GISEL-NEXT: orr w8, w3, #0x1
; GISEL-NEXT: cmp w0, w2
; GISEL-NEXT: neg w8, w8
; GISEL-NEXT: ccmp w1, w8, #4, lt
; GISEL-NEXT: csel w0, w1, w0, gt
; GISEL-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp sgt i32 %b, %negd
%cmp1 = icmp slt i32 %a, %c
%or.cond = and i1 %cmp, %cmp1
%retval.0 = select i1 %or.cond, i32 %b, i32 %a
ret i32 %retval.0
}

; (b >u -(d | 1) && a < c)
define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) {
; SDISEL-LABEL: neg_range_int_comp_u:
; SDISEL: // %bb.0:
; SDISEL-NEXT: orr w8, w3, #0x1
; SDISEL-NEXT: cmp w0, w2
; SDISEL-NEXT: ccmn w1, w8, #0, lt
; SDISEL-NEXT: csel w0, w1, w0, hi
; SDISEL-NEXT: ret
;
; GISEL-LABEL: neg_range_int_comp_u:
; GISEL: // %bb.0:
; GISEL-NEXT: orr w8, w3, #0x1
; GISEL-NEXT: cmp w0, w2
; GISEL-NEXT: neg w8, w8
; GISEL-NEXT: ccmp w1, w8, #0, lt
; GISEL-NEXT: csel w0, w1, w0, hi
; GISEL-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp ugt i32 %b, %negd
%cmp1 = icmp slt i32 %a, %c
%or.cond = and i1 %cmp, %cmp1
%retval.0 = select i1 %or.cond, i32 %b, i32 %a
ret i32 %retval.0
}

; (b > -(d | 1) && a u < c)
define i32 @neg_range_int_comp_ua(i32 %a, i32 %b, i32 %c, i32 %d) {
; SDISEL-LABEL: neg_range_int_comp_ua:
; SDISEL: // %bb.0:
; SDISEL-NEXT: orr w8, w3, #0x1
; SDISEL-NEXT: cmp w0, w2
; SDISEL-NEXT: ccmn w1, w8, #4, lo
; SDISEL-NEXT: csel w0, w1, w0, gt
; SDISEL-NEXT: ret
;
; GISEL-LABEL: neg_range_int_comp_ua:
; GISEL: // %bb.0:
; GISEL-NEXT: orr w8, w3, #0x1
; GISEL-NEXT: cmp w0, w2
; GISEL-NEXT: neg w8, w8
; GISEL-NEXT: ccmp w1, w8, #4, lo
; GISEL-NEXT: csel w0, w1, w0, gt
; GISEL-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp sgt i32 %b, %negd
%cmp1 = icmp ult i32 %a, %c
%or.cond = and i1 %cmp, %cmp1
%retval.0 = select i1 %or.cond, i32 %b, i32 %a
ret i32 %retval.0
}

; (b <= -3 && a > c)
define i32 @neg_range_int_2(i32 %a, i32 %b, i32 %c) {
; SDISEL-LABEL: neg_range_int_2:
; SDISEL: // %bb.0:
; SDISEL-NEXT: cmp w0, w2
; SDISEL-NEXT: ccmn w1, #4, #4, gt
; SDISEL-NEXT: csel w0, w1, w0, gt
; SDISEL-NEXT: ret
;
; GISEL-LABEL: neg_range_int_2:
; GISEL: // %bb.0:
; GISEL-NEXT: cmp w0, w2
; GISEL-NEXT: ccmn w1, #3, #8, gt
; GISEL-NEXT: csel w0, w1, w0, ge
; GISEL-NEXT: ret
%cmp = icmp sge i32 %b, -3
%cmp1 = icmp sgt i32 %a, %c
%or.cond = and i1 %cmp, %cmp1
%retval.0 = select i1 %or.cond, i32 %b, i32 %a
ret i32 %retval.0
}

; (b < -(d | 1) && a >= c)
define i32 @neg_range_int_comp2(i32 %a, i32 %b, i32 %c, i32 %d) {
; SDISEL-LABEL: neg_range_int_comp2:
; SDISEL: // %bb.0:
; SDISEL-NEXT: orr w8, w3, #0x1
; SDISEL-NEXT: cmp w0, w2
; SDISEL-NEXT: ccmn w1, w8, #0, ge
; SDISEL-NEXT: csel w0, w1, w0, lt
; SDISEL-NEXT: ret
;
; GISEL-LABEL: neg_range_int_comp2:
; GISEL: // %bb.0:
; GISEL-NEXT: orr w8, w3, #0x1
; GISEL-NEXT: cmp w0, w2
; GISEL-NEXT: neg w8, w8
; GISEL-NEXT: ccmp w1, w8, #0, ge
; GISEL-NEXT: csel w0, w1, w0, lt
; GISEL-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp slt i32 %b, %negd
%cmp1 = icmp sge i32 %a, %c
%or.cond = and i1 %cmp, %cmp1
%retval.0 = select i1 %or.cond, i32 %b, i32 %a
ret i32 %retval.0
}

; (b <u -(d | 1) && a > c)
define i32 @neg_range_int_comp_u2(i32 %a, i32 %b, i32 %c, i32 %d) {
; SDISEL-LABEL: neg_range_int_comp_u2:
; SDISEL: // %bb.0:
; SDISEL-NEXT: orr w8, w3, #0x1
; SDISEL-NEXT: cmp w0, w2
; SDISEL-NEXT: ccmn w1, w8, #2, gt
; SDISEL-NEXT: csel w0, w1, w0, lo
; SDISEL-NEXT: ret
;
; GISEL-LABEL: neg_range_int_comp_u2:
; GISEL: // %bb.0:
; GISEL-NEXT: orr w8, w3, #0x1
; GISEL-NEXT: cmp w0, w2
; GISEL-NEXT: neg w8, w8
; GISEL-NEXT: ccmp w1, w8, #2, gt
; GISEL-NEXT: csel w0, w1, w0, lo
; GISEL-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp ult i32 %b, %negd
%cmp1 = icmp sgt i32 %a, %c
%or.cond = and i1 %cmp, %cmp1
%retval.0 = select i1 %or.cond, i32 %b, i32 %a
ret i32 %retval.0
}

; (b > -(d | 1) && a u > c)
define i32 @neg_range_int_comp_ua2(i32 %a, i32 %b, i32 %c, i32 %d) {
; SDISEL-LABEL: neg_range_int_comp_ua2:
; SDISEL: // %bb.0:
; SDISEL-NEXT: orr w8, w3, #0x1
; SDISEL-NEXT: cmp w0, w2
; SDISEL-NEXT: ccmn w1, w8, #4, hi
; SDISEL-NEXT: csel w0, w1, w0, gt
; SDISEL-NEXT: ret
;
; GISEL-LABEL: neg_range_int_comp_ua2:
; GISEL: // %bb.0:
; GISEL-NEXT: orr w8, w3, #0x1
; GISEL-NEXT: cmp w0, w2
; GISEL-NEXT: neg w8, w8
; GISEL-NEXT: ccmp w1, w8, #4, hi
; GISEL-NEXT: csel w0, w1, w0, gt
; GISEL-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp sgt i32 %b, %negd
%cmp1 = icmp ugt i32 %a, %c
%or.cond = and i1 %cmp, %cmp1
%retval.0 = select i1 %or.cond, i32 %b, i32 %a
ret i32 %retval.0
}

; (b > -(d | 1) && a u == c)
define i32 @neg_range_int_comp_ua3(i32 %a, i32 %b, i32 %c, i32 %d) {
; SDISEL-LABEL: neg_range_int_comp_ua3:
; SDISEL: // %bb.0:
; SDISEL-NEXT: orr w8, w3, #0x1
; SDISEL-NEXT: cmp w0, w2
; SDISEL-NEXT: ccmn w1, w8, #4, eq
; SDISEL-NEXT: csel w0, w1, w0, gt
; SDISEL-NEXT: ret
;
; GISEL-LABEL: neg_range_int_comp_ua3:
; GISEL: // %bb.0:
; GISEL-NEXT: orr w8, w3, #0x1
; GISEL-NEXT: cmp w0, w2
; GISEL-NEXT: neg w8, w8
; GISEL-NEXT: ccmp w1, w8, #4, eq
; GISEL-NEXT: csel w0, w1, w0, gt
; GISEL-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp sgt i32 %b, %negd
%cmp1 = icmp eq i32 %a, %c
%or.cond = and i1 %cmp, %cmp1
%retval.0 = select i1 %or.cond, i32 %b, i32 %a
ret i32 %retval.0
}

; -(a | 1) > (b | 3) && a < c
define i32 @neg_range_int_c(i32 %a, i32 %b, i32 %c) {
; SDISEL-LABEL: neg_range_int_c:
; SDISEL: // %bb.0: // %entry
; SDISEL-NEXT: orr w8, w0, #0x1
; SDISEL-NEXT: orr w9, w1, #0x3
; SDISEL-NEXT: cmn w9, w8
; SDISEL-NEXT: ccmp w2, w0, #2, lo
; SDISEL-NEXT: cset w0, lo
; SDISEL-NEXT: ret
;
; GISEL-LABEL: neg_range_int_c:
; GISEL: // %bb.0: // %entry
; GISEL-NEXT: orr w8, w0, #0x1
; GISEL-NEXT: orr w9, w1, #0x3
; GISEL-NEXT: neg w8, w8
; GISEL-NEXT: cmp w9, w8
; GISEL-NEXT: cset w8, lo
; GISEL-NEXT: cmp w2, w0
; GISEL-NEXT: cset w9, lo
; GISEL-NEXT: and w0, w8, w9
; GISEL-NEXT: ret
entry:
%or = or i32 %a, 1
%sub = sub i32 0, %or
%or1 = or i32 %b, 3
%cmp = icmp ult i32 %or1, %sub
%cmp2 = icmp ult i32 %c, %a
%0 = and i1 %cmp, %cmp2
%land.ext = zext i1 %0 to i32
ret i32 %land.ext
}
Loading

0 comments on commit 27ec0f1

Please sign in to comment.