Skip to content

Commit

Permalink
[SelectionDAG] Expand [US]CMP using arithmetic on boolean values inst…
Browse files Browse the repository at this point in the history
…ead of selects (llvm#98774)

The previous expansion of [US]CMP was done using two selects and two
compares. It produced decent code, but on many platforms it is better to
implement [US]CMP nodes by performing the following operation:

  ```
[us]cmp(x, y) = (x [us]> y) - (x [us]< y)
```

This patch adds this new expansion, as well as a hook in TargetLowering to allow some targets to still use the select-based approach. AArch64 and SystemZ are currently the only targets to prefer the former approach, but other targets may also start to use it if it provides for better codegen.
  • Loading branch information
Poseydon42 authored and sgundapa committed Jul 23, 2024
1 parent 2a807f1 commit ea90ac7
Show file tree
Hide file tree
Showing 20 changed files with 4,391 additions and 2,853 deletions.
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -3391,6 +3391,10 @@ class TargetLoweringBase {
return isOperationLegalOrCustom(Op, VT);
}

/// Should we expand [US]CMP nodes using two selects and two compares, or by
/// doing arithmetic on boolean types
virtual bool shouldExpandCmpUsingSelects() const { return false; }

/// Does this target support complex deinterleaving
virtual bool isComplexDeinterleavingSupported() const { return false; }

Expand Down
26 changes: 20 additions & 6 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10391,14 +10391,28 @@ SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {

auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);

SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
SDValue SelectZeroOrOne =
DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
DAG.getConstant(0, dl, ResVT));
return DAG.getSelect(dl, ResVT, IsLT, DAG.getConstant(-1, dl, ResVT),
SelectZeroOrOne);

// We can't perform arithmetic on i1 values. Extending them would
// probably result in worse codegen, so let's just use two selects instead.
// Some targets are also just better off using selects rather than subtraction
// because one of the conditions can be merged with one of the selects.
// And finally, if we don't know the contents of high bits of a boolean value
// we can't perform any arithmetic either.
if (shouldExpandCmpUsingSelects() || BoolVT.getScalarSizeInBits() == 1 ||
getBooleanContents(BoolVT) == UndefinedBooleanContent) {
SDValue SelectZeroOrOne =
DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
DAG.getConstant(0, dl, ResVT));
return DAG.getSelect(dl, ResVT, IsLT, DAG.getConstant(-1, dl, ResVT),
SelectZeroOrOne);
}

if (getBooleanContents(BoolVT) == ZeroOrNegativeOneBooleanContent)
std::swap(IsGT, IsLT);
return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
ResVT);
}

SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -907,6 +907,8 @@ class AArch64TargetLowering : public TargetLowering {

bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;

bool shouldExpandCmpUsingSelects() const override { return true; }

bool isComplexDeinterleavingSupported() const override;
bool isComplexDeinterleavingOperationSupported(
ComplexDeinterleavingOperation Operation, Type *Ty) const override;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,8 @@ class SystemZTargetLowering : public TargetLowering {

bool shouldConsiderGEPOffsetSplit() const override { return true; }

bool shouldExpandCmpUsingSelects() const override { return true; }

const char *getTargetNodeName(unsigned Opcode) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
Expand Down
143 changes: 143 additions & 0 deletions llvm/test/CodeGen/ARM/scmp.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=armv7-unknown-eabi %s -o - | FileCheck %s

define i8 @scmp_8_8(i8 signext %x, i8 signext %y) nounwind {
; CHECK-LABEL: scmp_8_8:
; CHECK: @ %bb.0:
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: movwgt r2, #1
; CHECK-NEXT: sub r0, r2, r0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.scmp(i8 %x, i8 %y)
ret i8 %1
}

define i8 @scmp_8_16(i16 signext %x, i16 signext %y) nounwind {
; CHECK-LABEL: scmp_8_16:
; CHECK: @ %bb.0:
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: movwgt r2, #1
; CHECK-NEXT: sub r0, r2, r0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.scmp(i16 %x, i16 %y)
ret i8 %1
}

define i8 @scmp_8_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scmp_8_32:
; CHECK: @ %bb.0:
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: movwgt r2, #1
; CHECK-NEXT: sub r0, r2, r0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
ret i8 %1
}

define i8 @scmp_8_64(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scmp_8_64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: subs lr, r0, r2
; CHECK-NEXT: mov r12, #0
; CHECK-NEXT: sbcs lr, r1, r3
; CHECK-NEXT: mov lr, #0
; CHECK-NEXT: movwlt lr, #1
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: sbcs r0, r3, r1
; CHECK-NEXT: movwlt r12, #1
; CHECK-NEXT: sub r0, r12, lr
; CHECK-NEXT: pop {r11, pc}
%1 = call i8 @llvm.scmp(i64 %x, i64 %y)
ret i8 %1
}

define i8 @scmp_8_128(i128 %x, i128 %y) nounwind {
; CHECK-LABEL: scmp_8_128:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: ldr r4, [sp, #24]
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: ldr r6, [sp, #28]
; CHECK-NEXT: subs r7, r0, r4
; CHECK-NEXT: ldr r12, [sp, #32]
; CHECK-NEXT: sbcs r7, r1, r6
; CHECK-NEXT: ldr lr, [sp, #36]
; CHECK-NEXT: sbcs r7, r2, r12
; CHECK-NEXT: sbcs r7, r3, lr
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: movwlt r7, #1
; CHECK-NEXT: subs r0, r4, r0
; CHECK-NEXT: sbcs r0, r6, r1
; CHECK-NEXT: sbcs r0, r12, r2
; CHECK-NEXT: sbcs r0, lr, r3
; CHECK-NEXT: movwlt r5, #1
; CHECK-NEXT: sub r0, r5, r7
; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
%1 = call i8 @llvm.scmp(i128 %x, i128 %y)
ret i8 %1
}

define i32 @scmp_32_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scmp_32_32:
; CHECK: @ %bb.0:
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: movwgt r2, #1
; CHECK-NEXT: sub r0, r2, r0
; CHECK-NEXT: bx lr
%1 = call i32 @llvm.scmp(i32 %x, i32 %y)
ret i32 %1
}

define i32 @scmp_32_64(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scmp_32_64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: subs lr, r0, r2
; CHECK-NEXT: mov r12, #0
; CHECK-NEXT: sbcs lr, r1, r3
; CHECK-NEXT: mov lr, #0
; CHECK-NEXT: movwlt lr, #1
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: sbcs r0, r3, r1
; CHECK-NEXT: movwlt r12, #1
; CHECK-NEXT: sub r0, r12, lr
; CHECK-NEXT: pop {r11, pc}
%1 = call i32 @llvm.scmp(i64 %x, i64 %y)
ret i32 %1
}

define i64 @scmp_64_64(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scmp_64_64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: subs lr, r0, r2
; CHECK-NEXT: mov r12, #0
; CHECK-NEXT: sbcs lr, r1, r3
; CHECK-NEXT: mov lr, #0
; CHECK-NEXT: movwlt lr, #1
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: sbcs r0, r3, r1
; CHECK-NEXT: movwlt r12, #1
; CHECK-NEXT: sub r0, r12, lr
; CHECK-NEXT: asr r1, r0, #31
; CHECK-NEXT: pop {r11, pc}
%1 = call i64 @llvm.scmp(i64 %x, i64 %y)
ret i64 %1
}
143 changes: 143 additions & 0 deletions llvm/test/CodeGen/ARM/ucmp.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=armv7-unknown-eabi %s -o - | FileCheck %s

define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
; CHECK-LABEL: ucmp_8_8:
; CHECK: @ %bb.0:
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlo r0, #1
; CHECK-NEXT: movwhi r2, #1
; CHECK-NEXT: sub r0, r2, r0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
ret i8 %1
}

define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
; CHECK-LABEL: ucmp_8_16:
; CHECK: @ %bb.0:
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlo r0, #1
; CHECK-NEXT: movwhi r2, #1
; CHECK-NEXT: sub r0, r2, r0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
ret i8 %1
}

define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: ucmp_8_32:
; CHECK: @ %bb.0:
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlo r0, #1
; CHECK-NEXT: movwhi r2, #1
; CHECK-NEXT: sub r0, r2, r0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
ret i8 %1
}

define i8 @ucmp_8_64(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: ucmp_8_64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: subs lr, r0, r2
; CHECK-NEXT: mov r12, #0
; CHECK-NEXT: sbcs lr, r1, r3
; CHECK-NEXT: mov lr, #0
; CHECK-NEXT: movwlo lr, #1
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: sbcs r0, r3, r1
; CHECK-NEXT: movwlo r12, #1
; CHECK-NEXT: sub r0, r12, lr
; CHECK-NEXT: pop {r11, pc}
%1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
ret i8 %1
}

define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind {
; CHECK-LABEL: ucmp_8_128:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: ldr r4, [sp, #24]
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: ldr r6, [sp, #28]
; CHECK-NEXT: subs r7, r0, r4
; CHECK-NEXT: ldr r12, [sp, #32]
; CHECK-NEXT: sbcs r7, r1, r6
; CHECK-NEXT: ldr lr, [sp, #36]
; CHECK-NEXT: sbcs r7, r2, r12
; CHECK-NEXT: sbcs r7, r3, lr
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: movwlo r7, #1
; CHECK-NEXT: subs r0, r4, r0
; CHECK-NEXT: sbcs r0, r6, r1
; CHECK-NEXT: sbcs r0, r12, r2
; CHECK-NEXT: sbcs r0, lr, r3
; CHECK-NEXT: movwlo r5, #1
; CHECK-NEXT: sub r0, r5, r7
; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
%1 = call i8 @llvm.ucmp(i128 %x, i128 %y)
ret i8 %1
}

define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: ucmp_32_32:
; CHECK: @ %bb.0:
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlo r0, #1
; CHECK-NEXT: movwhi r2, #1
; CHECK-NEXT: sub r0, r2, r0
; CHECK-NEXT: bx lr
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
ret i32 %1
}

define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: ucmp_32_64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: subs lr, r0, r2
; CHECK-NEXT: mov r12, #0
; CHECK-NEXT: sbcs lr, r1, r3
; CHECK-NEXT: mov lr, #0
; CHECK-NEXT: movwlo lr, #1
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: sbcs r0, r3, r1
; CHECK-NEXT: movwlo r12, #1
; CHECK-NEXT: sub r0, r12, lr
; CHECK-NEXT: pop {r11, pc}
%1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
ret i32 %1
}

define i64 @ucmp_64_64(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: ucmp_64_64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: subs lr, r0, r2
; CHECK-NEXT: mov r12, #0
; CHECK-NEXT: sbcs lr, r1, r3
; CHECK-NEXT: mov lr, #0
; CHECK-NEXT: movwlo lr, #1
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: sbcs r0, r3, r1
; CHECK-NEXT: movwlo r12, #1
; CHECK-NEXT: sub r0, r12, lr
; CHECK-NEXT: asr r1, r0, #31
; CHECK-NEXT: pop {r11, pc}
%1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
ret i64 %1
}
Loading

0 comments on commit ea90ac7

Please sign in to comment.