Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SelectionDAG] Expand [US]CMP using arithmetic on boolean values instead of selects #98774

Merged
merged 5 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -3390,6 +3390,10 @@ class TargetLoweringBase {
return isOperationLegalOrCustom(Op, VT);
}

/// Should we expand [US]CMP nodes using two selects and two compares, or by
/// doing arithmetic on boolean types
virtual bool shouldExpandCmpUsingSelects() const { return false; }

/// Does this target support complex deinterleaving
virtual bool isComplexDeinterleavingSupported() const { return false; }

Expand Down
26 changes: 20 additions & 6 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10381,14 +10381,28 @@ SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {

auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);

SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
SDValue SelectZeroOrOne =
DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
DAG.getConstant(0, dl, ResVT));
return DAG.getSelect(dl, ResVT, IsLT, DAG.getConstant(-1, dl, ResVT),
SelectZeroOrOne);

// We can't perform arithmetic on i1 values. Extending them would
// probably result in worse codegen, so let's just use two selects instead.
// Some targets are also just better off using selects rather than subtraction
// because one of the conditions can be merged with one of the selects.
// And finally, if we don't know the contents of high bits of a boolean value
// we can't perform any arithmetic either.
if (shouldExpandCmpUsingSelects() || BoolVT.getScalarSizeInBits() == 1 ||
getBooleanContents(BoolVT) == UndefinedBooleanContent) {
SDValue SelectZeroOrOne =
DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
DAG.getConstant(0, dl, ResVT));
return DAG.getSelect(dl, ResVT, IsLT, DAG.getConstant(-1, dl, ResVT),
SelectZeroOrOne);
}

if (getBooleanContents(BoolVT) == ZeroOrNegativeOneBooleanContent)
std::swap(LHS, RHS);
Poseydon42 marked this conversation as resolved.
Show resolved Hide resolved
return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
ResVT);
}

SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -907,6 +907,8 @@ class AArch64TargetLowering : public TargetLowering {

bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;

bool shouldExpandCmpUsingSelects() const override { return true; }

bool isComplexDeinterleavingSupported() const override;
bool isComplexDeinterleavingOperationSupported(
ComplexDeinterleavingOperation Operation, Type *Ty) const override;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,8 @@ class SystemZTargetLowering : public TargetLowering {

bool shouldConsiderGEPOffsetSplit() const override { return true; }

bool shouldExpandCmpUsingSelects() const override { return true; }

const char *getTargetNodeName(unsigned Opcode) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
Expand Down
143 changes: 143 additions & 0 deletions llvm/test/CodeGen/ARM/scmp.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=armv7-unknown-eabi %s -o - | FileCheck %s

define i8 @scmp_8_8(i8 signext %x, i8 signext %y) nounwind {
; CHECK-LABEL: scmp_8_8:
; CHECK: @ %bb.0:
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: movwgt r2, #1
; CHECK-NEXT: sub r0, r2, r0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.scmp(i8 %x, i8 %y)
ret i8 %1
}

define i8 @scmp_8_16(i16 signext %x, i16 signext %y) nounwind {
; CHECK-LABEL: scmp_8_16:
; CHECK: @ %bb.0:
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: movwgt r2, #1
; CHECK-NEXT: sub r0, r2, r0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.scmp(i16 %x, i16 %y)
ret i8 %1
}

define i8 @scmp_8_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scmp_8_32:
; CHECK: @ %bb.0:
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: movwgt r2, #1
; CHECK-NEXT: sub r0, r2, r0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
ret i8 %1
}

define i8 @scmp_8_64(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scmp_8_64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: subs lr, r0, r2
; CHECK-NEXT: mov r12, #0
; CHECK-NEXT: sbcs lr, r1, r3
; CHECK-NEXT: mov lr, #0
; CHECK-NEXT: movwlt lr, #1
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: sbcs r0, r3, r1
; CHECK-NEXT: movwlt r12, #1
; CHECK-NEXT: sub r0, r12, lr
; CHECK-NEXT: pop {r11, pc}
%1 = call i8 @llvm.scmp(i64 %x, i64 %y)
ret i8 %1
}

define i8 @scmp_8_128(i128 %x, i128 %y) nounwind {
; CHECK-LABEL: scmp_8_128:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: ldr r4, [sp, #24]
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: ldr r6, [sp, #28]
; CHECK-NEXT: subs r7, r0, r4
; CHECK-NEXT: ldr r12, [sp, #32]
; CHECK-NEXT: sbcs r7, r1, r6
; CHECK-NEXT: ldr lr, [sp, #36]
; CHECK-NEXT: sbcs r7, r2, r12
; CHECK-NEXT: sbcs r7, r3, lr
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: movwlt r7, #1
; CHECK-NEXT: subs r0, r4, r0
; CHECK-NEXT: sbcs r0, r6, r1
; CHECK-NEXT: sbcs r0, r12, r2
; CHECK-NEXT: sbcs r0, lr, r3
; CHECK-NEXT: movwlt r5, #1
; CHECK-NEXT: sub r0, r5, r7
; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
%1 = call i8 @llvm.scmp(i128 %x, i128 %y)
ret i8 %1
}

define i32 @scmp_32_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scmp_32_32:
; CHECK: @ %bb.0:
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: movwgt r2, #1
; CHECK-NEXT: sub r0, r2, r0
; CHECK-NEXT: bx lr
%1 = call i32 @llvm.scmp(i32 %x, i32 %y)
ret i32 %1
}

define i32 @scmp_32_64(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scmp_32_64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: subs lr, r0, r2
; CHECK-NEXT: mov r12, #0
; CHECK-NEXT: sbcs lr, r1, r3
; CHECK-NEXT: mov lr, #0
; CHECK-NEXT: movwlt lr, #1
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: sbcs r0, r3, r1
; CHECK-NEXT: movwlt r12, #1
; CHECK-NEXT: sub r0, r12, lr
; CHECK-NEXT: pop {r11, pc}
%1 = call i32 @llvm.scmp(i64 %x, i64 %y)
ret i32 %1
}

define i64 @scmp_64_64(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scmp_64_64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: subs lr, r0, r2
; CHECK-NEXT: mov r12, #0
; CHECK-NEXT: sbcs lr, r1, r3
; CHECK-NEXT: mov lr, #0
; CHECK-NEXT: movwlt lr, #1
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: sbcs r0, r3, r1
; CHECK-NEXT: movwlt r12, #1
; CHECK-NEXT: sub r0, r12, lr
; CHECK-NEXT: asr r1, r0, #31
; CHECK-NEXT: pop {r11, pc}
%1 = call i64 @llvm.scmp(i64 %x, i64 %y)
ret i64 %1
}
143 changes: 143 additions & 0 deletions llvm/test/CodeGen/ARM/ucmp.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=armv7-unknown-eabi %s -o - | FileCheck %s

define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
; CHECK-LABEL: ucmp_8_8:
; CHECK: @ %bb.0:
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlo r0, #1
; CHECK-NEXT: movwhi r2, #1
; CHECK-NEXT: sub r0, r2, r0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
ret i8 %1
}

define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
; CHECK-LABEL: ucmp_8_16:
; CHECK: @ %bb.0:
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlo r0, #1
; CHECK-NEXT: movwhi r2, #1
; CHECK-NEXT: sub r0, r2, r0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
ret i8 %1
}

define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: ucmp_8_32:
; CHECK: @ %bb.0:
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlo r0, #1
; CHECK-NEXT: movwhi r2, #1
; CHECK-NEXT: sub r0, r2, r0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
ret i8 %1
}

define i8 @ucmp_8_64(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: ucmp_8_64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: subs lr, r0, r2
; CHECK-NEXT: mov r12, #0
; CHECK-NEXT: sbcs lr, r1, r3
; CHECK-NEXT: mov lr, #0
; CHECK-NEXT: movwlo lr, #1
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: sbcs r0, r3, r1
; CHECK-NEXT: movwlo r12, #1
; CHECK-NEXT: sub r0, r12, lr
; CHECK-NEXT: pop {r11, pc}
%1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
ret i8 %1
}

define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind {
; CHECK-LABEL: ucmp_8_128:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: ldr r4, [sp, #24]
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: ldr r6, [sp, #28]
; CHECK-NEXT: subs r7, r0, r4
; CHECK-NEXT: ldr r12, [sp, #32]
; CHECK-NEXT: sbcs r7, r1, r6
; CHECK-NEXT: ldr lr, [sp, #36]
; CHECK-NEXT: sbcs r7, r2, r12
; CHECK-NEXT: sbcs r7, r3, lr
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: movwlo r7, #1
; CHECK-NEXT: subs r0, r4, r0
; CHECK-NEXT: sbcs r0, r6, r1
; CHECK-NEXT: sbcs r0, r12, r2
; CHECK-NEXT: sbcs r0, lr, r3
; CHECK-NEXT: movwlo r5, #1
; CHECK-NEXT: sub r0, r5, r7
; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
%1 = call i8 @llvm.ucmp(i128 %x, i128 %y)
ret i8 %1
}

define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: ucmp_32_32:
; CHECK: @ %bb.0:
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlo r0, #1
; CHECK-NEXT: movwhi r2, #1
; CHECK-NEXT: sub r0, r2, r0
; CHECK-NEXT: bx lr
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
ret i32 %1
}

define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: ucmp_32_64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: subs lr, r0, r2
; CHECK-NEXT: mov r12, #0
; CHECK-NEXT: sbcs lr, r1, r3
; CHECK-NEXT: mov lr, #0
; CHECK-NEXT: movwlo lr, #1
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: sbcs r0, r3, r1
; CHECK-NEXT: movwlo r12, #1
; CHECK-NEXT: sub r0, r12, lr
; CHECK-NEXT: pop {r11, pc}
%1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
ret i32 %1
}

define i64 @ucmp_64_64(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: ucmp_64_64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: subs lr, r0, r2
; CHECK-NEXT: mov r12, #0
; CHECK-NEXT: sbcs lr, r1, r3
; CHECK-NEXT: mov lr, #0
; CHECK-NEXT: movwlo lr, #1
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: sbcs r0, r3, r1
; CHECK-NEXT: movwlo r12, #1
; CHECK-NEXT: sub r0, r12, lr
; CHECK-NEXT: asr r1, r0, #31
; CHECK-NEXT: pop {r11, pc}
%1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
ret i64 %1
}
Loading
Loading