Skip to content

Commit

Permalink
[X86] LowerABD - simplify i32/i64 to use sub+sub+cmov instead of repe…
Browse files Browse the repository at this point in the history
…ating nodes via abs (#102174)

Using X86ISD::SUB nodes directly allows us to drive the X86ISD::CMOV node with exact flags instead of trying to cleanup the generic codegen via ICMP/SUBO nodes.
  • Loading branch information
RKSimon authored Aug 19, 2024
1 parent 0cc6b46 commit b05c554
Show file tree
Hide file tree
Showing 5 changed files with 197 additions and 257 deletions.
20 changes: 17 additions & 3 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28443,13 +28443,27 @@ static SDValue LowerABD(SDValue Op, const X86Subtarget &Subtarget,
bool IsSigned = Op.getOpcode() == ISD::ABDS;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();

// TODO: Move to TargetLowering expandABD() once we have ABD promotion.
if (VT.isScalarInteger()) {
// abds(lhs, rhs) -> select(slt(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs))
// abdu(lhs, rhs) -> select(ult(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs))
if (Subtarget.canUseCMOV() && VT.bitsGE(MVT::i32)) {
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
X86::CondCode CC = IsSigned ? X86::COND_L : X86::COND_B;
SDValue LHS = DAG.getFreeze(Op.getOperand(0));
SDValue RHS = DAG.getFreeze(Op.getOperand(1));
SDValue Diff0 = DAG.getNode(X86ISD::SUB, dl, VTs, LHS, RHS);
SDValue Diff1 = DAG.getNode(X86ISD::SUB, dl, VTs, RHS, LHS);
return DAG.getNode(X86ISD::CMOV, dl, VT, Diff1, Diff0,
DAG.getTargetConstant(CC, dl, MVT::i8),
Diff1.getValue(1));
}

// TODO: Move to TargetLowering expandABD() once we have ABD promotion.
// abds(lhs, rhs) -> trunc(abs(sub(sext(lhs), sext(rhs))))
// abdu(lhs, rhs) -> trunc(abs(sub(zext(lhs), zext(rhs))))
unsigned WideBits = std::max<unsigned>(2 * VT.getScalarSizeInBits(), 32u);
MVT WideVT = MVT::getIntegerVT(WideBits);
if (TLI.isTypeLegal(WideVT)) {
// abds(lhs, rhs) -> trunc(abs(sub(sext(lhs), sext(rhs))))
// abdu(lhs, rhs) -> trunc(abs(sub(zext(lhs), zext(rhs))))
unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
SDValue LHS = DAG.getNode(ExtOpc, dl, WideVT, Op.getOperand(0));
SDValue RHS = DAG.getNode(ExtOpc, dl, WideVT, Op.getOperand(1));
Expand Down
99 changes: 41 additions & 58 deletions llvm/test/CodeGen/X86/abds-neg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -139,28 +139,25 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; X86-LABEL: abd_ext_i16_i32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %edx
; X86-NEXT: subl %ecx, %edx
; X86-NEXT: negl %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovlel %edx, %eax
; X86-NEXT: cmovll %edx, %eax
; X86-NEXT: negl %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i16_i32:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: movswq %di, %rcx
; X64-NEXT: movslq %esi, %rax
; X64-NEXT: subq %rax, %rcx
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: negq %rax
; X64-NEXT: cmovsq %rcx, %rax
; X64-NEXT: movswl %di, %ecx
; X64-NEXT: movl %edi, %eax
; X64-NEXT: subl %esi, %eax
; X64-NEXT: subl %ecx, %esi
; X64-NEXT: cmovgel %esi, %eax
; X64-NEXT: negl %eax
; X64-NEXT: # kill: def $ax killed $ax killed $rax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%aext = sext i16 %a to i64
%bext = sext i32 %b to i64
Expand Down Expand Up @@ -205,26 +202,22 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_ext_i32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %edx
; X86-NEXT: subl %ecx, %edx
; X86-NEXT: negl %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovlel %edx, %eax
; X86-NEXT: cmovll %edx, %eax
; X86-NEXT: negl %eax
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i32:
; X64: # %bb.0:
; X64-NEXT: movslq %esi, %rax
; X64-NEXT: movslq %edi, %rcx
; X64-NEXT: subq %rax, %rcx
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: negq %rax
; X64-NEXT: cmovsq %rcx, %rax
; X64-NEXT: movl %edi, %eax
; X64-NEXT: subl %esi, %eax
; X64-NEXT: subl %edi, %esi
; X64-NEXT: cmovgel %esi, %eax
; X64-NEXT: negl %eax
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
Expand All @@ -238,27 +231,23 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; X86-LABEL: abd_ext_i32_i16:
; X86: # %bb.0:
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %edx
; X86-NEXT: subl %ecx, %edx
; X86-NEXT: negl %edx
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovlel %edx, %eax
; X86-NEXT: cmovll %edx, %eax
; X86-NEXT: negl %eax
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i32_i16:
; X64: # %bb.0:
; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: movswq %si, %rax
; X64-NEXT: movslq %edi, %rcx
; X64-NEXT: subq %rax, %rcx
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: negq %rax
; X64-NEXT: cmovsq %rcx, %rax
; X64-NEXT: movswl %si, %eax
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: subl %eax, %ecx
; X64-NEXT: subl %edi, %eax
; X64-NEXT: cmovll %ecx, %eax
; X64-NEXT: negl %eax
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%aext = sext i32 %a to i64
%bext = sext i16 %b to i64
Expand All @@ -272,26 +261,22 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_ext_i32_undef:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %edx
; X86-NEXT: subl %ecx, %edx
; X86-NEXT: negl %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovlel %edx, %eax
; X86-NEXT: cmovll %edx, %eax
; X86-NEXT: negl %eax
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i32_undef:
; X64: # %bb.0:
; X64-NEXT: movslq %esi, %rax
; X64-NEXT: movslq %edi, %rcx
; X64-NEXT: subq %rax, %rcx
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: negq %rax
; X64-NEXT: cmovsq %rcx, %rax
; X64-NEXT: movl %edi, %eax
; X64-NEXT: subl %esi, %eax
; X64-NEXT: subl %edi, %esi
; X64-NEXT: cmovgel %esi, %eax
; X64-NEXT: negl %eax
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
Expand Down Expand Up @@ -332,9 +317,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: subq %rsi, %rax
; X64-NEXT: negq %rax
; X64-NEXT: subq %rsi, %rdi
; X64-NEXT: cmovgq %rdi, %rax
; X64-NEXT: subq %rdi, %rsi
; X64-NEXT: cmovgeq %rsi, %rax
; X64-NEXT: negq %rax
; X64-NEXT: retq
%aext = sext i64 %a to i128
Expand Down Expand Up @@ -376,9 +360,8 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: subq %rsi, %rax
; X64-NEXT: negq %rax
; X64-NEXT: subq %rsi, %rdi
; X64-NEXT: cmovgq %rdi, %rax
; X64-NEXT: subq %rdi, %rsi
; X64-NEXT: cmovgeq %rsi, %rax
; X64-NEXT: negq %rax
; X64-NEXT: retq
%aext = sext i64 %a to i128
Expand Down
Loading

0 comments on commit b05c554

Please sign in to comment.