Skip to content

Commit

Permalink
[x86] Fix infinite loop inside DAG combiner with lzcnt feature.
Browse files Browse the repository at this point in the history
The issue affects targets supporting fast-lzcnt such as btver2.
This removes extraneous zext/trunc node insertions to fix the infinite
loop.
This fixes Issue llvm#54694

Differential Revision: https://reviews.llvm.org/D122900

Reviewed By: RKSimon, spatel, lebedev.ri

(cherry picked from commit a3d5f1c)
Signed-off-by: Warren Ristow <[email protected]>

In https://reviews.llvm.org/D122900 a new function (to exercise the
infinite-loop bug) was added to llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll.
In applying the fix in the main branch, two previously existing
functions in that test also changed behavior slightly, and in the review
it was noted:
    The instructions generated end up being reordered in some cases
    but I think it is equivalent.
That reordering did not happen in those pre-existing functions when
applying the fix to the slightly older code-base of the llvm14 branch,
and so they are suppressed here.  So the updated version of the test in
this commit has the additional function added to it, but it is otherwise
identical to the previous llvm14 version of the test.
  • Loading branch information
goussepi authored and tstellar committed Apr 21, 2022
1 parent 0fbe860 commit b83c4a2
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 13 deletions.
21 changes: 8 additions & 13 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47110,8 +47110,7 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
// into:
// srl(ctlz x), log2(bitsize(x))
// Input pattern is checked by caller.
static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy,
SelectionDAG &DAG) {
static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) {
SDValue Cmp = Op.getOperand(1);
EVT VT = Cmp.getOperand(0).getValueType();
unsigned Log2b = Log2_32(VT.getSizeInBits());
Expand All @@ -47122,7 +47121,7 @@ static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy,
SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32);
SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc,
DAG.getConstant(Log2b, dl, MVT::i8));
return DAG.getZExtOrTrunc(Scc, dl, ExtTy);
return Scc;
}

// Try to transform:
Expand Down Expand Up @@ -47182,11 +47181,10 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
// or(srl(ctlz),srl(ctlz)).
// The dag combiner can then fold it into:
// srl(or(ctlz, ctlz)).
EVT VT = OR->getValueType(0);
SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, VT, DAG);
SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, DAG);
SDValue Ret, NewRHS;
if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG)))
Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, NewLHS, NewRHS);
if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG)))
Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, NewLHS, NewRHS);

if (!Ret)
return SDValue();
Expand All @@ -47199,16 +47197,13 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
// Swap rhs with lhs to match or(setcc(eq, cmp, 0), or).
if (RHS->getOpcode() == ISD::OR)
std::swap(LHS, RHS);
NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG);
NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG);
if (!NewRHS)
return SDValue();
Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS);
Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, Ret, NewRHS);
}

if (Ret)
Ret = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);

return Ret;
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
}

static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R,
Expand Down
34 changes: 34 additions & 0 deletions llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -335,3 +335,37 @@ entry:
%conv = zext i1 %0 to i32
ret i32 %conv
}

; PR54694 Fix an infinite loop in DAG combiner.
define i32 @test_zext_cmp12(i32 %0, i32 %1) {
; FASTLZCNT-LABEL: test_zext_cmp12:
; FASTLZCNT: # %bb.0:
; FASTLZCNT-NEXT: andl $131072, %edi # imm = 0x20000
; FASTLZCNT-NEXT: andl $131072, %esi # imm = 0x20000
; FASTLZCNT-NEXT: lzcntl %edi, %eax
; FASTLZCNT-NEXT: lzcntl %esi, %ecx
; FASTLZCNT-NEXT: orl %eax, %ecx
; FASTLZCNT-NEXT: movl $2, %eax
; FASTLZCNT-NEXT: shrl $5, %ecx
; FASTLZCNT-NEXT: subl %ecx, %eax
; FASTLZCNT-NEXT: retq
;
; NOFASTLZCNT-LABEL: test_zext_cmp12:
; NOFASTLZCNT: # %bb.0:
; NOFASTLZCNT-NEXT: testl $131072, %edi # imm = 0x20000
; NOFASTLZCNT-NEXT: sete %al
; NOFASTLZCNT-NEXT: testl $131072, %esi # imm = 0x20000
; NOFASTLZCNT-NEXT: sete %cl
; NOFASTLZCNT-NEXT: orb %al, %cl
; NOFASTLZCNT-NEXT: movl $2, %eax
; NOFASTLZCNT-NEXT: movzbl %cl, %ecx
; NOFASTLZCNT-NEXT: subl %ecx, %eax
; NOFASTLZCNT-NEXT: retq
%3 = and i32 %0, 131072
%4 = icmp eq i32 %3, 0
%5 = and i32 %1, 131072
%6 = icmp eq i32 %5, 0
%7 = select i1 %4, i1 true, i1 %6
%8 = select i1 %7, i32 1, i32 2
ret i32 %8
}

0 comments on commit b83c4a2

Please sign in to comment.