From b83c4a2dc0fb620761301f57e92118a3971f66cd Mon Sep 17 00:00:00 2001 From: Pierre Gousseau Date: Tue, 5 Apr 2022 17:30:25 +0100 Subject: [PATCH] [x86] Fix infinite loop inside DAG combiner with lzcnt feature. The issue affects targets supporting fast-lzcnt such as btver2. This removes extraneous zext/trunc node insertions to fix the infinite loop. This fixes Issue https://github.com/llvm/llvm-project/issues/54694 Differential Revision: https://reviews.llvm.org/D122900 Reviewed By: RKSimon, spatel, lebedev.ri (cherry picked from commit a3d5f1cf5d88dfbbed931951e07f328d5ceba510) Signed-off-by: Warren Ristow In https://reviews.llvm.org/D122900 a new function (to exercise the infinite-loop bug) was added to llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll. In applying the fix in the main branch, two previously existing functions in that test also changed behavior slightly, and in the review it was noted: The instructions generated end up being reordered in some cases but I think it is equivalent. That reordering did not happen in those pre-existing functions when applying the fix to the slightly older code-base of the llvm14 branch, and so they are suppressed here. So the updated version of the test in this commit has the additional function added to it, but it is otherwise identical to the previous llvm14 version of the test. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 21 ++++++--------- llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll | 34 +++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 600d146cb1245a..682932b8f3e66c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47110,8 +47110,7 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, // into: // srl(ctlz x), log2(bitsize(x)) // Input pattern is checked by caller. -static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy, - SelectionDAG &DAG) { +static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) { SDValue Cmp = Op.getOperand(1); EVT VT = Cmp.getOperand(0).getValueType(); unsigned Log2b = Log2_32(VT.getSizeInBits()); @@ -47122,7 +47121,7 @@ static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy, SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32); SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc, DAG.getConstant(Log2b, dl, MVT::i8)); - return DAG.getZExtOrTrunc(Scc, dl, ExtTy); + return Scc; } // Try to transform: @@ -47182,11 +47181,10 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG, // or(srl(ctlz),srl(ctlz)). // The dag combiner can then fold it into: // srl(or(ctlz, ctlz)). - EVT VT = OR->getValueType(0); - SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, VT, DAG); + SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, DAG); SDValue Ret, NewRHS; - if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG))) - Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, NewLHS, NewRHS); + if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG))) + Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, NewLHS, NewRHS); if (!Ret) return SDValue(); @@ -47199,16 +47197,13 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG, // Swap rhs with lhs to match or(setcc(eq, cmp, 0), or). if (RHS->getOpcode() == ISD::OR) std::swap(LHS, RHS); - NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG); + NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG); if (!NewRHS) return SDValue(); - Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS); + Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, Ret, NewRHS); } - if (Ret) - Ret = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret); - - return Ret; + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret); } static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R, diff --git a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll index 0c5450a59e4223..e291a1923f1dc2 100644 --- a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll +++ b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll @@ -335,3 +335,37 @@ entry: %conv = zext i1 %0 to i32 ret i32 %conv } + +; PR54694 Fix an infinite loop in DAG combiner. +define i32 @test_zext_cmp12(i32 %0, i32 %1) { +; FASTLZCNT-LABEL: test_zext_cmp12: +; FASTLZCNT: # %bb.0: +; FASTLZCNT-NEXT: andl $131072, %edi # imm = 0x20000 +; FASTLZCNT-NEXT: andl $131072, %esi # imm = 0x20000 +; FASTLZCNT-NEXT: lzcntl %edi, %eax +; FASTLZCNT-NEXT: lzcntl %esi, %ecx +; FASTLZCNT-NEXT: orl %eax, %ecx +; FASTLZCNT-NEXT: movl $2, %eax +; FASTLZCNT-NEXT: shrl $5, %ecx +; FASTLZCNT-NEXT: subl %ecx, %eax +; FASTLZCNT-NEXT: retq +; +; NOFASTLZCNT-LABEL: test_zext_cmp12: +; NOFASTLZCNT: # %bb.0: +; NOFASTLZCNT-NEXT: testl $131072, %edi # imm = 0x20000 +; NOFASTLZCNT-NEXT: sete %al +; NOFASTLZCNT-NEXT: testl $131072, %esi # imm = 0x20000 +; NOFASTLZCNT-NEXT: sete %cl +; NOFASTLZCNT-NEXT: orb %al, %cl +; NOFASTLZCNT-NEXT: movl $2, %eax +; NOFASTLZCNT-NEXT: movzbl %cl, %ecx +; NOFASTLZCNT-NEXT: subl %ecx, %eax +; NOFASTLZCNT-NEXT: retq + %3 = and i32 %0, 131072 + %4 = icmp eq i32 %3, 0 + %5 = and i32 %1, 131072 + %6 = icmp eq i32 %5, 0 + %7 = select i1 %4, i1 true, i1 %6 + %8 = select i1 %7, i32 1, i32 2 + ret i32 %8 +}