From b83c4a2dc0fb620761301f57e92118a3971f66cd Mon Sep 17 00:00:00 2001
From: Pierre Gousseau <pierre.gousseau@sony.com>
Date: Tue, 5 Apr 2022 17:30:25 +0100
Subject: [PATCH] [x86] Fix infinite loop inside DAG combiner with lzcnt
 feature.

The issue affects targets supporting fast-lzcnt such as btver2.
This removes extraneous zext/trunc node insertions to fix the infinite
loop.
This fixes Issue https://github.com/llvm/llvm-project/issues/54694

Differential Revision: https://reviews.llvm.org/D122900

Reviewed By: RKSimon, spatel, lebedev.ri

(cherry picked from commit a3d5f1cf5d88dfbbed931951e07f328d5ceba510)
Signed-off-by: Warren Ristow <warren.ristow@sony.com>

In https://reviews.llvm.org/D122900 a new function (to exercise the
infinite-loop bug) was added to llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll.
In applying the fix in the main branch, two previously existing
functions in that test also changed behavior slightly, and in the review
it was noted:
    The instructions generated end up being reordered in some cases
    but I think it is equivalent.
That reordering did not happen in those pre-existing functions when
applying the fix to the slightly older code-base of the llvm14 branch,
and so they are suppressed here.  So the updated version of the test in
this commit has the additional function added to it, but it is otherwise
identical to the previous llvm14 version of the test.
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 21 ++++++---------
 llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll | 34 +++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 600d146cb1245a..682932b8f3e66c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47110,8 +47110,7 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
 //   into:
 //   srl(ctlz x), log2(bitsize(x))
 // Input pattern is checked by caller.
-static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy,
-                                          SelectionDAG &DAG) {
+static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) {
   SDValue Cmp = Op.getOperand(1);
   EVT VT = Cmp.getOperand(0).getValueType();
   unsigned Log2b = Log2_32(VT.getSizeInBits());
@@ -47122,7 +47121,7 @@ static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy,
   SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32);
   SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc,
                             DAG.getConstant(Log2b, dl, MVT::i8));
-  return DAG.getZExtOrTrunc(Scc, dl, ExtTy);
+  return Scc;
 }
 
 // Try to transform:
@@ -47182,11 +47181,10 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
   // or(srl(ctlz),srl(ctlz)).
   // The dag combiner can then fold it into:
   // srl(or(ctlz, ctlz)).
-  EVT VT = OR->getValueType(0);
-  SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, VT, DAG);
+  SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, DAG);
   SDValue Ret, NewRHS;
-  if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG)))
-    Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, NewLHS, NewRHS);
+  if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG)))
+    Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, NewLHS, NewRHS);
 
   if (!Ret)
     return SDValue();
@@ -47199,16 +47197,13 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
     // Swap rhs with lhs to match or(setcc(eq, cmp, 0), or).
     if (RHS->getOpcode() == ISD::OR)
       std::swap(LHS, RHS);
-    NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG);
+    NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG);
     if (!NewRHS)
       return SDValue();
-    Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS);
+    Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, Ret, NewRHS);
   }
 
-  if (Ret)
-    Ret = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
-
-  return Ret;
+  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
 }
 
 static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R,
diff --git a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll
index 0c5450a59e4223..e291a1923f1dc2 100644
--- a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll
+++ b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll
@@ -335,3 +335,37 @@ entry:
   %conv = zext i1 %0 to i32
   ret i32 %conv
 }
+
+; PR54694 Fix an infinite loop in DAG combiner.
+define i32 @test_zext_cmp12(i32 %0, i32 %1) {
+; FASTLZCNT-LABEL: test_zext_cmp12:
+; FASTLZCNT:       # %bb.0:
+; FASTLZCNT-NEXT:    andl $131072, %edi # imm = 0x20000
+; FASTLZCNT-NEXT:    andl $131072, %esi # imm = 0x20000
+; FASTLZCNT-NEXT:    lzcntl %edi, %eax
+; FASTLZCNT-NEXT:    lzcntl %esi, %ecx
+; FASTLZCNT-NEXT:    orl %eax, %ecx
+; FASTLZCNT-NEXT:    movl $2, %eax
+; FASTLZCNT-NEXT:    shrl $5, %ecx
+; FASTLZCNT-NEXT:    subl %ecx, %eax
+; FASTLZCNT-NEXT:    retq
+;
+; NOFASTLZCNT-LABEL: test_zext_cmp12:
+; NOFASTLZCNT:       # %bb.0:
+; NOFASTLZCNT-NEXT:    testl $131072, %edi # imm = 0x20000
+; NOFASTLZCNT-NEXT:    sete %al
+; NOFASTLZCNT-NEXT:    testl $131072, %esi # imm = 0x20000
+; NOFASTLZCNT-NEXT:    sete %cl
+; NOFASTLZCNT-NEXT:    orb %al, %cl
+; NOFASTLZCNT-NEXT:    movl $2, %eax
+; NOFASTLZCNT-NEXT:    movzbl %cl, %ecx
+; NOFASTLZCNT-NEXT:    subl %ecx, %eax
+; NOFASTLZCNT-NEXT:    retq
+  %3 = and i32 %0, 131072
+  %4 = icmp eq i32 %3, 0
+  %5 = and i32 %1, 131072
+  %6 = icmp eq i32 %5, 0
+  %7 = select i1 %4, i1 true, i1 %6
+  %8 = select i1 %7, i32 1, i32 2
+  ret i32 %8
+}