From 62a2a080dc2cbb56d0a63720e32ad05401416750 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 4 Jul 2024 20:54:43 +0100 Subject: [PATCH] [X86] matchAddressRecursively - don't fold zext(shl(x,c)) -> shl(zext(x),c)) if the pattern has multiple uses Fixes #97533 crash where we hit a case where the root node had referenced the original zext node, which we then deleted - hopefully I can come up with a better solution, but the codegen changes don't look too bad atm (pulls out a shift from some complex LEA nodes that shared the scaled index). --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 2 +- llvm/test/CodeGen/X86/addr-mode-matcher-3.ll | 28 ++++++++++++++++++++ llvm/test/CodeGen/X86/sttni.ll | 20 ++++++++------ llvm/test/CodeGen/X86/var-permute-128.ll | 10 ++++--- 4 files changed, 47 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index f2b3855ebc5445..c91bd576dc9f6a 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2745,7 +2745,7 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, Src = Src.getOperand(0); } - if (Src.getOpcode() == ISD::SHL && Src.hasOneUse()) { + if (Src.getOpcode() == ISD::SHL && Src.hasOneUse() && N->hasOneUse()) { // Give up if the shift is not a valid scale factor [1,2,3]. SDValue ShlSrc = Src.getOperand(0); SDValue ShlAmt = Src.getOperand(1); diff --git a/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll b/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll index daa521d3917cdf..0c7275ec28677b 100644 --- a/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll +++ b/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll @@ -70,3 +70,31 @@ define i32 @mask_offset_scale_zext_i32_i64(ptr %base, i32 %i) { %load = load i32, ptr %arrayidx, align 4 ret i32 %load } + +; PR97533 - multiple uses of shl node (add + gep) in the same dependency chain. +define i64 @add_shl_zext(ptr %ptr, i8 %arg) nounwind { +; X86-LABEL: add_shl_zext: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl 4(%esi,%ecx,4), %edx +; X86-NEXT: leal (,%ecx,8), %eax +; X86-NEXT: addl (%esi,%ecx,4), %eax +; X86-NEXT: adcl $0, %edx +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: add_shl_zext: +; X64: # %bb.0: +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: shll $3, %eax +; X64-NEXT: addq (%rdi,%rax), %rax +; X64-NEXT: retq + %idx = zext i8 %arg to i64 + %gep = getelementptr ptr, ptr %ptr, i64 %idx + %val = load i64, ptr %gep, align 8 + %shl = shl i64 %idx, 3 + %sum = add i64 %val, %shl + ret i64 %sum +} diff --git a/llvm/test/CodeGen/X86/sttni.ll b/llvm/test/CodeGen/X86/sttni.ll index 870912bb6bb1be..39cbee54737c38 100644 --- a/llvm/test/CodeGen/X86/sttni.ll +++ b/llvm/test/CodeGen/X86/sttni.ll @@ -341,9 +341,10 @@ define i32 @pcmpestri_reg_diff_i16(<8 x i16> %lhs, i32 %lhs_len, <8 x i16> %rhs, ; X64-NEXT: .LBB8_2: # %compare ; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: andl $7, %ecx -; X64-NEXT: movzwl -24(%rsp,%rcx,2), %eax +; X64-NEXT: addl %ecx, %ecx +; X64-NEXT: movzwl -24(%rsp,%rcx), %eax ; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; X64-NEXT: subw -40(%rsp,%rcx,2), %ax +; X64-NEXT: subw -40(%rsp,%rcx), %ax ; X64-NEXT: movzwl %ax, %eax ; X64-NEXT: retq entry: @@ -481,9 +482,10 @@ define i32 @pcmpestri_mem_diff_i16(ptr %lhs_ptr, i32 %lhs_len, ptr %rhs_ptr, i32 ; X64-NEXT: .LBB11_2: # %compare ; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) ; X64-NEXT: andl $7, %ecx -; X64-NEXT: movzwl -24(%rsp,%rcx,2), %eax +; X64-NEXT: addl %ecx, %ecx +; X64-NEXT: movzwl -24(%rsp,%rcx), %eax ; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: subw -40(%rsp,%rcx,2), %ax +; X64-NEXT: subw -40(%rsp,%rcx), %ax ; X64-NEXT: movzwl %ax, %eax ; X64-NEXT: retq entry: @@ -795,9 +797,10 @@ define i32 @pcmpistri_reg_diff_i16(<8 x i16> %lhs, <8 x i16> %rhs) nounwind { ; X64-NEXT: .LBB20_2: # %compare ; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: andl $7, %ecx -; X64-NEXT: movzwl -24(%rsp,%rcx,2), %eax +; X64-NEXT: addl %ecx, %ecx +; X64-NEXT: movzwl -24(%rsp,%rcx), %eax ; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; X64-NEXT: subw -40(%rsp,%rcx,2), %ax +; X64-NEXT: subw -40(%rsp,%rcx), %ax ; X64-NEXT: movzwl %ax, %eax ; X64-NEXT: retq entry: @@ -915,9 +918,10 @@ define i32 @pcmpistri_mem_diff_i16(ptr %lhs_ptr, ptr %rhs_ptr) nounwind { ; X64-NEXT: .LBB23_2: # %compare ; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) ; X64-NEXT: andl $7, %ecx -; X64-NEXT: movzwl -24(%rsp,%rcx,2), %eax +; X64-NEXT: addl %ecx, %ecx +; X64-NEXT: movzwl -24(%rsp,%rcx), %eax ; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: subw -40(%rsp,%rcx,2), %ax +; X64-NEXT: subw -40(%rsp,%rcx), %ax ; X64-NEXT: movzwl %ax, %eax ; X64-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/var-permute-128.ll b/llvm/test/CodeGen/X86/var-permute-128.ll index 46d253569be4db..6350344457d87a 100644 --- a/llvm/test/CodeGen/X86/var-permute-128.ll +++ b/llvm/test/CodeGen/X86/var-permute-128.ll @@ -1108,8 +1108,9 @@ define void @indices_convert() { ; SSE3-NEXT: movaps %xmm0, -56(%rsp) ; SSE3-NEXT: movaps %xmm0, -72(%rsp) ; SSE3-NEXT: andl $3, %eax -; SSE3-NEXT: movsd -72(%rsp,%rax,8), %xmm0 # xmm0 = mem[0],zero -; SSE3-NEXT: movsd -40(%rsp,%rax,8), %xmm1 # xmm1 = mem[0],zero +; SSE3-NEXT: shll $3, %eax +; SSE3-NEXT: movsd -72(%rsp,%rax), %xmm0 # xmm0 = mem[0],zero +; SSE3-NEXT: movsd -40(%rsp,%rax), %xmm1 # xmm1 = mem[0],zero ; SSE3-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE3-NEXT: movups %xmm1, (%rax) ; SSE3-NEXT: retq @@ -1123,8 +1124,9 @@ define void @indices_convert() { ; SSSE3-NEXT: movaps %xmm0, -56(%rsp) ; SSSE3-NEXT: movaps %xmm0, -72(%rsp) ; SSSE3-NEXT: andl $3, %eax -; SSSE3-NEXT: movsd -72(%rsp,%rax,8), %xmm0 # xmm0 = mem[0],zero -; SSSE3-NEXT: movsd -40(%rsp,%rax,8), %xmm1 # xmm1 = mem[0],zero +; SSSE3-NEXT: shll $3, %eax +; SSSE3-NEXT: movsd -72(%rsp,%rax), %xmm0 # xmm0 = mem[0],zero +; SSSE3-NEXT: movsd -40(%rsp,%rax), %xmm1 # xmm1 = mem[0],zero ; SSSE3-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSSE3-NEXT: movups %xmm1, (%rax) ; SSSE3-NEXT: retq