From f92bfca9fc217cad9026598ef6755e711c0be070 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Fri, 5 Jul 2024 16:01:00 +0100 Subject: [PATCH] [AArch64] All bits of an exact right shift are demanded (#97448) When building a vector which contains zero elements, the AArch64 ISel replaces those elements with `undef`, if they are right shifted out. However, these elements need to stay zero if the right shift is exact, or otherwise we will be introducing undefined behavior. Should allow https://github.com/llvm/llvm-project/pull/92528 to be recommitted. --- .../Target/AArch64/AArch64ISelLowering.cpp | 4 +++ .../AArch64/shr-exact-demanded-bits.ll | 35 +++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/shr-exact-demanded-bits.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e0c3cc5eddb827..341cf51173ccc2 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -22142,6 +22142,10 @@ static SDValue performVectorShiftCombine(SDNode *N, if (DCI.DAG.ComputeNumSignBits(Op.getOperand(0)) > ShiftImm) return Op.getOperand(0); + // If the shift is exact, the shifted out bits matter. + if (N->getFlags().hasExact()) + return SDValue(); + APInt ShiftedOutBits = APInt::getLowBitsSet(OpScalarSize, ShiftImm); APInt DemandedMask = ~ShiftedOutBits; diff --git a/llvm/test/CodeGen/AArch64/shr-exact-demanded-bits.ll b/llvm/test/CodeGen/AArch64/shr-exact-demanded-bits.ll new file mode 100644 index 00000000000000..9698626aea655d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/shr-exact-demanded-bits.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s | FileCheck %s +target triple = "aarch64-linux" + +define <2 x i32> @f(i8 %0, i8 %1) { +; CHECK-LABEL: f: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.b[3], w0 +; CHECK-NEXT: mov v0.b[7], w1 +; CHECK-NEXT: sshr v0.2s, v0.2s, #24 +; CHECK-NEXT: ret + %3 = insertelement <2 x i8> poison, i8 %0, i64 0 + %4 = insertelement <2 x i8> %3, i8 %1, i64 1 + %5 = shufflevector <2 x i8> %4, <2 x i8> , <8 x i32> + %6 = bitcast <8 x i8> %5 to <2 x i32> + %7 = ashr exact <2 x i32> %6, + ret <2 x i32> %7 +} + +define <2 x i32> @g(i8 %0, i8 %1) { +; CHECK-LABEL: g: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.b[3], w0 +; CHECK-NEXT: mov v0.b[7], w1 +; CHECK-NEXT: ushr v0.2s, v0.2s, #24 +; CHECK-NEXT: ret + %3 = insertelement <2 x i8> poison, i8 %0, i64 0 + %4 = insertelement <2 x i8> %3, i8 %1, i64 1 + %5 = shufflevector <2 x i8> %4, <2 x i8> , <8 x i32> + %6 = bitcast <8 x i8> %5 to <2 x i32> + %7 = lshr exact <2 x i32> %6, + ret <2 x i32> %7 +}