From 613544c5f83560328924ac14c5c71e1c441cb00c Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Tue, 16 Jul 2024 14:42:25 +0100 Subject: [PATCH] Reapply "[Intrinsics][PreISelInstrinsicLowering] llvm.memcpy.inline length no longer needs to be constant (#98281)" Summary: This reverts commit ac4b6b662630cd4d3bf6929f2b39ea203c0054a1. A test change was missing for mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir in the initial commit. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251743 --- llvm/docs/LangRef.rst | 2 +- llvm/include/llvm/IR/IntrinsicInst.h | 3 -- llvm/include/llvm/IR/Intrinsics.td | 3 +- llvm/lib/Analysis/Lint.cpp | 20 +------- llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 16 ++++++ .../X86/memcpy-inline-non-constant-len.ll | 49 +++++++++++++++++++ llvm/test/Verifier/intrinsic-immarg.ll | 8 --- .../test/Target/LLVMIR/llvmir-intrinsics.mlir | 4 +- 8 files changed, 71 insertions(+), 34 deletions(-) create mode 100644 llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index a04b5769f095fb..40c8b7f7695968 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15026,7 +15026,7 @@ Arguments: """""""""" The first argument is a pointer to the destination, the second is a -pointer to the source. The third argument is a constant integer argument +pointer to the source. The third argument is an integer argument specifying the number of bytes to copy, and the fourth is a boolean indicating a volatile access. diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index a2ecf625ff61aa..fe3f92da400f8a 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -1296,9 +1296,6 @@ class MemMoveInst : public MemTransferInst { /// This class wraps the llvm.memcpy.inline intrinsic. class MemCpyInlineInst : public MemCpyInst { public: - ConstantInt *getLength() const { - return cast(MemCpyInst::getLength()); - } // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::memcpy_inline; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 01e379dfcebcad..9d04256d593178 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -966,7 +966,6 @@ def int_memcpy : Intrinsic<[], // Memcpy semantic that is guaranteed to be inlined. // In particular this means that the generated code is not allowed to call any // external function. -// The third argument (specifying the size) must be a constant. def int_memcpy_inline : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], @@ -974,7 +973,7 @@ def int_memcpy_inline NoCapture>, NoCapture>, NoAlias>, NoAlias>, WriteOnly>, ReadOnly>, - ImmArg>, ImmArg>]>; + ImmArg>]>; def int_memmove : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp index 496308a0c247a4..a44d5a3bbe462a 100644 --- a/llvm/lib/Analysis/Lint.cpp +++ b/llvm/lib/Analysis/Lint.cpp @@ -290,7 +290,8 @@ void Lint::visitCallBase(CallBase &I) { // TODO: Check more intrinsics - case Intrinsic::memcpy: { + case Intrinsic::memcpy: + case Intrinsic::memcpy_inline: { MemCpyInst *MCI = cast(&I); visitMemoryReference(I, MemoryLocation::getForDest(MCI), MCI->getDestAlign(), nullptr, MemRef::Write); @@ -311,23 +312,6 @@ void Lint::visitCallBase(CallBase &I) { "Undefined behavior: memcpy source and destination overlap", &I); break; } - case Intrinsic::memcpy_inline: { - MemCpyInlineInst *MCII = cast(&I); - const uint64_t Size = MCII->getLength()->getValue().getLimitedValue(); - visitMemoryReference(I, MemoryLocation::getForDest(MCII), - MCII->getDestAlign(), nullptr, MemRef::Write); - visitMemoryReference(I, MemoryLocation::getForSource(MCII), - MCII->getSourceAlign(), nullptr, MemRef::Read); - - // Check that the memcpy arguments don't overlap. The AliasAnalysis API - // isn't expressive enough for what we really want to do. Known partial - // overlap is not distinguished from the case where nothing is known. - const LocationSize LS = LocationSize::precise(Size); - Check(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) != - AliasResult::MustAlias, - "Undefined behavior: memcpy source and destination overlap", &I); - break; - } case Intrinsic::memmove: { MemMoveInst *MMI = cast(&I); visitMemoryReference(I, MemoryLocation::getForDest(MMI), diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 8572cdc1604562..19950f3eb67bad 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -230,6 +230,21 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const { break; } + case Intrinsic::memcpy_inline: { + // Only expand llvm.memcpy.inline with non-constant length in this + // codepath, leaving the current SelectionDAG expansion for constant + // length memcpy intrinsics undisturbed. + auto *Memcpy = cast(Inst); + if (isa(Memcpy->getLength())) + break; + + Function *ParentFunc = Memcpy->getFunction(); + const TargetTransformInfo &TTI = LookupTTI(*ParentFunc); + expandMemCpyAsLoop(Memcpy, TTI); + Changed = true; + Memcpy->eraseFromParent(); + break; + } case Intrinsic::memmove: { auto *Memmove = cast(Inst); Function *ParentFunc = Memmove->getFunction(); @@ -291,6 +306,7 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const { default: break; case Intrinsic::memcpy: + case Intrinsic::memcpy_inline: case Intrinsic::memmove: case Intrinsic::memset: case Intrinsic::memset_inline: diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll new file mode 100644 index 00000000000000..a4e049941030ef --- /dev/null +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=x86_64-pc-linux-gnu -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s + +; Constant length memcpy.inline should be left unmodified. +define void @memcpy_32(ptr %dst, ptr %src) nounwind { +; CHECK-LABEL: define void @memcpy_32( +; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 32, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.inline.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 32, i1 true) +; CHECK-NEXT: ret void +; + call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 32, i1 0) + tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 32, i1 1) + ret void +} + +define void @memcpy_x(ptr %dst, ptr %src, i64 %x) nounwind { +; CHECK-LABEL: define void @memcpy_x( +; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[X]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_MEMCPY_EXPANSION:.*]], label %[[POST_LOOP_MEMCPY_EXPANSION:.*]] +; CHECK: [[LOOP_MEMCPY_EXPANSION]]: +; CHECK-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP5:%.*]], %[[LOOP_MEMCPY_EXPANSION]] ] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[LOOP_INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[LOOP_INDEX]] +; CHECK-NEXT: store i8 [[TMP3]], ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP5]] = add i64 [[LOOP_INDEX]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP5]], [[X]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[LOOP_MEMCPY_EXPANSION]], label %[[POST_LOOP_MEMCPY_EXPANSION]] +; CHECK: [[POST_LOOP_MEMCPY_EXPANSION]]: +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[X]], 0 +; CHECK-NEXT: br i1 [[TMP7]], label %[[LOOP_MEMCPY_EXPANSION2:.*]], label %[[POST_LOOP_MEMCPY_EXPANSION1:.*]] +; CHECK: [[LOOP_MEMCPY_EXPANSION2]]: +; CHECK-NEXT: [[LOOP_INDEX3:%.*]] = phi i64 [ 0, %[[POST_LOOP_MEMCPY_EXPANSION]] ], [ [[TMP11:%.*]], %[[LOOP_MEMCPY_EXPANSION2]] ] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[LOOP_INDEX3]] +; CHECK-NEXT: [[TMP9:%.*]] = load volatile i8, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[LOOP_INDEX3]] +; CHECK-NEXT: store volatile i8 [[TMP9]], ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP11]] = add i64 [[LOOP_INDEX3]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = icmp ult i64 [[TMP11]], [[X]] +; CHECK-NEXT: br i1 [[TMP12]], label %[[LOOP_MEMCPY_EXPANSION2]], label %[[POST_LOOP_MEMCPY_EXPANSION1]] +; CHECK: [[POST_LOOP_MEMCPY_EXPANSION1]]: +; CHECK-NEXT: ret void +; + call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 %x, i1 0) + tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 %x, i1 1) + ret void +} diff --git a/llvm/test/Verifier/intrinsic-immarg.ll b/llvm/test/Verifier/intrinsic-immarg.ll index b1b9f7ee4be112..ad70b17e5fb727 100644 --- a/llvm/test/Verifier/intrinsic-immarg.ll +++ b/llvm/test/Verifier/intrinsic-immarg.ll @@ -36,14 +36,6 @@ define void @memcpy_inline_is_volatile(ptr %dest, ptr %src, i1 %is.volatile) { ret void } -define void @memcpy_inline_variable_size(ptr %dest, ptr %src, i32 %size) { - ; CHECK: immarg operand has non-immediate parameter - ; CHECK-NEXT: i32 %size - ; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %dest, ptr %src, i32 %size, i1 true) - call void @llvm.memcpy.inline.p0.p0.i32(ptr %dest, ptr %src, i32 %size, i1 true) - ret void -} - declare void @llvm.memmove.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) define void @memmove(ptr %dest, ptr %src, i1 %is.volatile) { ; CHECK: immarg operand has non-immediate parameter diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir index 1e533aeacfb49d..7878aa5ee46d4f 100644 --- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir @@ -1069,8 +1069,8 @@ llvm.func @experimental_constrained_fptrunc(%s: f64, %v: vector<4xf32>) { // CHECK-DAG: declare void @llvm.debugtrap() // CHECK-DAG: declare void @llvm.ubsantrap(i8 immarg) // CHECK-DAG: declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) -// CHECK-DAG: declare void @llvm.memcpy.inline.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32 immarg, i1 immarg) -// CHECK-DAG: declare void @llvm.memcpy.inline.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64 immarg, i1 immarg) +// CHECK-DAG: declare void @llvm.memcpy.inline.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) +// CHECK-DAG: declare void @llvm.memcpy.inline.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) // CHECK-DAG: declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) // CHECK-DAG: declare { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32>, <8 x i32>) // CHECK-DAG: declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)