Skip to content

Commit

Permalink
[Inliner] Propagate more attributes to params when inlining
Browse files Browse the repository at this point in the history
Add support for propagating:
    - `derefereancable`
    - `derefereancable_or_null`
    - `align`
    - `nonnull`

These are only propagated if the parameter to the to-be-inlined
callsite match the exact parameter used in the to-be-inlined function.
  • Loading branch information
goldsteinn committed Oct 14, 2024
1 parent 272c27c commit 60cbba6
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 36 deletions.
2 changes: 1 addition & 1 deletion clang/test/CodeGen/attr-counted-by-pr88931.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ void init(void * __attribute__((pass_dynamic_object_size(0))));
// CHECK-LABEL: define dso_local void @_ZN3foo3barC1Ev(
// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 {
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @_Z4initPvU25pass_dynamic_object_size0(ptr noundef nonnull [[THIS]], i64 noundef -1) #[[ATTR2:[0-9]+]]
// CHECK-NEXT: tail call void @_Z4initPvU25pass_dynamic_object_size0(ptr noundef nonnull align 4 dereferenceable(1) [[THIS]], i64 noundef -1) #[[ATTR2:[0-9]+]]
// CHECK-NEXT: ret void
//
foo::bar::bar() {
Expand Down
2 changes: 1 addition & 1 deletion clang/test/OpenMP/bug57757.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ void foo() {
// CHECK-NEXT: ]
// CHECK: .untied.jmp..i:
// CHECK-NEXT: store i32 1, ptr [[TMP2]], align 4, !tbaa [[TBAA16]], !alias.scope [[META13]], !noalias [[META17]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]), !noalias [[META13]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr nonnull [[TMP1]]), !noalias [[META13]]
// CHECK-NEXT: br label [[DOTOMP_OUTLINED__EXIT]]
// CHECK: .untied.next..i:
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
Expand Down
7 changes: 7 additions & 0 deletions llvm/include/llvm/IR/Attributes.h
Original file line number Diff line number Diff line change
Expand Up @@ -947,6 +947,9 @@ class AttributeList {
/// arg.
uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const;

/// Get range (or std::nullopt if unknown) of an arg.
std::optional<ConstantRange> getParamRange(unsigned ArgNo) const;

/// Get the disallowed floating-point classes of the return value.
FPClassTest getRetNoFPClass() const;

Expand Down Expand Up @@ -1123,6 +1126,10 @@ class AttrBuilder {
/// invalid if the Kind is not present in the builder.
Attribute getAttribute(StringRef Kind) const;

/// Retrieve the range if the attribute exists (std::nullopt is returned
/// otherwise).
std::optional<ConstantRange> getRange() const;

/// Return raw (possibly packed/encoded) value of integer attribute or
/// std::nullopt if not set.
std::optional<uint64_t> getRawIntAttr(Attribute::AttrKind Kind) const;
Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/IR/Attributes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1931,6 +1931,14 @@ AttributeList::getParamDereferenceableOrNullBytes(unsigned Index) const {
return getParamAttrs(Index).getDereferenceableOrNullBytes();
}

std::optional<ConstantRange>
AttributeList::getParamRange(unsigned Index) const {
auto RangeAttr = getParamAttrs(Index).getAttribute(Attribute::Range);
if (RangeAttr.isValid())
return RangeAttr.getRange();
return std::nullopt;
}

FPClassTest AttributeList::getRetNoFPClass() const {
return getRetAttrs().getNoFPClass();
}
Expand Down Expand Up @@ -2277,6 +2285,13 @@ Attribute AttrBuilder::getAttribute(StringRef A) const {
return {};
}

std::optional<ConstantRange> AttrBuilder::getRange() const {
const Attribute RangeAttr = getAttribute(Attribute::Range);
if (RangeAttr.isValid())
return RangeAttr.getRange();
return std::nullopt;
}

bool AttrBuilder::contains(Attribute::AttrKind A) const {
return getAttribute(A).isValid();
}
Expand Down
90 changes: 74 additions & 16 deletions llvm/lib/Transforms/Utils/InlineFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
Expand All @@ -59,6 +60,7 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
Expand Down Expand Up @@ -1358,18 +1360,36 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
auto &Context = CalledFunction->getContext();

// Collect valid attributes for all params.
SmallVector<AttrBuilder> ValidParamAttrs;
SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs;
bool HasAttrToPropagate = false;

// Attributes we can only propagate if the exact parameter is forwarded.
// We can propagate both poison generating and UB generating attributes
// without any extra checks. The only attribute that is tricky to propagate
// is `noundef` (skipped for now) as that can create new UB where previous
// behavior was just using a poison value.
static const Attribute::AttrKind ExactAttrsToPropagate[] = {
Attribute::Dereferenceable, Attribute::DereferenceableOrNull,
Attribute::NonNull, Attribute::Alignment, Attribute::Range};

for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) {
ValidParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
ValidObjParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
ValidExactParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
// Access attributes can be propagated to any param with the same underlying
// object as the argument.
if (CB.paramHasAttr(I, Attribute::ReadNone))
ValidParamAttrs.back().addAttribute(Attribute::ReadNone);
ValidObjParamAttrs.back().addAttribute(Attribute::ReadNone);
if (CB.paramHasAttr(I, Attribute::ReadOnly))
ValidParamAttrs.back().addAttribute(Attribute::ReadOnly);
HasAttrToPropagate |= ValidParamAttrs.back().hasAttributes();
ValidObjParamAttrs.back().addAttribute(Attribute::ReadOnly);

for (Attribute::AttrKind AK : ExactAttrsToPropagate) {
Attribute Attr = CB.getParamAttr(I, AK);
if (Attr.isValid())
ValidExactParamAttrs.back().addAttribute(Attr);
}

HasAttrToPropagate |= ValidObjParamAttrs.back().hasAttributes();
HasAttrToPropagate |= ValidExactParamAttrs.back().hasAttributes();
}

// Won't be able to propagate anything.
Expand All @@ -1391,22 +1411,60 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,

AttributeList AL = NewInnerCB->getAttributes();
for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) {
// Check if the underlying value for the parameter is an argument.
const Value *UnderlyingV =
getUnderlyingObject(InnerCB->getArgOperand(I));
const Argument *Arg = dyn_cast<Argument>(UnderlyingV);
if (!Arg)
// It's unsound or requires special handling to propagate
// attributes to byval arguments. Even if CalledFunction
// doesn't e.g. write to the argument (readonly), the call to
// NewInnerCB may write to its by-value copy.
if (AL.hasParamAttr(I, Attribute::ByVal))
continue;

if (AL.hasParamAttr(I, Attribute::ByVal))
// It's unsound to propagate memory attributes to byval arguments.
// Even if CalledFunction doesn't e.g. write to the argument,
// the call to NewInnerCB may write to its by-value copy.
// Don't bother propagating attrs to constants.
if (match(NewInnerCB->getArgOperand(I),
llvm::PatternMatch::m_ImmConstant()))
continue;

unsigned ArgNo = Arg->getArgNo();
// Check if the underlying value for the parameter is an argument.
const Argument *Arg = dyn_cast<Argument>(InnerCB->getArgOperand(I));
unsigned ArgNo;
if (Arg) {
ArgNo = Arg->getArgNo();
// For dereferenceable, dereferenceable_or_null, align, etc...
// we don't want to propagate if the existing param has the same
// attribute with "better" constraints. So remove from the
// new AL if the region of the existing param is larger than
// what we can propagate.
AttrBuilder NewAB{
Context, AttributeSet::get(Context, ValidExactParamAttrs[ArgNo])};
if (AL.getParamDereferenceableBytes(I) >
NewAB.getDereferenceableBytes())
NewAB.removeAttribute(Attribute::Dereferenceable);
if (AL.getParamDereferenceableOrNullBytes(I) >
NewAB.getDereferenceableOrNullBytes())
NewAB.removeAttribute(Attribute::DereferenceableOrNull);
if (AL.getParamAlignment(I).valueOrOne() >
NewAB.getAlignment().valueOrOne())
NewAB.removeAttribute(Attribute::Alignment);
if (auto ExistingRange = AL.getParamRange(I)) {
if (auto NewRange = NewAB.getRange()) {
ConstantRange CombinedRange =
ExistingRange->intersectWith(*NewRange);
NewAB.removeAttribute(Attribute::Range);
NewAB.addRangeAttr(CombinedRange);
}
}
AL = AL.addParamAttributes(Context, I, NewAB);
} else {
// Check if the underlying value for the parameter is an argument.
const Value *UnderlyingV =
getUnderlyingObject(InnerCB->getArgOperand(I));
Arg = dyn_cast<Argument>(UnderlyingV);
if (!Arg)
continue;
ArgNo = Arg->getArgNo();
}

// If so, propagate its access attributes.
AL = AL.addParamAttributes(Context, I, ValidParamAttrs[ArgNo]);
AL = AL.addParamAttributes(Context, I, ValidObjParamAttrs[ArgNo]);
// We can have conflicting attributes from the inner callsite and
// to-be-inlined callsite. In that case, choose the most
// restrictive.
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/Transforms/Inline/access-attributes-prop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ define void @prop_param_callbase_def_1x_partial_3(ptr %p, ptr %p2) {
define void @prop_deref(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_deref
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: call void @bar1(ptr [[P]])
; CHECK-NEXT: call void @bar1(ptr dereferenceable(16) [[P]])
; CHECK-NEXT: ret void
;
call void @foo1(ptr dereferenceable(16) %p)
Expand All @@ -315,7 +315,7 @@ define void @prop_deref(ptr %p) {
define void @prop_deref_or_null(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_deref_or_null
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: call void @bar1(ptr [[P]])
; CHECK-NEXT: call void @bar1(ptr dereferenceable_or_null(256) [[P]])
; CHECK-NEXT: ret void
;
call void @foo1(ptr dereferenceable_or_null(256) %p)
Expand All @@ -325,7 +325,7 @@ define void @prop_deref_or_null(ptr %p) {
define void @prop_param_nonnull_and_align(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_nonnull_and_align
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: call void @bar1(ptr [[P]])
; CHECK-NEXT: call void @bar1(ptr nonnull align 32 [[P]])
; CHECK-NEXT: ret void
;
call void @foo1(ptr nonnull align 32 %p)
Expand All @@ -335,7 +335,7 @@ define void @prop_param_nonnull_and_align(ptr %p) {
define void @prop_param_nofree_and_align(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_nofree_and_align
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: call void @bar1(ptr [[P]])
; CHECK-NEXT: call void @bar1(ptr align 32 [[P]])
; CHECK-NEXT: ret void
;
call void @foo1(ptr nofree align 32 %p)
Expand All @@ -355,7 +355,7 @@ define void @prop_param_deref_align_no_update(ptr %p) {
define void @prop_param_deref_align_update(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_align_update
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: call void @bar1(ptr align 64 dereferenceable(512) [[P]])
; CHECK-NEXT: call void @bar1(ptr align 128 dereferenceable(1024) [[P]])
; CHECK-NEXT: ret void
;
call void @foo1_bar_aligned64_deref512(ptr align 128 dereferenceable(1024) %p)
Expand All @@ -365,7 +365,7 @@ define void @prop_param_deref_align_update(ptr %p) {
define void @prop_param_deref_or_null_update(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_or_null_update
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: call void @bar1(ptr align 512 dereferenceable_or_null(512) [[P]])
; CHECK-NEXT: call void @bar1(ptr align 512 dereferenceable_or_null(1024) [[P]])
; CHECK-NEXT: ret void
;
call void @foo1_bar_aligned512_deref_or_null512(ptr dereferenceable_or_null(1024) %p)
Expand Down Expand Up @@ -634,7 +634,7 @@ define dso_local void @foo4(i32 %v) {
define void @prop_range_empty_intersect(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@prop_range_empty_intersect
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 range(i32 0, 10) [[V]])
; CHECK-NEXT: call void @bar5(i32 range(i32 0, 0) [[V]])
; CHECK-NEXT: ret void
;
call void @foo4_range_0_10(i32 range(i32 11, 50) %v)
Expand All @@ -644,7 +644,7 @@ define void @prop_range_empty_intersect(i32 %v) {
define void @prop_range_empty(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@prop_range_empty
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 [[V]])
; CHECK-NEXT: call void @bar5(i32 range(i32 1, 0) [[V]])
; CHECK-NEXT: ret void
;
call void @foo4(i32 range(i32 1, 0) %v)
Expand All @@ -654,7 +654,7 @@ define void @prop_range_empty(i32 %v) {
define void @prop_range_empty_with_intersect(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@prop_range_empty_with_intersect
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 range(i32 0, 10) [[V]])
; CHECK-NEXT: call void @bar5(i32 range(i32 1, 10) [[V]])
; CHECK-NEXT: ret void
;
call void @foo4_range_0_10(i32 range(i32 1, 0) %v)
Expand All @@ -664,7 +664,7 @@ define void @prop_range_empty_with_intersect(i32 %v) {
define void @prop_range_intersect1(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect1
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 range(i32 0, 10) [[V]])
; CHECK-NEXT: call void @bar5(i32 range(i32 0, 9) [[V]])
; CHECK-NEXT: ret void
;
call void @foo4_range_0_10(i32 range(i32 0, 9) %v)
Expand All @@ -674,7 +674,7 @@ define void @prop_range_intersect1(i32 %v) {
define void @prop_range_intersect2(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect2
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 range(i32 0, 10) [[V]])
; CHECK-NEXT: call void @bar5(i32 range(i32 1, 9) [[V]])
; CHECK-NEXT: ret void
;
call void @foo4_range_0_10(i32 range(i32 1, 9) %v)
Expand All @@ -684,7 +684,7 @@ define void @prop_range_intersect2(i32 %v) {
define void @prop_range_intersect3(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect3
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 [[V]])
; CHECK-NEXT: call void @bar5(i32 range(i32 0, 11) [[V]])
; CHECK-NEXT: ret void
;
call void @foo4_2_range_0_10(i32 range(i32 0, 11) %v)
Expand All @@ -694,7 +694,7 @@ define void @prop_range_intersect3(i32 %v) {
define void @prop_range_intersect4(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect4
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 range(i32 0, 10) [[V]])
; CHECK-NEXT: call void @bar5(i32 range(i32 0, 5) [[V]])
; CHECK-NEXT: ret void
;
call void @foo4_range_0_10(i32 range(i32 40, 5) %v)
Expand Down Expand Up @@ -724,7 +724,7 @@ define void @prop_range_keep(i32 %v) {
define void @prop_range_direct(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@prop_range_direct
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 [[V]])
; CHECK-NEXT: call void @bar5(i32 range(i32 1, 11) [[V]])
; CHECK-NEXT: ret void
;
call void @foo4(i32 range(i32 1, 11) %v)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ declare void @h(ptr %p, ptr %q, ptr %z)
define void @f(ptr %p, ptr %q, ptr %z) {
; CHECK-LABEL: define void @f
; CHECK-SAME: (ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[Z:%.*]]) {
; CHECK-NEXT: call void @h(ptr [[P]], ptr [[Q]], ptr [[Z]])
; CHECK-NEXT: call void @h(ptr nonnull [[P]], ptr [[Q]], ptr nonnull [[Z]])
; CHECK-NEXT: ret void
;
call void @g(ptr nonnull %p, ptr %q, ptr nonnull %z)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/Inline/byval.ll
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ define void @test3() nounwind {
; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS]], align 1
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[S1]])
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[S1]], ptr align 1 [[S]], i64 12, i1 false)
; CHECK-NEXT: call void @g3(ptr [[S1]]) #[[ATTR0]]
; CHECK-NEXT: call void @g3(ptr align 64 [[S1]]) #[[ATTR0]]
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[S1]])
; CHECK-NEXT: ret void
;
Expand All @@ -131,7 +131,7 @@ define i32 @test4() nounwind {
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 64
; CHECK-NEXT: call void @g3(ptr [[S]]) #[[ATTR0]]
; CHECK-NEXT: call void @g3(ptr align 64 [[S]]) #[[ATTR0]]
; CHECK-NEXT: ret i32 4
;
entry:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/PhaseOrdering/pr95152.ll
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ define void @f(ptr dead_on_unwind noalias %p) {
; CHECK-LABEL: define void @f(
; CHECK-SAME: ptr dead_on_unwind noalias [[P:%.*]]) local_unnamed_addr {
; CHECK-NEXT: store i64 3, ptr [[P]], align 4
; CHECK-NEXT: tail call void @j(ptr nonnull [[P]])
; CHECK-NEXT: tail call void @j(ptr nonnull align 8 dereferenceable(8) [[P]])
; CHECK-NEXT: store i64 43, ptr [[P]], align 4
; CHECK-NEXT: ret void
;
Expand Down

0 comments on commit 60cbba6

Please sign in to comment.