-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[FunctionAttrs] Add the "initializes" attribute inference #97373
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-pgo @llvm/pr-subscribers-coroutines Author: Haopeng Liu (haopliu) ChangesAdd the "initializes" attribute inference. This change is expected to have ~0.09% compile time regression, which seems acceptable for interprocedural DSE. Patch is 89.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97373.diff 15 Files Affected:
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 7b419d0f098b5..507dbf4ef26f0 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -15,6 +15,7 @@
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -36,6 +37,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantRangeList.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
@@ -580,6 +582,205 @@ struct ArgumentUsesTracker : public CaptureTracker {
const SCCNodeSet &SCCNodes;
};
+struct ArgumentUse {
+ Use *U;
+ std::optional<int64_t> Offset;
+};
+
+// A struct of argument access info. "Unknown" accesses are the cases like
+// unrecognized instructions, instructions that have more than one use of
+// the argument, or volatile memory accesses. "Unknown" implies "IsClobber"
+// and an empty access range.
+// Write or Read accesses can be clobbers as well for example, a Load with
+// scalable type.
+struct ArgumentAccessInfo {
+ enum AccessType { Write, Read, Unknown };
+ AccessType ArgAccessType;
+ ConstantRangeList AccessRanges;
+ bool IsClobber = false;
+};
+
+struct UsesPerBlockInfo {
+ DenseMap<Instruction *, ArgumentAccessInfo> Insts;
+ bool HasWrites;
+ bool HasClobber;
+};
+
+ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I,
+ const ArgumentUse &IU,
+ const DataLayout &DL) {
+ auto GetTypeAccessRange =
+ [&DL](Type *Ty,
+ std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
+ auto TypeSize = DL.getTypeStoreSize(Ty);
+ if (!TypeSize.isScalable() && Offset.has_value()) {
+ int64_t Size = TypeSize.getFixedValue();
+ return ConstantRange(APInt(64, Offset.value(), true),
+ APInt(64, Offset.value() + Size, true));
+ }
+ return std::nullopt;
+ };
+ auto GetConstantIntRange =
+ [](Value *Length,
+ std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
+ auto *ConstantLength = dyn_cast<ConstantInt>(Length);
+ if (ConstantLength && Offset.has_value()) {
+ return ConstantRange(
+ APInt(64, Offset.value(), true),
+ APInt(64, Offset.value() + ConstantLength->getSExtValue(), true));
+ }
+ return std::nullopt;
+ };
+ if (auto *SI = dyn_cast<StoreInst>(I)) {
+ if (&SI->getOperandUse(1) == IU.U) {
+ // Get the fixed type size of "SI". Since the access range of a write
+ // will be unioned, if "SI" doesn't have a fixed type size, we just set
+ // the access range to empty.
+ ConstantRangeList AccessRanges;
+ auto TypeAccessRange = GetTypeAccessRange(SI->getAccessType(), IU.Offset);
+ if (TypeAccessRange.has_value())
+ AccessRanges.insert(TypeAccessRange.value());
+ return {ArgumentAccessInfo::AccessType::Write, AccessRanges,
+ /*IsClobber=*/false};
+ }
+ } else if (auto *LI = dyn_cast<LoadInst>(I)) {
+ if (&LI->getOperandUse(0) == IU.U) {
+ // Get the fixed type size of "LI". Different from Write, if "LI"
+ // doesn't have a fixed type size, we conservatively set as a clobber
+ // with an empty access range.
+ auto TypeAccessRange = GetTypeAccessRange(LI->getAccessType(), IU.Offset);
+ if (TypeAccessRange.has_value())
+ return {ArgumentAccessInfo::AccessType::Read,
+ {TypeAccessRange.value()},
+ /*IsClobber=*/false};
+ else
+ return {ArgumentAccessInfo::AccessType::Read, {}, /*IsClobber=*/true};
+ }
+ } else if (auto *MemSet = dyn_cast<MemSetInst>(I)) {
+ if (!MemSet->isVolatile()) {
+ ConstantRangeList AccessRanges;
+ auto AccessRange = GetConstantIntRange(MemSet->getLength(), IU.Offset);
+ if (AccessRange.has_value())
+ AccessRanges.insert(AccessRange.value());
+ return {ArgumentAccessInfo::AccessType::Write, AccessRanges,
+ /*IsClobber=*/false};
+ }
+ } else if (auto *MemCpy = dyn_cast<MemCpyInst>(I)) {
+ if (!MemCpy->isVolatile()) {
+ if (&MemCpy->getOperandUse(0) == IU.U) {
+ ConstantRangeList AccessRanges;
+ auto AccessRange = GetConstantIntRange(MemCpy->getLength(), IU.Offset);
+ if (AccessRange.has_value())
+ AccessRanges.insert(AccessRange.value());
+ return {ArgumentAccessInfo::AccessType::Write, AccessRanges,
+ /*IsClobber=*/false};
+ } else if (&MemCpy->getOperandUse(1) == IU.U) {
+ auto AccessRange = GetConstantIntRange(MemCpy->getLength(), IU.Offset);
+ if (AccessRange.has_value())
+ return {ArgumentAccessInfo::AccessType::Read,
+ {AccessRange.value()},
+ /*IsClobber=*/false};
+ else
+ return {ArgumentAccessInfo::AccessType::Read, {}, /*IsClobber=*/true};
+ }
+ }
+ } else if (auto *CB = dyn_cast<CallBase>(I)) {
+ if (CB->isArgOperand(IU.U)) {
+ unsigned ArgNo = CB->getArgOperandNo(IU.U);
+ bool IsInitialize = CB->paramHasAttr(ArgNo, Attribute::Initializes);
+ // Argument is only not clobbered when parameter is writeonly/readnone
+ // and nocapture.
+ bool IsClobber = !(CB->onlyWritesMemory(ArgNo) &&
+ CB->paramHasAttr(ArgNo, Attribute::NoCapture));
+ ConstantRangeList AccessRanges;
+ if (IsInitialize && IU.Offset.has_value()) {
+ Attribute Attr = CB->getParamAttr(ArgNo, Attribute::Initializes);
+ if (!Attr.isValid()) {
+ Attr = CB->getCalledFunction()->getParamAttribute(
+ ArgNo, Attribute::Initializes);
+ }
+ ConstantRangeList CBCRL = Attr.getValueAsConstantRangeList();
+ for (ConstantRange &CR : CBCRL) {
+ AccessRanges.insert(ConstantRange(CR.getLower() + IU.Offset.value(),
+ CR.getUpper() + IU.Offset.value()));
+ }
+ return {ArgumentAccessInfo::AccessType::Write, AccessRanges, IsClobber};
+ }
+ }
+ }
+ // Unrecognized instructions are considered clobbers.
+ return {ArgumentAccessInfo::AccessType::Unknown, {}, /*IsClobber=*/true};
+}
+
+std::pair<bool, bool> CollectArgumentUsesPerBlock(
+ Argument &A, Function &F,
+ DenseMap<const BasicBlock *, UsesPerBlockInfo> &UsesPerBlock) {
+ auto &DL = F.getParent()->getDataLayout();
+ auto PointerSize =
+ DL.getIndexSizeInBits(A.getType()->getPointerAddressSpace());
+
+ bool HasAnyWrite = false;
+ bool HasWriteOutsideEntryBB = false;
+
+ BasicBlock &EntryBB = F.getEntryBlock();
+ SmallVector<ArgumentUse, 4> Worklist;
+ for (Use &U : A.uses())
+ Worklist.push_back({&U, 0});
+
+ auto UpdateUseInfo = [&UsesPerBlock](Instruction *I,
+ ArgumentAccessInfo Info) {
+ auto *BB = I->getParent();
+ auto &BBInfo = UsesPerBlock.getOrInsertDefault(BB);
+ bool AlreadyVisitedInst = BBInfo.Insts.contains(I);
+ auto &IInfo = BBInfo.Insts[I];
+
+ // Instructions that have more than one use of the argument are considered
+ // as clobbers.
+ if (AlreadyVisitedInst) {
+ IInfo = {ArgumentAccessInfo::AccessType::Unknown, {}, true};
+ BBInfo.HasClobber = true;
+ return false;
+ }
+
+ IInfo = Info;
+ BBInfo.HasClobber |= IInfo.IsClobber;
+ BBInfo.HasWrites |=
+ (IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write &&
+ !IInfo.AccessRanges.empty());
+ return !IInfo.AccessRanges.empty();
+ };
+
+ // No need for a visited set because we don't look through phis, so there are
+ // no cycles.
+ while (!Worklist.empty()) {
+ ArgumentUse IU = Worklist.pop_back_val();
+ User *U = IU.U->getUser();
+ // Add GEP uses to worklist.
+ // If the GEP is not a constant GEP, set IsInitialize to false.
+ if (auto *GEP = dyn_cast<GEPOperator>(U)) {
+ APInt Offset(PointerSize, 0, /*isSigned=*/true);
+ bool IsConstGEP = GEP->accumulateConstantOffset(DL, Offset);
+ std::optional<int64_t> NewOffset = std::nullopt;
+ if (IsConstGEP && IU.Offset.has_value()) {
+ NewOffset = *IU.Offset + Offset.getSExtValue();
+ }
+ for (Use &U : GEP->uses())
+ Worklist.push_back({&U, NewOffset});
+ continue;
+ }
+
+ auto *I = cast<Instruction>(U);
+ bool HasWrite = UpdateUseInfo(I, GetArgmentAccessInfo(I, IU, DL));
+
+ HasAnyWrite |= HasWrite;
+
+ if (HasWrite && I->getParent() != &EntryBB) {
+ HasWriteOutsideEntryBB = true;
+ }
+ }
+ return {HasAnyWrite, HasWriteOutsideEntryBB};
+}
+
} // end anonymous namespace
namespace llvm {
@@ -866,9 +1067,132 @@ static bool addAccessAttr(Argument *A, Attribute::AttrKind R) {
return true;
}
+static bool inferInitializes(Argument &A, Function &F) {
+ DenseMap<const BasicBlock *, UsesPerBlockInfo> UsesPerBlock;
+ auto [HasAnyWrite, HasWriteOutsideEntryBB] =
+ CollectArgumentUsesPerBlock(A, F, UsesPerBlock);
+ // No write anywhere in the function, bail.
+ if (!HasAnyWrite)
+ return false;
+
+ BasicBlock &EntryBB = F.getEntryBlock();
+ DenseMap<const BasicBlock *, ConstantRangeList> Initialized;
+ auto VisitBlock = [&](const BasicBlock *BB) -> ConstantRangeList {
+ auto UPB = UsesPerBlock.find(BB);
+
+ // If this block has uses and none are writes, the argument is not
+ // initialized in this block.
+ if (UPB != UsesPerBlock.end() && !UPB->second.HasWrites)
+ return ConstantRangeList();
+
+ ConstantRangeList CRL;
+
+ // Start with intersection of successors.
+ // If this block has any clobbering use, we're going to clear out the
+ // ranges at some point in this block anyway, so don't bother looking at
+ // successors.
+ if (UPB == UsesPerBlock.end() || !UPB->second.HasClobber) {
+ bool HasAddedSuccessor = false;
+ for (auto *Succ : successors(BB)) {
+ if (auto SuccI = Initialized.find(Succ); SuccI != Initialized.end()) {
+ if (HasAddedSuccessor) {
+ CRL = CRL.intersectWith(SuccI->second);
+ } else {
+ CRL = SuccI->second;
+ HasAddedSuccessor = true;
+ }
+ } else {
+ CRL = ConstantRangeList();
+ break;
+ }
+ }
+ }
+
+ if (UPB != UsesPerBlock.end()) {
+ // Sort uses in this block by instruction order.
+ SmallVector<std::pair<Instruction *, ArgumentAccessInfo>, 2> Insts;
+ append_range(Insts, UPB->second.Insts);
+ sort(Insts, [](std::pair<Instruction *, ArgumentAccessInfo> &LHS,
+ std::pair<Instruction *, ArgumentAccessInfo> &RHS) {
+ return LHS.first->comesBefore(RHS.first);
+ });
+
+ // From the end of the block to the beginning of the block, set
+ // initializes ranges.
+ for (auto [_, Info] : reverse(Insts)) {
+ if (Info.IsClobber) {
+ CRL = ConstantRangeList();
+ }
+ if (!Info.AccessRanges.empty()) {
+ if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Write) {
+ CRL = CRL.unionWith(Info.AccessRanges);
+ } else {
+ assert(Info.ArgAccessType == ArgumentAccessInfo::AccessType::Read);
+ for (const auto &ReadRange : Info.AccessRanges) {
+ CRL.subtract(ReadRange);
+ }
+ }
+ }
+ }
+ }
+ return CRL;
+ };
+
+ ConstantRangeList EntryCRL;
+ // If all write instructions are in the EntryBB, or if the EntryBB has
+ // a clobbering use, we only need to look at EntryBB.
+ bool OnlyScanEntryBlock = !HasWriteOutsideEntryBB;
+ if (!OnlyScanEntryBlock) {
+ if (auto EntryUPB = UsesPerBlock.find(&EntryBB);
+ EntryUPB != UsesPerBlock.end()) {
+ OnlyScanEntryBlock = EntryUPB->second.HasClobber;
+ }
+ }
+ if (OnlyScanEntryBlock) {
+ EntryCRL = VisitBlock(&EntryBB);
+ if (EntryCRL.empty()) {
+ return false;
+ }
+ } else {
+ // Visit successors before predecessors with a post-order walk of the
+ // blocks.
+ for (const BasicBlock *BB : post_order(&F)) {
+ ConstantRangeList CRL = VisitBlock(BB);
+ if (!CRL.empty()) {
+ Initialized[BB] = CRL;
+ }
+ }
+
+ auto EntryCRLI = Initialized.find(&EntryBB);
+ if (EntryCRLI == Initialized.end()) {
+ return false;
+ }
+
+ EntryCRL = EntryCRLI->second;
+ }
+
+ assert(!EntryCRL.empty() &&
+ "should have bailed already if EntryCRL is empty");
+
+ if (A.hasAttribute(Attribute::Initializes)) {
+ ConstantRangeList PreviousCRL =
+ A.getAttribute(Attribute::Initializes).getValueAsConstantRangeList();
+ if (PreviousCRL == EntryCRL) {
+ return false;
+ }
+ EntryCRL = EntryCRL.unionWith(PreviousCRL);
+ }
+
+ A.addAttr(Attribute::get(A.getContext(), Attribute::Initializes,
+ EntryCRL.rangesRef()));
+
+ return true;
+}
+
/// Deduce nocapture attributes for the SCC.
static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
- SmallSet<Function *, 8> &Changed) {
+ SmallSet<Function *, 8> &Changed,
+ bool SkipInitializes) {
ArgumentGraph AG;
// Check each function in turn, determining which pointer arguments are not
@@ -936,6 +1260,10 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
if (addAccessAttr(&A, R))
Changed.insert(F);
}
+ if (!SkipInitializes && !A.onlyReadsMemory()) {
+ if (inferInitializes(A, *F))
+ Changed.insert(F);
+ }
}
}
@@ -1844,13 +2172,13 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter,
SmallSet<Function *, 8> Changed;
if (ArgAttrsOnly) {
- addArgumentAttrs(Nodes.SCCNodes, Changed);
+ addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/true);
return Changed;
}
addArgumentReturnedAttrs(Nodes.SCCNodes, Changed);
addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed);
- addArgumentAttrs(Nodes.SCCNodes, Changed);
+ addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/false);
inferConvergent(Nodes.SCCNodes, Changed);
addNoReturnAttrs(Nodes.SCCNodes, Changed);
addWillReturn(Nodes.SCCNodes, Changed);
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
index bea56a72bdeae..8615363a985d1 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
@@ -15,7 +15,7 @@ define void @test0_yes(ptr %p) nounwind {
ret void
}
-; CHECK: define void @test0_no(ptr nocapture writeonly %p) #1 {
+; CHECK: define void @test0_no(ptr nocapture writeonly initializes((0, 4)) %p) #1 {
define void @test0_no(ptr %p) nounwind {
store i32 0, ptr %p, !tbaa !2
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll
index 6b835bb4eef66..317a069eed26e 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll
@@ -10,7 +10,7 @@
; Should have call to sincos declarations, not calls to the asm pseudo-libcalls
define protected amdgpu_kernel void @swdev456865(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, float noundef %x) #0 {
; CHECK-LABEL: define protected amdgpu_kernel void @swdev456865(
-; CHECK-SAME: ptr addrspace(1) nocapture writeonly [[OUT0:%.*]], ptr addrspace(1) nocapture writeonly [[OUT1:%.*]], ptr addrspace(1) nocapture writeonly [[OUT2:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT0:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT1:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT2:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5)
; CHECK-NEXT: [[I_I:%.*]] = call float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) #[[ATTR1:[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll
index a35fbaadddf9e..619124affff81 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll
@@ -49,7 +49,7 @@ declare float @_Z6sincosfPU3AS0f(float %x, ptr writeonly %ptr) #1
define void @sincos_f16_nocontract(half %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
; CHECK-LABEL: define void @sincos_f16_nocontract
-; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL:%.*]] = tail call half @_Z3sinDh(half [[X]])
; CHECK-NEXT: store half [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 2
@@ -68,7 +68,7 @@ entry:
define void @sincos_v2f16_nocontract(<2 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
; CHECK-LABEL: define void @sincos_v2f16_nocontract
-; CHECK-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL:%.*]] = tail call <2 x half> @_Z3sinDv2_Dh(<2 x half> [[X]])
; CHECK-NEXT: store <2 x half> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
@@ -87,7 +87,7 @@ entry:
define void @sincos_f16(half %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
; CHECK-LABEL: define void @sincos_f16
-; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL:%.*]] = tail call contract half @_Z3sinDh(half [[X]])
; CHECK-NEXT: store half [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 2
@@ -105,7 +105,7 @@ entry:
define void @sincos_f16_order1(half %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
; CHECK-LABEL: define void @sincos_f16_order1
-; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL1:%.*]] = tail call contract half @_Z3cosDh(half [[X]])
; CHECK-NEXT: store half [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 2
@@ -123,7 +123,7 @@ entry:
define void @sincos_v2f16(<2 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
; CHECK-LABEL: define void @sincos_v2f16
-; CHECK-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: (<2 x half> [[X:%.*]], p...
[truncated]
|
return {ArgumentAccessInfo::AccessType::Write, AccessRanges, | ||
/*IsClobber=*/false}; | ||
} | ||
} else if (auto *MemCpy = dyn_cast<MemCpyInst>(I)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this be MemTransferInst to cover memmove too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, good point! Changed to MemTransferInst and added unit test for memmove as well.
return {ArgumentAccessInfo::AccessType::Read, | ||
{AccessRange.value()}, | ||
/*IsClobber=*/false}; | ||
else |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No else after return
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
@@ -580,6 +582,205 @@ struct ArgumentUsesTracker : public CaptureTracker { | |||
const SCCNodeSet &SCCNodes; | |||
}; | |||
|
|||
struct ArgumentUse { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
missing documentation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for reminding! Done.
// Write or Read accesses can be clobbers as well for example, a Load with | ||
// scalable type. | ||
struct ArgumentAccessInfo { | ||
enum AccessType { Write, Read, Unknown }; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
enum class
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
@@ -1844,13 +2172,13 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter, | |||
|
|||
SmallSet<Function *, 8> Changed; | |||
if (ArgAttrsOnly) { | |||
addArgumentAttrs(Nodes.SCCNodes, Changed); | |||
addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/true); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a comment as to why SkipInitializes in the ArgAttrsOnly case?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done :-D
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would simplify the comment a bit, this is too much detail. The important thing is that ArgAttrsOnly
means to only infer attributes that may aid optimizations on the current function. initializes
does not fall into that category.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
ret void | ||
} | ||
|
||
define void @call_initializes_no_clobber_writeonly_capture(ptr %p) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: "nocapture" in the function name since it doesn't capture?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
}; | ||
|
||
ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, | ||
const ArgumentUse &IU, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Would it make more sense to name the variable "ArgUse"? Unclear to me why "I" in the name?
Similar below "ArgumentUse IU = ..." at line 756
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, legacy names. Changed to "ArgUse".
return {ArgumentAccessInfo::AccessType::Unknown, {}, /*IsClobber=*/true}; | ||
} | ||
|
||
std::pair<bool, bool> CollectArgumentUsesPerBlock( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add some doc for the function? What does the return value represent? UsesPerBlock updated?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added the comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can this return UsesPerBlock
along with the two bools, rather than using an out parameter? and maybe change the return type to a struct with nice names rather than std::pair/tuple
}; | ||
|
||
struct UsesPerBlockInfo { | ||
DenseMap<Instruction *, ArgumentAccessInfo> Insts; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How large is Insts typically?
DenseMap jumps from 0 to 64: https://llvm.org/docs/ProgrammersManual.html#llvm-adt-densemap-h
SmallDenseMap<..., 8> or something might help make it more gradual 0 -> 8 -> 64
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point! According to an internal benchmark (muppet), 90% have a <= 2 size. Changed to SmallDenseMap<..., 2>.
Similarly to UsesPerBlock
, 90% have a <=8 size so changed UsesPerBlock
to SmallDenseMap<..., 8>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If it's <= 2, and a non-trivial amount are == 2, you may want to provide 3 inline buckets to avoid having to grow() when the 2nd entry is inserted.
There's a max load factor of 3/4
llvm-project/llvm/include/llvm/ADT/DenseMap.h
Line 612 in c1f1aab
// If the load of the hash table is more than 3/4, or if fewer than 1/8 of |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, good point! Changed the SmallDenseMap size here to 4 and the size of UsesPerBlock
to 16.
With 4*3/4 (<=3) and 16*3/4 (<=12), they covers 96% and 94% cases in an internal large benchmark. Thanks!
BTW, the size must be a power of 2 so choose 4 and 16.
llvm-project/llvm/include/llvm/ADT/DenseMap.h
Line 933 in 8f15909
static_assert(isPowerOf2_64(InlineBuckets), |
BBInfo.HasWrites |= | ||
(IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write && | ||
!IInfo.AccessRanges.empty()); | ||
return !IInfo.AccessRanges.empty(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does the return value mean "HasWrite" or is it more subtle than that? If it means "HasWrite", should it match the "BBInfo.HasWrites " above in that it also checks ArgAccessType == Write
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice catch! Updated.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
even when lgtm, we should land this only after the DSE patch lands as to not unnecessarily increase compile time without any benefit
can you add [FunctionAttrs]
to the beginning of the commit title?
} | ||
|
||
auto EntryCRLI = Initialized.find(&EntryBB); | ||
if (EntryCRLI == Initialized.end()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you cleanup the instances of https://llvm.org/docs/CodingStandards.html#don-t-use-braces-on-simple-single-statement-bodies-of-if-else-loop-statements ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, thanks for reminding! Done.
[&DL](Type *Ty, | ||
std::optional<int64_t> Offset) -> std::optional<ConstantRange> { | ||
auto TypeSize = DL.getTypeStoreSize(Ty); | ||
if (!TypeSize.isScalable() && Offset.has_value()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
in many if-statements, you write optional.has_value()
. optional
is sufficient and the preferred form.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. Thanks!
if (!TypeSize.isScalable() && Offset.has_value()) { | ||
int64_t Size = TypeSize.getFixedValue(); | ||
return ConstantRange(APInt(64, Offset.value(), true), | ||
APInt(64, Offset.value() + Size, true)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To access the value of optionals, you often write optional.value()
. The preferred from is *optional
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
@@ -12,7 +12,8 @@ | |||
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -S | FileCheck %s | |||
|
|||
; CHECK: call {{.*}} @_Znam{{.*}} #[[ATTR:[0-9]+]] | |||
; CHECK: attributes #[[ATTR]] = { builtin allocsize(0) "memprof"="notcold" } | |||
; old: attributes #[[ATTR]] = { builtin allocsize(0) "memprof"="notcold" } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
unintentional test change? (the opt command doesn't run function-attrs)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for reminding! Our PR causes this test failed and I'm not sure why.
Reached out to the owner of this test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated the linkage number as suggested by the owner and fixed this test!
Thank you all for the comments! Addressed all except only one comment about "memprof_internal_linkage.ll". |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
will review more thoroughly soon, one thing jumped out at me
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
looks pretty good, just some minor comments
return {ArgumentAccessInfo::AccessType::Unknown, {}, /*IsClobber=*/true}; | ||
} | ||
|
||
std::pair<bool, bool> CollectArgumentUsesPerBlock( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can this return UsesPerBlock
along with the two bools, rather than using an out parameter? and maybe change the return type to a struct with nice names rather than std::pair/tuple
/*IsClobber=*/false, AccessRanges}; | ||
} | ||
} else if (auto *LI = dyn_cast<LoadInst>(I)) { | ||
if (!LI->isVolatile() && &LI->getOperandUse(0) == ArgUse.U) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
a load only has one operand, so the &LI->getOperandUse(0) == ArgUse.U
shouldn't be necessary?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, yes! Moved it to an assert.
} | ||
} | ||
} else if (auto *CB = dyn_cast<CallBase>(I)) { | ||
if (CB->isArgOperand(ArgUse.U)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we should add a test where the argument is passed as an operand bundle to a call, rather than an argument
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
if (!Attr.isValid()) | ||
Attr = CB->getCalledFunction()->getParamAttribute( | ||
ArgNo, Attribute::Initializes); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
after a620697 this is no longer necessary
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks! Done.
@@ -1844,13 +2172,13 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter, | |||
|
|||
SmallSet<Function *, 8> Changed; | |||
if (ArgAttrsOnly) { | |||
addArgumentAttrs(Nodes.SCCNodes, Changed); | |||
addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/true); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would simplify the comment a bit, this is too much detail. The important thing is that ArgAttrsOnly
means to only infer attributes that may aid optimizations on the current function. initializes
does not fall into that category.
|
||
BasicBlock &EntryBB = F.getEntryBlock(); | ||
DenseMap<const BasicBlock *, ConstantRangeList> Initialized; | ||
auto VisitBlock = [&](const BasicBlock *BB) -> ConstantRangeList { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you add a comment on what VisitBlock
does
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done and found a corner case :-)
"If this block has uses and none are writes, the argument is not initialized in this block."
Removed this early return. See the test.
if (EntryCRL.empty()) | ||
return false; | ||
} else { | ||
// Visit successors before predecessors with a post-order walk of the |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you expand the comment to include the reasoning behind this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
declare void @g1(ptr initializes((0, 4)) %p) | ||
declare void @g2(ptr initializes((8, 12)) %p) | ||
declare void @g3(ptr initializes((0, 4)) writeonly nocapture %p) | ||
declare void @g4(ptr initializes((0, 4)) readnone nocapture %p) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it seems like "initializes" doesn't make much sense a readnone param (not supposed to write to the memory)? Can the test still work without the 0,4 (still retain the 8,12 in a g4();g2(); sequence)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"readnone" indicates that the function does not dereference that pointer argument. I think you are correct.
If g4 does not have initializes
attribute, we consider this use as a clobber then the g4();g2(); sequence doesn't have initializes
attribute. Removed this unit test.
ret void | ||
} | ||
|
||
define void @call_initializes_no_clobber_readnone_capture(ptr %p) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: "capture" -> "nocapture" also?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed this unit test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm, let's wait until the DSE patch is also ready until submitting
Thanks! Here is the PR to refactor DSE. Will apply the attribute in DSE after the refactoring. |
@@ -580,6 +582,206 @@ struct ArgumentUsesTracker : public CaptureTracker { | |||
const SCCNodeSet &SCCNodes; | |||
}; | |||
|
|||
// A struct of argument use: a Use and the offset it accesses. This struct | |||
// is to track uses inside function via GEP. If GEP has a non-constant index, | |||
// the Offset field is nullopt. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please use doxygen comments (///
) for classes.
SmallDenseMap<const BasicBlock *, UsesPerBlockInfo, 16> UsesPerBlock; | ||
}; | ||
|
||
ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I, | |
ArgumentAccessInfo getArgmentAccessInfo(const Instruction *I, |
} | ||
|
||
// Collect the uses of argument "A" in "F". | ||
ArgumentUsesSummary CollectArgumentUsesPerBlock(Argument &A, Function &F) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ArgumentUsesSummary CollectArgumentUsesPerBlock(Argument &A, Function &F) { | |
ArgumentUsesSummary collectArgumentUsesPerBlock(Argument &A, Function &F) { |
// Collect the uses of argument "A" in "F". | ||
ArgumentUsesSummary CollectArgumentUsesPerBlock(Argument &A, Function &F) { | ||
auto &DL = F.getParent()->getDataLayout(); | ||
auto PointerSize = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto PointerSize = | |
unsigned PointerSize = |
// Add GEP uses to worklist. | ||
// If the GEP is not a constant GEP, set the ArgumentUse::Offset to nullopt. | ||
if (auto *GEP = dyn_cast<GEPOperator>(U)) { | ||
APInt Offset(PointerSize, 0, /*isSigned=*/true); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
APInt Offset(PointerSize, 0, /*isSigned=*/true); | |
APInt Offset(PointerSize, 0); |
The flag is not meaningful for a zero value.
APInt Offset(PointerSize, 0, /*isSigned=*/true); | ||
bool IsConstGEP = GEP->accumulateConstantOffset(DL, Offset); | ||
std::optional<int64_t> NewOffset = std::nullopt; | ||
if (IsConstGEP && ArgUse.Offset) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If ArgUse.Offset is already nullopt, you can skip the accumulateConstantOffset() call.
return std::nullopt; | ||
}; | ||
if (auto *SI = dyn_cast<StoreInst>(I)) { | ||
if (!SI->isVolatile() && &SI->getOperandUse(1) == ArgUse.U) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These isVolatile() checks should probably be isSimple() checks? Presumably you don't want to handle atomics.
GetTypeAccessRange(SI->getAccessType(), ArgUse.Offset)) | ||
AccessRanges.insert(*TypeAccessRange); | ||
return {ArgumentAccessInfo::AccessType::Write, | ||
/*IsClobber=*/false, AccessRanges}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
/*IsClobber=*/false, AccessRanges}; | |
/*IsClobber=*/false, std::move(AccessRanges)}; |
struct ArgumentAccessInfo { | ||
enum class AccessType : uint8_t { Write, Read, Unknown }; | ||
AccessType ArgAccessType; | ||
bool IsClobber = false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The "clobber" terminology here is pretty weird, because usually "clobber" means "write", while here it's more like "read".
Also, after looking at the code, I'm a bit unclear on whether we really need both AccessType::Unknown and IsClobber. Isn't one of them sufficient?
return false; | ||
} | ||
|
||
IInfo = Info; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IInfo = Info; | |
IInfo = std::move(Info); |
Add the "initializes" attribute inference.
This change is expected to have ~0.09% compile time regression, which seems acceptable for interprocedural DSE.
https://llvm-compile-time-tracker.com/compare.php?from=9f10252c4ad7cffbbcf692fa9c953698f82ac4f5&to=56345c1cee4375eb5c28b8e7abf4803d20216b3b&stat=instructions%3Au