From 637d17946a39fb30eb1fb5fcfa183b3b2dd62893 Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Mon, 30 Sep 2024 23:15:18 +0100 Subject: [PATCH] [NFC] Use initial-stack-allocations for more data structures (#110544) This replaces some of the most frequent offenders of using a DenseMap that cause a malloc, where the typical element-count is small enough to fit in an initial stack allocation. Most of these are fairly obvious, one to highlight is the collectOffset method of GEP instructions: if there's a GEP, of course it's going to have at least one offset, but every time we've called collectOffset we end up calling malloc as well for the DenseMap in the MapVector. --- llvm/include/llvm/IR/Instructions.h | 2 +- llvm/include/llvm/IR/Operator.h | 2 +- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 3 ++- llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h | 4 ++-- llvm/lib/CodeGen/ScheduleDAGInstrs.cpp | 5 +++-- llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 2 +- llvm/lib/IR/Instructions.cpp | 2 +- llvm/lib/IR/Operator.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 2 +- .../AggressiveInstCombine/AggressiveInstCombine.cpp | 2 +- llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 2 +- llvm/lib/Transforms/Scalar/ConstraintElimination.cpp | 4 ++-- llvm/lib/Transforms/Scalar/GVN.cpp | 2 +- llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp | 2 +- llvm/lib/Transforms/Utils/Local.cpp | 4 ++-- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 ++-- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 10 ++++++---- 17 files changed, 29 insertions(+), 25 deletions(-) diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 75a059760f48fa6..695a7a6aa9f2548 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -1117,7 +1117,7 @@ class GetElementPtrInst : public Instruction { /// the base GEP pointer. bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const; bool collectOffset(const DataLayout &DL, unsigned BitWidth, - MapVector &VariableOffsets, + SmallMapVector &VariableOffsets, APInt &ConstantOffset) const; // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h index 88b9bfc0be4b15b..0e9f6ed35dcb4ed 100644 --- a/llvm/include/llvm/IR/Operator.h +++ b/llvm/include/llvm/IR/Operator.h @@ -528,7 +528,7 @@ class GEPOperator /// Collect the offset of this GEP as a map of Values to their associated /// APInt multipliers, as well as a total Constant Offset. bool collectOffset(const DataLayout &DL, unsigned BitWidth, - MapVector &VariableOffsets, + SmallMapVector &VariableOffsets, APInt &ConstantOffset) const; }; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 6f211abb299e7a9..aa44d62da47be90 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2831,7 +2831,8 @@ static void emitRangeList( // Gather all the ranges that apply to the same section so they can share // a base address entry. - MapVector> SectionRanges; + SmallMapVector, 16> + SectionRanges; for (const auto &Range : R) SectionRanges[&Range.Begin->getSection()].push_back(&Range); diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h index f157ffc6bcc2d73..68db65ace9a427f 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h @@ -1046,7 +1046,7 @@ class VLocTracker { /// transfer function for this block, as part of the dataflow analysis. The /// movement of values between locations inside of a block is handled at a /// much later stage, in the TransferTracker class. - MapVector Vars; + SmallMapVector Vars; SmallDenseMap Scopes; MachineBasicBlock *MBB = nullptr; const OverlapMap &OverlappingFragments; @@ -1128,7 +1128,7 @@ class InstrRefBasedLDV : public LDVImpl { /// Live in/out structure for the variable values: a per-block map of /// variables to their values. - using LiveIdxT = DenseMap; + using LiveIdxT = SmallDenseMap; using VarAndLoc = std::pair; diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 68dece6cf73e919..a0632eb17e65e64 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -621,7 +621,8 @@ void ScheduleDAGInstrs::initSUnits() { } } -class ScheduleDAGInstrs::Value2SUsMap : public MapVector { +class ScheduleDAGInstrs::Value2SUsMap + : public SmallMapVector { /// Current total number of SUs in map. unsigned NumNodes = 0; @@ -656,7 +657,7 @@ class ScheduleDAGInstrs::Value2SUsMap : public MapVector { /// Clears map from all contents. void clear() { - MapVector::clear(); + SmallMapVector::clear(); NumNodes = 0; } diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index e4ee3fd99f16e3b..9e5867c70d7b6d9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -183,7 +183,7 @@ class ScheduleDAGRRList : public ScheduleDAGSDNodes { // Hack to keep track of the inverse of FindCallSeqStart without more crazy // DAG crawling. - DenseMap CallSeqEndForStart; + SmallDenseMap CallSeqEndForStart; public: ScheduleDAGRRList(MachineFunction &mf, bool needlatency, diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index e95b98a6404432e..009e0c03957c97b 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -1584,7 +1584,7 @@ bool GetElementPtrInst::accumulateConstantOffset(const DataLayout &DL, bool GetElementPtrInst::collectOffset( const DataLayout &DL, unsigned BitWidth, - MapVector &VariableOffsets, + SmallMapVector &VariableOffsets, APInt &ConstantOffset) const { // Delegate to the generic GEPOperator implementation. return cast(this)->collectOffset(DL, BitWidth, VariableOffsets, diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp index 6c9862556f55048..f93ff8f6fc8a253 100644 --- a/llvm/lib/IR/Operator.cpp +++ b/llvm/lib/IR/Operator.cpp @@ -201,7 +201,7 @@ bool GEPOperator::accumulateConstantOffset( bool GEPOperator::collectOffset( const DataLayout &DL, unsigned BitWidth, - MapVector &VariableOffsets, + SmallMapVector &VariableOffsets, APInt &ConstantOffset) const { assert(BitWidth == DL.getIndexSizeInBits(getPointerAddressSpace()) && "The offset bit width does not match DL specification."); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 7bd618b2d9660cb..24bfbff41ec5c05 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -402,7 +402,7 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca, // TODO: Extracting a "multiple of X" from a GEP might be a useful generic // helper. unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType()); - MapVector VarOffsets; + SmallMapVector VarOffsets; APInt ConstOffset(BW, 0); if (GEP->getPointerOperand()->stripPointerCasts() != Alloca || !GEP->collectOffset(DL, BW, VarOffsets, ConstOffset)) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 01642b0677aba39..9943c3cbb9fc7d3 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -843,7 +843,7 @@ getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL) { // Return a minimum gep stride, greatest common divisor of consective gep // index scales(c.f. Bézout's identity). while (auto *GEP = dyn_cast(PtrOp)) { - MapVector VarOffsets; + SmallMapVector VarOffsets; if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset)) break; diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 416dd09ca874bfc..238bdf9c344b088 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1557,7 +1557,7 @@ bool AAPointerInfoFloating::collectConstantsForGEP(Attributor &A, const OffsetInfo &PtrOI, const GEPOperator *GEP) { unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType()); - MapVector VariableOffsets; + SmallMapVector VariableOffsets; APInt ConstantOffset(BitWidth, 0); assert(!UsrOI.isUnknown() && !PtrOI.isUnknown() && diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 7e2721d0c5a5e68..7c06e0c757e1cc4 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -385,7 +385,7 @@ struct Decomposition { struct OffsetResult { Value *BasePtr; APInt ConstantOffset; - MapVector VariableOffsets; + SmallMapVector VariableOffsets; bool AllInbounds; OffsetResult() : BasePtr(nullptr), ConstantOffset(0, uint64_t(0)) {} @@ -410,7 +410,7 @@ static OffsetResult collectOffsets(GEPOperator &GEP, const DataLayout &DL) { // If we have a nested GEP, check if we can combine the constant offset of the // inner GEP with the outer GEP. if (auto *InnerGEP = dyn_cast(Result.BasePtr)) { - MapVector VariableOffsets2; + SmallMapVector VariableOffsets2; APInt ConstantOffset2(BitWidth, 0); bool CanCollectInner = InnerGEP->collectOffset( DL, BitWidth, VariableOffsets2, ConstantOffset2); diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index db39d8621d07714..2ba600497e00d3c 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -422,7 +422,7 @@ GVNPass::Expression GVNPass::ValueTable::createGEPExpr(GetElementPtrInst *GEP) { Type *PtrTy = GEP->getType()->getScalarType(); const DataLayout &DL = GEP->getDataLayout(); unsigned BitWidth = DL.getIndexTypeSizeInBits(PtrTy); - MapVector VariableOffsets; + SmallMapVector VariableOffsets; APInt ConstantOffset(BitWidth, 0); if (GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) { // Convert into offset representation, to recognize equivalent address diff --git a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp index 2a4f68e12525235..7f99cd2060a9d81 100644 --- a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp +++ b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp @@ -56,7 +56,7 @@ static std::optional parseJumpTable(GetElementPtrInst *GEP, const DataLayout &DL = F.getDataLayout(); const unsigned BitWidth = DL.getIndexSizeInBits(GEP->getPointerAddressSpace()); - MapVector VariableOffsets; + SmallMapVector VariableOffsets; APInt ConstantOffset(BitWidth, 0); if (!GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) return std::nullopt; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 7659fc691961513..cfe40f91f9a5df7 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -925,7 +925,7 @@ CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ, } using PredBlockVector = SmallVector; -using IncomingValueMap = DenseMap; +using IncomingValueMap = SmallDenseMap; /// Determines the value to use as the phi node input for a block. /// @@ -2467,7 +2467,7 @@ Value *getSalvageOpsForGEP(GetElementPtrInst *GEP, const DataLayout &DL, SmallVectorImpl &AdditionalValues) { unsigned BitWidth = DL.getIndexSizeInBits(GEP->getPointerAddressSpace()); // Rewrite a GEP into a DIExpression. - MapVector VariableOffsets; + SmallMapVector VariableOffsets; APInt ConstantOffset(BitWidth, 0); if (!GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) return nullptr; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 034765bee40e7b2..f5ef50934f59fd2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5122,7 +5122,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { // Each 'key' in the map opens a new interval. The values // of the map are the index of the 'last seen' usage of the // instruction that is the key. - using IntervalMap = DenseMap; + using IntervalMap = SmallDenseMap; // Maps instruction to its index. SmallVector IdxToInstr; @@ -5165,7 +5165,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { // Saves the list of intervals that end with the index in 'key'. using InstrList = SmallVector; - DenseMap TransposeEnds; + SmallDenseMap TransposeEnds; // Transpose the EndPoints to a list of values that end at each index. for (auto &Interval : EndPoint) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 6b8ec55b30426cb..68bf5c52814f560 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5470,7 +5470,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { } return I1 < I2; }; - DenseMap PhiToId; + SmallDenseMap PhiToId; SmallVector Phis(TE.Scalars.size()); std::iota(Phis.begin(), Phis.end(), 0); OrdersType ResOrder(TE.Scalars.size()); @@ -10319,7 +10319,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode(); if (E->CombinedOp != TreeEntry::NotCombinedOp) ShuffleOrOp = E->CombinedOp; - SetVector UniqueValues(VL.begin(), VL.end()); + SmallSetVector UniqueValues(VL.begin(), VL.end()); const unsigned Sz = UniqueValues.size(); SmallBitVector UsedScalars(Sz, false); for (unsigned I = 0; I < Sz; ++I) { @@ -18013,7 +18013,7 @@ class HorizontalReduction { /// List of possibly reduced values. SmallVector> ReducedVals; /// Maps reduced value to the corresponding reduction operation. - DenseMap> ReducedValsToOps; + SmallDenseMap, 16> ReducedValsToOps; WeakTrackingVH ReductionRoot; /// The type of reduction operation. RecurKind RdxKind; @@ -18382,7 +18382,9 @@ class HorizontalReduction { // instruction op id and/or alternate op id, plus do extra analysis for // loads (grouping them by the distabce between pointers) and cmp // instructions (grouping them by the predicate). - MapVector>> + SmallMapVector< + size_t, SmallMapVector, 2>, + 8> PossibleReducedVals; initReductionOps(Root); DenseMap> LoadsMap;