From 719557269e9f5206d954c87ef0cb3d9abdf49946 Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Thu, 8 Aug 2024 11:35:40 +0100 Subject: [PATCH] [SSAUpdater] Add a SmallPtrSet reserve method for IDFcalc (#97823) As per the LLVM programmers manual, SmallPtrSets do linear scans on insertion and then turn into a hash-table if the set gets big. Here in the IDFCalculator, the SmallPtrSets have been configured to have 32 elements in each static allocation... which means that we linearly scan for all problems with up to 32 elements, which I feel is quite a large N. Shorten the SmallPtrSet size, and add a reserve method to avoid any repeated allocations, plus corresponding unit tests. Doing this yields a 0.13% compile-time improvement for debug-info builds, as we hit IDFCalculator pretty hard in InstrRefBasedLDV. --- llvm/include/llvm/ADT/SmallPtrSet.h | 23 +++++++++ .../GenericIteratedDominanceFrontier.h | 8 ++- llvm/unittests/ADT/SmallPtrSetTest.cpp | 49 +++++++++++++++++++ 3 files changed, 78 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/ADT/SmallPtrSet.h b/llvm/include/llvm/ADT/SmallPtrSet.h index 597cad8444f69f..4c85b1251bc40f 100644 --- a/llvm/include/llvm/ADT/SmallPtrSet.h +++ b/llvm/include/llvm/ADT/SmallPtrSet.h @@ -17,6 +17,7 @@ #include "llvm/ADT/EpochTracker.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/ReverseIteration.h" #include "llvm/Support/type_traits.h" #include @@ -92,6 +93,7 @@ class SmallPtrSetImplBase : public DebugEpochBase { [[nodiscard]] bool empty() const { return size() == 0; } size_type size() const { return NumNonEmpty - NumTombstones; } + size_type capacity() const { return CurArraySize; } void clear() { incrementEpoch(); @@ -108,6 +110,27 @@ class SmallPtrSetImplBase : public DebugEpochBase { NumTombstones = 0; } + void reserve(size_type NumEntries) { + incrementEpoch(); + // Do nothing if we're given zero as a reservation size. + if (NumEntries == 0) + return; + // No need to expand if we're small and NumEntries will fit in the space. + if (isSmall() && NumEntries <= CurArraySize) + return; + // insert_imp_big will reallocate if stores is more than 75% full, on the + // /final/ insertion. + if (!isSmall() && ((NumEntries - 1) * 4) < (CurArraySize * 3)) + return; + // We must Grow -- find the size where we'd be 75% full, then round up to + // the next power of two. + size_type NewSize = NumEntries + (NumEntries / 3); + NewSize = 1 << (Log2_32_Ceil(NewSize) + 1); + // Like insert_imp_big, always allocate at least 128 elements. + NewSize = std::max(128u, NewSize); + Grow(NewSize); + } + protected: static void *getTombstoneMarker() { return reinterpret_cast(-2); } diff --git a/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h b/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h index cb18d5b0c265ad..c52b2cab377bf8 100644 --- a/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h +++ b/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h @@ -145,8 +145,12 @@ void IDFCalculatorBase::calculate( DT.updateDFSNumbers(); SmallVector *, 32> Worklist; - SmallPtrSet *, 32> VisitedPQ; - SmallPtrSet *, 32> VisitedWorklist; + SmallPtrSet *, 16> VisitedPQ; + SmallPtrSet *, 16> VisitedWorklist; + if (useLiveIn) { + VisitedPQ.reserve(LiveInBlocks->size()); + VisitedWorklist.reserve(LiveInBlocks->size()); + } for (NodeTy *BB : *DefBlocks) if (DomTreeNodeBase *Node = DT.getNode(BB)) { diff --git a/llvm/unittests/ADT/SmallPtrSetTest.cpp b/llvm/unittests/ADT/SmallPtrSetTest.cpp index b45318d076a3d8..1d9a0d1725a92f 100644 --- a/llvm/unittests/ADT/SmallPtrSetTest.cpp +++ b/llvm/unittests/ADT/SmallPtrSetTest.cpp @@ -14,9 +14,11 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/PointerLikeTypeTraits.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" using namespace llvm; +using testing::UnorderedElementsAre; TEST(SmallPtrSetTest, Assignment) { int buf[8]; @@ -408,3 +410,50 @@ TEST(SmallPtrSetTest, RemoveIf) { Removed = Set.remove_if([](int *Ptr) { return false; }); EXPECT_FALSE(Removed); } + +TEST(SmallPtrSetTest, Reserve) { + // Check that we don't do anything silly when using reserve(). + SmallPtrSet Set; + int Vals[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + + Set.insert(&Vals[0]); + + // We shouldn't reallocate when this happens. + Set.reserve(4); + EXPECT_EQ(Set.capacity(), 4u); + + Set.insert(&Vals[1]); + Set.insert(&Vals[2]); + Set.insert(&Vals[3]); + + // We shouldn't reallocate this time either. + Set.reserve(4); + EXPECT_EQ(Set.capacity(), 4u); + EXPECT_EQ(Set.size(), 4u); + EXPECT_THAT(Set, + UnorderedElementsAre(&Vals[0], &Vals[1], &Vals[2], &Vals[3])); + + // Reserving further should lead to a reallocation. And matching the existing + // insertion approach, we immediately allocate up to 128 elements. + Set.reserve(5); + EXPECT_EQ(Set.capacity(), 128u); + EXPECT_EQ(Set.size(), 4u); + EXPECT_THAT(Set, + UnorderedElementsAre(&Vals[0], &Vals[1], &Vals[2], &Vals[3])); + + // And we should be able to insert another two or three elements without + // reallocating. + Set.insert(&Vals[4]); + Set.insert(&Vals[5]); + + // Calling a smaller reserve size should have no effect. + Set.reserve(1); + EXPECT_EQ(Set.capacity(), 128u); + EXPECT_EQ(Set.size(), 6u); + + // Reserving zero should have no effect either. + Set.reserve(0); + EXPECT_EQ(Set.capacity(), 128u); + EXPECT_EQ(Set.size(), 6u); + EXPECT_THAT(Set, UnorderedElementsAre(&Vals[0], &Vals[1], &Vals[2], &Vals[3], &Vals[4], &Vals[5])); +}