From 16c925ab5fd3d677792ce6575f81774c64b87cec Mon Sep 17 00:00:00 2001 From: Xuan Zhang <144393379+xuanzh-meta@users.noreply.github.com> Date: Mon, 3 Jun 2024 10:41:49 -0400 Subject: [PATCH] [MachineOutliner] Efficient Implementation of MachineOutliner::findCandidates() (#90260) This reduce the time complexity of the main loop of `findCandidates()` method from $O(n^2)$ to $O(n \log n)$. For small $n$, the modification does not regress the build time, but it helps significantly when $n$ is large. For one application, this reduces the runtime of the main loop from 120 seconds to 28 seconds. This is the first commit for an enhanced version of machine outliner -- see [RFC](https://discourse.llvm.org/t/rfc-enhanced-machine-outliner-part-1-fulllto-part-2-thinlto-nolto-to-come/78732). --- llvm/lib/CodeGen/MachineOutliner.cpp | 21 ++++++++++--------- .../AArch64/machine-outliner-overlap.mir | 12 +++++------ 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index f174dd857def05..626e577a30bf32 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -584,7 +584,7 @@ void MachineOutliner::findCandidates( LLVM_DEBUG(dbgs() << "*** Discarding overlapping candidates *** \n"); LLVM_DEBUG( dbgs() << "Searching for overlaps in all repeated sequences...\n"); - for (const SuffixTree::RepeatedSubstring &RS : ST) { + for (SuffixTree::RepeatedSubstring &RS : ST) { CandidatesForRepeatedSeq.clear(); unsigned StringLen = RS.Length; LLVM_DEBUG(dbgs() << " Sequence length: " << StringLen << "\n"); @@ -593,6 +593,9 @@ void MachineOutliner::findCandidates( unsigned NumDiscarded = 0; unsigned NumKept = 0; #endif + // Sort the start indices so that we can efficiently check if candidates + // overlap with the ones we've already found for this sequence. + llvm::sort(RS.StartIndices); for (const unsigned &StartIdx : RS.StartIndices) { // Trick: Discard some candidates that would be incompatible with the // ones we've already found for this sequence. This will save us some @@ -616,17 +619,15 @@ void MachineOutliner::findCandidates( // * End before the other starts // * Start after the other ends unsigned EndIdx = StartIdx + StringLen - 1; - auto FirstOverlap = find_if( - CandidatesForRepeatedSeq, [StartIdx, EndIdx](const Candidate &C) { - return EndIdx >= C.getStartIdx() && StartIdx <= C.getEndIdx(); - }); - if (FirstOverlap != CandidatesForRepeatedSeq.end()) { + if (!CandidatesForRepeatedSeq.empty() && + StartIdx <= CandidatesForRepeatedSeq.back().getEndIdx()) { #ifndef NDEBUG ++NumDiscarded; - LLVM_DEBUG(dbgs() << " .. DISCARD candidate @ [" << StartIdx - << ", " << EndIdx << "]; overlaps with candidate @ [" - << FirstOverlap->getStartIdx() << ", " - << FirstOverlap->getEndIdx() << "]\n"); + LLVM_DEBUG(dbgs() << " .. DISCARD candidate @ [" << StartIdx << ", " + << EndIdx << "]; overlaps with candidate @ [" + << CandidatesForRepeatedSeq.back().getStartIdx() + << ", " << CandidatesForRepeatedSeq.back().getEndIdx() + << "]\n"); #endif continue; } diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-overlap.mir b/llvm/test/CodeGen/AArch64/machine-outliner-overlap.mir index 649bb33828c32c..c6bd4c1d04d871 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-overlap.mir +++ b/llvm/test/CodeGen/AArch64/machine-outliner-overlap.mir @@ -8,27 +8,27 @@ # CHECK-NEXT: Candidates discarded: 0 # CHECK-NEXT: Candidates kept: 2 # CHECK-DAG: Sequence length: 8 -# CHECK-NEXT: .. DISCARD candidate @ [5, 12]; overlaps with candidate @ [12, 19] +# CHECK-NEXT: .. DISCARD candidate @ [12, 19]; overlaps with candidate @ [5, 12] # CHECK-NEXT: Candidates discarded: 1 # CHECK-NEXT: Candidates kept: 1 # CHECK-DAG: Sequence length: 9 -# CHECK-NEXT: .. DISCARD candidate @ [4, 12]; overlaps with candidate @ [11, 19] +# CHECK-NEXT: .. DISCARD candidate @ [11, 19]; overlaps with candidate @ [4, 12] # CHECK-NEXT: Candidates discarded: 1 # CHECK-NEXT: Candidates kept: 1 # CHECK-DAG: Sequence length: 10 -# CHECK-NEXT: .. DISCARD candidate @ [3, 12]; overlaps with candidate @ [10, 19] +# CHECK-NEXT: .. DISCARD candidate @ [10, 19]; overlaps with candidate @ [3, 12] # CHECK-NEXT: Candidates discarded: 1 # CHECK-NEXT: Candidates kept: 1 # CHECK-DAG: Sequence length: 11 -# CHECK-NEXT: .. DISCARD candidate @ [2, 12]; overlaps with candidate @ [9, 19] +# CHECK-NEXT: .. DISCARD candidate @ [9, 19]; overlaps with candidate @ [2, 12] # CHECK-NEXT: Candidates discarded: 1 # CHECK-NEXT: Candidates kept: 1 # CHECK-DAG: Sequence length: 12 -# CHECK-NEXT: .. DISCARD candidate @ [1, 12]; overlaps with candidate @ [8, 19] +# CHECK-NEXT: .. DISCARD candidate @ [8, 19]; overlaps with candidate @ [1, 12] # CHECK-NEXT: Candidates discarded: 1 # CHECK-NEXT: Candidates kept: 1 # CHECK-DAG: Sequence length: 13 -# CHECK-NEXT: .. DISCARD candidate @ [0, 12]; overlaps with candidate @ [7, 19] +# CHECK-NEXT: .. DISCARD candidate @ [7, 19]; overlaps with candidate @ [0, 12] # CHECK-NEXT: Candidates discarded: 1 # CHECK-NEXT: Candidates kept: 1