From 221c6a9525077a0ca5eb341c19ceb097360ff01d Mon Sep 17 00:00:00 2001
From: Matt Liberty <mliberty@precisioninno.com>
Date: Sun, 19 Nov 2023 17:25:43 -0800
Subject: [PATCH 1/2] mpl2: make run_thread initialization uniform

Signed-off-by: Matt Liberty <mliberty@precisioninno.com>
---
 src/mpl2/src/hier_rtlmp.cpp | 35 ++++++++++-------------------------
 1 file changed, 10 insertions(+), 25 deletions(-)
diff --git a/src/mpl2/src/hier_rtlmp.cpp b/src/mpl2/src/hier_rtlmp.cpp
index 58e8a5e17a7..357e5f06a41 100644
--- a/src/mpl2/src/hier_rtlmp.cpp
+++ b/src/mpl2/src/hier_rtlmp.cpp
@@ -3673,11 +3673,8 @@ void HierRTLMP::multiLevelMacroPlacement(Cluster* parent)
              "Start Simulated Annealing Core");
   while (remaining_runs > 0) {
     std::vector<SACoreSoftMacro*> sa_vector;
-    int run_thread
-        = (remaining_runs > num_threads_) ? num_threads_ : remaining_runs;
-    if (graphics_) {
-      run_thread = 1;
-    }
+    const int run_thread
+        = graphics_ ? 1 : std::min(remaining_runs, num_threads_);
     for (int i = 0; i < run_thread; i++) {
       debugPrint(logger_,
                  MPL,
@@ -3910,11 +3907,8 @@ void HierRTLMP::multiLevelMacroPlacement(Cluster* parent)
                "Start Simulated Annealing Core");
     while (remaining_runs > 0) {
       std::vector<SACoreSoftMacro*> sa_vector;
-      int run_thread
-          = (remaining_runs > num_threads_) ? num_threads_ : remaining_runs;
-      if (graphics_) {
-        run_thread = 1;
-      }
+      const int run_thread
+          = graphics_ ? 1 : std::min(remaining_runs, num_threads_);
       for (int i = 0; i < run_thread; i++) {
         debugPrint(logger_,
                    MPL,
@@ -4460,11 +4454,8 @@ void HierRTLMP::multiLevelMacroPlacementWithoutBusPlanning(Cluster* parent)
              "Start Simulated Annealing Core");
   while (remaining_runs > 0) {
     std::vector<SACoreSoftMacro*> sa_vector;
-    int run_thread
-        = (remaining_runs > num_threads_) ? num_threads_ : remaining_runs;
-    if (graphics_) {
-      run_thread = 1;
-    }
+    const int run_thread
+        = graphics_ ? 1 : std::min(remaining_runs, num_threads_);
     for (int i = 0; i < run_thread; i++) {
       debugPrint(logger_,
                  MPL,
@@ -4962,11 +4953,8 @@ void HierRTLMP::enhancedMacroPlacement(Cluster* parent)
              "Start Simulated Annealing Core");
   while (remaining_runs > 0) {
     std::vector<SACoreSoftMacro*> sa_vector;
-    int run_thread
-        = (remaining_runs > num_threads_) ? num_threads_ : remaining_runs;
-    if (graphics_) {
-      run_thread = 1;
-    }
+    const int run_thread
+        = graphics_ ? 1 : std::min(remaining_runs, num_threads_);
     for (int i = 0; i < run_thread; i++) {
       std::vector<SoftMacro> shaped_macros = macros;  // copy for multithread
       // determine the shape for each macro
@@ -5488,11 +5476,8 @@ void HierRTLMP::hardMacroClusterMacroPlacement(Cluster* cluster)
   float best_cost = std::numeric_limits<float>::max();
   while (remaining_runs > 0) {
     std::vector<SACoreHardMacro*> sa_vector;
-    int run_thread
-        = (remaining_runs > num_threads_) ? num_threads_ : remaining_runs;
-    if (graphics_) {
-      run_thread = 1;
-    }
+    const int run_thread
+        = graphics_ ? 1 : std::min(remaining_runs, num_threads_);
     for (int i = 0; i < run_thread; i++) {
       // change the aspect ratio
       const float width = outline_width * vary_factor_list[run_id++];

From 86772c49f9bcdd9776f035d9518c83c8558b47e1 Mon Sep 17 00:00:00 2001
From: Matt Liberty <mliberty@precisioninno.com>
Date: Mon, 20 Nov 2023 11:40:03 -0800
Subject: [PATCH 2/2] mpl2: update to get consistent results when varying
 thread count

Check the results for early stopping at a constant interval rather than
a number of thread dependent one.

Signed-off-by: Matt Liberty <mliberty@precisioninno.com>
---
 src/mpl2/src/hier_rtlmp.cpp | 120 ++++++++++++++++++++++++++----------
 1 file changed, 86 insertions(+), 34 deletions(-)

diff --git a/src/mpl2/src/hier_rtlmp.cpp b/src/mpl2/src/hier_rtlmp.cpp
index 357e5f06a41..686cabca98c 100644
--- a/src/mpl2/src/hier_rtlmp.cpp
+++ b/src/mpl2/src/hier_rtlmp.cpp
@@ -3666,6 +3666,11 @@ void HierRTLMP::multiLevelMacroPlacement(Cluster* parent)
   std::vector<SACoreSoftMacro*>
       sa_containers;  // store all the SA runs to avoid memory leakage
   float best_cost = std::numeric_limits<float>::max();
+  // To give consistency across threads we check the solutions
+  // at a fixed interval independent of how many threads we are using.
+  const int check_interval = 10;
+  int begin_check = 0;
+  int end_check = std::min(check_interval, remaining_runs);
   debugPrint(logger_,
              MPL,
              "hierarchical_macro_placement",
@@ -3772,20 +3777,29 @@ void HierRTLMP::multiLevelMacroPlacement(Cluster* parent)
         th.join();
       }
     }
+    remaining_runs -= run_thread;
     // add macro tilings
     for (auto& sa : sa_vector) {
-      sa_containers.push_back(sa);  // add SA to containers
-      if (sa->isValid() && sa->getNormCost() < best_cost) {
-        best_cost = sa->getNormCost();
-        best_sa = sa;
+      sa_containers.push_back(sa);
+    }
+    while (sa_containers.size() >= end_check) {
+      while (begin_check < end_check) {
+        auto& sa = sa_containers[begin_check];
+        if (sa->isValid() && sa->getNormCost() < best_cost) {
+          best_cost = sa->getNormCost();
+          best_sa = sa;
+        }
+        ++begin_check;
       }
+      // add early stop mechanism
+      if (best_sa) {
+        break;
+      }
+      end_check = begin_check + std::min(check_interval, remaining_runs);
     }
-    sa_vector.clear();
-    // add early stop mechanism
-    if (best_sa != nullptr) {
+    if (best_sa) {
       break;
     }
-    remaining_runs -= run_thread;
   }
   debugPrint(logger_,
              MPL,
@@ -3900,6 +3914,8 @@ void HierRTLMP::multiLevelMacroPlacement(Cluster* parent)
     best_sa = nullptr;
     sa_containers.clear();
     best_cost = std::numeric_limits<float>::max();
+    begin_check = 0;
+    end_check = std::min(check_interval, remaining_runs);
     debugPrint(logger_,
                MPL,
                "hierarchical_macro_placement",
@@ -4007,20 +4023,29 @@ void HierRTLMP::multiLevelMacroPlacement(Cluster* parent)
           th.join();
         }
       }
+      remaining_runs -= run_thread;
       // add macro tilings
       for (auto& sa : sa_vector) {
-        sa_containers.push_back(sa);  // add SA to containers
-        if (sa->isValid() && sa->getNormCost() < best_cost) {
-          best_cost = sa->getNormCost();
-          best_sa = sa;
+        sa_containers.push_back(sa);
+      }
+      while (sa_containers.size() >= end_check) {
+        while (begin_check < end_check) {
+          auto& sa = sa_containers[begin_check];
+          if (sa->isValid() && sa->getNormCost() < best_cost) {
+            best_cost = sa->getNormCost();
+            best_sa = sa;
+          }
+          ++begin_check;
+        }
+        // add early stop mechanism
+        if (best_sa) {
+          break;
         }
+        end_check = begin_check + std::min(check_interval, remaining_runs);
       }
-      sa_vector.clear();
-      // add early stop mechanism
-      if (best_sa != nullptr) {
+      if (best_sa) {
         break;
       }
-      remaining_runs -= run_thread;
     }
     debugPrint(logger_,
                MPL,
@@ -4444,8 +4469,12 @@ void HierRTLMP::multiLevelMacroPlacementWithoutBusPlanning(Cluster* parent)
   int remaining_runs = target_util_list.size();
   int run_id = 0;
   SACoreSoftMacro* best_sa = nullptr;
-  std::vector<SACoreSoftMacro*>
-      sa_containers;  // store all the SA runs to avoid memory leakage
+  std::vector<SACoreSoftMacro*> sa_containers;
+  // To give consistency across threads we check the solutions
+  // at a fixed interval independent of how many threads we are using.
+  const int check_interval = 10;
+  int begin_check = 0;
+  int end_check = std::min(check_interval, remaining_runs);
   float best_cost = std::numeric_limits<float>::max();
   debugPrint(logger_,
              MPL,
@@ -4553,20 +4582,29 @@ void HierRTLMP::multiLevelMacroPlacementWithoutBusPlanning(Cluster* parent)
         th.join();
       }
     }
+    remaining_runs -= run_thread;
     // add macro tilings
     for (auto& sa : sa_vector) {
-      sa_containers.push_back(sa);  // add SA to containers
-      if (sa->isValid() && sa->getNormCost() < best_cost) {
-        best_cost = sa->getNormCost();
-        best_sa = sa;
+      sa_containers.push_back(sa);
+    }
+    while (sa_containers.size() >= end_check) {
+      while (begin_check < end_check) {
+        auto& sa = sa_containers[begin_check];
+        if (sa->isValid() && sa->getNormCost() < best_cost) {
+          best_cost = sa->getNormCost();
+          best_sa = sa;
+        }
+        ++begin_check;
+      }
+      // add early stop mechanism
+      if (best_sa) {
+        break;
       }
+      end_check = begin_check + std::min(check_interval, remaining_runs);
     }
-    sa_vector.clear();
-    // add early stop mechanism
-    if (best_sa != nullptr) {
+    if (best_sa) {
       break;
     }
-    remaining_runs -= run_thread;
   }
   debugPrint(logger_,
              MPL,
@@ -4946,6 +4984,11 @@ void HierRTLMP::enhancedMacroPlacement(Cluster* parent)
   std::vector<SACoreSoftMacro*>
       sa_containers;  // store all the SA runs to avoid memory leakage
   float best_cost = std::numeric_limits<float>::max();
+  // To give consistency across threads we check the solutions
+  // at a fixed interval independent of how many threads we are using.
+  const int check_interval = 10;
+  int begin_check = 0;
+  int end_check = std::min(check_interval, remaining_runs);
   debugPrint(logger_,
              MPL,
              "hierarchical_macro_placement",
@@ -5049,20 +5092,29 @@ void HierRTLMP::enhancedMacroPlacement(Cluster* parent)
         th.join();
       }
     }
+    remaining_runs -= run_thread;
     // add macro tilings
     for (auto& sa : sa_vector) {
-      sa_containers.push_back(sa);  // add SA to containers
-      if (sa->isValid() && sa->getNormCost() < best_cost) {
-        best_cost = sa->getNormCost();
-        best_sa = sa;
+      sa_containers.push_back(sa);
+    }
+    while (sa_containers.size() >= end_check) {
+      while (begin_check < end_check) {
+        auto& sa = sa_containers[begin_check];
+        if (sa->isValid() && sa->getNormCost() < best_cost) {
+          best_cost = sa->getNormCost();
+          best_sa = sa;
+        }
+        ++begin_check;
       }
+      // add early stop mechanism
+      if (best_sa) {
+        break;
+      }
+      end_check = begin_check + std::min(check_interval, remaining_runs);
     }
-    sa_vector.clear();
-    // add early stop mechanism
-    if (best_sa != nullptr) {
+    if (best_sa) {
       break;
     }
-    remaining_runs -= run_thread;
   }
   debugPrint(logger_,
              MPL,