From 221c6a9525077a0ca5eb341c19ceb097360ff01d Mon Sep 17 00:00:00 2001 From: Matt Liberty Date: Sun, 19 Nov 2023 17:25:43 -0800 Subject: [PATCH 1/2] mpl2: make run_thread initialization uniform Signed-off-by: Matt Liberty --- src/mpl2/src/hier_rtlmp.cpp | 35 ++++++++++------------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/src/mpl2/src/hier_rtlmp.cpp b/src/mpl2/src/hier_rtlmp.cpp index 58e8a5e17a7..357e5f06a41 100644 --- a/src/mpl2/src/hier_rtlmp.cpp +++ b/src/mpl2/src/hier_rtlmp.cpp @@ -3673,11 +3673,8 @@ void HierRTLMP::multiLevelMacroPlacement(Cluster* parent) "Start Simulated Annealing Core"); while (remaining_runs > 0) { std::vector sa_vector; - int run_thread - = (remaining_runs > num_threads_) ? num_threads_ : remaining_runs; - if (graphics_) { - run_thread = 1; - } + const int run_thread + = graphics_ ? 1 : std::min(remaining_runs, num_threads_); for (int i = 0; i < run_thread; i++) { debugPrint(logger_, MPL, @@ -3910,11 +3907,8 @@ void HierRTLMP::multiLevelMacroPlacement(Cluster* parent) "Start Simulated Annealing Core"); while (remaining_runs > 0) { std::vector sa_vector; - int run_thread - = (remaining_runs > num_threads_) ? num_threads_ : remaining_runs; - if (graphics_) { - run_thread = 1; - } + const int run_thread + = graphics_ ? 1 : std::min(remaining_runs, num_threads_); for (int i = 0; i < run_thread; i++) { debugPrint(logger_, MPL, @@ -4460,11 +4454,8 @@ void HierRTLMP::multiLevelMacroPlacementWithoutBusPlanning(Cluster* parent) "Start Simulated Annealing Core"); while (remaining_runs > 0) { std::vector sa_vector; - int run_thread - = (remaining_runs > num_threads_) ? num_threads_ : remaining_runs; - if (graphics_) { - run_thread = 1; - } + const int run_thread + = graphics_ ? 1 : std::min(remaining_runs, num_threads_); for (int i = 0; i < run_thread; i++) { debugPrint(logger_, MPL, @@ -4962,11 +4953,8 @@ void HierRTLMP::enhancedMacroPlacement(Cluster* parent) "Start Simulated Annealing Core"); while (remaining_runs > 0) { std::vector sa_vector; - int run_thread - = (remaining_runs > num_threads_) ? num_threads_ : remaining_runs; - if (graphics_) { - run_thread = 1; - } + const int run_thread + = graphics_ ? 1 : std::min(remaining_runs, num_threads_); for (int i = 0; i < run_thread; i++) { std::vector shaped_macros = macros; // copy for multithread // determine the shape for each macro @@ -5488,11 +5476,8 @@ void HierRTLMP::hardMacroClusterMacroPlacement(Cluster* cluster) float best_cost = std::numeric_limits::max(); while (remaining_runs > 0) { std::vector sa_vector; - int run_thread - = (remaining_runs > num_threads_) ? num_threads_ : remaining_runs; - if (graphics_) { - run_thread = 1; - } + const int run_thread + = graphics_ ? 1 : std::min(remaining_runs, num_threads_); for (int i = 0; i < run_thread; i++) { // change the aspect ratio const float width = outline_width * vary_factor_list[run_id++]; From 86772c49f9bcdd9776f035d9518c83c8558b47e1 Mon Sep 17 00:00:00 2001 From: Matt Liberty Date: Mon, 20 Nov 2023 11:40:03 -0800 Subject: [PATCH 2/2] mpl2: update to get consistent results when varying thread count Check the results for early stopping at a constant interval rather than a number of thread dependent one. Signed-off-by: Matt Liberty --- src/mpl2/src/hier_rtlmp.cpp | 120 ++++++++++++++++++++++++++---------- 1 file changed, 86 insertions(+), 34 deletions(-) diff --git a/src/mpl2/src/hier_rtlmp.cpp b/src/mpl2/src/hier_rtlmp.cpp index 357e5f06a41..686cabca98c 100644 --- a/src/mpl2/src/hier_rtlmp.cpp +++ b/src/mpl2/src/hier_rtlmp.cpp @@ -3666,6 +3666,11 @@ void HierRTLMP::multiLevelMacroPlacement(Cluster* parent) std::vector sa_containers; // store all the SA runs to avoid memory leakage float best_cost = std::numeric_limits::max(); + // To give consistency across threads we check the solutions + // at a fixed interval independent of how many threads we are using. + const int check_interval = 10; + int begin_check = 0; + int end_check = std::min(check_interval, remaining_runs); debugPrint(logger_, MPL, "hierarchical_macro_placement", @@ -3772,20 +3777,29 @@ void HierRTLMP::multiLevelMacroPlacement(Cluster* parent) th.join(); } } + remaining_runs -= run_thread; // add macro tilings for (auto& sa : sa_vector) { - sa_containers.push_back(sa); // add SA to containers - if (sa->isValid() && sa->getNormCost() < best_cost) { - best_cost = sa->getNormCost(); - best_sa = sa; + sa_containers.push_back(sa); + } + while (sa_containers.size() >= end_check) { + while (begin_check < end_check) { + auto& sa = sa_containers[begin_check]; + if (sa->isValid() && sa->getNormCost() < best_cost) { + best_cost = sa->getNormCost(); + best_sa = sa; + } + ++begin_check; } + // add early stop mechanism + if (best_sa) { + break; + } + end_check = begin_check + std::min(check_interval, remaining_runs); } - sa_vector.clear(); - // add early stop mechanism - if (best_sa != nullptr) { + if (best_sa) { break; } - remaining_runs -= run_thread; } debugPrint(logger_, MPL, @@ -3900,6 +3914,8 @@ void HierRTLMP::multiLevelMacroPlacement(Cluster* parent) best_sa = nullptr; sa_containers.clear(); best_cost = std::numeric_limits::max(); + begin_check = 0; + end_check = std::min(check_interval, remaining_runs); debugPrint(logger_, MPL, "hierarchical_macro_placement", @@ -4007,20 +4023,29 @@ void HierRTLMP::multiLevelMacroPlacement(Cluster* parent) th.join(); } } + remaining_runs -= run_thread; // add macro tilings for (auto& sa : sa_vector) { - sa_containers.push_back(sa); // add SA to containers - if (sa->isValid() && sa->getNormCost() < best_cost) { - best_cost = sa->getNormCost(); - best_sa = sa; + sa_containers.push_back(sa); + } + while (sa_containers.size() >= end_check) { + while (begin_check < end_check) { + auto& sa = sa_containers[begin_check]; + if (sa->isValid() && sa->getNormCost() < best_cost) { + best_cost = sa->getNormCost(); + best_sa = sa; + } + ++begin_check; + } + // add early stop mechanism + if (best_sa) { + break; } + end_check = begin_check + std::min(check_interval, remaining_runs); } - sa_vector.clear(); - // add early stop mechanism - if (best_sa != nullptr) { + if (best_sa) { break; } - remaining_runs -= run_thread; } debugPrint(logger_, MPL, @@ -4444,8 +4469,12 @@ void HierRTLMP::multiLevelMacroPlacementWithoutBusPlanning(Cluster* parent) int remaining_runs = target_util_list.size(); int run_id = 0; SACoreSoftMacro* best_sa = nullptr; - std::vector - sa_containers; // store all the SA runs to avoid memory leakage + std::vector sa_containers; + // To give consistency across threads we check the solutions + // at a fixed interval independent of how many threads we are using. + const int check_interval = 10; + int begin_check = 0; + int end_check = std::min(check_interval, remaining_runs); float best_cost = std::numeric_limits::max(); debugPrint(logger_, MPL, @@ -4553,20 +4582,29 @@ void HierRTLMP::multiLevelMacroPlacementWithoutBusPlanning(Cluster* parent) th.join(); } } + remaining_runs -= run_thread; // add macro tilings for (auto& sa : sa_vector) { - sa_containers.push_back(sa); // add SA to containers - if (sa->isValid() && sa->getNormCost() < best_cost) { - best_cost = sa->getNormCost(); - best_sa = sa; + sa_containers.push_back(sa); + } + while (sa_containers.size() >= end_check) { + while (begin_check < end_check) { + auto& sa = sa_containers[begin_check]; + if (sa->isValid() && sa->getNormCost() < best_cost) { + best_cost = sa->getNormCost(); + best_sa = sa; + } + ++begin_check; + } + // add early stop mechanism + if (best_sa) { + break; } + end_check = begin_check + std::min(check_interval, remaining_runs); } - sa_vector.clear(); - // add early stop mechanism - if (best_sa != nullptr) { + if (best_sa) { break; } - remaining_runs -= run_thread; } debugPrint(logger_, MPL, @@ -4946,6 +4984,11 @@ void HierRTLMP::enhancedMacroPlacement(Cluster* parent) std::vector sa_containers; // store all the SA runs to avoid memory leakage float best_cost = std::numeric_limits::max(); + // To give consistency across threads we check the solutions + // at a fixed interval independent of how many threads we are using. + const int check_interval = 10; + int begin_check = 0; + int end_check = std::min(check_interval, remaining_runs); debugPrint(logger_, MPL, "hierarchical_macro_placement", @@ -5049,20 +5092,29 @@ void HierRTLMP::enhancedMacroPlacement(Cluster* parent) th.join(); } } + remaining_runs -= run_thread; // add macro tilings for (auto& sa : sa_vector) { - sa_containers.push_back(sa); // add SA to containers - if (sa->isValid() && sa->getNormCost() < best_cost) { - best_cost = sa->getNormCost(); - best_sa = sa; + sa_containers.push_back(sa); + } + while (sa_containers.size() >= end_check) { + while (begin_check < end_check) { + auto& sa = sa_containers[begin_check]; + if (sa->isValid() && sa->getNormCost() < best_cost) { + best_cost = sa->getNormCost(); + best_sa = sa; + } + ++begin_check; } + // add early stop mechanism + if (best_sa) { + break; + } + end_check = begin_check + std::min(check_interval, remaining_runs); } - sa_vector.clear(); - // add early stop mechanism - if (best_sa != nullptr) { + if (best_sa) { break; } - remaining_runs -= run_thread; } debugPrint(logger_, MPL,