From 076843cab17f2fe49f2364271174c10e7f560ac4 Mon Sep 17 00:00:00 2001 From: AlexandreSinger Date: Mon, 26 Aug 2024 14:24:20 -0400 Subject: [PATCH 1/3] [ClusterLegalizer] Cluster Legalizer API Isolated the cluster legalization logic out of the packer's clustering algorithm in such a way that it can be used externally to the clusterer with no issue. Once the ClusterLegalizer API is constructed, it will allocate all of the memory that it needs for legalization; it can then be used to legalize clusters; and when it is destroyed it will remove all state created for clustering. The cluster legalizer API maintains the legalized clusters internally as opposed to using the ClusterBlockId as an ID to the clusters. This allowed me to separate the ClusteredNetlist from the packing algorithm entirely. This importantly helper separate out all legalization logic from directly modifying the ClusteredNetlist generation code. I did my best to remove as much global state as I could from the API to make it as self-contained as possible. This change greatly cleaned up the packer code and removed many of its global variables (including the ClusterLegalizer itself, which is now a local variable!) This change also removed the re_clustering API since it would need to be heavily modified to include this change; and since this feature was not being tested, it would be challenging to upgrade to use this new cluster legalization API. --- vpr/src/base/SetupGrid.h | 2 + vpr/src/base/vpr_api.cpp | 18 +- vpr/src/base/vpr_constraints_writer.cpp | 16 +- vpr/src/base/vpr_constraints_writer.h | 16 +- vpr/src/base/vpr_context.h | 80 +- vpr/src/base/vpr_types.h | 9 - vpr/src/pack/cluster.cpp | 184 ++- vpr/src/pack/cluster.h | 16 +- vpr/src/pack/cluster_legalizer.cpp | 1756 ++++++++++++++++++++ vpr/src/pack/cluster_legalizer.h | 474 ++++++ vpr/src/pack/cluster_util.cpp | 1943 +++-------------------- vpr/src/pack/cluster_util.h | 255 +-- vpr/src/pack/constraints_report.cpp | 16 +- vpr/src/pack/constraints_report.h | 6 +- vpr/src/pack/noc_aware_cluster_util.cpp | 38 +- vpr/src/pack/noc_aware_cluster_util.h | 13 +- vpr/src/pack/output_clustering.cpp | 267 ++-- vpr/src/pack/output_clustering.h | 20 +- vpr/src/pack/pack.cpp | 107 +- vpr/src/pack/pack.h | 8 +- vpr/src/pack/re_cluster.cpp | 276 ---- vpr/src/pack/re_cluster.h | 60 - vpr/src/pack/re_cluster_util.cpp | 764 --------- vpr/src/pack/re_cluster_util.h | 212 --- vpr/src/place/place_constraints.cpp | 6 +- vpr/src/route/route_common.cpp | 1 - vpr/src/util/vpr_utils.cpp | 29 +- vpr/src/util/vpr_utils.h | 4 +- vpr/test/test_connection_router.cpp | 3 - vpr/test/test_post_verilog.cpp | 4 - vpr/test/test_vpr.cpp | 6 - 31 files changed, 3005 insertions(+), 3604 deletions(-) create mode 100644 vpr/src/pack/cluster_legalizer.cpp create mode 100644 vpr/src/pack/cluster_legalizer.h delete mode 100644 vpr/src/pack/re_cluster.cpp delete mode 100644 vpr/src/pack/re_cluster.h delete mode 100644 vpr/src/pack/re_cluster_util.cpp delete mode 100644 vpr/src/pack/re_cluster_util.h diff --git a/vpr/src/base/SetupGrid.h b/vpr/src/base/SetupGrid.h index da9b2631c94..b65a9eb23da 100644 --- a/vpr/src/base/SetupGrid.h +++ b/vpr/src/base/SetupGrid.h @@ -12,6 +12,8 @@ #include #include "physical_types.h" +class DeviceGrid; + ///@brief Find the device satisfying the specified minimum resources /// minimum_instance_counts and target_device_utilization are not required when specifying a fixed layout DeviceGrid create_device_grid(const std::string& layout_name, diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index 2bc4dd2a5f9..4794724f234 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -15,6 +15,7 @@ #include #include +#include "vpr_context.h" #include "vtr_assert.h" #include "vtr_math.h" #include "vtr_log.h" @@ -359,9 +360,7 @@ void vpr_init_with_options(const t_options* options, t_vpr_setup* vpr_setup, t_a fflush(stdout); - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); auto& device_ctx = g_vpr_ctx.mutable_device(); - helper_ctx.lb_type_rr_graphs = vpr_setup->PackerRRGraph; device_ctx.pad_loc_type = vpr_setup->PlacerOpts.pad_loc_type; } @@ -613,12 +612,13 @@ bool vpr_pack_flow(t_vpr_setup& vpr_setup, const t_arch& arch) { //Load a previous packing from the .net file vpr_load_packing(vpr_setup, arch); - //Load cluster_constraints data structure here since loading pack file - load_cluster_constraints(); } } + // Load cluster_constraints data structure here since loading pack file + load_cluster_constraints(); + /* Sanity check the resulting netlist */ check_netlist(packer_opts.pack_verbosity); @@ -696,6 +696,7 @@ void vpr_load_packing(t_vpr_setup& vpr_setup, const t_arch& arch) { "Must have valid .net filename to load packing"); auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); /* Ensure we have a clean start with void net remapping information */ cluster_ctx.post_routing_clb_pin_nets.clear(); @@ -706,8 +707,15 @@ void vpr_load_packing(t_vpr_setup& vpr_setup, const t_arch& arch) { vpr_setup.FileNameOpts.verify_file_digests, vpr_setup.PackerOpts.pack_verbosity); + /* Load the mapping between clusters and their atoms */ + cluster_ctx.atoms_lookup.resize(cluster_ctx.clb_nlist.blocks().size()); + for (AtomBlockId atom_blk_id : atom_ctx.nlist.blocks()) { + ClusterBlockId atom_cluster_blk_id = atom_ctx.lookup.atom_clb(atom_blk_id); + cluster_ctx.atoms_lookup[atom_cluster_blk_id].insert(atom_blk_id); + } + process_constant_nets(g_vpr_ctx.mutable_atom().nlist, - g_vpr_ctx.atom().lookup, + atom_ctx.lookup, cluster_ctx.clb_nlist, vpr_setup.constant_net_method, vpr_setup.PackerOpts.pack_verbosity); diff --git a/vpr/src/base/vpr_constraints_writer.cpp b/vpr/src/base/vpr_constraints_writer.cpp index 0de68549aec..29842028754 100644 --- a/vpr/src/base/vpr_constraints_writer.cpp +++ b/vpr/src/base/vpr_constraints_writer.cpp @@ -7,17 +7,15 @@ #include "vpr_constraints_serializer.h" #include "vpr_constraints_uxsdcxx.h" -#include "vtr_time.h" +#include "vpr_context.h" #include "globals.h" #include "pugixml.hpp" -#include "pugixml_util.hpp" -#include "clustered_netlist_utils.h" #include +#include #include "vpr_constraints_writer.h" #include "region.h" -#include "re_cluster_util.h" /** * @brief Create a partition with the given name and a single region. @@ -30,7 +28,6 @@ static Partition create_partition(const std::string& part_name, const Region& re void write_vpr_floorplan_constraints(const char* file_name, int expand, bool subtile, int horizontal_partitions, int vertical_partitions) { VprConstraints constraints; - if (horizontal_partitions != 0 && vertical_partitions != 0) { setup_vpr_floorplan_constraints_cutpoints(constraints, horizontal_partitions, vertical_partitions); } else { @@ -83,8 +80,7 @@ void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int ex part.set_part_region(pr); constraints.mutable_place_constraints().add_partition(part); - const std::unordered_set& atoms = cluster_to_atoms(blk_id); - + const std::unordered_set& atoms = cluster_ctx.atoms_lookup[blk_id]; for (AtomBlockId atom_id : atoms) { constraints.mutable_place_constraints().add_constrained_atom(atom_id, partid); } @@ -92,7 +88,9 @@ void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int ex } } -void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int horizontal_cutpoints, int vertical_cutpoints) { +void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, + int horizontal_cutpoints, + int vertical_cutpoints) { auto& cluster_ctx = g_vpr_ctx.clustering(); auto& block_locs = g_vpr_ctx.placement().block_locs(); auto& device_ctx = g_vpr_ctx.device(); @@ -158,7 +156,7 @@ void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int * appropriate region accordingly */ for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { - const std::unordered_set& atoms = cluster_to_atoms(blk_id); + const std::unordered_set& atoms = cluster_ctx.atoms_lookup[blk_id]; int x = block_locs[blk_id].loc.x; int y = block_locs[blk_id].loc.y; int width = device_ctx.grid.width(); diff --git a/vpr/src/base/vpr_constraints_writer.h b/vpr/src/base/vpr_constraints_writer.h index 25dd7fc08ce..ddfcd259b43 100644 --- a/vpr/src/base/vpr_constraints_writer.h +++ b/vpr/src/base/vpr_constraints_writer.h @@ -25,6 +25,8 @@ #ifndef VPR_SRC_BASE_VPR_CONSTRAINTS_WRITER_H_ #define VPR_SRC_BASE_VPR_CONSTRAINTS_WRITER_H_ +class VprConstraints; + /** * @brief Write out floorplan constraints to an XML file based on current placement * @@ -35,7 +37,11 @@ * @param subtile Specifies whether to write out the constraint regions with or without * subtile values. */ -void write_vpr_floorplan_constraints(const char* file_name, int expand, bool subtile, int horizontal_partitions, int vertical_partitions); +void write_vpr_floorplan_constraints(const char* file_name, + int expand, + bool subtile, + int horizontal_partitions, + int vertical_partitions); /** * @brief Populates VprConstraints by creating a partition for each clustered block. @@ -50,7 +56,9 @@ void write_vpr_floorplan_constraints(const char* file_name, int expand, bool sub * @param subtile Specifies whether to write out the constraint regions with or without * subtile values. */ -void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int expand, bool subtile); +void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, + int expand, + bool subtile); /** * @brief Populates VprConstraints by dividing the grid into multiple partitions. @@ -62,6 +70,8 @@ void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int ex * @param horizontal_cutpoints The number of horizontal cut-lines. * @param vertical_cutpoints The number of vertical cut_lines. */ -void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int horizontal_cutpoints, int vertical_cutpoints); +void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, + int horizontal_cutpoints, + int vertical_cutpoints); #endif /* VPR_SRC_BASE_VPR_CONSTRAINTS_WRITER_H_ */ diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index 7ff7205024a..f69c58e94ab 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -10,6 +10,7 @@ #include "vtr_ndmatrix.h" #include "vtr_optional.h" #include "vtr_vector.h" +#include "vtr_vector_map.h" #include "atom_netlist.h" #include "clustered_netlist.h" #include "rr_graph_view.h" @@ -78,12 +79,6 @@ struct AtomContext : public Context { /// @brief Mappings to/from the Atom Netlist to physically described .blif models AtomLookup lookup; - - /// @brief Prepacker object which performs prepacking and stores the pack - /// molecules. Has a method to get the pack molecule of an AtomBlock. - /// TODO: This is mainly only used in the clusterer. It can probably be - /// removed from the AtomContext entirely. - Prepacker prepacker; }; /** @@ -286,69 +281,23 @@ struct ClusteringContext : public Context { * CLB Netlist ********************************************************************/ - ///@brief New netlist class derived from Netlist + /// @brief New netlist class derived from Netlist ClusteredNetlist clb_nlist; - /* Database for nets of each clb block pin after routing stage - * - post_routing_clb_pin_nets: - * mapping of pb_type pins to clustered net ids - * - pre_routing_net_pin_mapping: - * a copy of mapping for current pb_route index to previous pb_route index - * Record the previous pin mapping for finding the correct pin index during timing analysis - */ + /// @brief Database for nets of each clb block pin after routing stage. + /// - post_routing_clb_pin_nets: + /// mapping of pb_type pins to clustered net ids. + /// - pre_routing_net_pin_mapping: + /// a copy of mapping for current pb_route index to previous pb_route index + /// Record the previous pin mapping for finding the correct pin index during + /// timing analysis. std::map> post_routing_clb_pin_nets; std::map> pre_routing_net_pin_mapping; -}; - -/** - * @brief State relating to helper data structure using in the clustering stage - * - * This should contain helper data structures that are useful in the clustering/packing stage. - * They are encapsulated here as they are useful in clustering and reclustering algorithms that may be used - * in packing or placement stages. - */ -struct ClusteringHelperContext : public Context { - // A map used to save the number of used instances from each logical block type. - std::map num_used_type_instances; - - // Stats keeper for placement information during packing/clustering - t_cluster_placement_stats* cluster_placement_stats; - - // total number of models in the architecture - int num_models; - - int max_cluster_size; - t_pb_graph_node** primitives_list; - bool enable_pin_feasibility_filter; - int feasible_block_array_size; - - // total number of CLBs - int total_clb_num; - - // A vector of routing resource nodes within each of logic cluster_ctx.blocks types [0 .. num_logical_block_type-1] - std::vector* lb_type_rr_graphs; - - // the utilization of external input/output pins during packing (between 0 and 1) - t_ext_pin_util_targets target_external_pin_util; - - // During clustering, a block is related to un-clustered primitives with nets. - // This relation has three types: low fanout, high fanout, and transitive - // high_fanout_thresholds stores the threshold for nets to a block type to be considered high fanout - t_pack_high_fanout_thresholds high_fanout_thresholds; - - // A vector of unordered_sets of AtomBlockIds that are inside each clustered block [0 .. num_clustered_blocks-1] - // unordered_set for faster insertion/deletion during the iterative improvement process of packing + /// @brief A vector of unordered_sets of AtomBlockIds that are inside each + /// clustered block [0 .. num_clustered_blocks-1] + /// This is populated when the packing is loaded. vtr::vector> atoms_lookup; - - /** Stores the NoC group ID of each atom block. Atom blocks that belong - * to different NoC groups can't be clustered with each other into the - * same clustered block.*/ - vtr::vector atom_noc_grp_id; - - ~ClusteringHelperContext() { - delete[] primitives_list; - } }; /** @@ -728,9 +677,6 @@ class VprContext : public Context { const ClusteringContext& clustering() const { return clustering_; } ClusteringContext& mutable_clustering() { return clustering_; } - const ClusteringHelperContext& cl_helper() const { return helper_; } - ClusteringHelperContext& mutable_cl_helper() { return helper_; } - const PlacementContext& placement() const { return placement_; } PlacementContext& mutable_placement() { return placement_; } @@ -760,8 +706,6 @@ class VprContext : public Context { PowerContext power_; ClusteringContext clustering_; - ClusteringHelperContext helper_; - PlacementContext placement_; RoutingContext routing_; FloorplanningContext constraints_; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index cf5f1062f96..b63a2f7d501 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -168,15 +168,6 @@ enum class e_cluster_seed { BLEND2 }; -enum class e_block_pack_status { - BLK_PASSED, - BLK_FAILED_FEASIBLE, - BLK_FAILED_ROUTE, - BLK_FAILED_FLOORPLANNING, - BLK_FAILED_NOC_GROUP, - BLK_STATUS_UNDEFINED -}; - struct t_ext_pin_util { t_ext_pin_util() = default; t_ext_pin_util(float in, float out) diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp index a5ee38b8d0c..607e4b530f3 100644 --- a/vpr/src/pack/cluster.cpp +++ b/vpr/src/pack/cluster.cpp @@ -44,11 +44,10 @@ #include "PreClusterDelayCalculator.h" #include "atom_netlist.h" -#include "cluster_router.h" +#include "cluster_legalizer.h" #include "cluster_util.h" #include "constraints_report.h" #include "globals.h" -#include "pack_types.h" #include "prepack.h" #include "timing_info.h" #include "vpr_types.h" @@ -70,13 +69,14 @@ std::map do_clustering(const t_packer_opts& pa const t_analysis_opts& analysis_opts, const t_arch* arch, Prepacker& prepacker, + ClusterLegalizer& cluster_legalizer, const std::unordered_set& is_clock, const std::unordered_set& is_global, bool allow_unrelated_clustering, bool balance_block_type_utilization, - std::vector* lb_type_rr_graphs, AttractionInfo& attraction_groups, bool& floorplan_regions_overfull, + const t_pack_high_fanout_thresholds& high_fanout_thresholds, t_clustering_data& clustering_data) { /* Does the actual work of clustering multiple netlist blocks * * into clusters. */ @@ -102,7 +102,7 @@ std::map do_clustering(const t_packer_opts& pa t_cluster_progress_stats cluster_stats; //int num_molecules, num_molecules_processed, mols_since_last_print, blocks_since_last_analysis, - int num_blocks_hill_added, max_pb_depth, detailed_routing_stage; + int num_blocks_hill_added; const int verbosity = packer_opts.pack_verbosity; @@ -116,17 +116,11 @@ std::map do_clustering(const t_packer_opts& pa enum e_block_pack_status block_pack_status; - t_cluster_placement_stats* cur_cluster_placement_stats_ptr; - t_lb_router_data* router_data = nullptr; + t_cluster_placement_stats* cur_cluster_placement_stats_ptr = nullptr; t_pack_molecule *istart, *next_molecule, *prev_molecule; auto& atom_ctx = g_vpr_ctx.atom(); auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - - helper_ctx.enable_pin_feasibility_filter = packer_opts.enable_pin_feasibility_filter; - helper_ctx.feasible_block_array_size = packer_opts.feasible_block_array_size; std::shared_ptr clustering_delay_calc; std::shared_ptr timing_info; @@ -141,30 +135,25 @@ std::map do_clustering(const t_packer_opts& pa // Index 2 holds the number of LEs that are used for registers only. std::vector le_count(3, 0); - helper_ctx.total_clb_num = 0; + int total_clb_num = 0; /* TODO: This is memory inefficient, fix if causes problems */ /* Store stats on nets used by packed block, useful for determining transitively connected blocks * (eg. [A1, A2, ..]->[B1, B2, ..]->C implies cluster [A1, A2, ...] and C have a weak link) */ - vtr::vector> clb_inter_blk_nets(atom_ctx.nlist.blocks().size()); + vtr::vector> clb_inter_blk_nets(atom_ctx.nlist.blocks().size()); istart = nullptr; - /* determine bound on cluster size and primitive input size */ - helper_ctx.max_cluster_size = 0; - max_pb_depth = 0; - const t_molecule_stats max_molecule_stats = prepacker.calc_max_molecule_stats(atom_ctx.nlist); prepacker.mark_all_molecules_valid(); cluster_stats.num_molecules = prepacker.get_num_molecules(); - get_max_cluster_size_and_pb_depth(helper_ctx.max_cluster_size, max_pb_depth); - if (packer_opts.hill_climbing_flag) { - clustering_data.hill_climbing_inputs_avail = new int[helper_ctx.max_cluster_size + 1]; - for (int i = 0; i < helper_ctx.max_cluster_size + 1; i++) + size_t max_cluster_size = cluster_legalizer.get_max_cluster_size(); + clustering_data.hill_climbing_inputs_avail = new int[max_cluster_size + 1]; + for (size_t i = 0; i < max_cluster_size + 1; i++) clustering_data.hill_climbing_inputs_avail[i] = 0; } else { clustering_data.hill_climbing_inputs_avail = nullptr; /* if used, die hard */ @@ -173,8 +162,9 @@ std::map do_clustering(const t_packer_opts& pa #if 0 check_for_duplicate_inputs (); #endif + alloc_and_init_clustering(max_molecule_stats, - &(helper_ctx.cluster_placement_stats), &(helper_ctx.primitives_list), prepacker, + prepacker, clustering_data, net_output_feeds_driving_block_input, unclustered_list_head_size, cluster_stats.num_molecules); @@ -187,9 +177,6 @@ std::map do_clustering(const t_packer_opts& pa cluster_stats.blocks_since_last_analysis = 0; num_blocks_hill_added = 0; - VTR_ASSERT(helper_ctx.max_cluster_size < MAX_SHORT); - /* Limit maximum number of elements for each cluster */ - //Default criticalities set to zero (e.g. if not timing driven) vtr::vector atom_criticality(atom_ctx.nlist.blocks().size(), 0.); @@ -199,11 +186,17 @@ std::map do_clustering(const t_packer_opts& pa } // Assign gain scores to atoms and sort them based on the scores. - auto seed_atoms = initialize_seed_atoms(packer_opts.cluster_seed_type, max_molecule_stats, atom_criticality); + auto seed_atoms = initialize_seed_atoms(packer_opts.cluster_seed_type, + max_molecule_stats, + prepacker, + atom_criticality); /* index of next most timing critical block */ int seed_index = 0; - istart = get_highest_gain_seed_molecule(seed_index, seed_atoms); + istart = get_highest_gain_seed_molecule(seed_index, + seed_atoms, + prepacker, + cluster_legalizer); print_pack_status_header(); @@ -214,61 +207,58 @@ std::map do_clustering(const t_packer_opts& pa while (istart != nullptr) { bool is_cluster_legal = false; int saved_seed_index = seed_index; - for (detailed_routing_stage = (int)E_DETAILED_ROUTE_AT_END_ONLY; !is_cluster_legal && detailed_routing_stage != (int)E_DETAILED_ROUTE_INVALID; detailed_routing_stage++) { - // Use the total number created clusters so far as the ID for the new cluster - ClusterBlockId clb_index(helper_ctx.total_clb_num); - - VTR_LOGV(verbosity > 2, "Complex block %d:\n", helper_ctx.total_clb_num); - - /*Used to store cluster's PartitionRegion as primitives are added to it. - * Since some of the primitives might fail legality, this structure temporarily - * stores PartitionRegion information while the cluster is packed*/ - PartitionRegion temp_cluster_pr; - /* - * Stores the cluster's NoC group ID as more primitives are added to it. - * This is used to check if a candidate primitive is in the same NoC group - * as the atom blocks that have already been added to the primitive. - */ - NocGroupId temp_cluster_noc_grp_id = NocGroupId::INVALID(); - - start_new_cluster(helper_ctx.cluster_placement_stats, helper_ctx.primitives_list, - clb_index, istart, + // The basic algorithm: + // 1) Try to put all the molecules in that you can without doing the + // full intra-lb route. Then do full legalization at the end. + // 2) If the legalization at the end fails, try again, but this time + // do full legalization for each molecule added to the cluster. + const ClusterLegalizationStrategy legalization_strategies[] = {ClusterLegalizationStrategy::SKIP_INTRA_LB_ROUTE, + ClusterLegalizationStrategy::FULL}; + for (const ClusterLegalizationStrategy strategy : legalization_strategies) { + // If the cluster is legal, no need to try a stronger cluster legalizer + // mode. + if (is_cluster_legal) + break; + // Set the legalization strategy of the cluster legalizer. + cluster_legalizer.set_legalization_strategy(strategy); + + LegalizationClusterId legalization_cluster_id; + + VTR_LOGV(verbosity > 2, "Complex block %d:\n", total_clb_num); + + start_new_cluster(cluster_legalizer, + legalization_cluster_id, + istart, num_used_type_instances, packer_opts.target_device_utilization, - helper_ctx.num_models, helper_ctx.max_cluster_size, arch, packer_opts.device_layout, - lb_type_rr_graphs, &router_data, - detailed_routing_stage, &cluster_ctx.clb_nlist, primitive_candidate_block_types, verbosity, - packer_opts.enable_pin_feasibility_filter, - balance_block_type_utilization, - packer_opts.feasible_block_array_size, - temp_cluster_pr, - temp_cluster_noc_grp_id); + balance_block_type_utilization); //initial molecule in cluster has been processed cluster_stats.num_molecules_processed++; cluster_stats.mols_since_last_print++; - print_pack_status(helper_ctx.total_clb_num, + print_pack_status(total_clb_num, cluster_stats.num_molecules, cluster_stats.num_molecules_processed, cluster_stats.mols_since_last_print, device_ctx.grid.width(), device_ctx.grid.height(), - attraction_groups); + attraction_groups, + cluster_legalizer); VTR_LOGV(verbosity > 2, - "Complex block %d: '%s' (%s) ", helper_ctx.total_clb_num, - cluster_ctx.clb_nlist.block_name(clb_index).c_str(), - cluster_ctx.clb_nlist.block_type(clb_index)->name); + "Complex block %d: '%s' (%s) ", total_clb_num, + cluster_legalizer.get_cluster_pb(legalization_cluster_id)->name, + cluster_legalizer.get_cluster_type(legalization_cluster_id)->name); VTR_LOGV(verbosity > 2, "."); //Progress dot for seed-block fflush(stdout); - t_ext_pin_util target_ext_pin_util = helper_ctx.target_external_pin_util.get_pin_util(cluster_ctx.clb_nlist.block_type(clb_index)->name); - int high_fanout_threshold = helper_ctx.high_fanout_thresholds.get_threshold(cluster_ctx.clb_nlist.block_type(clb_index)->name); - update_cluster_stats(istart, clb_index, + int high_fanout_threshold = high_fanout_thresholds.get_threshold(cluster_legalizer.get_cluster_type(legalization_cluster_id)->name); + update_cluster_stats(istart, + cluster_legalizer, is_clock, //Set of clock nets is_global, //Set of global nets (currently all clocks) packer_opts.global_clocks, @@ -278,16 +268,16 @@ std::map do_clustering(const t_packer_opts& pa *timing_info, attraction_groups, net_output_feeds_driving_block_input); - helper_ctx.total_clb_num++; + total_clb_num++; if (packer_opts.timing_driven) { cluster_stats.blocks_since_last_analysis++; /*it doesn't make sense to do a timing analysis here since there* *is only one atom block clustered it would not change anything */ } - cur_cluster_placement_stats_ptr = &(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(clb_index)->index]); + cur_cluster_placement_stats_ptr = cluster_legalizer.get_cluster_placement_stats(legalization_cluster_id); cluster_stats.num_unrelated_clustering_attempts = 0; - next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index), + next_molecule = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id), attraction_groups, allow_unrelated_clustering, packer_opts.prioritize_transitive_connectivity, @@ -295,8 +285,10 @@ std::map do_clustering(const t_packer_opts& pa packer_opts.feasible_block_array_size, &cluster_stats.num_unrelated_clustering_attempts, cur_cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, clb_inter_blk_nets, - clb_index, + legalization_cluster_id, verbosity, clustering_data.unclustered_list_head, unclustered_list_head_size, @@ -322,18 +314,16 @@ std::map do_clustering(const t_packer_opts& pa while (next_molecule != nullptr && num_repeated_molecules < max_num_repeated_molecules) { prev_molecule = next_molecule; - try_fill_cluster(packer_opts, + try_fill_cluster(cluster_legalizer, + prepacker, + packer_opts, cur_cluster_placement_stats_ptr, prev_molecule, next_molecule, num_repeated_molecules, - helper_ctx.primitives_list, cluster_stats, - helper_ctx.total_clb_num, - helper_ctx.num_models, - helper_ctx.max_cluster_size, - clb_index, - detailed_routing_stage, + total_clb_num, + legalization_cluster_id, attraction_groups, clb_inter_blk_nets, allow_unrelated_clustering, @@ -341,10 +331,6 @@ std::map do_clustering(const t_packer_opts& pa is_clock, is_global, timing_info, - router_data, - target_ext_pin_util, - temp_cluster_pr, - temp_cluster_noc_grp_id, block_pack_status, clustering_data.unclustered_list_head, unclustered_list_head_size, @@ -352,16 +338,41 @@ std::map do_clustering(const t_packer_opts& pa primitive_candidate_block_types); } - is_cluster_legal = check_cluster_legality(verbosity, detailed_routing_stage, router_data); + if (strategy == ClusterLegalizationStrategy::FULL) { + // If the legalizer fully legalized for every molecule added, + // the cluster should be legal. + is_cluster_legal = true; + } else { + // If the legalizer did not check everything for every molecule, + // need to check that the full cluster is legal (need to perform + // intra-lb routing). + is_cluster_legal = cluster_legalizer.check_cluster_legality(legalization_cluster_id); + } if (is_cluster_legal) { - istart = save_cluster_routing_and_pick_new_seed(packer_opts, helper_ctx.total_clb_num, seed_atoms, num_blocks_hill_added, clustering_data.intra_lb_routing, seed_index, cluster_stats, router_data); - store_cluster_info_and_free(packer_opts, clb_index, logic_block_type, le_pb_type, le_count, clb_inter_blk_nets); + // Pick new seed. + istart = get_highest_gain_seed_molecule(seed_index, + seed_atoms, + prepacker, + cluster_legalizer); + // Update cluster stats. + if (packer_opts.timing_driven && num_blocks_hill_added > 0) + cluster_stats.blocks_since_last_analysis += num_blocks_hill_added; + + store_cluster_info_and_free(packer_opts, legalization_cluster_id, logic_block_type, le_pb_type, le_count, cluster_legalizer, clb_inter_blk_nets); + // Since the cluster will no longer be added to beyond this point, + // clean the cluster of any data not strictly necessary for + // creating the clustered netlist. + cluster_legalizer.clean_cluster(legalization_cluster_id); } else { - free_data_and_requeue_used_mols_if_illegal(clb_index, saved_seed_index, num_used_type_instances, helper_ctx.total_clb_num, seed_index); + // If the cluster is not legal, requeue used mols. + num_used_type_instances[cluster_legalizer.get_cluster_type(legalization_cluster_id)]--; + total_clb_num--; + seed_index = saved_seed_index; + // Destroy the illegal cluster. + cluster_legalizer.destroy_cluster(legalization_cluster_id); + cluster_legalizer.compress(); } - free_router_data(router_data); - router_data = nullptr; } } @@ -371,7 +382,12 @@ std::map do_clustering(const t_packer_opts& pa } //check_floorplan_regions(floorplan_regions_overfull); - floorplan_regions_overfull = floorplan_constraints_regions_overfull(); + floorplan_regions_overfull = floorplan_constraints_regions_overfull(cluster_legalizer); + + // Ensure that we have kept track of the number of clusters correctly. + // TODO: The total_clb_num variable could probably just be replaced by + // clusters().size(). + VTR_ASSERT(cluster_legalizer.clusters().size() == (size_t)total_clb_num); return num_used_type_instances; } diff --git a/vpr/src/pack/cluster.h b/vpr/src/pack/cluster.h index 76b2315ceae..a10d7ccf21a 100644 --- a/vpr/src/pack/cluster.h +++ b/vpr/src/pack/cluster.h @@ -1,32 +1,32 @@ #ifndef CLUSTER_H #define CLUSTER_H -#include #include -#include +#include #include "physical_types.h" #include "vpr_types.h" -#include "atom_netlist_fwd.h" -#include "attraction_groups.h" -#include "cluster_util.h" +class AtomNetid; +class AttractionInfo; +class ClusterLegalizer; +class ClusteredNetlist; class Prepacker; +struct t_clustering_data; std::map do_clustering(const t_packer_opts& packer_opts, const t_analysis_opts& analysis_opts, const t_arch* arch, Prepacker& prepacker, + ClusterLegalizer& cluster_legalizer, const std::unordered_set& is_clock, const std::unordered_set& is_global, bool allow_unrelated_clustering, bool balance_block_type_utilization, - std::vector* lb_type_rr_graphs, AttractionInfo& attraction_groups, bool& floorplan_regions_overfull, + const t_pack_high_fanout_thresholds& high_fanout_thresholds, t_clustering_data& clustering_data); -int get_cluster_of_block(int blkidx); - void print_pb_type_count(const ClusteredNetlist& clb_nlist); #endif diff --git a/vpr/src/pack/cluster_legalizer.cpp b/vpr/src/pack/cluster_legalizer.cpp new file mode 100644 index 00000000000..4cd82799073 --- /dev/null +++ b/vpr/src/pack/cluster_legalizer.cpp @@ -0,0 +1,1756 @@ +/** + * @file + * @author Alex Singer + * @date September 2024 + * @brief The implementation of the Cluster Legalizer class. + * + * Most of the code in this file was original part of cluster_util.cpp and was + * highly integrated with the clusterer in VPR. All code that was used for + * legalizing the clusters was moved into this file and all the functionality + * was moved into the ClusterLegalizer class. + */ + +#include "cluster_legalizer.h" +#include +#include +#include +#include +#include "atom_lookup.h" +#include "atom_netlist.h" +#include "cluster_placement.h" +#include "cluster_router.h" +#include "cluster_util.h" +#include "globals.h" +#include "logic_types.h" +#include "netlist_utils.h" +#include "noc_aware_cluster_util.h" +#include "noc_data_types.h" +#include "pack_types.h" +#include "partition.h" +#include "partition_region.h" +#include "physical_types.h" +#include "prepack.h" +#include "user_place_constraints.h" +#include "vpr_context.h" +#include "vpr_types.h" +#include "vpr_utils.h" +#include "vtr_assert.h" +#include "vtr_vector.h" +#include "vtr_vector_map.h" + +/* + * @brief Gets the max cluster size that any logical block can have. + */ +static size_t calc_max_cluster_size(const std::vector& logical_block_types) { + size_t max_cluster_size = 0; + for (const t_logical_block_type& blk_type : logical_block_types) { + if (is_empty_type(&blk_type)) + continue; + int cur_cluster_size = get_max_primitives_in_pb_type(blk_type.pb_type); + max_cluster_size = std::max(max_cluster_size, cur_cluster_size); + } + return max_cluster_size; +} + +/* + * @brief Allocates the stats stored within the pb of a cluster. + * + * Used to store information used during clustering. + */ +static void alloc_and_load_pb_stats(t_pb* pb, const int feasible_block_array_size) { + /* Call this routine when starting to fill up a new cluster. It resets * + * the gain vector, etc. */ + + pb->pb_stats = new t_pb_stats; + + /* If statement below is for speed. If nets are reasonably low-fanout, * + * only a relatively small number of blocks will be marked, and updating * + * only those atom block structures will be fastest. If almost all blocks * + * have been touched it should be faster to just run through them all * + * in order (less addressing and better cache locality). */ + pb->pb_stats->input_pins_used = std::vector>(pb->pb_graph_node->num_input_pin_class); + pb->pb_stats->output_pins_used = std::vector>(pb->pb_graph_node->num_output_pin_class); + pb->pb_stats->lookahead_input_pins_used = std::vector>(pb->pb_graph_node->num_input_pin_class); + pb->pb_stats->lookahead_output_pins_used = std::vector>(pb->pb_graph_node->num_output_pin_class); + pb->pb_stats->num_feasible_blocks = NOT_VALID; + pb->pb_stats->feasible_blocks = new t_pack_molecule*[feasible_block_array_size]; + + for (int i = 0; i < feasible_block_array_size; i++) + pb->pb_stats->feasible_blocks[i] = nullptr; + + pb->pb_stats->tie_break_high_fanout_net = AtomNetId::INVALID(); + + pb->pb_stats->pulled_from_atom_groups = 0; + pb->pb_stats->num_att_group_atoms_used = 0; + + pb->pb_stats->gain.clear(); + pb->pb_stats->timinggain.clear(); + pb->pb_stats->connectiongain.clear(); + pb->pb_stats->sharinggain.clear(); + pb->pb_stats->hillgain.clear(); + pb->pb_stats->transitive_fanout_candidates.clear(); + + pb->pb_stats->num_pins_of_net_in_pb.clear(); + + pb->pb_stats->num_child_blocks_in_pb = 0; + + pb->pb_stats->explore_transitive_fanout = true; +} + +/* + * @brief Check the atom blocks of a cluster pb. Used in the verify method. + */ +/* TODO: May want to check that all atom blocks are actually reached */ +static void check_cluster_atom_blocks(t_pb* pb, std::unordered_set& blocks_checked) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + const t_pb_type* pb_type = pb->pb_graph_node->pb_type; + if (pb_type->num_modes == 0) { + /* primitive */ + AtomBlockId blk_id = atom_ctx.lookup.pb_atom(pb); + if (blk_id) { + if (blocks_checked.count(blk_id)) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "pb %s contains atom block %s but atom block is already contained in another pb.\n", + pb->name, atom_ctx.nlist.block_name(blk_id).c_str()); + } + blocks_checked.insert(blk_id); + if (pb != atom_ctx.lookup.atom_pb(blk_id)) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "pb %s contains atom block %s but atom block does not link to pb.\n", + pb->name, atom_ctx.nlist.block_name(blk_id).c_str()); + } + } + } else { + /* this is a container pb, all container pbs must contain children */ + bool has_child = false; + for (int i = 0; i < pb_type->modes[pb->mode].num_pb_type_children; i++) { + for (int j = 0; j < pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) { + if (pb->child_pbs[i] != nullptr) { + if (pb->child_pbs[i][j].name != nullptr) { + has_child = true; + check_cluster_atom_blocks(&pb->child_pbs[i][j], blocks_checked); + } + } + } + } + VTR_ASSERT(has_child); + } +} + +/// @brief Recursively frees the pb stats of the given pb, without freeing the +/// pb itself. +static void free_pb_stats_recursive(t_pb* pb) { + /* Releases all the memory used by clustering data structures. */ + if (pb) { + if (pb->pb_graph_node != nullptr) { + if (!pb->pb_graph_node->is_primitive()) { + for (int i = 0; i < pb->pb_graph_node->pb_type->modes[pb->mode].num_pb_type_children; i++) { + for (int j = 0; j < pb->pb_graph_node->pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) { + if (pb->child_pbs && pb->child_pbs[i]) { + free_pb_stats_recursive(&pb->child_pbs[i][j]); + } + } + } + } + } + free_pb_stats(pb); + } +} + +/* Record the failure of the molecule in this cluster in the current pb stats. + * If a molecule fails repeatedly, it's gain will be penalized if packing with + * attraction groups on. */ +static void record_molecule_failure(t_pack_molecule* molecule, t_pb* pb) { + //Only have to record the failure for the first atom in the molecule. + //The convention when checking if a molecule has failed to pack in the cluster + //is to check whether the first atoms has been recorded as having failed + + auto got = pb->pb_stats->atom_failures.find(molecule->atom_block_ids[0]); + if (got == pb->pb_stats->atom_failures.end()) { + pb->pb_stats->atom_failures.insert({molecule->atom_block_ids[0], 1}); + } else { + got->second++; + } +} + +/** + * @brief Checks whether an atom block can be added to a clustered block + * without violating floorplanning constraints. It also updates the + * clustered block's floorplanning region by taking the intersection of + * its current region and the floorplanning region of the given atom block. + * + * @param atom_blk_id A unique ID for the candidate atom block to + * be added to the growing cluster. + * @param cluster_pr The floorplanning regions of the clustered + * block. This function may update the given + * region. + * @param constraints The set of user-given place constraints. + * @param log_verbosity Controls the detail level of log information + * printed by this function. + * @param cluster_pr_needs_update Indicates whether the floorplanning region + * of the clustered block have updated. + * + * @return True if adding the given atom block to the clustered block does not + * violated any floorplanning constraints. + */ +static bool check_cluster_floorplanning(AtomBlockId atom_blk_id, + PartitionRegion& cluster_pr, + const UserPlaceConstraints& constraints, + int log_verbosity, + bool& cluster_pr_needs_update) { + // Get the partition ID of the atom. + PartitionId part_id = constraints.get_atom_partition(atom_blk_id); + // If the partition ID is invalid, then it can be put in the cluster + // regardless of what the cluster's PartitionRegion is since it is not + // constrained. + if (!part_id.is_valid()) { + VTR_LOGV(log_verbosity > 3, + "\t\t\t Intersect: Atom block %d has no floorplanning constraints\n", + atom_blk_id); + cluster_pr_needs_update = false; + return true; + } + + // Get the Atom and Cluster Partition Regions + const PartitionRegion& atom_pr = constraints.get_partition_pr(part_id); + + // If the Cluster's PartitionRegion is empty, then this atom's PR becomes + // the Cluster's new PartitionRegion. + if (cluster_pr.empty()) { + VTR_LOGV(log_verbosity > 3, + "\t\t\t Intersect: Atom block %d has floorplanning constraints\n", + atom_blk_id); + cluster_pr = atom_pr; + cluster_pr_needs_update = true; + return true; + } + + // The Cluster's new PartitionRegion is the intersection of the Cluster's + // original PartitionRegion and the atom's PartitionRegion. + update_cluster_part_reg(cluster_pr, atom_pr); + + // If the intersection is empty, then the atom cannot be placed in this + // Cluster due to floorplanning constraints. + if (cluster_pr.empty()) { + VTR_LOGV(log_verbosity > 3, + "\t\t\t Intersect: Atom block %d failed floorplanning check for cluster\n", + atom_blk_id); + cluster_pr_needs_update = false; + return false; + } + + // If the Cluster's new PartitionRegion is non-empty, then this atom passes + // the floorplanning constraints and the cluster's PartitionRegion should be + // updated. + cluster_pr_needs_update = true; + VTR_LOGV(log_verbosity > 3, + "\t\t\t Intersect: Atom block %d passed cluster, cluster PR was updated with intersection result \n", + atom_blk_id); + return true; +} + +/** + * @brief Checks if an atom block can be added to a clustered block without + * violating NoC group constraints. For passing this check, either both + * clustered and atom blocks must belong to the same NoC group, or at + * least one of them should not belong to any NoC group. If the atom block + * is associated with a NoC group while the clustered block does not + * belong to any NoC groups, the NoC group ID of the atom block is assigned + * to the clustered block when the atom is added to it. + * + * @param atom_blk_id A unique ID for the candidate atom block to be + * added to the growing cluster. + * @param cluster_noc_grp_id The NoC group ID of the clustered block. This + * function may update this ID. + * @param atom_noc_grp_ids A mapping from atoms to NoC group IDs. + * @param log_verbosity Controls the detail level of log information + * printed by this function. + * + * @return True if adding the atom block the cluster does not violate NoC group + * constraints. + */ +static bool check_cluster_noc_group(AtomBlockId atom_blk_id, + NocGroupId& cluster_noc_grp_id, + const vtr::vector& atom_noc_grp_ids, + int log_verbosity) { + const NocGroupId atom_noc_grp_id = atom_noc_grp_ids.empty() ? NocGroupId::INVALID() : atom_noc_grp_ids[atom_blk_id]; + + if (!cluster_noc_grp_id.is_valid()) { + // If the cluster does not have a NoC group, assign the atom's NoC group + // to the cluster. + VTR_LOGV(log_verbosity > 3, + "\t\t\t NoC Group: Atom block %d passed cluster, cluster's NoC group was updated with the atom's group %d\n", + atom_blk_id, (size_t)atom_noc_grp_id); + cluster_noc_grp_id = atom_noc_grp_id; + return true; + } + + if (cluster_noc_grp_id == atom_noc_grp_id) { + // If the cluster has the same NoC group ID as the atom, they are + // compatible. + VTR_LOGV(log_verbosity > 3, + "\t\t\t NoC Group: Atom block %d passed cluster, cluster's NoC group was compatible with the atom's group %d\n", + atom_blk_id, (size_t)atom_noc_grp_id); + return true; + } + + // If the cluster belongs to a different NoC group than the atom's group, + // they are incompatible. + VTR_LOGV(log_verbosity > 3, + "\t\t\t NoC Group: Atom block %d failed NoC group check for cluster. Cluster's NoC group: %d, atom's NoC group: %d\n", + atom_blk_id, (size_t)cluster_noc_grp_id, (size_t)atom_noc_grp_id); + return false; +} + +/** + * This function takes the root block of a chain molecule and a proposed + * placement primitive for this block. The function then checks if this + * chain root block has a placement constraint (such as being driven from + * outside the cluster) and returns the status of the placement accordingly. + */ +static enum e_block_pack_status check_chain_root_placement_feasibility(const t_pb_graph_node* pb_graph_node, + const t_pack_molecule* molecule, + const AtomBlockId blk_id) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + enum e_block_pack_status block_pack_status = e_block_pack_status::BLK_PASSED; + + bool is_long_chain = molecule->chain_info->is_long_chain; + + const auto& chain_root_pins = molecule->pack_pattern->chain_root_pins; + + t_model_ports* root_port = chain_root_pins[0][0]->port->model_port; + AtomNetId chain_net_id; + auto port_id = atom_ctx.nlist.find_atom_port(blk_id, root_port); + + if (port_id) { + chain_net_id = atom_ctx.nlist.port_net(port_id, chain_root_pins[0][0]->pin_number); + } + + // if this block is part of a long chain or it is driven by a cluster + // input pin we need to check the placement legality of this block + // Depending on the logic synthesis even small chains that can fit within one + // cluster might need to start at the top of the cluster as their input can be + // driven by a global gnd or vdd. Therefore even if this is not a long chain + // but its input pin is driven by a net, the placement legality is checked. + if (is_long_chain || chain_net_id) { + auto chain_id = molecule->chain_info->chain_id; + // if this chain has a chain id assigned to it (implies is_long_chain too) + if (chain_id != -1) { + // the chosen primitive should be a valid starting point for the chain + // long chains should only be placed at the top of the chain tieOff = 0 + if (pb_graph_node != chain_root_pins[chain_id][0]->parent_node) { + block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; + } + // the chain doesn't have an assigned chain_id yet + } else { + block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; + for (const auto& chain : chain_root_pins) { + for (auto tieOff : chain) { + // check if this chosen primitive is one of the possible + // starting points for this chain. + if (pb_graph_node == tieOff->parent_node) { + // this location matches with the one of the dedicated chain + // input from outside logic block, therefore it is feasible + block_pack_status = e_block_pack_status::BLK_PASSED; + break; + } + // long chains should only be placed at the top of the chain tieOff = 0 + if (is_long_chain) break; + } + } + } + } + + return block_pack_status; +} + +/* + * @brief Check that the two atom blocks blk_id and sibling_blk_id (which should + * both be memory slices) are feasible, in the sence that they have + * precicely the same net connections (with the exception of nets in data + * port classes). + * + * Note that this routine does not check pin feasibility against the cur_pb_type; so + * primitive_type_feasible() should also be called on blk_id before concluding it is feasible. + */ +static bool primitive_memory_sibling_feasible(const AtomBlockId blk_id, const t_pb_type* cur_pb_type, const AtomBlockId sibling_blk_id) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + VTR_ASSERT(cur_pb_type->class_type == MEMORY_CLASS); + + //First, identify the 'data' ports by looking at the cur_pb_type + std::unordered_set data_ports; + for (int iport = 0; iport < cur_pb_type->num_ports; ++iport) { + const char* port_class = cur_pb_type->ports[iport].port_class; + if (port_class && strstr(port_class, "data") == port_class) { + //The port_class starts with "data", so it is a data port + + //Record the port + data_ports.insert(cur_pb_type->ports[iport].model_port); + } + } + + //Now verify that all nets (except those connected to data ports) are equivalent + //between blk_id and sibling_blk_id + + //Since the atom netlist stores only in-use ports, we iterate over the model to ensure + //all ports are compared + const t_model* model = cur_pb_type->model; + for (t_model_ports* port : {model->inputs, model->outputs}) { + for (; port; port = port->next) { + if (data_ports.count(port)) { + //Don't check data ports + continue; + } + + //Note: VPR doesn't support multi-driven nets, so all outputs + //should be data ports, otherwise the siblings will both be + //driving the output net + + //Get the ports from each primitive + auto blk_port_id = atom_ctx.nlist.find_atom_port(blk_id, port); + auto sib_port_id = atom_ctx.nlist.find_atom_port(sibling_blk_id, port); + + //Check that all nets (including unconnected nets) match + for (int ipin = 0; ipin < port->size; ++ipin) { + //The nets are initialized as invalid (i.e. disconnected) + AtomNetId blk_net_id; + AtomNetId sib_net_id; + + //We can get the actual net provided the port exists + // + //Note that if the port did not exist, the net is left + //as invalid/disconneced + if (blk_port_id) { + blk_net_id = atom_ctx.nlist.port_net(blk_port_id, ipin); + } + if (sib_port_id) { + sib_net_id = atom_ctx.nlist.port_net(sib_port_id, ipin); + } + + //The sibling and block must have the same (possibly disconnected) + //net on this pin + if (blk_net_id != sib_net_id) { + //Nets do not match, not feasible + return false; + } + } + } + } + + return true; +} + +/* + * @brief Check if the given atom is feasible in the given pb. + */ +static bool primitive_feasible(const AtomBlockId blk_id, t_pb* cur_pb) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + const t_pb_type* cur_pb_type = cur_pb->pb_graph_node->pb_type; + + VTR_ASSERT(cur_pb_type->num_modes == 0); /* primitive */ + + AtomBlockId cur_pb_blk_id = atom_ctx.lookup.pb_atom(cur_pb); + if (cur_pb_blk_id && cur_pb_blk_id != blk_id) { + /* This pb already has a different logical block */ + return false; + } + + if (cur_pb_type->class_type == MEMORY_CLASS) { + /* Memory class has additional feasibility requirements: + * - all siblings must share all nets, including open nets, with the exception of data nets */ + + /* find sibling if one exists */ + AtomBlockId sibling_memory_blk_id = find_memory_sibling(cur_pb); + + if (sibling_memory_blk_id) { + //There is a sibling, see if the current block is feasible with it + bool sibling_feasible = primitive_memory_sibling_feasible(blk_id, cur_pb_type, sibling_memory_blk_id); + if (!sibling_feasible) { + return false; + } + } + } + + //Generic feasibility check + return primitive_type_feasible(blk_id, cur_pb_type); +} + +/** + * Try place atom block into current primitive location + */ +static enum e_block_pack_status +try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, + const AtomBlockId blk_id, + t_pb* cb, + t_pb** parent, + const int max_models, + const int max_cluster_size, + const LegalizationClusterId cluster_id, + vtr::vector_map& atom_cluster, + const t_cluster_placement_stats* cluster_placement_stats_ptr, + const t_pack_molecule* molecule, + t_lb_router_data* router_data, + int verbosity, + const int feasible_block_array_size) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + AtomContext& mutable_atom_ctx = g_vpr_ctx.mutable_atom(); + + e_block_pack_status block_pack_status = e_block_pack_status::BLK_PASSED; + + /* Discover parent */ + t_pb* parent_pb = nullptr; + if (pb_graph_node->parent_pb_graph_node != cb->pb_graph_node) { + t_pb* my_parent = nullptr; + block_pack_status = try_place_atom_block_rec(pb_graph_node->parent_pb_graph_node, blk_id, cb, + &my_parent, max_models, max_cluster_size, cluster_id, + atom_cluster, + cluster_placement_stats_ptr, molecule, router_data, + verbosity, feasible_block_array_size); + parent_pb = my_parent; + } else { + parent_pb = cb; + } + + /* Create siblings if siblings are not allocated */ + if (parent_pb->child_pbs == nullptr) { + VTR_ASSERT(parent_pb->name == nullptr); + parent_pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str()); + parent_pb->mode = pb_graph_node->pb_type->parent_mode->index; + set_reset_pb_modes(router_data, parent_pb, true); + const t_mode* mode = &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode]; + parent_pb->child_pbs = new t_pb*[mode->num_pb_type_children]; + + for (int i = 0; i < mode->num_pb_type_children; i++) { + parent_pb->child_pbs[i] = new t_pb[mode->pb_type_children[i].num_pb]; + + for (int j = 0; j < mode->pb_type_children[i].num_pb; j++) { + parent_pb->child_pbs[i][j].parent_pb = parent_pb; + parent_pb->child_pbs[i][j].pb_graph_node = &(parent_pb->pb_graph_node->child_pb_graph_nodes[parent_pb->mode][i][j]); + } + } + } else { + /* if this is not the first child of this parent, must match existing parent mode */ + if (parent_pb->mode != pb_graph_node->pb_type->parent_mode->index) { + return e_block_pack_status::BLK_FAILED_FEASIBLE; + } + } + + const t_mode* mode = &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode]; + int i; + for (i = 0; i < mode->num_pb_type_children; i++) { + if (pb_graph_node->pb_type == &mode->pb_type_children[i]) { + break; + } + } + VTR_ASSERT(i < mode->num_pb_type_children); + t_pb* pb = &parent_pb->child_pbs[i][pb_graph_node->placement_index]; + *parent = pb; /* this pb is parent of it's child that called this function */ + VTR_ASSERT(pb->pb_graph_node == pb_graph_node); + if (pb->pb_stats == nullptr) { + alloc_and_load_pb_stats(pb, feasible_block_array_size); + } + const t_pb_type* pb_type = pb_graph_node->pb_type; + + /* Any pb_type under an mode, which is disabled for packing, should not be considerd for mapping + * Early exit to flag failure + */ + if (true == pb_type->parent_mode->disable_packing) { + return e_block_pack_status::BLK_FAILED_FEASIBLE; + } + + bool is_primitive = (pb_type->num_modes == 0); + + if (is_primitive) { + VTR_ASSERT(!atom_ctx.lookup.pb_atom(pb) + && atom_ctx.lookup.atom_pb(blk_id) == nullptr + && atom_cluster[blk_id] == LegalizationClusterId::INVALID()); + /* try pack to location */ + VTR_ASSERT(pb->name == nullptr); + pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str()); + + //Update the atom netlist mappings + atom_cluster[blk_id] = cluster_id; + // NOTE: This pb is different from the pb of the cluster. It is the pb + // of the actual primitive. + // TODO: It would be a good idea to remove the use of this global + // variables to prevent external users from modifying this by + // mistake. + mutable_atom_ctx.lookup.set_atom_pb(blk_id, pb); + + add_atom_as_target(router_data, blk_id); + if (!primitive_feasible(blk_id, pb)) { + /* failed location feasibility check, revert pack */ + block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; + } + + // if this block passed and is part of a chained molecule + if (block_pack_status == e_block_pack_status::BLK_PASSED && molecule->is_chain()) { + auto molecule_root_block = molecule->atom_block_ids[molecule->root]; + // if this is the root block of the chain molecule check its placmeent feasibility + if (blk_id == molecule_root_block) { + block_pack_status = check_chain_root_placement_feasibility(pb_graph_node, molecule, blk_id); + } + } + + VTR_LOGV(verbosity > 4 && block_pack_status == e_block_pack_status::BLK_PASSED, + "\t\t\tPlaced atom '%s' (%s) at %s\n", + atom_ctx.nlist.block_name(blk_id).c_str(), + atom_ctx.nlist.block_model(blk_id)->name, + pb->hierarchical_type_name().c_str()); + } + + if (block_pack_status != e_block_pack_status::BLK_PASSED) { + free(pb->name); + pb->name = nullptr; + } + return block_pack_status; +} + +/* Resets nets used at different pin classes for determining pin feasibility */ +static void reset_lookahead_pins_used(t_pb* cur_pb) { + const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; + if (cur_pb->pb_stats == nullptr) { + return; /* No pins used, no need to continue */ + } + + if (pb_type->num_modes > 0 && cur_pb->name != nullptr) { + for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { + cur_pb->pb_stats->lookahead_input_pins_used[i].clear(); + } + + for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { + cur_pb->pb_stats->lookahead_output_pins_used[i].clear(); + } + + if (cur_pb->child_pbs != nullptr) { + for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { + if (cur_pb->child_pbs[i] != nullptr) { + for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { + reset_lookahead_pins_used(&cur_pb->child_pbs[i][j]); + } + } + } + } + } +} + +/* + * @brief Checks if the sinks of the given net are reachable from the driver + * pb gpin. + */ +static int net_sinks_reachable_in_cluster(const t_pb_graph_pin* driver_pb_gpin, const int depth, const AtomNetId net_id) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + //Record the sink pb graph pins we are looking for + std::unordered_set sink_pb_gpins; + for (const AtomPinId pin_id : atom_ctx.nlist.net_sinks(net_id)) { + const t_pb_graph_pin* sink_pb_gpin = find_pb_graph_pin(atom_ctx.nlist, atom_ctx.lookup, pin_id); + VTR_ASSERT(sink_pb_gpin); + + sink_pb_gpins.insert(sink_pb_gpin); + } + + //Count how many sink pins are reachable + size_t num_reachable_sinks = 0; + for (int i_prim_pin = 0; i_prim_pin < driver_pb_gpin->num_connectable_primitive_input_pins[depth]; ++i_prim_pin) { + const t_pb_graph_pin* reachable_pb_gpin = driver_pb_gpin->list_of_connectable_input_pin_ptrs[depth][i_prim_pin]; + + if (sink_pb_gpins.count(reachable_pb_gpin)) { + ++num_reachable_sinks; + if (num_reachable_sinks == atom_ctx.nlist.net_sinks(net_id).size()) { + return true; + } + } + } + + return false; +} + +/** + * Returns the pb_graph_pin of the atom pin defined by the driver_pin_id in the driver_pb + */ +static t_pb_graph_pin* get_driver_pb_graph_pin(const t_pb* driver_pb, const AtomPinId driver_pin_id) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + const auto driver_pb_type = driver_pb->pb_graph_node->pb_type; + int output_port = 0; + // find the port of the pin driving the net as well as the port model + auto driver_port_id = atom_ctx.nlist.pin_port(driver_pin_id); + auto driver_model_port = atom_ctx.nlist.port_model(driver_port_id); + // find the port id of the port containing the driving pin in the driver_pb_type + for (int i = 0; i < driver_pb_type->num_ports; i++) { + auto& prim_port = driver_pb_type->ports[i]; + if (prim_port.type == OUT_PORT) { + if (prim_port.model_port == driver_model_port) { + // get the output pb_graph_pin driving this input net + return &(driver_pb->pb_graph_node->output_pins[output_port][atom_ctx.nlist.pin_port_bit(driver_pin_id)]); + } + output_port++; + } + } + // the pin should be found + VTR_ASSERT(false); + return nullptr; +} + +/** + * Given a pin and its assigned net, mark all pin classes that are affected. + * Check if connecting this pin to it's driver pin or to all sink pins will + * require leaving a pb_block starting from the parent pb_block of the + * primitive till the root block (depth = 0). If leaving a pb_block is + * required add this net to the pin class (to increment the number of used + * pins from this class) that should be used to leave the pb_block. + */ +static void compute_and_mark_lookahead_pins_used_for_pin(const t_pb_graph_pin* pb_graph_pin, + const t_pb* primitive_pb, + const AtomNetId net_id, + const vtr::vector_map& atom_cluster) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + // starting from the parent pb of the input primitive go up in the hierarchy till the root block + for (auto cur_pb = primitive_pb->parent_pb; cur_pb; cur_pb = cur_pb->parent_pb) { + const auto depth = cur_pb->pb_graph_node->pb_type->depth; + const auto pin_class = pb_graph_pin->parent_pin_class[depth]; + VTR_ASSERT(pin_class != OPEN); + + const auto driver_blk_id = atom_ctx.nlist.net_driver_block(net_id); + + // if this primitive pin is an input pin + if (pb_graph_pin->port->type == IN_PORT) { + /* find location of net driver if exist in clb, NULL otherwise */ + // find the driver of the input net connected to the pin being studied + const auto driver_pin_id = atom_ctx.nlist.net_driver(net_id); + // find the id of the atom occupying the input primitive_pb + const auto prim_blk_id = atom_ctx.lookup.pb_atom(primitive_pb); + // find the pb block occupied by the driving atom + const auto driver_pb = atom_ctx.lookup.atom_pb(driver_blk_id); + // pb_graph_pin driving net_id in the driver pb block + t_pb_graph_pin* output_pb_graph_pin = nullptr; + // if the driver block is in the same clb as the input primitive block + LegalizationClusterId driver_cluster_id = atom_cluster[driver_blk_id]; + LegalizationClusterId prim_cluster_id = atom_cluster[prim_blk_id]; + if (driver_cluster_id == prim_cluster_id) { + // get pb_graph_pin driving the given net + output_pb_graph_pin = get_driver_pb_graph_pin(driver_pb, driver_pin_id); + } + + bool is_reachable = false; + + // if the driver pin is within the cluster + if (output_pb_graph_pin) { + // find if the driver pin can reach the input pin of the primitive or not + const t_pb* check_pb = driver_pb; + while (check_pb && check_pb != cur_pb) { + check_pb = check_pb->parent_pb; + } + if (check_pb) { + for (int i = 0; i < output_pb_graph_pin->num_connectable_primitive_input_pins[depth]; i++) { + if (pb_graph_pin == output_pb_graph_pin->list_of_connectable_input_pin_ptrs[depth][i]) { + is_reachable = true; + break; + } + } + } + } + + // Must use an input pin to connect the driver to the input pin of the given primitive, either the + // driver atom is not contained in the cluster or is contained but cannot reach the primitive pin + if (!is_reachable) { + // add net to lookahead_input_pins_used if not already added + auto it = std::find(cur_pb->pb_stats->lookahead_input_pins_used[pin_class].begin(), + cur_pb->pb_stats->lookahead_input_pins_used[pin_class].end(), net_id); + if (it == cur_pb->pb_stats->lookahead_input_pins_used[pin_class].end()) { + cur_pb->pb_stats->lookahead_input_pins_used[pin_class].push_back(net_id); + } + } + } else { + VTR_ASSERT(pb_graph_pin->port->type == OUT_PORT); + /* + * Determine if this net (which is driven from within this cluster) leaves this cluster + * (and hence uses an output pin). + */ + + bool net_exits_cluster = true; + int num_net_sinks = static_cast(atom_ctx.nlist.net_sinks(net_id).size()); + + if (pb_graph_pin->num_connectable_primitive_input_pins[depth] >= num_net_sinks) { + //It is possible the net is completely absorbed in the cluster, + //since this pin could (potentially) drive all the net's sinks + + /* Important: This runtime penalty looks a lot scarier than it really is. + * For high fan-out nets, I at most look at the number of pins within the + * cluster which limits runtime. + * + * DO NOT REMOVE THIS INITIAL FILTER WITHOUT CAREFUL ANALYSIS ON RUNTIME!!! + * + * Key Observation: + * For LUT-based designs it is impossible for the average fanout to exceed + * the number of LUT inputs so it's usually around 4-5 (pigeon-hole argument, + * if the average fanout is greater than the number of LUT inputs, where do + * the extra connections go? Therefore, average fanout must be capped to a + * small constant where the constant is equal to the number of LUT inputs). + * The real danger to runtime is when the number of sinks of a net gets doubled + */ + + //Check if all the net sinks are, in fact, inside this cluster + bool all_sinks_in_cur_cluster = true; + LegalizationClusterId driver_cluster = atom_cluster[driver_blk_id]; + for (auto pin_id : atom_ctx.nlist.net_sinks(net_id)) { + auto sink_blk_id = atom_ctx.nlist.pin_block(pin_id); + if (atom_cluster[sink_blk_id] != driver_cluster) { + all_sinks_in_cur_cluster = false; + break; + } + } + + if (all_sinks_in_cur_cluster) { + //All the sinks are part of this cluster, so the net may be fully absorbed. + // + //Verify this, by counting the number of net sinks reachable from the driver pin. + //If the count equals the number of net sinks then the net is fully absorbed and + //the net does not exit the cluster + /* TODO: I should cache the absorbed outputs, once net is absorbed, + * net is forever absorbed, no point in rechecking every time */ + if (net_sinks_reachable_in_cluster(pb_graph_pin, depth, net_id)) { + //All the sinks are reachable inside the cluster + net_exits_cluster = false; + } + } + } + + if (net_exits_cluster) { + /* This output must exit this cluster */ + cur_pb->pb_stats->lookahead_output_pins_used[pin_class].push_back(net_id); + } + } + } +} + + +/* Determine if pins of speculatively packed pb are legal */ +static void compute_and_mark_lookahead_pins_used(const AtomBlockId blk_id, + const vtr::vector_map& atom_cluster) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + const t_pb* cur_pb = atom_ctx.lookup.atom_pb(blk_id); + VTR_ASSERT(cur_pb != nullptr); + + /* Walk through inputs, outputs, and clocks marking pins off of the same class */ + for (auto pin_id : atom_ctx.nlist.block_pins(blk_id)) { + auto net_id = atom_ctx.nlist.pin_net(pin_id); + + const t_pb_graph_pin* pb_graph_pin = find_pb_graph_pin(atom_ctx.nlist, atom_ctx.lookup, pin_id); + compute_and_mark_lookahead_pins_used_for_pin(pb_graph_pin, cur_pb, net_id, atom_cluster); + } +} + +/* Determine if speculatively packed cur_pb is pin feasible + * Runtime is actually not that bad for this. It's worst case O(k^2) where k is the + * number of pb_graph pins. Can use hash tables or make incremental if becomes an issue. + */ +static void try_update_lookahead_pins_used(t_pb* cur_pb, + const vtr::vector_map& atom_cluster) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + // run recursively till a leaf (primitive) pb block is reached + const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; + if (pb_type->num_modes > 0 && cur_pb->name != nullptr) { + if (cur_pb->child_pbs != nullptr) { + for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { + if (cur_pb->child_pbs[i] != nullptr) { + for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { + try_update_lookahead_pins_used(&cur_pb->child_pbs[i][j], atom_cluster); + } + } + } + } + } else { + // find if this child (primitive) pb block has an atom mapped to it, + // if yes compute and mark lookahead pins used for that pb block + AtomBlockId blk_id = atom_ctx.lookup.pb_atom(cur_pb); + if (pb_type->blif_model != nullptr && blk_id) { + compute_and_mark_lookahead_pins_used(blk_id, atom_cluster); + } + } +} + +/* Check if the number of available inputs/outputs for a pin class is sufficient for speculatively packed blocks */ +static bool check_lookahead_pins_used(t_pb* cur_pb, t_ext_pin_util max_external_pin_util) { + const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; + + if (pb_type->num_modes > 0 && cur_pb->name) { + for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { + size_t class_size = cur_pb->pb_graph_node->input_pin_class_size[i]; + + if (cur_pb->is_root()) { + // Scale the class size by the maximum external pin utilization factor + // Use ceil to avoid classes of size 1 from being scaled to zero + class_size = std::ceil(max_external_pin_util.input_pin_util * class_size); + // if the number of pins already used is larger than class size, then the number of + // cluster inputs already used should be our constraint. Why is this needed? This is + // needed since when packing the seed block the maximum external pin utilization is + // used as 1.0 allowing molecules that are using up to all the cluster inputs to be + // packed legally. Therefore, if the seed block is already using more inputs than + // the allowed maximum utilization, this should become the new maximum pin utilization. + class_size = std::max(class_size, cur_pb->pb_stats->input_pins_used[i].size()); + } + + if (cur_pb->pb_stats->lookahead_input_pins_used[i].size() > class_size) { + return false; + } + } + + for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { + size_t class_size = cur_pb->pb_graph_node->output_pin_class_size[i]; + if (cur_pb->is_root()) { + // Scale the class size by the maximum external pin utilization factor + // Use ceil to avoid classes of size 1 from being scaled to zero + class_size = std::ceil(max_external_pin_util.output_pin_util * class_size); + // if the number of pins already used is larger than class size, then the number of + // cluster outputs already used should be our constraint. Why is this needed? This is + // needed since when packing the seed block the maximum external pin utilization is + // used as 1.0 allowing molecules that are using up to all the cluster inputs to be + // packed legally. Therefore, if the seed block is already using more inputs than + // the allowed maximum utilization, this should become the new maximum pin utilization. + class_size = std::max(class_size, cur_pb->pb_stats->output_pins_used[i].size()); + } + + if (cur_pb->pb_stats->lookahead_output_pins_used[i].size() > class_size) { + return false; + } + } + + if (cur_pb->child_pbs) { + for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { + if (cur_pb->child_pbs[i]) { + for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { + if (!check_lookahead_pins_used(&cur_pb->child_pbs[i][j], max_external_pin_util)) + return false; + } + } + } + } + } + + return true; +} + +/** + * This function takes a chain molecule, and the pb_graph_node that is chosen + * for packing the molecule's root block. Using the given root_primitive, this + * function will identify which chain id this molecule is being mapped to and + * will update the chain id value inside the chain info data structure of this + * molecule + */ +static void update_molecule_chain_info(t_pack_molecule* chain_molecule, const t_pb_graph_node* root_primitive) { + VTR_ASSERT(chain_molecule->chain_info->chain_id == -1 && chain_molecule->chain_info->is_long_chain); + + auto chain_root_pins = chain_molecule->pack_pattern->chain_root_pins; + + // long chains should only be placed at the beginning of the chain + // Since for long chains the molecule size is already equal to the + // total number of adders in the cluster. Therefore, it should + // always be placed at the very first adder in this cluster. + for (size_t chainId = 0; chainId < chain_root_pins.size(); chainId++) { + if (chain_root_pins[chainId][0]->parent_node == root_primitive) { + chain_molecule->chain_info->chain_id = chainId; + chain_molecule->chain_info->first_packed_molecule = chain_molecule; + return; + } + } + + VTR_ASSERT(false); +} + +/* Revert trial atom block iblock and free up memory space accordingly + */ +static void revert_place_atom_block(const AtomBlockId blk_id, + t_lb_router_data* router_data, + const Prepacker& prepacker, + vtr::vector_map& atom_cluster) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + AtomContext& mutable_atom_ctx = g_vpr_ctx.mutable_atom(); + + //We cast away const here since we may free the pb, and it is + //being removed from the active mapping. + // + //In general most code works fine accessing cosnt t_pb*, + //which is why we store them as such in atom_ctx.lookup + t_pb* pb = const_cast(atom_ctx.lookup.atom_pb(blk_id)); + + if (pb != nullptr) { + /* When freeing molecules, the current block might already have been freed by a prior revert + * When this happens, no need to do anything beyond basic book keeping at the atom block + */ + + t_pb* next = pb->parent_pb; + revalid_molecules(pb, prepacker); + free_pb(pb); + pb = next; + + while (pb != nullptr) { + /* If this is pb is created only for the purposes of holding new molecule, remove it + * Must check if cluster is already freed (which can be the case) + */ + next = pb->parent_pb; + + if (pb->child_pbs != nullptr && pb->pb_stats != nullptr + && pb->pb_stats->num_child_blocks_in_pb == 0) { + set_reset_pb_modes(router_data, pb, false); + if (next != nullptr) { + /* If the code gets here, then that means that placing the initial seed molecule + * failed, don't free the actual complex block itself as the seed needs to find + * another placement */ + revalid_molecules(pb, prepacker); + free_pb(pb); + } + } + pb = next; + } + } + + //Update the atom netlist mapping + atom_cluster[blk_id] = LegalizationClusterId::INVALID(); + mutable_atom_ctx.lookup.set_atom_pb(blk_id, nullptr); +} + +/* Speculation successful, commit input/output pins used */ +static void commit_lookahead_pins_used(t_pb* cur_pb) { + const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; + + if (pb_type->num_modes > 0 && cur_pb->name) { + for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { + VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->input_pin_class_size[i]); + for (size_t j = 0; j < cur_pb->pb_stats->lookahead_input_pins_used[i].size(); j++) { + VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i][j]); + cur_pb->pb_stats->input_pins_used[i].insert({j, cur_pb->pb_stats->lookahead_input_pins_used[i][j]}); + } + } + + for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { + VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->output_pin_class_size[i]); + for (size_t j = 0; j < cur_pb->pb_stats->lookahead_output_pins_used[i].size(); j++) { + VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i][j]); + cur_pb->pb_stats->output_pins_used[i].insert({j, cur_pb->pb_stats->lookahead_output_pins_used[i][j]}); + } + } + + if (cur_pb->child_pbs) { + for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { + if (cur_pb->child_pbs[i]) { + for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { + commit_lookahead_pins_used(&cur_pb->child_pbs[i][j]); + } + } + } + } + } +} + +/** + * Cleans up a pb after unsuccessful molecule packing + * + * Recursively frees pbs from a t_pb tree. The given root pb itself is not + * deleted. + * + * If a pb object has its children allocated then before freeing them the + * function checks if there is no atom that corresponds to any of them. The + * check is performed only for leaf (primitive) pbs. The function recurses for + * non-primitive pbs. + * + * The cleaning itself includes deleting all child pbs, resetting mode of the + * pb and also freeing its name. This prepares the pb for another round of + * molecule packing tryout. + */ +static bool cleanup_pb(t_pb* pb) { + bool can_free = true; + + /* Recursively check if there are any children with already assigned atoms */ + if (pb->child_pbs != nullptr) { + const t_mode* mode = &pb->pb_graph_node->pb_type->modes[pb->mode]; + VTR_ASSERT(mode != nullptr); + + /* Check each mode */ + for (int i = 0; i < mode->num_pb_type_children; ++i) { + /* Check each child */ + if (pb->child_pbs[i] != nullptr) { + for (int j = 0; j < mode->pb_type_children[i].num_pb; ++j) { + t_pb* pb_child = &pb->child_pbs[i][j]; + t_pb_type* pb_type = pb_child->pb_graph_node->pb_type; + + /* Primitive, check occupancy */ + if (pb_type->num_modes == 0) { + if (pb_child->name != nullptr) { + can_free = false; + } + } + + /* Non-primitive, recurse */ + else { + if (!cleanup_pb(pb_child)) { + can_free = false; + } + } + } + } + } + + /* Free if can */ + if (can_free) { + for (int i = 0; i < mode->num_pb_type_children; ++i) { + if (pb->child_pbs[i] != nullptr) { + delete[] pb->child_pbs[i]; + } + } + + delete[] pb->child_pbs; + pb->child_pbs = nullptr; + pb->mode = 0; + + if (pb->name) { + free(pb->name); + pb->name = nullptr; + } + } + } + + return can_free; +} + +e_block_pack_status ClusterLegalizer::try_pack_molecule(t_pack_molecule* molecule, + LegalizationCluster& cluster, + LegalizationClusterId cluster_id, + const t_ext_pin_util& max_external_pin_util) { + // Try to pack the molecule into a cluster with this pb type. + + // Safety debugs. + VTR_ASSERT_DEBUG(molecule != nullptr); + VTR_ASSERT_DEBUG(cluster.pb != nullptr); + VTR_ASSERT_DEBUG(cluster.type != nullptr); + + // TODO: Remove these global accesses. + // AtomContext used for: + // - printing verbose statements + // - Looking up the primitive pb + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + // FloorplanningContext used for: + // - Checking if the atom can be placed in the cluster for floorplanning + // constraints. + const FloorplanningContext& floorplanning_ctx = g_vpr_ctx.floorplanning(); + if (log_verbosity_ > 3) { + AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; + VTR_LOG("\t\tTry pack molecule: '%s' (%s)", + atom_ctx.nlist.block_name(root_atom).c_str(), + atom_ctx.nlist.block_model(root_atom)->name); + VTR_LOGV(molecule->pack_pattern, + " molecule_type %s molecule_size %zu", + molecule->pack_pattern->name, + molecule->atom_block_ids.size()); + VTR_LOG("\n"); + } + + // if this cluster has a molecule placed in it that is part of a long chain + // (a chain that consists of more than one molecule), don't allow more long chain + // molecules to be placed in this cluster. To avoid possibly creating cluster level + // blocks that have incompatible placement constraints or form very long placement + // macros that limit placement flexibility. + t_cluster_placement_stats* cluster_placement_stats_ptr = &(cluster_placement_stats_[cluster.type->index]); + if (cluster_placement_stats_ptr->has_long_chain && molecule->is_chain() && molecule->chain_info->is_long_chain) { + VTR_LOGV(log_verbosity_ > 4, "\t\t\tFAILED Placement Feasibility Filter: Only one long chain per cluster is allowed\n"); + //Record the failure of this molecule in the current pb stats + record_molecule_failure(molecule, cluster.pb); + // Free the allocated data. + return e_block_pack_status::BLK_FAILED_FEASIBLE; + } + + // Check if every atom in the molecule is legal in the cluster from a + // floorplanning perspective + bool cluster_pr_update_check = false; + PartitionRegion new_cluster_pr = cluster.pr; + // TODO: This can be made more efficient by pre-computing the intersection + // of all the atoms' PRs in the molecule. + int molecule_size = get_array_size_of_molecule(molecule); + for (int i_mol = 0; i_mol < molecule_size; i_mol++) { + // Try to intersect with atom PartitionRegion if atom exists + AtomBlockId atom_blk_id = molecule->atom_block_ids[i_mol]; + if (atom_blk_id) { + bool cluster_pr_needs_update = false; + bool block_pack_floorplan_status = check_cluster_floorplanning(atom_blk_id, + new_cluster_pr, + floorplanning_ctx.constraints, + log_verbosity_, + cluster_pr_needs_update); + if (!block_pack_floorplan_status) { + // Record the failure of this molecule in the current pb stats + record_molecule_failure(molecule, cluster.pb); + return e_block_pack_status::BLK_FAILED_FLOORPLANNING; + } + + if (cluster_pr_needs_update) { + cluster_pr_update_check = true; + } + } + } + + // Check if all atoms in the molecule can be added to the cluster without + // NoC group conflicts + NocGroupId new_cluster_noc_grp_id = cluster.noc_grp_id; + for (int i_mol = 0; i_mol < molecule_size; i_mol++) { + AtomBlockId atom_blk_id = molecule->atom_block_ids[i_mol]; + if (atom_blk_id) { + bool block_pack_noc_grp_status = check_cluster_noc_group(atom_blk_id, + new_cluster_noc_grp_id, + atom_noc_grp_id_, + log_verbosity_); + if (!block_pack_noc_grp_status) { + // Record the failure of this molecule in the current pb stats + record_molecule_failure(molecule, cluster.pb); + return e_block_pack_status::BLK_FAILED_NOC_GROUP; + } + } + } + + std::vector primitives_list(max_molecule_size_, nullptr); + e_block_pack_status block_pack_status = e_block_pack_status::BLK_STATUS_UNDEFINED; + while (block_pack_status != e_block_pack_status::BLK_PASSED) { + if (!get_next_primitive_list(cluster_placement_stats_ptr, + molecule, + primitives_list.data())) { + VTR_LOGV(log_verbosity_ > 3, "\t\tFAILED No candidate primitives available\n"); + block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; + break; /* no more candidate primitives available, this molecule will not pack, return fail */ + } + + block_pack_status = e_block_pack_status::BLK_PASSED; + int failed_location = 0; + for (int i_mol = 0; i_mol < molecule_size && block_pack_status == e_block_pack_status::BLK_PASSED; i_mol++) { + VTR_ASSERT((primitives_list[i_mol] == nullptr) == (!molecule->atom_block_ids[i_mol])); + failed_location = i_mol + 1; + AtomBlockId atom_blk_id = molecule->atom_block_ids[i_mol]; + if (!atom_blk_id.is_valid()) + continue; + // NOTE: This parent variable is only used in the recursion of this + // function. + t_pb* parent = nullptr; + block_pack_status = try_place_atom_block_rec(primitives_list[i_mol], + atom_blk_id, + cluster.pb, + &parent, + num_models_, + max_cluster_size_, + cluster_id, + atom_cluster_, + cluster_placement_stats_ptr, + molecule, + cluster.router_data, + log_verbosity_, + feasible_block_array_size_); + } + + if (enable_pin_feasibility_filter_ && block_pack_status == e_block_pack_status::BLK_PASSED) { + // Check if pin usage is feasible for the current packing assignment + reset_lookahead_pins_used(cluster.pb); + try_update_lookahead_pins_used(cluster.pb, atom_cluster_); + if (!check_lookahead_pins_used(cluster.pb, max_external_pin_util)) { + VTR_LOGV(log_verbosity_ > 4, "\t\t\tFAILED Pin Feasibility Filter\n"); + block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; + } else { + VTR_LOGV(log_verbosity_ > 3, "\t\t\tPin Feasibility: Passed pin feasibility filter\n"); + } + } + + if (block_pack_status == e_block_pack_status::BLK_PASSED) { + /* + * during the clustering step of `do_clustering`, `detailed_routing_stage` is incremented at each iteration until it a cluster + * is correctly generated or `detailed_routing_stage` assumes an invalid value (E_DETAILED_ROUTE_INVALID). + * depending on its value we have different behaviors: + * - E_DETAILED_ROUTE_AT_END_ONLY: Skip routing if heuristic is to route at the end of packing complex block. + * - E_DETAILED_ROUTE_FOR_EACH_ATOM: Try to route if heuristic is to route for every atom. If the clusterer arrives at this stage, + * it means that more checks have to be performed as the previous stage failed to generate a new cluster. + * + * mode_status is a data structure containing the status of the mode selection. Its members are: + * - bool is_mode_conflict + * - bool try_expand_all_modes + * - bool expand_all_modes + * + * is_mode_conflict affects this stage. Its value determines whether the cluster failed to pack after a mode conflict issue. + * It holds a flag that is used to verify whether try_intra_lb_route ended in a mode conflict issue. + * + * Until is_mode_conflict is set to FALSE by try_intra_lb_route, the loop re-iterates. If all the available modes are exhausted + * an error will be thrown during mode conflicts checks (this to prevent infinite loops). + * + * If the value is TRUE the cluster has to be re-routed, and its internal pb_graph_nodes will have more restrict choices + * for what regards the mode that has to be selected. + * + * is_mode_conflict is initially set to TRUE, and, unless a mode conflict is found, it is set to false in `try_intra_lb_route`. + * + * try_expand_all_modes is set if the node expansion failed to find a valid routing path. The clusterer tries to find another route + * by using all the modes during node expansion. + * + * expand_all_modes is used to enable the expansion of all the nodes using all the possible modes. + */ + t_mode_selection_status mode_status; + bool is_routed = false; + bool do_detailed_routing_stage = (cluster_legalization_strategy_ == ClusterLegalizationStrategy::FULL); + if (do_detailed_routing_stage) { + do { + reset_intra_lb_route(cluster.router_data); + is_routed = try_intra_lb_route(cluster.router_data, log_verbosity_, &mode_status); + } while (do_detailed_routing_stage && mode_status.is_mode_issue()); + } + + if (do_detailed_routing_stage && !is_routed) { + /* Cannot pack */ + VTR_LOGV(log_verbosity_ > 4, "\t\t\tFAILED Detailed Routing Legality\n"); + block_pack_status = e_block_pack_status::BLK_FAILED_ROUTE; + } else { + /* Pack successful, commit + * TODO: SW Engineering note - may want to update cluster stats here too instead of doing it outside + */ + VTR_ASSERT(block_pack_status == e_block_pack_status::BLK_PASSED); + if (molecule->is_chain()) { + /* Chained molecules often take up lots of area and are important, + * if a chain is packed in, want to rename logic block to match chain name */ + AtomBlockId chain_root_blk_id = molecule->atom_block_ids[molecule->pack_pattern->root_block->block_id]; + t_pb* cur_pb = atom_ctx.lookup.atom_pb(chain_root_blk_id)->parent_pb; + while (cur_pb != nullptr) { + free(cur_pb->name); + cur_pb->name = vtr::strdup(atom_ctx.nlist.block_name(chain_root_blk_id).c_str()); + cur_pb = cur_pb->parent_pb; + } + // if this molecule is part of a chain, mark the cluster as having a long chain + // molecule. Also check if it's the first molecule in the chain to be packed. + // If so, update the chain id for this chain of molecules to make sure all + // molecules will be packed to the same chain id and can reach each other using + // the chain direct links between clusters + if (molecule->chain_info->is_long_chain) { + cluster_placement_stats_ptr->has_long_chain = true; + if (molecule->chain_info->chain_id == -1) { + update_molecule_chain_info(molecule, primitives_list[molecule->root]); + } + } + } + + //update cluster PartitionRegion if atom with floorplanning constraints was added + if (cluster_pr_update_check) { + cluster.pr = new_cluster_pr; + VTR_LOGV(log_verbosity_ > 2, "\nUpdated PartitionRegion of cluster\n"); + } + + // Update the cluster's NoC group ID. This is cheap so it does + // not need the check like the what the PR did above. + cluster.noc_grp_id = new_cluster_noc_grp_id; + + // Insert the molecule into the cluster for bookkeeping. + cluster.molecules.insert(molecule); + + for (int i = 0; i < molecule_size; i++) { + AtomBlockId atom_blk_id = molecule->atom_block_ids[i]; + if (!atom_blk_id.is_valid()) + continue; + + /* invalidate all molecules that share atom block with current molecule */ + t_pack_molecule* cur_molecule = prepacker_.get_atom_molecule(atom_blk_id); + // TODO: This should really be named better. Something like + // "is_clustered". and then it should be set to true. + // Right now, valid implies "not clustered" which is + // confusing. + cur_molecule->valid = false; + + commit_primitive(cluster_placement_stats_ptr, primitives_list[i]); + + atom_cluster_[atom_blk_id] = cluster_id; + } + + // Update the lookahead pins used. + commit_lookahead_pins_used(cluster.pb); + } + } + + if (block_pack_status != e_block_pack_status::BLK_PASSED) { + /* Pack unsuccessful, undo inserting molecule into cluster */ + for (int i = 0; i < failed_location; i++) { + AtomBlockId atom_blk_id = molecule->atom_block_ids[i]; + if (atom_blk_id) { + remove_atom_from_target(cluster.router_data, atom_blk_id); + } + } + for (int i = 0; i < failed_location; i++) { + AtomBlockId atom_blk_id = molecule->atom_block_ids[i]; + if (atom_blk_id) { + revert_place_atom_block(atom_blk_id, cluster.router_data, prepacker_, atom_cluster_); + } + } + + // Record the failure of this molecule in the current pb stats + record_molecule_failure(molecule, cluster.pb); + + /* Packing failed, but a part of the pb tree is still allocated and pbs have their modes set. + * Before trying to pack next molecule the unused pbs need to be freed and, the most important, + * their modes reset. This task is performed by the cleanup_pb() function below. */ + cleanup_pb(cluster.pb); + } else { + VTR_LOGV(log_verbosity_ > 3, "\t\tPASSED pack molecule\n"); + } + } + return block_pack_status; +} + +std::tuple +ClusterLegalizer::start_new_cluster(t_pack_molecule* molecule, + t_logical_block_type_ptr cluster_type, + int cluster_mode) { + // Safety asserts to ensure the API is being called with valid arguments. + VTR_ASSERT_DEBUG(molecule != nullptr); + VTR_ASSERT_DEBUG(cluster_type != nullptr); + VTR_ASSERT_DEBUG(cluster_mode < cluster_type->pb_graph_head->pb_type->num_modes); + // Ensure that the molecule has not already been placed. + VTR_ASSERT_SAFE(molecule_cluster_.find(molecule) == molecule_cluster_.end() || + !molecule_cluster_[molecule].is_valid()); + // Safety asserts to ensure that the API was initialized properly. + VTR_ASSERT_DEBUG(cluster_placement_stats_ != nullptr && + lb_type_rr_graphs_ != nullptr); + + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; + + // Create the physical block for this cluster based on the type. + t_pb* cluster_pb = new t_pb; + cluster_pb->pb_graph_node = cluster_type->pb_graph_head; + alloc_and_load_pb_stats(cluster_pb, feasible_block_array_size_); + cluster_pb->parent_pb = nullptr; + cluster_pb->mode = cluster_mode; + + // Allocate and load the LB router data + t_lb_router_data* router_data = alloc_and_load_router_data(&lb_type_rr_graphs_[cluster_type->index], + cluster_type); + + // Reset the cluster placement stats + t_cluster_placement_stats* cluster_placement_stats_ptr = &(cluster_placement_stats_[cluster_type->index]); + reset_cluster_placement_stats(cluster_placement_stats_ptr); + set_mode_cluster_placement_stats(cluster_pb->pb_graph_node, cluster_pb->mode); + + // Create the new cluster + LegalizationCluster new_cluster; + new_cluster.pb = cluster_pb; + new_cluster.router_data = router_data; + new_cluster.pr = PartitionRegion(); + new_cluster.noc_grp_id = NocGroupId::INVALID(); + new_cluster.type = cluster_type; + + // Try to pack the molecule into the new_cluster. + // When starting a new cluster, we set the external pin utilization to full + // (meaning all cluster pins are allowed to be used). + const t_ext_pin_util FULL_EXTERNAL_PIN_UTIL(1., 1.); + LegalizationClusterId new_cluster_id = LegalizationClusterId(legalization_cluster_ids_.size()); + e_block_pack_status pack_status = try_pack_molecule(molecule, + new_cluster, + new_cluster_id, + FULL_EXTERNAL_PIN_UTIL); + + if (pack_status == e_block_pack_status::BLK_PASSED) { + // Give the new cluster pb a name. The current convention is to name the + // cluster after the root atom of the first molecule packed into it. + AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; + const std::string& root_atom_name = atom_nlist.block_name(root_atom); + if (new_cluster.pb->name != nullptr) + free(new_cluster.pb->name); + new_cluster.pb->name = vtr::strdup(root_atom_name.c_str()); + // Move the cluster into the vector of clusters and ids. + legalization_cluster_ids_.push_back(new_cluster_id); + legalization_clusters_.push_back(std::move(new_cluster)); + // Update the molecule to cluster map. + molecule_cluster_[molecule] = new_cluster_id; + } else { + // Delete the new_cluster. + free_pb(new_cluster.pb); + delete new_cluster.pb; + free_router_data(new_cluster.router_data); + new_cluster_id = LegalizationClusterId::INVALID(); + } + + return {pack_status, new_cluster_id}; +} + +e_block_pack_status ClusterLegalizer::add_mol_to_cluster(t_pack_molecule* molecule, + LegalizationClusterId cluster_id) { + // Safety asserts to make sure the inputs are valid. + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + VTR_ASSERT(legalization_cluster_ids_[cluster_id].is_valid() && "Cannot add to a destroyed cluster"); + // Ensure that the molecule has not already been placed. + VTR_ASSERT(molecule_cluster_.find(molecule) == molecule_cluster_.end() || + !molecule_cluster_[molecule].is_valid()); + // Safety asserts to ensure that the API was initialized properly. + VTR_ASSERT_DEBUG(cluster_placement_stats_ != nullptr && + lb_type_rr_graphs_ != nullptr); + + // Get the cluster. + LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + VTR_ASSERT(cluster.router_data != nullptr && "Cannot add molecule to cleaned cluster!"); + // Set the target_external_pin_util. + t_ext_pin_util target_ext_pin_util = target_external_pin_util_.get_pin_util(cluster.type->name); + // Try to pack the molecule into the cluster. + e_block_pack_status pack_status = try_pack_molecule(molecule, + cluster, + cluster_id, + target_ext_pin_util); + + // If the packing was successful, set the molecules' cluster to this one. + if (pack_status == e_block_pack_status::BLK_PASSED) + molecule_cluster_[molecule] = cluster_id; + + return pack_status; +} + +void ClusterLegalizer::destroy_cluster(LegalizationClusterId cluster_id) { + // Safety asserts to make sure the inputs are valid. + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + VTR_ASSERT(legalization_cluster_ids_[cluster_id].is_valid() && "Cannot destroy an already destroyed cluster"); + // Get the cluster. + LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + // Remove all molecules from the cluster. + for (t_pack_molecule* mol : cluster.molecules) { + VTR_ASSERT_SAFE(molecule_cluster_.find(mol) != molecule_cluster_.end() && + molecule_cluster_[mol] == cluster_id); + molecule_cluster_[mol] = LegalizationClusterId::INVALID(); + // The overall clustering algorithm uses this valid flag to indicate + // that a molecule has not been packed (clustered) yet. Since we are + // destroying a cluster, all of its molecules are now no longer clustered + // so they are all validated. + mol->valid = true; + // Revert the placement of all blocks in the molecule. + int molecule_size = get_array_size_of_molecule(mol); + for (int i = 0; i < molecule_size; i++) { + AtomBlockId atom_blk_id = mol->atom_block_ids[i]; + if (atom_blk_id) { + revert_place_atom_block(atom_blk_id, cluster.router_data, prepacker_, atom_cluster_); + } + } + } + cluster.molecules.clear(); + // Free the rest of the cluster data. + // Casting things to nullptr for safety just in case someone is trying to use it. + free_pb(cluster.pb); + delete cluster.pb; + cluster.pb = nullptr; + free_router_data(cluster.router_data); + cluster.router_data = nullptr; + cluster.pr = PartitionRegion(); + + // Mark the cluster as invalid. + legalization_cluster_ids_[cluster_id] = LegalizationClusterId::INVALID(); +} + +void ClusterLegalizer::compress() { + // Create a map from the old ids to the new (compressed) one. + vtr::vector_map cluster_id_map; + cluster_id_map = compress_ids(legalization_cluster_ids_); + // Update all cluster values. + legalization_cluster_ids_ = clean_and_reorder_ids(cluster_id_map); + legalization_clusters_ = clean_and_reorder_values(legalization_clusters_, cluster_id_map); + // Update the reverse lookups. + for (auto& it : molecule_cluster_) { + if (!it.second.is_valid()) + continue; + molecule_cluster_[it.first] = cluster_id_map[it.second]; + } + for (size_t i = 0; i < atom_cluster_.size(); i++) { + AtomBlockId atom_blk_id = AtomBlockId(i); + LegalizationClusterId old_cluster_id = atom_cluster_[atom_blk_id]; + if (!old_cluster_id.is_valid()) + continue; + atom_cluster_[atom_blk_id] = cluster_id_map[old_cluster_id]; + } + // Shrink everything to fit + legalization_cluster_ids_.shrink_to_fit(); + legalization_clusters_.shrink_to_fit(); + atom_cluster_.shrink_to_fit(); +} + +void ClusterLegalizer::clean_cluster(LegalizationClusterId cluster_id) { + // Safety asserts to make sure the inputs are valid. + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + // Get the cluster. + LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + VTR_ASSERT(cluster.router_data != nullptr && "Should not clean an already cleaned cluster!"); + // Free the pb stats. + free_pb_stats_recursive(cluster.pb); + // Load the pb_route so we can free the cluster router data. + // The pb_route is used when creating a netlist from the legalized clusters. + std::vector* saved_lb_nets = cluster.router_data->saved_lb_nets; + t_pb_graph_node* pb_graph_node = cluster.pb->pb_graph_node; + cluster.pb->pb_route = alloc_and_load_pb_route(saved_lb_nets, pb_graph_node); + // Free the router data. + free_router_data(cluster.router_data); + cluster.router_data = nullptr; +} + +// TODO: This is fine for the current implementation of the legalizer. But if +// more complex strategies are added, this will need to be updated to +// check more than just routing (such as PR and NoC groups). +bool ClusterLegalizer::check_cluster_legality(LegalizationClusterId cluster_id) { + // Safety asserts to make sure the inputs are valid. + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + // To check if a cluster is fully legal, try to perform an intra logic block + // route on the cluster. If it succeeds, the cluster is fully legal. + t_mode_selection_status mode_status; + LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + return try_intra_lb_route(cluster.router_data, log_verbosity_, &mode_status); +} + +ClusterLegalizer::ClusterLegalizer(const AtomNetlist& atom_netlist, + const Prepacker& prepacker, + const std::vector& logical_block_types, + std::vector* lb_type_rr_graphs, + size_t num_models, + const std::vector& target_external_pin_util_str, + const t_pack_high_fanout_thresholds& high_fanout_thresholds, + ClusterLegalizationStrategy cluster_legalization_strategy, + bool enable_pin_feasibility_filter, + int feasible_block_array_size, + int log_verbosity) : prepacker_(prepacker) { + // Verify that the inputs are valid. + VTR_ASSERT_SAFE(lb_type_rr_graphs != nullptr); + + // Resize the atom_cluster lookup to make the accesses much cheaper. + atom_cluster_.resize(atom_netlist.blocks().size(), LegalizationClusterId::INVALID()); + // Allocate the cluster_placement_stats + cluster_placement_stats_ = alloc_and_load_cluster_placement_stats(); + // Pre-compute the max size of any molecule. + max_molecule_size_ = prepacker.get_max_molecule_size(); + // Calculate the max cluster size + // - Limit maximum number of elements for each cluster to MAX_SHORT + max_cluster_size_ = calc_max_cluster_size(logical_block_types); + VTR_ASSERT(max_cluster_size_ < MAX_SHORT); + // Get a reference to the rr graphs. + lb_type_rr_graphs_ = lb_type_rr_graphs; + // Get the number of models in the architecture. + num_models_ = num_models; + // Find all NoC router atoms. + std::vector noc_atoms = find_noc_router_atoms(atom_netlist); + update_noc_reachability_partitions(noc_atoms, + atom_netlist, + high_fanout_thresholds, + atom_noc_grp_id_); + // Copy the options passed by the user + cluster_legalization_strategy_ = cluster_legalization_strategy; + enable_pin_feasibility_filter_ = enable_pin_feasibility_filter; + feasible_block_array_size_ = feasible_block_array_size; + log_verbosity_ = log_verbosity; + // Get the target external pin utilization + // NOTE: This is really silly, but this can potentially fail. If it does + // it is important that everything is allocated. If not, when it fails + // it will call the reset method when only parts of the class are + // allocated which may cause havoc... + target_external_pin_util_ = t_ext_pin_util_targets(target_external_pin_util_str); +} + +void ClusterLegalizer::reset() { + // Destroy all of the clusters and compress. + for (LegalizationClusterId cluster_id : legalization_cluster_ids_) { + if (!cluster_id.is_valid()) + continue; + destroy_cluster(cluster_id); + } + compress(); + // Reset the molecule_cluster map + molecule_cluster_.clear(); + // Reset the cluster placement stats. + free_cluster_placement_stats(cluster_placement_stats_); + cluster_placement_stats_ = alloc_and_load_cluster_placement_stats(); +} + +void ClusterLegalizer::verify() { + std::unordered_set atoms_checked; + auto& atom_ctx = g_vpr_ctx.atom(); + + if (clusters().size() == 0) { + VTR_LOG_WARN("Packing produced no clustered blocks"); + } + + /* + * Check that each atom block connects to one physical primitive and that the primitive links up to the parent clb + */ + for (auto blk_id : atom_ctx.nlist.blocks()) { + //Each atom should be part of a pb + const t_pb* atom_pb = atom_ctx.lookup.atom_pb(blk_id); + if (!atom_pb) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "Atom block %s is not mapped to a pb\n", + atom_ctx.nlist.block_name(blk_id).c_str()); + } + + //Check the reverse mapping is consistent + if (atom_ctx.lookup.pb_atom(atom_pb) != blk_id) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "pb %s does not contain atom block %s but atom block %s maps to pb.\n", + atom_pb->name, + atom_ctx.nlist.block_name(blk_id).c_str(), + atom_ctx.nlist.block_name(blk_id).c_str()); + } + + VTR_ASSERT(atom_ctx.nlist.block_name(blk_id) == atom_pb->name); + + const t_pb* cur_pb = atom_pb; + while (cur_pb->parent_pb) { + cur_pb = cur_pb->parent_pb; + VTR_ASSERT(cur_pb->name); + } + + LegalizationClusterId cluster_id = get_atom_cluster(blk_id); + if (cluster_id == LegalizationClusterId::INVALID()) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "Atom %s is not mapped to a CLB\n", + atom_ctx.nlist.block_name(blk_id).c_str()); + } + + if (cur_pb != get_cluster_pb(cluster_id)) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "CLB %s does not match CLB contained by pb %s.\n", + cur_pb->name, atom_pb->name); + } + } + + /* Check that I do not have spurious links in children pbs */ + for (LegalizationClusterId cluster_id : clusters()) { + if (!cluster_id.is_valid()) + continue; + check_cluster_atom_blocks(get_cluster_pb(cluster_id), + atoms_checked); + } + + for (auto blk_id : atom_ctx.nlist.blocks()) { + if (!atoms_checked.count(blk_id)) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "Atom block %s not found in any cluster.\n", + atom_ctx.nlist.block_name(blk_id).c_str()); + } + } +} + +void ClusterLegalizer::finalize() { + for (LegalizationClusterId cluster_id : legalization_cluster_ids_) { + if (!cluster_id.is_valid()) + continue; + // If the cluster has not already been cleaned, clean it. This will + // generate the pb_route necessary for generating a clustered netlist. + const LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + if (cluster.router_data != nullptr) + clean_cluster(cluster_id); + } +} + +ClusterLegalizer::~ClusterLegalizer() { + // Destroy all clusters (no need to compress). + for (LegalizationClusterId cluster_id : legalization_cluster_ids_) { + if (!cluster_id.is_valid()) + continue; + destroy_cluster(cluster_id); + } + // Free the cluster_placement_stats + free_cluster_placement_stats(cluster_placement_stats_); +} + diff --git a/vpr/src/pack/cluster_legalizer.h b/vpr/src/pack/cluster_legalizer.h new file mode 100644 index 00000000000..ed1c35b857c --- /dev/null +++ b/vpr/src/pack/cluster_legalizer.h @@ -0,0 +1,474 @@ +#pragma once + +#include +#include +#include "atom_netlist_fwd.h" +#include "noc_data_types.h" +#include "partition_region.h" +#include "vpr_types.h" +#include "vtr_range.h" +#include "vtr_strong_id.h" +#include "vtr_vector.h" +#include "vtr_vector_map.h" + +class Prepacker; +class t_pb_graph_node; +struct t_lb_router_data; + +// A special ID to identify the legalization clusters. This is separate from the +// ClusterBlockId since this legalizer is not necessarily tied to the Clustered +// netlist, but is used as a sub-routine to it. +struct legalization_cluster_id_tag; +typedef vtr::StrongId LegalizationClusterId; + +/// @brief The different legalization strategies the cluster legalizer can perform. +/// +/// Allows the user of the API to select how thorough the legalizer should be +/// when adding molecules into clusters. +enum class ClusterLegalizationStrategy { + FULL, // Run the full legalizer (including intra-lb routing) + SKIP_INTRA_LB_ROUTE // Do all legality checks except intra-lb routing +}; + +/// @brief The status of the cluster legalization. +enum class e_block_pack_status { + BLK_PASSED, // Passed legalization. + BLK_FAILED_FEASIBLE, // Failed due to block not feasibly being able to go in the cluster. + BLK_FAILED_ROUTE, // Failed due to intra-lb routing failure. + BLK_FAILED_FLOORPLANNING, // Failed due to not being compatible with the cluster's current PartitionRegion. + BLK_FAILED_NOC_GROUP, // Failed due to not being compatible with the cluster's NoC group. + BLK_STATUS_UNDEFINED // Undefined status. Something went wrong. +}; + +/* + * @brief A struct containing information about the cluster. + * + * This contains necessary information for legalizing a cluster. + */ +struct LegalizationCluster { + /// @brief A list of the molecules in the cluster. By design, a cluster will + /// only contain molecules which have been previously legalized into + /// the cluster using a legalization strategy. + std::set molecules; + + /// @brief The logical block of this cluster. + /// TODO: We should be more careful with how this is allocated. Instead of + /// pointers, we really should use IDs and store them in a standard + /// container. + t_pb* pb; + + /// @brief The logical block type this cluster represents. + t_logical_block_type_ptr type; + + /// @brief The partition region of legal positions this cluster can be placed. + /// Used to detect if a molecule can physically be placed in a cluster. + /// It is derived from the partition region constraints on the atoms + /// in the cluster (not fundamental but good for performance). + PartitionRegion pr; + + /// @brief The NoC group that this cluster is a part of. Is used to check if + /// a candidate primitive is in the same NoC group as the atom blocks + /// that have already been added to the primitive. This can be helpful + /// for optimization. + NocGroupId noc_grp_id; + + /// @brief The router data of the intra lb router used for this cluster. + /// Contains information about the atoms in the cluster and how they + /// can be routed within. + t_lb_router_data* router_data; +}; + +/* + * @brief A manager class which manages the legalization of cluster. As clusters + * are created, this class will legalize for each molecule added. It also + * provides methods which are helpful for clustering. + * + * Usage: + * The ClusterLegalizer class maintains the clusters within itself since the + * legalization of a cluster depends on the molecules which have already been + * inserted into the clusters prior. + * + * The class provides different legalization strategies the user may use to + * legalize: + * 1) SKIP_INTRA_LB_ROUTE + * 2) FULL + * + * 1) SKIP_INTRA_LB_ROUTE Legalization Strategy Example: + * This strategy will not fully route the interal connections of the clusters + * until when the user specifies. An example of how to use this strategy would + * look something like this. Note, this example is simplified and the result + * of the packings should be checked and handled. + * + * ClusterLegalizer legalizer(...); + * + * std::tie(status, new_cluster_id) = legalizer.start_new_cluster(seed_mol, + * cluster_type, + * mode); + * for mol in molecules_to_add: + * // Cheaper additions, but may pack a molecule that wouldnt route. + * status = legalizer.add_mol_to_cluster(mol, new_cluster_id); + * if (status != e_block_pack_status::BLK_PASSED) + * break; + * + * // Do the expensive check once all molecules are in. + * if (!legalizer.check_cluster_legality(new_cluster_id)) + * // Destroy the illegal cluster. + * legalizer.destroy_cluster(new_cluster_id); + * legalizer.compress(); + * // Handle how to try again (maybe use FULL strategy). + * + * 2) FULL Legalization Strategy Example: + * This strategy will fully route the internal connections of the clusters for + * each molecule added. This is much more expensive to run; however, will ensure + * that the cluster is fully legalized while it is being created. An example + * of how to sue this strategy would look something like this: + * + * Clusterlegalizer legalizer(...); + * + * std::tie(pack_result, new_cluster_id) = legalizer.start_new_cluster(seed_mol, + * cluster_type, + * mode); + * for mol in molecules_to_add: + * // Do the expensive check for each molecule added. + * status = legalizer.add_mol_to_cluster(mol, new_cluster_id); + * if (status != e_block_pack_status::BLK_PASSED) + * break; + * + * // new_cluster_id now contains a fully legalized cluster. + */ +class ClusterLegalizer { +public: + // Iterator for the legalization cluster IDs + typedef typename vtr::vector_map::const_iterator cluster_iterator; + + // Range for the legalization cluster IDs + typedef typename vtr::Range cluster_range; + +private: + + /* + * @brief Helper method that tries to pack the given molecule into a cluster. + * + * This method runs all the legality checks specified by the legalization + * strategy. If the molecule can be packed into the cluster, it will insert + * it into the cluster. + * + * @param molecule The molecule to insert into the cluster. + * @param cluster The cluster to try to insert the molecule into. + * @param cluster_id The ID of the cluster. + * @param max_external_pin_util The max external pin utilization for a + * cluster of this type. + */ + e_block_pack_status try_pack_molecule(t_pack_molecule* molecule, + LegalizationCluster& cluster, + LegalizationClusterId cluster_id, + const t_ext_pin_util& max_external_pin_util); + +public: + + // Explicitly deleted default constructor. Need to use other constructor to + // initialize state correctly. + ClusterLegalizer() = delete; + + /* + * @brief Initialize the ClusterLegalizer class. + * + * Allocates internal state. + */ + ClusterLegalizer(const AtomNetlist& atom_netlist, + const Prepacker& prepacker, + const std::vector& logical_block_types, + std::vector* lb_type_rr_graphs, + size_t num_models, + const std::vector& target_external_pin_util_str, + const t_pack_high_fanout_thresholds& high_fanout_thresholds, + ClusterLegalizationStrategy cluster_legalization_strategy, + bool enable_pin_feasibility_filter, + int feasible_block_array_size, + int log_verbosity); + + // This class allocates and deallocates memory within. This class should not + // be copied or moved to prevent it from double freeing / losing pointers. + ClusterLegalizer(const ClusterLegalizer&) = delete; + ClusterLegalizer& operator=(const ClusterLegalizer&) = delete; + + /* + * @brief Start a new legalization cluster with the given molecule. + * + * @param molecule The seed molecule used to start the new cluster. + * @param cluster_type The type of the cluster to start. + * @param cluster_mode The mode of the new cluster for the given type. + * + * @return A pair for the status of the packing and the ID of the new + * cluster. If the new cluster could not be created, the pack + * status will return the reason and the ID would be invalid. + */ + std::tuple + start_new_cluster(t_pack_molecule* molecule, + t_logical_block_type_ptr cluster_type, + int cluster_mode); + + /* + * @brief Add an unclustered molecule to the given legalization cluster. + * + * If the addition was unsuccessful, the molecule will remain unclustered. + * + * @param molecule The molecule to add to the cluster. + * @param cluster_id The ID of the cluster to add the molecule to. + * + * @return The status of the pack (if the addition was successful and + * if not why). + */ + e_block_pack_status add_mol_to_cluster(t_pack_molecule* molecule, + LegalizationClusterId cluster_id); + + /* + * @brief Destroy the given cluster. + * + * This unclusters all molecules in the cluster so they can be re-clustered + * into different clusters. Should call the compress() method after destroying + * one or more clusters. + * + * @param cluster_id The ID of the cluster to destroy. + */ + void destroy_cluster(LegalizationClusterId cluster_id); + + /* + * @brief Compress the internal storage of clusters. Should be called after + * a cluster is destroyed. + * + * Similar to the Netlist compress method. Will invalidate all Legalization + * Cluster IDs. + * + * This method can be quite expensive, so it is a good idea to batch many + * cluster destructions and then compress at the end. + */ + void compress(); + + /* + * @brief A list of all cluster IDs in the legalizer. + * + * If the legalizer has been compressed (or no clusters have been destroyed) + * then all cluster IDs in this list will be valid and represent a non-empty + * legalization cluster. + */ + cluster_range clusters() const { + return vtr::make_range(legalization_cluster_ids_.begin(), legalization_cluster_ids_.end()); + } + + /* + * @brief Check that the given cluster is fully legal. + * + * This method runs an intra_lb_route on the given cluster. This ignores + * the cluster legalization strategy set by the user. This method will not + * correct the problematic molecules, it will only return true if the + * cluster is legal and false if it is not. + * + * @param cluster_id The ID of the cluster to fully legalize. + * + * @return True if the cluster is legal, false otherwise. + */ + bool check_cluster_legality(LegalizationClusterId cluster_id); + + /* + * @brief Cleans the cluster of unnessary data, reducing the memory footprint. + * + * After this function is called, no more molecules can be added to the + * cluster. This method will ensure that the cluster has enough information + * to generate a clustered netlist from the legalized clusters. + * + * Specifically, this frees the pb stats (which is used by the clusterer + * to compute the gain) and the router data of the cluster. + * + * TODO: The pb stats should really not be calculated or stored in the + * cluster legalizer. + * + * @param cluster_id The ID of the cluster to clean. + */ + void clean_cluster(LegalizationClusterId cluster_id); + + /* + * @brief Verify that all atoms have been clustered into a cluster. + * + * This will not verify if all the clusters are fully legal. + */ + void verify(); + + /* + * @brief Finalize the clustering. + * + * Before generating a Clustered Netlist, each cluster needs to allocate and + * load a pb_route. This method will generate a pb_route for each cluster + * and store it into the clusters' pb. + */ + void finalize(); + + /* + * @brief Resets the legalizer to its initial state. + * + * Destroys all clusters and resets the cluster placement stats. + */ + void reset(); + + /// @brief Gets the top-level pb of the given cluster. + inline t_pb* get_cluster_pb(LegalizationClusterId cluster_id) const { + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + const LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + return cluster.pb; + } + + /// @brief Gets the logical block type of the given cluster. + inline t_logical_block_type_ptr get_cluster_type(LegalizationClusterId cluster_id) const { + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + const LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + return cluster.type; + } + + /// @brief Gets the current partition region (the intersection of all + /// contained atoms) of the given cluster. + inline const PartitionRegion& get_cluster_pr(LegalizationClusterId cluster_id) const { + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + const LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + return cluster.pr; + } + + /// @brief Gets the ID of the cluster that contains the given atom block. + inline LegalizationClusterId get_atom_cluster(AtomBlockId blk_id) const { + VTR_ASSERT_SAFE(blk_id.is_valid() && (size_t)blk_id < atom_cluster_.size()); + return atom_cluster_[blk_id]; + } + + /// @brief Gets the cluster placement stats of the given cluster. + inline t_cluster_placement_stats* get_cluster_placement_stats(LegalizationClusterId cluster_id) const { + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + return &(cluster_placement_stats_[get_cluster_type(cluster_id)->index]); + } + + /// @brief Returns true if the given atom block has been packed into a + /// cluster, false otherwise. + inline bool is_atom_clustered(AtomBlockId blk_id) const { + // Simply, if the atom is not in an invalid cluster, it has been clustered. + return get_atom_cluster(blk_id) != LegalizationClusterId::INVALID(); + } + + /// @brief Returns a reference to the target_external_pin_util object. This + /// allows the user to modify the external pin utilization if needed. + inline t_ext_pin_util_targets& get_target_external_pin_util() { + return target_external_pin_util_; + } + + /// @bried Gets the max size a cluster could physically be. + inline size_t get_max_cluster_size() const { + return max_cluster_size_; + } + + /* + * @brief Set the legalization strategy of the cluster legalizer. + * + * This allows the strategy of the cluster legalizer to change based on the + * needs of the user. For example, one can set the legalizer to use a more + * relaxed strategy to insert a batch of molecules in cheaply, saving the + * full legalizerion for the end (using check_cluster_legality). + * + * @param strategy The strategy to set the cluster legalizer to. + */ + inline void set_legalization_strategy(ClusterLegalizationStrategy strategy) { + cluster_legalization_strategy_ = strategy; + } + + /* + * @brief Set how verbose the log messages should be for the cluster legalizer. + * + * This allows the user to set the verbosity at different points for easier + * usability. + * + * Set the verbosity to 4 to see most of the log messages on how the + * molecules move through the legalizer. + * Set the verbosity to 5 to see all the log messages in the legalizer. + * + * @param verbosity The value to set the verbosity to. + */ + inline void set_log_verbosity(int verbosity) { + log_verbosity_ = verbosity; + } + + /// @brief Destructor of the class. Frees allocated data. + ~ClusterLegalizer(); + +private: + /// @brief A vector of the legalization cluster IDs. If any of them are + /// invalid, then that means that the cluster has been destroyed. + vtr::vector_map legalization_cluster_ids_; + + /// @brief Lookup table for which cluster each molecule is in. + std::unordered_map molecule_cluster_; + + /// @brief List of all legalization clusters. + vtr::vector_map legalization_clusters_; + + /// @brief A lookup-table for which cluster the given atom is packed into. + vtr::vector_map atom_cluster_; + + /// @brief Stores the NoC group ID of each atom block. Atom blocks that + /// belong to different NoC groups can't be clustered with each other + /// into the same clustered block. + vtr::vector atom_noc_grp_id_; + + /// @brief Stats keeper for placement information during packing/clustering. + /// TODO: This should be a vector. + t_cluster_placement_stats* cluster_placement_stats_ = nullptr; + + /// @brief The utilization of external input/output pins during packing + /// (between 0 and 1). + t_ext_pin_util_targets target_external_pin_util_; + + /// @brief The max size of any molecule. This is used to allocate a dynamic + /// array within the legalizer, and in its current form this is a bit + /// expensive to calculate from the prepacker. + size_t max_molecule_size_; + + /// @brief The max size a cluster could physically be. This is used to + /// allocate dynamic arrays. + size_t max_cluster_size_; + + /// @brief A vector of routing resource nodes within each logical block type + /// [0 .. num_logical_block_types-1] + /// TODO: This really should not be a pointer to a vector... I think this is + /// meant to be a vector of vectors... + std::vector* lb_type_rr_graphs_ = nullptr; + + /// @brief The total number of models (user + library) in the architecture. + /// Used to allocate space in dynamic data structures. + size_t num_models_; + + /// @brief The current legalization strategy of the cluster legalizer. + ClusterLegalizationStrategy cluster_legalization_strategy_; + + /// @brief Controls whether the pin counting feasibility filter is used + /// during clustering. When enabled the clustering engine counts the + /// number of available pins in groups/classes of mutually connected + /// pins within a cluster. These counts are used to quickly filter + /// out candidate primitives/atoms/molecules for which the cluster + /// has insufficient pins to route (without performing a full + /// routing). This reduces packing run-time. This matches the packer + /// option of the same name. + bool enable_pin_feasibility_filter_; + + /// @brief The max size of the priority queue for candidates that pass the + /// early filter legality test but not the more detailed routing + /// filter. This matches the packer option of the same name. + int feasible_block_array_size_; + + /// @brief Used to set the verbosity of log messages in the legalizer. Used + /// for debugging. When log_verbosity > 3, the legalizer will print + /// messages when a molecule is successful during legalization. When + /// log_verbosity is > 4, the legalizer will print when a molecule + /// fails a legality check. This parameter is also passed into the + /// intra-lb router. + int log_verbosity_; + + /// @brief The prepacker object that stores the molecules which will be + /// legalized into clusters. + const Prepacker& prepacker_; +}; + diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp index 8fd0bcfa56f..3ae20f169c8 100644 --- a/vpr/src/pack/cluster_util.cpp +++ b/vpr/src/pack/cluster_util.cpp @@ -1,10 +1,11 @@ #include "cluster_util.h" #include +#include #include "PreClusterTimingGraphResolver.h" #include "PreClusterDelayCalculator.h" #include "atom_netlist.h" -#include "cluster_router.h" +#include "cluster_legalizer.h" #include "cluster_placement.h" #include "concrete_timing_info.h" #include "output_clustering.h" @@ -19,48 +20,8 @@ /* Global variables in clustering */ /**********************************/ -/* TODO: May want to check that all atom blocks are actually reached */ -static void check_cluster_atom_blocks(t_pb* pb, std::unordered_set& blocks_checked) { - int i, j; - const t_pb_type* pb_type; - bool has_child = false; - auto& atom_ctx = g_vpr_ctx.atom(); - - pb_type = pb->pb_graph_node->pb_type; - if (pb_type->num_modes == 0) { - /* primitive */ - auto blk_id = atom_ctx.lookup.pb_atom(pb); - if (blk_id) { - if (blocks_checked.count(blk_id)) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "pb %s contains atom block %s but atom block is already contained in another pb.\n", - pb->name, atom_ctx.nlist.block_name(blk_id).c_str()); - } - blocks_checked.insert(blk_id); - if (pb != atom_ctx.lookup.atom_pb(blk_id)) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "pb %s contains atom block %s but atom block does not link to pb.\n", - pb->name, atom_ctx.nlist.block_name(blk_id).c_str()); - } - } - } else { - /* this is a container pb, all container pbs must contain children */ - for (i = 0; i < pb_type->modes[pb->mode].num_pb_type_children; i++) { - for (j = 0; j < pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) { - if (pb->child_pbs[i] != nullptr) { - if (pb->child_pbs[i][j].name != nullptr) { - has_child = true; - check_cluster_atom_blocks(&pb->child_pbs[i][j], blocks_checked); - } - } - } - } - VTR_ASSERT(has_child); - } -} - /*Print the contents of each cluster to an echo file*/ -static void echo_clusters(char* filename) { +static void echo_clusters(char* filename, const ClusterLegalizer& cluster_legalizer) { FILE* fp; fp = vtr::fopen(filename, "w"); @@ -70,22 +31,21 @@ static void echo_clusters(char* filename) { fprintf(fp, "\n"); auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - std::map> cluster_atoms; + std::map> cluster_atoms; - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - cluster_atoms.insert({blk_id, std::vector()}); + for (LegalizationClusterId cluster_id : cluster_legalizer.clusters()) { + cluster_atoms.insert({cluster_id, std::vector()}); } for (auto atom_blk_id : atom_ctx.nlist.blocks()) { - ClusterBlockId clb_index = atom_ctx.lookup.atom_clb(atom_blk_id); + LegalizationClusterId cluster_id = cluster_legalizer.get_atom_cluster(atom_blk_id); - cluster_atoms[clb_index].push_back(atom_blk_id); + cluster_atoms[cluster_id].push_back(atom_blk_id); } for (auto& cluster_atom : cluster_atoms) { - const std::string& cluster_name = cluster_ctx.clb_nlist.block_name(cluster_atom.first); + const std::string& cluster_name = cluster_legalizer.get_cluster_pb(cluster_atom.first)->name; fprintf(fp, "Cluster %s Id: %zu \n", cluster_name.c_str(), size_t(cluster_atom.first)); fprintf(fp, "\tAtoms in cluster: \n"); @@ -98,12 +58,11 @@ static void echo_clusters(char* filename) { } fprintf(fp, "\nCluster Floorplanning Constraints:\n"); - const auto& floorplanning_ctx = g_vpr_ctx.floorplanning(); - for (ClusterBlockId clb_id : cluster_ctx.clb_nlist.blocks()) { - const std::vector& regions = floorplanning_ctx.cluster_constraints[clb_id].get_regions(); + for (LegalizationClusterId cluster_id : cluster_legalizer.clusters()) { + const std::vector& regions = cluster_legalizer.get_cluster_pr(cluster_id).get_regions(); if (!regions.empty()) { - fprintf(fp, "\nRegions in Cluster %zu:\n", size_t(clb_id)); + fprintf(fp, "\nRegions in Cluster %zu:\n", size_t(cluster_id)); for (const auto& region : regions) { print_region(fp, region); } @@ -113,73 +72,6 @@ static void echo_clusters(char* filename) { fclose(fp); } -/* TODO: Add more error checking! */ -void check_clustering() { - std::unordered_set atoms_checked; - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - if (cluster_ctx.clb_nlist.blocks().size() == 0) { - VTR_LOG_WARN("Packing produced no clustered blocks"); - } - - /* - * Check that each atom block connects to one physical primitive and that the primitive links up to the parent clb - */ - for (auto blk_id : atom_ctx.nlist.blocks()) { - //Each atom should be part of a pb - const t_pb* atom_pb = atom_ctx.lookup.atom_pb(blk_id); - if (!atom_pb) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "Atom block %s is not mapped to a pb\n", - atom_ctx.nlist.block_name(blk_id).c_str()); - } - - //Check the reverse mapping is consistent - if (atom_ctx.lookup.pb_atom(atom_pb) != blk_id) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "pb %s does not contain atom block %s but atom block %s maps to pb.\n", - atom_pb->name, - atom_ctx.nlist.block_name(blk_id).c_str(), - atom_ctx.nlist.block_name(blk_id).c_str()); - } - - VTR_ASSERT(atom_ctx.nlist.block_name(blk_id) == atom_pb->name); - - const t_pb* cur_pb = atom_pb; - while (cur_pb->parent_pb) { - cur_pb = cur_pb->parent_pb; - VTR_ASSERT(cur_pb->name); - } - - ClusterBlockId clb_index = atom_ctx.lookup.atom_clb(blk_id); - if (clb_index == ClusterBlockId::INVALID()) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "Atom %s is not mapped to a CLB\n", - atom_ctx.nlist.block_name(blk_id).c_str()); - } - - if (cur_pb != cluster_ctx.clb_nlist.block_pb(clb_index)) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "CLB %s does not match CLB contained by pb %s.\n", - cur_pb->name, atom_pb->name); - } - } - - /* Check that I do not have spurious links in children pbs */ - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - check_cluster_atom_blocks(cluster_ctx.clb_nlist.block_pb(blk_id), atoms_checked); - } - - for (auto blk_id : atom_ctx.nlist.blocks()) { - if (!atoms_checked.count(blk_id)) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "Atom block %s not found in any cluster.\n", - atom_ctx.nlist.block_name(blk_id).c_str()); - } - } -} - //calculate the initial timing at the start of packing stage void calc_init_packing_timing(const t_packer_opts& packer_opts, const t_analysis_opts& analysis_opts, @@ -187,7 +79,7 @@ void calc_init_packing_timing(const t_packer_opts& packer_opts, std::shared_ptr& clustering_delay_calc, std::shared_ptr& timing_info, vtr::vector& atom_criticality) { - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); /* * Initialize the timing analyzer @@ -236,88 +128,32 @@ void calc_init_packing_timing(const t_packer_opts& packer_opts, //Free the clustering data structures void free_clustering_data(const t_packer_opts& packer_opts, t_clustering_data& clustering_data) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) - free_intra_lb_nets(clustering_data.intra_lb_routing[blk_id]); - - clustering_data.intra_lb_routing.clear(); if (packer_opts.hill_climbing_flag) delete[] clustering_data.hill_climbing_inputs_avail; - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) - cluster_ctx.clb_nlist.remove_block(blk_id); - - cluster_ctx.clb_nlist = ClusteredNetlist(); - delete[] clustering_data.unclustered_list_head; delete[] clustering_data.memory_pool; } //check the clustering and output it -void check_and_output_clustering(const t_packer_opts& packer_opts, +void check_and_output_clustering(ClusterLegalizer& cluster_legalizer, + const t_packer_opts& packer_opts, const std::unordered_set& is_clock, - const t_arch* arch, - const int& num_clb, - const vtr::vector*>& intra_lb_routing) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - VTR_ASSERT(num_clb == (int)cluster_ctx.clb_nlist.blocks().size()); - check_clustering(); + const t_arch* arch) { + cluster_legalizer.verify(); if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_CLUSTERS)) { - echo_clusters(getEchoFileName(E_ECHO_CLUSTERS)); - } - - output_clustering(intra_lb_routing, packer_opts.global_clocks, is_clock, arch->architecture_id, packer_opts.output_file.c_str(), false); - - VTR_ASSERT(cluster_ctx.clb_nlist.blocks().size() == intra_lb_routing.size()); -} - -void get_max_cluster_size_and_pb_depth(int& max_cluster_size, - int& max_pb_depth) { - auto& device_ctx = g_vpr_ctx.mutable_device(); - int cur_cluster_size, cur_pb_depth; - - for (const auto& type : device_ctx.logical_block_types) { - if (is_empty_type(&type)) - continue; - - cur_cluster_size = get_max_primitives_in_pb_type(type.pb_type); - cur_pb_depth = get_max_depth_of_pb_type(type.pb_type); - if (cur_cluster_size > max_cluster_size) { - max_cluster_size = cur_cluster_size; - } - if (cur_pb_depth > max_pb_depth) { - max_pb_depth = cur_pb_depth; - } + echo_clusters(getEchoFileName(E_ECHO_CLUSTERS), cluster_legalizer); } -} -bool check_cluster_legality(const int& verbosity, - const int& detailed_routing_stage, - t_lb_router_data* router_data) { - bool is_cluster_legal; - - if (detailed_routing_stage == (int)E_DETAILED_ROUTE_AT_END_ONLY) { - /* is_mode_conflict does not affect this stage. It is needed when trying to route the packed clusters. - * - * It holds a flag that is used to verify whether try_intra_lb_route ended in a mode conflict issue. - * If the value is TRUE the cluster has to be repacked, and its internal pb_graph_nodes will have more restrict choices - * for what regards the mode that has to be selected - */ - t_mode_selection_status mode_status; - is_cluster_legal = try_intra_lb_route(router_data, verbosity, &mode_status); - if (is_cluster_legal) { - VTR_LOGV(verbosity > 2, "\tPassed route at end.\n"); - } else { - VTR_LOGV(verbosity > 0, "Failed route at end, repack cluster trying detailed routing at each stage.\n"); - } - } else { - is_cluster_legal = true; - } - return is_cluster_legal; + output_clustering(&cluster_legalizer, + packer_opts.global_clocks, + is_clock, + arch->architecture_id, + packer_opts.output_file.c_str(), + false, /*skip_clustering*/ + true /*from_legalizer*/); } /*print the header for the clustering progress table*/ @@ -335,7 +171,8 @@ void print_pack_status(int num_clb, int& mols_since_last_print, int device_width, int device_height, - AttractionInfo& attraction_groups) { + AttractionInfo& attraction_groups, + const ClusterLegalizer& cluster_legalizer) { //Print a packing update each time another 4% of molecules have been packed. const float print_frequency = 0.04; @@ -361,7 +198,7 @@ void print_pack_status(int num_clb, fflush(stdout); mols_since_last_print = 0; if (attraction_groups.num_attraction_groups() > 0) { - rebuild_attraction_groups(attraction_groups); + rebuild_attraction_groups(attraction_groups, cluster_legalizer); } } } @@ -371,8 +208,8 @@ void print_pack_status(int num_clb, * are still available for new clusters (i.e. remove the atoms that have already * been packed from the attraction group). */ -void rebuild_attraction_groups(AttractionInfo& attraction_groups) { - auto& atom_ctx = g_vpr_ctx.atom(); +void rebuild_attraction_groups(AttractionInfo& attraction_groups, + const ClusterLegalizer& cluster_legalizer) { for (int igroup = 0; igroup < attraction_groups.num_attraction_groups(); igroup++) { AttractGroupId group_id(igroup); @@ -380,8 +217,7 @@ void rebuild_attraction_groups(AttractionInfo& attraction_groups) { AttractionGroup new_att_group_info; for (AtomBlockId atom : group.group_atoms) { - //If the ClusterBlockId is anything other than invalid, the atom has been packed already - if (atom_ctx.lookup.atom_clb(atom) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(atom)) { new_att_group_info.group_atoms.push_back(atom); } } @@ -392,7 +228,7 @@ void rebuild_attraction_groups(AttractionInfo& attraction_groups) { /* Determine if atom block is in pb */ bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb) { - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); const t_pb* cur_pb = atom_ctx.lookup.atom_pb(blk_id); while (cur_pb) { @@ -502,8 +338,6 @@ void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, /*****************************************/ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, - t_cluster_placement_stats** cluster_placement_stats, - t_pb_graph_node*** primitives_list, const Prepacker& prepacker, t_clustering_data& clustering_data, std::unordered_map& net_output_feeds_driving_block_input, @@ -560,147 +394,10 @@ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, } } } - - /* alloc and load cluster placement info */ - *cluster_placement_stats = alloc_and_load_cluster_placement_stats(); - - /* alloc array that will store primitives that a molecule gets placed to, - * primitive_list is referenced by index, for example a atom block in index 2 of a molecule matches to a primitive in index 2 in primitive_list - * this array must be the size of the biggest molecule - */ - size_t max_molecule_size = prepacker.get_max_molecule_size(); - *primitives_list = new t_pb_graph_node*[max_molecule_size]; - for (size_t i = 0; i < max_molecule_size; i++) - (*primitives_list)[i] = nullptr; } /*****************************************/ -void free_pb_stats_recursive(t_pb* pb) { - int i, j; - /* Releases all the memory used by clustering data structures. */ - if (pb) { - if (pb->pb_graph_node != nullptr) { - if (!pb->pb_graph_node->is_primitive()) { - for (i = 0; i < pb->pb_graph_node->pb_type->modes[pb->mode].num_pb_type_children; i++) { - for (j = 0; j < pb->pb_graph_node->pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) { - if (pb->child_pbs && pb->child_pbs[i]) { - free_pb_stats_recursive(&pb->child_pbs[i][j]); - } - } - } - } - } - free_pb_stats(pb); - } -} - -bool primitive_feasible(const AtomBlockId blk_id, t_pb* cur_pb) { - const t_pb_type* cur_pb_type = cur_pb->pb_graph_node->pb_type; - - VTR_ASSERT(cur_pb_type->num_modes == 0); /* primitive */ - - auto& atom_ctx = g_vpr_ctx.atom(); - AtomBlockId cur_pb_blk_id = atom_ctx.lookup.pb_atom(cur_pb); - if (cur_pb_blk_id && cur_pb_blk_id != blk_id) { - /* This pb already has a different logical block */ - return false; - } - if (cur_pb_type->class_type == MEMORY_CLASS) { - /* Memory class has additional feasibility requirements: - * - all siblings must share all nets, including open nets, with the exception of data nets */ - - /* find sibling if one exists */ - AtomBlockId sibling_memory_blk_id = find_memory_sibling(cur_pb); - - if (sibling_memory_blk_id) { - //There is a sibling, see if the current block is feasible with it - bool sibling_feasible = primitive_memory_sibling_feasible(blk_id, cur_pb_type, sibling_memory_blk_id); - if (!sibling_feasible) { - return false; - } - } - } - - //Generic feasibility check - return primitive_type_feasible(blk_id, cur_pb_type); -} - -bool primitive_memory_sibling_feasible(const AtomBlockId blk_id, const t_pb_type* cur_pb_type, const AtomBlockId sibling_blk_id) { - /* Check that the two atom blocks blk_id and sibling_blk_id (which should both be memory slices) - * are feasible, in the sence that they have precicely the same net connections (with the - * exception of nets in data port classes). - * - * Note that this routine does not check pin feasibility against the cur_pb_type; so - * primitive_type_feasible() should also be called on blk_id before concluding it is feasible. - */ - auto& atom_ctx = g_vpr_ctx.atom(); - VTR_ASSERT(cur_pb_type->class_type == MEMORY_CLASS); - - //First, identify the 'data' ports by looking at the cur_pb_type - std::unordered_set data_ports; - for (int iport = 0; iport < cur_pb_type->num_ports; ++iport) { - const char* port_class = cur_pb_type->ports[iport].port_class; - if (port_class && strstr(port_class, "data") == port_class) { - //The port_class starts with "data", so it is a data port - - //Record the port - data_ports.insert(cur_pb_type->ports[iport].model_port); - } - } - - //Now verify that all nets (except those connected to data ports) are equivalent - //between blk_id and sibling_blk_id - - //Since the atom netlist stores only in-use ports, we iterate over the model to ensure - //all ports are compared - const t_model* model = cur_pb_type->model; - for (t_model_ports* port : {model->inputs, model->outputs}) { - for (; port; port = port->next) { - if (data_ports.count(port)) { - //Don't check data ports - continue; - } - - //Note: VPR doesn't support multi-driven nets, so all outputs - //should be data ports, otherwise the siblings will both be - //driving the output net - - //Get the ports from each primitive - auto blk_port_id = atom_ctx.nlist.find_atom_port(blk_id, port); - auto sib_port_id = atom_ctx.nlist.find_atom_port(sibling_blk_id, port); - - //Check that all nets (including unconnected nets) match - for (int ipin = 0; ipin < port->size; ++ipin) { - //The nets are initialized as invalid (i.e. disconnected) - AtomNetId blk_net_id; - AtomNetId sib_net_id; - - //We can get the actual net provided the port exists - // - //Note that if the port did not exist, the net is left - //as invalid/disconneced - if (blk_port_id) { - blk_net_id = atom_ctx.nlist.port_net(blk_port_id, ipin); - } - if (sib_port_id) { - sib_net_id = atom_ctx.nlist.port_net(sib_port_id, ipin); - } - - //The sibling and block must have the same (possibly disconnected) - //net on this pin - if (blk_net_id != sib_net_id) { - //Nets do not match, not feasible - return false; - } - } - } - } - - return true; -} - -/*****************************************/ t_pack_molecule* get_molecule_by_num_ext_inputs(const int ext_inps, const enum e_removal_policy remove_flag, t_cluster_placement_stats* cluster_placement_stats_ptr, @@ -708,8 +405,7 @@ t_pack_molecule* get_molecule_by_num_ext_inputs(const int ext_inps, /* This routine returns an atom block which has not been clustered, has * * no connection to the current cluster, satisfies the cluster * * clock constraints, is a valid subblock inside the cluster, does not exceed the cluster subblock units available, - * and has ext_inps external inputs. If * - * there is no such atom block it returns ClusterBlockId::INVALID(). Remove_flag * + * and has ext_inps external inputs. Remove_flag * * controls whether or not blocks that have already been clustered * * are removed from the unclustered_list data structures. NB: * * to get a atom block regardless of clock constraints just set clocks_ * @@ -764,7 +460,7 @@ t_pack_molecule* get_free_molecule_with_most_ext_inputs_for_cluster(t_pb* cur_pb * blocks which are unconnected from the current cluster). It returns * * the atom block with the largest number of used inputs that satisfies the * * clocking and number of inputs constraints. If no suitable atom block is * - * found, the routine returns ClusterBlockId::INVALID(). + * found, the routine returns nullptr. * TODO: Analyze if this function is useful in more detail, also, should probably not include clock in input count */ @@ -790,684 +486,30 @@ t_pack_molecule* get_free_molecule_with_most_ext_inputs_for_cluster(t_pb* cur_pb } /*****************************************/ -void alloc_and_load_pb_stats(t_pb* pb, const int feasible_block_array_size) { - /* Call this routine when starting to fill up a new cluster. It resets * - * the gain vector, etc. */ - - pb->pb_stats = new t_pb_stats; - - /* If statement below is for speed. If nets are reasonably low-fanout, * - * only a relatively small number of blocks will be marked, and updating * - * only those atom block structures will be fastest. If almost all blocks * - * have been touched it should be faster to just run through them all * - * in order (less addressing and better cache locality). */ - pb->pb_stats->input_pins_used = std::vector>(pb->pb_graph_node->num_input_pin_class); - pb->pb_stats->output_pins_used = std::vector>(pb->pb_graph_node->num_output_pin_class); - pb->pb_stats->lookahead_input_pins_used = std::vector>(pb->pb_graph_node->num_input_pin_class); - pb->pb_stats->lookahead_output_pins_used = std::vector>(pb->pb_graph_node->num_output_pin_class); - pb->pb_stats->num_feasible_blocks = NOT_VALID; - pb->pb_stats->feasible_blocks = new t_pack_molecule*[feasible_block_array_size]; - - for (int i = 0; i < feasible_block_array_size; i++) - pb->pb_stats->feasible_blocks[i] = nullptr; - - pb->pb_stats->tie_break_high_fanout_net = AtomNetId::INVALID(); - - pb->pb_stats->pulled_from_atom_groups = 0; - pb->pb_stats->num_att_group_atoms_used = 0; - - pb->pb_stats->gain.clear(); - pb->pb_stats->timinggain.clear(); - pb->pb_stats->connectiongain.clear(); - pb->pb_stats->sharinggain.clear(); - pb->pb_stats->hillgain.clear(); - pb->pb_stats->transitive_fanout_candidates.clear(); - - pb->pb_stats->num_pins_of_net_in_pb.clear(); - - pb->pb_stats->num_child_blocks_in_pb = 0; - - pb->pb_stats->explore_transitive_fanout = true; -} -/*****************************************/ - -/** - * Cleans up a pb after unsuccessful molecule packing - * - * Recursively frees pbs from a t_pb tree. The given root pb itself is not - * deleted. - * - * If a pb object has its children allocated then before freeing them the - * function checks if there is no atom that corresponds to any of them. The - * check is performed only for leaf (primitive) pbs. The function recurses for - * non-primitive pbs. - * - * The cleaning itself includes deleting all child pbs, resetting mode of the - * pb and also freeing its name. This prepares the pb for another round of - * molecule packing tryout. - */ -bool cleanup_pb(t_pb* pb) { - bool can_free = true; - - /* Recursively check if there are any children with already assigned atoms */ - if (pb->child_pbs != nullptr) { - const t_mode* mode = &pb->pb_graph_node->pb_type->modes[pb->mode]; - VTR_ASSERT(mode != nullptr); - - /* Check each mode */ - for (int i = 0; i < mode->num_pb_type_children; ++i) { - /* Check each child */ - if (pb->child_pbs[i] != nullptr) { - for (int j = 0; j < mode->pb_type_children[i].num_pb; ++j) { - t_pb* pb_child = &pb->child_pbs[i][j]; - t_pb_type* pb_type = pb_child->pb_graph_node->pb_type; - - /* Primitive, check occupancy */ - if (pb_type->num_modes == 0) { - if (pb_child->name != nullptr) { - can_free = false; - } - } - - /* Non-primitive, recurse */ - else { - if (!cleanup_pb(pb_child)) { - can_free = false; - } - } - } - } - } - - /* Free if can */ - if (can_free) { - for (int i = 0; i < mode->num_pb_type_children; ++i) { - if (pb->child_pbs[i] != nullptr) { - delete[] pb->child_pbs[i]; - } - } - - delete[] pb->child_pbs; - pb->child_pbs = nullptr; - pb->mode = 0; - - if (pb->name) { - free(pb->name); - pb->name = nullptr; - } - } - } - - return can_free; -} - -/** - * Performs legality checks to see whether the selected molecule can be - * packed into the current cluster. The legality checks are related to - * floorplanning, pin feasibility, and routing (if detailed route - * checking is enabled). The routine returns BLK_PASSED if the molecule - * can be packed in the cluster. If the block passes, the routine commits - * it to the current cluster and updates the appropriate data structures. - * Otherwise, it returns the appropriate failed pack status based on which - * legality check the molecule failed. - */ -e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_placement_stats_ptr, - t_pack_molecule* molecule, - t_pb_graph_node** primitives_list, - t_pb* pb, - int max_models, - int max_cluster_size, - ClusterBlockId clb_index, - int detailed_routing_stage, - t_lb_router_data* router_data, - int verbosity, - bool enable_pin_feasibility_filter, - int feasible_block_array_size, - t_ext_pin_util max_external_pin_util, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_noc_grp_id, - int force_site) { - t_pb* parent; - t_pb* cur_pb; - - const auto& atom_ctx = g_vpr_ctx.atom(); - auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - parent = nullptr; - - const int molecule_size = get_array_size_of_molecule(molecule); - - if (verbosity > 3) { - AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; - VTR_LOG("\t\tTry pack molecule: '%s' (%s)", - atom_ctx.nlist.block_name(root_atom).c_str(), - atom_ctx.nlist.block_model(root_atom)->name); - VTR_LOGV(molecule->pack_pattern, - " molecule_type %s molecule_size %zu", - molecule->pack_pattern->name, - molecule->atom_block_ids.size()); - VTR_LOG("\n"); - } - - // if this cluster has a molecule placed in it that is part of a long chain - // (a chain that consists of more than one molecule), don't allow more long chain - // molecules to be placed in this cluster. To avoid possibly creating cluster level - // blocks that have incompatible placement constraints or form very long placement - // macros that limit placement flexibility. - if (cluster_placement_stats_ptr->has_long_chain && molecule->is_chain() && molecule->chain_info->is_long_chain) { - VTR_LOGV(verbosity > 4, "\t\t\tFAILED Placement Feasibility Filter: Only one long chain per cluster is allowed\n"); - //Record the failure of this molecule in the current pb stats - record_molecule_failure(molecule, pb); - return e_block_pack_status::BLK_FAILED_FEASIBLE; - } - - bool cluster_pr_update_check = false; - - //check if every atom in the molecule is legal in the cluster from a floorplanning perspective - for (int i_mol = 0; i_mol < molecule_size; i_mol++) { - //try to intersect with atom PartitionRegion if atom exists - if (molecule->atom_block_ids[i_mol]) { - bool cluster_pr_needs_update = false; - bool block_pack_floorplan_status = atom_cluster_floorplanning_check(molecule->atom_block_ids[i_mol], - clb_index, verbosity, - temp_cluster_pr, - cluster_pr_needs_update); - - if (!block_pack_floorplan_status) { - //Record the failure of this molecule in the current pb stats - record_molecule_failure(molecule, pb); - return e_block_pack_status::BLK_FAILED_FLOORPLANNING; - } - - if (cluster_pr_needs_update) { - cluster_pr_update_check = true; - } - } - } - - // check if all atoms in the molecule can be added to the cluster without NoC group conflicts - for (int i_mol = 0; i_mol < molecule_size; i_mol++) { - if (molecule->atom_block_ids[i_mol]) { - bool block_pack_noc_grp_status = atom_cluster_noc_group_check(molecule->atom_block_ids[i_mol], - clb_index, verbosity, - temp_noc_grp_id); - - if (!block_pack_noc_grp_status) { - //Record the failure of this molecule in the current pb stats - record_molecule_failure(molecule, pb); - return e_block_pack_status::BLK_FAILED_NOC_GROUP; - } - } - } - - e_block_pack_status block_pack_status = e_block_pack_status::BLK_STATUS_UNDEFINED; - - while (block_pack_status != e_block_pack_status::BLK_PASSED) { - if (get_next_primitive_list(cluster_placement_stats_ptr, molecule, - primitives_list, force_site)) { - block_pack_status = e_block_pack_status::BLK_PASSED; - - int failed_location = 0; - - for (int i_mol = 0; i_mol < molecule_size && block_pack_status == e_block_pack_status::BLK_PASSED; i_mol++) { - VTR_ASSERT((primitives_list[i_mol] == nullptr) == (!molecule->atom_block_ids[i_mol])); - failed_location = i_mol + 1; - // try place atom block if it exists - if (molecule->atom_block_ids[i_mol]) { - block_pack_status = try_place_atom_block_rec(primitives_list[i_mol], - molecule->atom_block_ids[i_mol], pb, &parent, - max_models, max_cluster_size, clb_index, - cluster_placement_stats_ptr, molecule, router_data, - verbosity, feasible_block_array_size); - } - } - - if (enable_pin_feasibility_filter && block_pack_status == e_block_pack_status::BLK_PASSED) { - /* Check if pin usage is feasible for the current packing assignment */ - reset_lookahead_pins_used(pb); - try_update_lookahead_pins_used(pb); - if (!check_lookahead_pins_used(pb, max_external_pin_util)) { - VTR_LOGV(verbosity > 4, "\t\t\tFAILED Pin Feasibility Filter\n"); - block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; - } - } - - if (block_pack_status == e_block_pack_status::BLK_PASSED) { - /* - * during the clustering step of `do_clustering`, `detailed_routing_stage` is incremented at each iteration until it a cluster - * is correctly generated or `detailed_routing_stage` assumes an invalid value (E_DETAILED_ROUTE_INVALID). - * depending on its value we have different behaviors: - * - E_DETAILED_ROUTE_AT_END_ONLY: Skip routing if heuristic is to route at the end of packing complex block. - * - E_DETAILED_ROUTE_FOR_EACH_ATOM: Try to route if heuristic is to route for every atom. If the clusterer arrives at this stage, - * it means that more checks have to be performed as the previous stage failed to generate a new cluster. - * - * mode_status is a data structure containing the status of the mode selection. Its members are: - * - bool is_mode_conflict - * - bool try_expand_all_modes - * - bool expand_all_modes - * - * is_mode_conflict affects this stage. Its value determines whether the cluster failed to pack after a mode conflict issue. - * It holds a flag that is used to verify whether try_intra_lb_route ended in a mode conflict issue. - * - * Until is_mode_conflict is set to FALSE by try_intra_lb_route, the loop re-iterates. If all the available modes are exhausted - * an error will be thrown during mode conflicts checks (this to prevent infinite loops). - * - * If the value is TRUE the cluster has to be re-routed, and its internal pb_graph_nodes will have more restrict choices - * for what regards the mode that has to be selected. - * - * is_mode_conflict is initially set to TRUE, and, unless a mode conflict is found, it is set to false in `try_intra_lb_route`. - * - * try_expand_all_modes is set if the node expansion failed to find a valid routing path. The clusterer tries to find another route - * by using all the modes during node expansion. - * - * expand_all_modes is used to enable the expansion of all the nodes using all the possible modes. - */ - t_mode_selection_status mode_status; - bool is_routed = false; - bool do_detailed_routing_stage = detailed_routing_stage == (int)E_DETAILED_ROUTE_FOR_EACH_ATOM; - if (do_detailed_routing_stage) { - do { - reset_intra_lb_route(router_data); - is_routed = try_intra_lb_route(router_data, verbosity, &mode_status); - } while (do_detailed_routing_stage && mode_status.is_mode_issue()); - } - - if (do_detailed_routing_stage && !is_routed) { - /* Cannot pack */ - VTR_LOGV(verbosity > 4, "\t\t\tFAILED Detailed Routing Legality\n"); - block_pack_status = e_block_pack_status::BLK_FAILED_ROUTE; - } else { - /* Pack successful, commit - * TODO: SW Engineering note - may want to update cluster stats here too instead of doing it outside - */ - VTR_ASSERT(block_pack_status == e_block_pack_status::BLK_PASSED); - if (molecule->is_chain()) { - /* Chained molecules often take up lots of area and are important, - * if a chain is packed in, want to rename logic block to match chain name */ - AtomBlockId chain_root_blk_id = molecule->atom_block_ids[molecule->pack_pattern->root_block->block_id]; - cur_pb = atom_ctx.lookup.atom_pb(chain_root_blk_id)->parent_pb; - while (cur_pb != nullptr) { - free(cur_pb->name); - cur_pb->name = vtr::strdup(atom_ctx.nlist.block_name(chain_root_blk_id).c_str()); - cur_pb = cur_pb->parent_pb; - } - // if this molecule is part of a chain, mark the cluster as having a long chain - // molecule. Also check if it's the first molecule in the chain to be packed. - // If so, update the chain id for this chain of molecules to make sure all - // molecules will be packed to the same chain id and can reach each other using - // the chain direct links between clusters - if (molecule->chain_info->is_long_chain) { - cluster_placement_stats_ptr->has_long_chain = true; - if (molecule->chain_info->chain_id == -1) { - update_molecule_chain_info(molecule, primitives_list[molecule->root]); - } - } - } - - //update cluster PartitionRegion if atom with floorplanning constraints was added - if (cluster_pr_update_check) { - floorplanning_ctx.cluster_constraints[clb_index] = temp_cluster_pr; - VTR_LOGV(verbosity > 2, "\nUpdated PartitionRegion of cluster %d\n", clb_index); - } - - for (int i = 0; i < molecule_size; i++) { - if (molecule->atom_block_ids[i]) { - /* invalidate all molecules that share atom block with current molecule */ - t_pack_molecule* cur_molecule = atom_ctx.prepacker.get_atom_molecule(molecule->atom_block_ids[i]); - cur_molecule->valid = false; - - commit_primitive(cluster_placement_stats_ptr, primitives_list[i]); - } - } - } - } - - if (block_pack_status != e_block_pack_status::BLK_PASSED) { - for (int i = 0; i < failed_location; i++) { - if (molecule->atom_block_ids[i]) { - remove_atom_from_target(router_data, molecule->atom_block_ids[i]); - } - } - for (int i = 0; i < failed_location; i++) { - if (molecule->atom_block_ids[i]) { - revert_place_atom_block(molecule->atom_block_ids[i], router_data); - } - } - - //Record the failure of this molecule in the current pb stats - record_molecule_failure(molecule, pb); - - /* Packing failed, but a part of the pb tree is still allocated and pbs have their modes set. - * Before trying to pack next molecule the unused pbs need to be freed and, the most important, - * their modes reset. This task is performed by the cleanup_pb() function below. */ - cleanup_pb(pb); - - } else { - VTR_LOGV(verbosity > 3, "\t\tPASSED pack molecule\n"); - } - } else { - VTR_LOGV(verbosity > 3, "\t\tFAILED No candidate primitives available\n"); - block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; - break; /* no more candidate primitives available, this molecule will not pack, return fail */ - } - } - return block_pack_status; -} - -/* Record the failure of the molecule in this cluster in the current pb stats. - * If a molecule fails repeatedly, it's gain will be penalized if packing with - * attraction groups on. */ -void record_molecule_failure(t_pack_molecule* molecule, t_pb* pb) { - //Only have to record the failure for the first atom in the molecule. - //The convention when checking if a molecule has failed to pack in the cluster - //is to check whether the first atoms has been recorded as having failed - - auto got = pb->pb_stats->atom_failures.find(molecule->atom_block_ids[0]); - if (got == pb->pb_stats->atom_failures.end()) { - pb->pb_stats->atom_failures.insert({molecule->atom_block_ids[0], 1}); - } else { - got->second++; - } -} - -/** - * Try place atom block into current primitive location - */ - -enum e_block_pack_status try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, - const AtomBlockId blk_id, - t_pb* cb, - t_pb** parent, - const int max_models, - const int max_cluster_size, - const ClusterBlockId clb_index, - const t_cluster_placement_stats* cluster_placement_stats_ptr, - const t_pack_molecule* molecule, - t_lb_router_data* router_data, - int verbosity, - const int feasible_block_array_size) { - int i, j; - bool is_primitive; - enum e_block_pack_status block_pack_status; - - t_pb* my_parent; - t_pb *pb, *parent_pb; - const t_pb_type* pb_type; - - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - - my_parent = nullptr; - - block_pack_status = e_block_pack_status::BLK_PASSED; - - /* Discover parent */ - if (pb_graph_node->parent_pb_graph_node != cb->pb_graph_node) { - block_pack_status = try_place_atom_block_rec(pb_graph_node->parent_pb_graph_node, blk_id, cb, - &my_parent, max_models, max_cluster_size, clb_index, - cluster_placement_stats_ptr, molecule, router_data, - verbosity, feasible_block_array_size); - parent_pb = my_parent; - } else { - parent_pb = cb; - } - - /* Create siblings if siblings are not allocated */ - if (parent_pb->child_pbs == nullptr) { - atom_ctx.lookup.set_atom_pb(AtomBlockId::INVALID(), parent_pb); - - VTR_ASSERT(parent_pb->name == nullptr); - parent_pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str()); - parent_pb->mode = pb_graph_node->pb_type->parent_mode->index; - set_reset_pb_modes(router_data, parent_pb, true); - const t_mode* mode = &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode]; - parent_pb->child_pbs = new t_pb*[mode->num_pb_type_children]; - - for (i = 0; i < mode->num_pb_type_children; i++) { - parent_pb->child_pbs[i] = new t_pb[mode->pb_type_children[i].num_pb]; - - for (j = 0; j < mode->pb_type_children[i].num_pb; j++) { - parent_pb->child_pbs[i][j].parent_pb = parent_pb; - - atom_ctx.lookup.set_atom_pb(AtomBlockId::INVALID(), &parent_pb->child_pbs[i][j]); - - parent_pb->child_pbs[i][j].pb_graph_node = &(parent_pb->pb_graph_node->child_pb_graph_nodes[parent_pb->mode][i][j]); - } - } - } else { - /* if this is not the first child of this parent, must match existing parent mode */ - if (parent_pb->mode != pb_graph_node->pb_type->parent_mode->index) { - return e_block_pack_status::BLK_FAILED_FEASIBLE; - } - } - - const t_mode* mode = &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode]; - for (i = 0; i < mode->num_pb_type_children; i++) { - if (pb_graph_node->pb_type == &mode->pb_type_children[i]) { - break; - } - } - VTR_ASSERT(i < mode->num_pb_type_children); - pb = &parent_pb->child_pbs[i][pb_graph_node->placement_index]; - *parent = pb; /* this pb is parent of it's child that called this function */ - VTR_ASSERT(pb->pb_graph_node == pb_graph_node); - if (pb->pb_stats == nullptr) { - alloc_and_load_pb_stats(pb, feasible_block_array_size); - } - pb_type = pb_graph_node->pb_type; - - /* Any pb_type under an mode, which is disabled for packing, should not be considerd for mapping - * Early exit to flag failure - */ - if (true == pb_type->parent_mode->disable_packing) { - return e_block_pack_status::BLK_FAILED_FEASIBLE; - } - is_primitive = (pb_type->num_modes == 0); - - if (is_primitive) { - VTR_ASSERT(!atom_ctx.lookup.pb_atom(pb) - && atom_ctx.lookup.atom_pb(blk_id) == nullptr - && atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()); - /* try pack to location */ - VTR_ASSERT(pb->name == nullptr); - pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str()); - - //Update the atom netlist mappings - atom_ctx.lookup.set_atom_clb(blk_id, clb_index); - atom_ctx.lookup.set_atom_pb(blk_id, pb); - - add_atom_as_target(router_data, blk_id); - if (!primitive_feasible(blk_id, pb)) { - /* failed location feasibility check, revert pack */ - block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; - } - - // if this block passed and is part of a chained molecule - if (block_pack_status == e_block_pack_status::BLK_PASSED && molecule->is_chain()) { - auto molecule_root_block = molecule->atom_block_ids[molecule->root]; - // if this is the root block of the chain molecule check its placmeent feasibility - if (blk_id == molecule_root_block) { - block_pack_status = check_chain_root_placement_feasibility(pb_graph_node, molecule, blk_id); - } - } - - VTR_LOGV(verbosity > 4 && block_pack_status == e_block_pack_status::BLK_PASSED, - "\t\t\tPlaced atom '%s' (%s) at %s\n", - atom_ctx.nlist.block_name(blk_id).c_str(), - atom_ctx.nlist.block_model(blk_id)->name, - pb->hierarchical_type_name().c_str()); - } - - if (block_pack_status != e_block_pack_status::BLK_PASSED) { - free(pb->name); - pb->name = nullptr; - } - return block_pack_status; -} - -/* - * Checks if the atom and cluster have compatible floorplanning constraints - * If the atom and cluster both have non-empty PartitionRegions, and the intersection - * of the PartitionRegions is empty, the atom cannot be packed in the cluster. - */ -bool atom_cluster_floorplanning_check(AtomBlockId blk_id, - ClusterBlockId clb_index, - int verbosity, - PartitionRegion& temp_cluster_pr, - bool& cluster_pr_needs_update) { - auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - - /*check if the atom can go in the cluster by checking if the atom and cluster have intersecting PartitionRegions*/ - - //get partition that atom belongs to - PartitionId partid; - partid = floorplanning_ctx.constraints.get_atom_partition(blk_id); - - //if the atom does not belong to a partition, it can be put in the cluster - //regardless of what the cluster's PartitionRegion is because it has no constraints - if (partid == PartitionId::INVALID()) { - VTR_LOGV(verbosity > 3, - "\t\t\t Intersect: Atom block %d has no floorplanning constraints, passed for cluster %d \n", - blk_id, clb_index); - cluster_pr_needs_update = false; - return true; - } else { - //get pr of that partition - const PartitionRegion& atom_pr = floorplanning_ctx.constraints.get_partition_pr(partid); - - //intersect it with the pr of the current cluster - PartitionRegion cluster_pr = floorplanning_ctx.cluster_constraints[clb_index]; - - if (cluster_pr.empty()) { - temp_cluster_pr = atom_pr; - cluster_pr_needs_update = true; - VTR_LOGV(verbosity > 3, - "\t\t\t Intersect: Atom block %d has floorplanning constraints, passed cluster %d which has empty PR\n", - blk_id, clb_index); - return true; - } else { - //update cluster_pr with the intersection of the cluster's PartitionRegion - //and the atom's PartitionRegion - update_cluster_part_reg(cluster_pr, atom_pr); - } - - // At this point, cluster_pr is the intersection of atom_pr and the clusters current pr - if (cluster_pr.empty()) { - VTR_LOGV(verbosity > 3, - "\t\t\t Intersect: Atom block %d failed floorplanning check for cluster %d \n", - blk_id, clb_index); - cluster_pr_needs_update = false; - return false; - } else { - //update the cluster's PartitionRegion with the intersecting PartitionRegion - temp_cluster_pr = cluster_pr; - cluster_pr_needs_update = true; - VTR_LOGV(verbosity > 3, - "\t\t\t Intersect: Atom block %d passed cluster %d, cluster PR was updated with intersection result \n", - blk_id, clb_index); - return true; - } - } -} - -bool atom_cluster_noc_group_check(AtomBlockId blk_id, - ClusterBlockId clb_index, - int verbosity, - NocGroupId& temp_cluster_noc_grp_id) { - const auto& atom_noc_grp_ids = g_vpr_ctx.cl_helper().atom_noc_grp_id; - const NocGroupId atom_noc_grp_id = atom_noc_grp_ids.empty() ? NocGroupId::INVALID() : atom_noc_grp_ids[blk_id]; - - if (temp_cluster_noc_grp_id == NocGroupId::INVALID()) { - // the cluster does not have a NoC group - // assign the atom's NoC group to cluster - VTR_LOGV(verbosity > 3, - "\t\t\t NoC Group: Atom block %d passed cluster %d, cluster's NoC group was updated with the atom's group %d\n", - blk_id, clb_index, (size_t)atom_noc_grp_id); - temp_cluster_noc_grp_id = atom_noc_grp_id; - return true; - } else if (temp_cluster_noc_grp_id == atom_noc_grp_id) { - // the cluster has the same NoC group ID as the atom, - // so they are compatible - VTR_LOGV(verbosity > 3, - "\t\t\t NoC Group: Atom block %d passed cluster %d, cluster's NoC group was compatible with the atom's group %d\n", - blk_id, clb_index, (size_t)atom_noc_grp_id); - return true; - } else { - // the cluster belongs to a different NoC group than the atom's group, - // so they are incompatible - VTR_LOGV(verbosity > 3, - "\t\t\t NoC Group: Atom block %d failed NoC group check for cluster %d. Cluster's NoC group: %d, atom's NoC group: %d\n", - blk_id, clb_index, (size_t)temp_cluster_noc_grp_id, size_t(atom_noc_grp_id)); - return false; - } -} - -/* Revert trial atom block iblock and free up memory space accordingly - */ -void revert_place_atom_block(const AtomBlockId blk_id, t_lb_router_data* router_data) { - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - - //We cast away const here since we may free the pb, and it is - //being removed from the active mapping. - // - //In general most code works fine accessing cosnt t_pb*, - //which is why we store them as such in atom_ctx.lookup - t_pb* pb = const_cast(atom_ctx.lookup.atom_pb(blk_id)); - - if (pb != nullptr) { - /* When freeing molecules, the current block might already have been freed by a prior revert - * When this happens, no need to do anything beyond basic book keeping at the atom block - */ - - t_pb* next = pb->parent_pb; - revalid_molecules(pb); - free_pb(pb); - pb = next; - - while (pb != nullptr) { - /* If this is pb is created only for the purposes of holding new molecule, remove it - * Must check if cluster is already freed (which can be the case) - */ - next = pb->parent_pb; - - if (pb->child_pbs != nullptr && pb->pb_stats != nullptr - && pb->pb_stats->num_child_blocks_in_pb == 0) { - set_reset_pb_modes(router_data, pb, false); - if (next != nullptr) { - /* If the code gets here, then that means that placing the initial seed molecule - * failed, don't free the actual complex block itself as the seed needs to find - * another placement */ - revalid_molecules(pb); - free_pb(pb); - } - } - pb = next; - } - } - - //Update the atom netlist mapping - atom_ctx.lookup.set_atom_clb(blk_id, ClusterBlockId::INVALID()); - atom_ctx.lookup.set_atom_pb(blk_id, nullptr); -} - -void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clustered_blk_id, t_pb* cur_pb, enum e_net_relation_to_clustered_block net_relation_to_clustered_block) { +void update_connection_gain_values(const AtomNetId net_id, + const AtomBlockId clustered_blk_id, + t_pb* cur_pb, + const ClusterLegalizer& cluster_legalizer, + enum e_net_relation_to_clustered_block net_relation_to_clustered_block) { /*This function is called when the connectiongain values on the net net_id* *require updating. */ + const AtomContext& atom_ctx = g_vpr_ctx.atom(); int num_internal_connections, num_open_connections, num_stuck_connections; num_internal_connections = num_open_connections = num_stuck_connections = 0; - auto& atom_ctx = g_vpr_ctx.atom(); - ClusterBlockId clb_index = atom_ctx.lookup.atom_clb(clustered_blk_id); + LegalizationClusterId legalization_cluster_id = cluster_legalizer.get_atom_cluster(clustered_blk_id); /* may wish to speed things up by ignoring clock nets since they are high fanout */ for (auto pin_id : atom_ctx.nlist.net_pins(net_id)) { auto blk_id = atom_ctx.nlist.pin_block(pin_id); - if (atom_ctx.lookup.atom_clb(blk_id) == clb_index + if (cluster_legalizer.get_atom_cluster(blk_id) == legalization_cluster_id && is_atom_blk_in_pb(blk_id, atom_ctx.lookup.atom_pb(clustered_blk_id))) { num_internal_connections++; - } else if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + } else if (!cluster_legalizer.is_atom_clustered(blk_id)) { num_open_connections++; } else { num_stuck_connections++; @@ -1479,7 +521,7 @@ void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clu auto blk_id = atom_ctx.nlist.pin_block(pin_id); VTR_ASSERT(blk_id); - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(blk_id)) { /* TODO: Gain function accurate only if net has one connection to block, * TODO: Should we handle case where net has multi-connection to block? * Gain computation is only off by a bit in this case */ @@ -1502,7 +544,7 @@ void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clu auto driver_pin_id = atom_ctx.nlist.net_driver(net_id); auto blk_id = atom_ctx.nlist.pin_block(driver_pin_id); - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(blk_id)) { if (cur_pb->pb_stats->connectiongain.count(blk_id) == 0) { cur_pb->pb_stats->connectiongain[blk_id] = 0; } @@ -1514,53 +556,33 @@ void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clu } } -void try_fill_cluster(const t_packer_opts& packer_opts, +void try_fill_cluster(ClusterLegalizer& cluster_legalizer, + const Prepacker& prepacker, + const t_packer_opts& packer_opts, t_cluster_placement_stats* cur_cluster_placement_stats_ptr, t_pack_molecule*& prev_molecule, t_pack_molecule*& next_molecule, int& num_same_molecules, - t_pb_graph_node** primitives_list, t_cluster_progress_stats& cluster_stats, int num_clb, - const int num_models, - const int max_cluster_size, - const ClusterBlockId clb_index, - const int detailed_routing_stage, + const LegalizationClusterId legalization_cluster_id, AttractionInfo& attraction_groups, - vtr::vector>& clb_inter_blk_nets, + vtr::vector>& clb_inter_blk_nets, bool allow_unrelated_clustering, const int& high_fanout_threshold, const std::unordered_set& is_clock, const std::unordered_set& is_global, const std::shared_ptr& timing_info, - t_lb_router_data* router_data, - t_ext_pin_util target_ext_pin_util, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_noc_grp_id, e_block_pack_status& block_pack_status, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size, std::unordered_map& net_output_feeds_driving_block_input, std::map>& primitive_candidate_block_types) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - block_pack_status = try_pack_molecule(cur_cluster_placement_stats_ptr, - next_molecule, - primitives_list, - cluster_ctx.clb_nlist.block_pb(clb_index), - num_models, - max_cluster_size, - clb_index, - detailed_routing_stage, - router_data, - packer_opts.pack_verbosity, - packer_opts.enable_pin_feasibility_filter, - packer_opts.feasible_block_array_size, - target_ext_pin_util, - temp_cluster_pr, - temp_noc_grp_id); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + const DeviceContext& device_ctx = g_vpr_ctx.device(); + + block_pack_status = cluster_legalizer.add_mol_to_cluster(next_molecule, + legalization_cluster_id); auto blk_id = next_molecule->atom_block_ids[next_molecule->root]; VTR_ASSERT(blk_id); @@ -1588,7 +610,7 @@ void try_fill_cluster(const t_packer_opts& packer_opts, } } - next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index), + next_molecule = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id), attraction_groups, allow_unrelated_clustering, packer_opts.prioritize_transitive_connectivity, @@ -1596,8 +618,11 @@ void try_fill_cluster(const t_packer_opts& packer_opts, packer_opts.feasible_block_array_size, &cluster_stats.num_unrelated_clustering_attempts, cur_cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, clb_inter_blk_nets, - clb_index, packer_opts.pack_verbosity, + legalization_cluster_id, + packer_opts.pack_verbosity, unclustered_list_head, unclustered_list_head_size, primitive_candidate_block_types); @@ -1625,9 +650,11 @@ void try_fill_cluster(const t_packer_opts& packer_opts, cluster_stats.mols_since_last_print, device_ctx.grid.width(), device_ctx.grid.height(), - attraction_groups); + attraction_groups, + cluster_legalizer); - update_cluster_stats(next_molecule, clb_index, + update_cluster_stats(next_molecule, + cluster_legalizer, is_clock, //Set of all clocks is_global, //Set of all global signals (currently clocks) packer_opts.global_clocks, packer_opts.alpha, packer_opts.beta, packer_opts.timing_driven, @@ -1641,7 +668,7 @@ void try_fill_cluster(const t_packer_opts& packer_opts, if (packer_opts.timing_driven) { cluster_stats.blocks_since_last_analysis++; /* historically, timing slacks were recomputed after X number of blocks were packed, but this doesn't significantly alter results so I (jluu) did not port the code */ } - next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index), + next_molecule = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id), attraction_groups, allow_unrelated_clustering, packer_opts.prioritize_transitive_connectivity, @@ -1649,8 +676,10 @@ void try_fill_cluster(const t_packer_opts& packer_opts, packer_opts.feasible_block_array_size, &cluster_stats.num_unrelated_clustering_attempts, cur_cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, clb_inter_blk_nets, - clb_index, + legalization_cluster_id, packer_opts.pack_verbosity, unclustered_list_head, unclustered_list_head_size, @@ -1661,78 +690,37 @@ void try_fill_cluster(const t_packer_opts& packer_opts, } } -t_pack_molecule* save_cluster_routing_and_pick_new_seed(const t_packer_opts& packer_opts, - const int& num_clb, - const std::vector& seed_atoms, - const int& num_blocks_hill_added, - vtr::vector*>& intra_lb_routing, - int& seedindex, - t_cluster_progress_stats& cluster_stats, - t_lb_router_data* router_data) { - t_pack_molecule* next_seed = nullptr; - - intra_lb_routing.push_back(router_data->saved_lb_nets); - VTR_ASSERT((int)intra_lb_routing.size() == num_clb); - router_data->saved_lb_nets = nullptr; - - //Pick a new seed - next_seed = get_highest_gain_seed_molecule(seedindex, seed_atoms); - - if (packer_opts.timing_driven) { - if (num_blocks_hill_added > 0) { - cluster_stats.blocks_since_last_analysis += num_blocks_hill_added; - } - } - return next_seed; -} - void store_cluster_info_and_free(const t_packer_opts& packer_opts, - const ClusterBlockId& clb_index, + const LegalizationClusterId legalization_cluster_id, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector& le_count, - vtr::vector>& clb_inter_blk_nets) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - auto& atom_ctx = g_vpr_ctx.atom(); + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); /* store info that will be used later in packing from pb_stats and free the rest */ - t_pb_stats* pb_stats = cluster_ctx.clb_nlist.block_pb(clb_index)->pb_stats; + t_pb* cur_pb = cluster_legalizer.get_cluster_pb(legalization_cluster_id); + t_pb_stats* pb_stats = cur_pb->pb_stats; for (const AtomNetId mnet_id : pb_stats->marked_nets) { int external_terminals = atom_ctx.nlist.net_pins(mnet_id).size() - pb_stats->num_pins_of_net_in_pb[mnet_id]; /* Check if external terminals of net is within the fanout limit and that there exists external terminals */ if (external_terminals < packer_opts.transitive_fanout_threshold && external_terminals > 0) { - clb_inter_blk_nets[clb_index].push_back(mnet_id); + clb_inter_blk_nets[legalization_cluster_id].push_back(mnet_id); } } - auto cur_pb = cluster_ctx.clb_nlist.block_pb(clb_index); // update the data structure holding the LE counts update_le_count(cur_pb, logic_block_type, le_pb_type, le_count); //print clustering progress incrementally //print_pack_status(num_clb, num_molecules, num_molecules_processed, mols_since_last_print, device_ctx.grid.width(), device_ctx.grid.height()); - free_pb_stats_recursive(cur_pb); -} - -/* Free up data structures and requeue used molecules */ -void free_data_and_requeue_used_mols_if_illegal(const ClusterBlockId& clb_index, - const int& savedseedindex, - std::map& num_used_type_instances, - int& num_clb, - int& seedindex) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - num_used_type_instances[cluster_ctx.clb_nlist.block_type(clb_index)]--; - revalid_molecules(cluster_ctx.clb_nlist.block_pb(clb_index)); - cluster_ctx.clb_nlist.remove_block(clb_index); - cluster_ctx.clb_nlist.compress(); - num_clb--; - seedindex = savedseedindex; } /*****************************************/ void update_timing_gain_values(const AtomNetId net_id, t_pb* cur_pb, + const ClusterLegalizer& cluster_legalizer, enum e_net_relation_to_clustered_block net_relation_to_clustered_block, const SetupTimingInfo& timing_info, const std::unordered_set& is_global, @@ -1741,7 +729,7 @@ void update_timing_gain_values(const AtomNetId net_id, *net_id requires updating. */ float timinggain; - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); /* Check if this atom net lists its driving atom block twice. If so, avoid * * double counting this atom block by skipping the first (driving) pin. */ @@ -1753,7 +741,7 @@ void update_timing_gain_values(const AtomNetId net_id, && !is_global.count(net_id)) { for (auto pin_id : pins) { auto blk_id = atom_ctx.nlist.pin_block(pin_id); - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(blk_id)) { timinggain = timing_info.setup_pin_criticality(pin_id); if (cur_pb->pb_stats->timinggain.count(blk_id) == 0) { @@ -1772,7 +760,7 @@ void update_timing_gain_values(const AtomNetId net_id, auto driver_pin = atom_ctx.nlist.net_driver(net_id); auto new_blk_id = atom_ctx.nlist.pin_block(driver_pin); - if (atom_ctx.lookup.atom_clb(new_blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(new_blk_id)) { for (auto pin_id : atom_ctx.nlist.net_sinks(net_id)) { timinggain = timing_info.setup_pin_criticality(pin_id); @@ -1790,6 +778,7 @@ void update_timing_gain_values(const AtomNetId net_id, void mark_and_update_partial_gain(const AtomNetId net_id, enum e_gain_update gain_flag, const AtomBlockId clustered_blk_id, + const ClusterLegalizer& cluster_legalizer, bool timing_driven, bool connection_driven, enum e_net_relation_to_clustered_block net_relation_to_clustered_block, @@ -1805,7 +794,7 @@ void mark_and_update_partial_gain(const AtomNetId net_id, * cluster. The timinggain is the criticality of the most critical* * atom net between this atom block and an atom block in the cluster. */ - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); t_pb* cur_pb = atom_ctx.lookup.atom_pb(clustered_blk_id)->parent_pb; cur_pb = get_top_level_pb(cur_pb); @@ -1845,7 +834,7 @@ void mark_and_update_partial_gain(const AtomNetId net_id, if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(net_id) == 0) { for (auto pin_id : pins) { auto blk_id = atom_ctx.nlist.pin_block(pin_id); - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(blk_id)) { if (cur_pb->pb_stats->sharinggain.count(blk_id) == 0) { cur_pb->pb_stats->marked_blocks.push_back(blk_id); cur_pb->pb_stats->sharinggain[blk_id] = 1; @@ -1860,11 +849,12 @@ void mark_and_update_partial_gain(const AtomNetId net_id, if (connection_driven) { update_connection_gain_values(net_id, clustered_blk_id, cur_pb, + cluster_legalizer, net_relation_to_clustered_block); } if (timing_driven) { - update_timing_gain_values(net_id, cur_pb, + update_timing_gain_values(net_id, cur_pb, cluster_legalizer, net_relation_to_clustered_block, timing_info, is_global, @@ -1882,7 +872,7 @@ void update_total_gain(float alpha, float beta, bool timing_driven, bool connect /*Updates the total gain array to reflect the desired tradeoff between* *input sharing (sharinggain) and path_length minimization (timinggain) *input each time a new molecule is added to the cluster.*/ - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); t_pb* cur_pb = pb; cur_pb = get_top_level_pb(cur_pb); @@ -1938,7 +928,7 @@ void update_total_gain(float alpha, float beta, bool timing_driven, bool connect /*****************************************/ void update_cluster_stats(const t_pack_molecule* molecule, - const ClusterBlockId clb_index, + const ClusterLegalizer& cluster_legalizer, const std::unordered_set& is_clock, const std::unordered_set& is_global, const bool global_clocks, @@ -1959,7 +949,7 @@ void update_cluster_stats(const t_pack_molecule* molecule, int iblock; t_pb *cur_pb, *cb; - auto& atom_ctx = g_vpr_ctx.mutable_atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); molecule_size = get_array_size_of_molecule(molecule); cb = nullptr; @@ -1969,9 +959,6 @@ void update_cluster_stats(const t_pack_molecule* molecule, continue; } - //Update atom netlist mapping - atom_ctx.lookup.set_atom_clb(blk_id, clb_index); - const t_pb* atom_pb = atom_ctx.lookup.atom_pb(blk_id); VTR_ASSERT(atom_pb); @@ -2000,7 +987,7 @@ void update_cluster_stats(const t_pack_molecule* molecule, for (auto pin_id : atom_ctx.nlist.block_output_pins(blk_id)) { auto net_id = atom_ctx.nlist.pin_net(pin_id); if (!is_clock.count(net_id) || !global_clocks) { - mark_and_update_partial_gain(net_id, GAIN, blk_id, + mark_and_update_partial_gain(net_id, GAIN, blk_id, cluster_legalizer, timing_driven, connection_driven, OUTPUT, timing_info, @@ -2008,7 +995,7 @@ void update_cluster_stats(const t_pack_molecule* molecule, high_fanout_net_threshold, net_output_feeds_driving_block_input); } else { - mark_and_update_partial_gain(net_id, NO_GAIN, blk_id, + mark_and_update_partial_gain(net_id, NO_GAIN, blk_id, cluster_legalizer, timing_driven, connection_driven, OUTPUT, timing_info, @@ -2021,7 +1008,7 @@ void update_cluster_stats(const t_pack_molecule* molecule, /* Next Inputs */ for (auto pin_id : atom_ctx.nlist.block_input_pins(blk_id)) { auto net_id = atom_ctx.nlist.pin_net(pin_id); - mark_and_update_partial_gain(net_id, GAIN, blk_id, + mark_and_update_partial_gain(net_id, GAIN, blk_id, cluster_legalizer, timing_driven, connection_driven, INPUT, timing_info, @@ -2034,14 +1021,14 @@ void update_cluster_stats(const t_pack_molecule* molecule, for (auto pin_id : atom_ctx.nlist.block_clock_pins(blk_id)) { auto net_id = atom_ctx.nlist.pin_net(pin_id); if (global_clocks) { - mark_and_update_partial_gain(net_id, NO_GAIN, blk_id, + mark_and_update_partial_gain(net_id, NO_GAIN, blk_id, cluster_legalizer, timing_driven, connection_driven, INPUT, timing_info, is_global, high_fanout_net_threshold, net_output_feeds_driving_block_input); } else { - mark_and_update_partial_gain(net_id, GAIN, blk_id, + mark_and_update_partial_gain(net_id, GAIN, blk_id, cluster_legalizer, timing_driven, connection_driven, INPUT, timing_info, is_global, @@ -2052,8 +1039,6 @@ void update_cluster_stats(const t_pack_molecule* molecule, update_total_gain(alpha, beta, timing_driven, connection_driven, atom_pb->parent_pb, attraction_groups); - - commit_lookahead_pins_used(cb); } // if this molecule came from the transitive fanout candidates remove it @@ -2063,38 +1048,23 @@ void update_cluster_stats(const t_pack_molecule* molecule, } } -void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, - t_pb_graph_node** primitives_list, - ClusterBlockId clb_index, +void start_new_cluster(ClusterLegalizer& cluster_legalizer, + LegalizationClusterId& legalization_cluster_id, t_pack_molecule* molecule, std::map& num_used_type_instances, const float target_device_utilization, - const int num_models, - const int max_cluster_size, const t_arch* arch, const std::string& device_layout_name, - std::vector* lb_type_rr_graphs, - t_lb_router_data** router_data, - const int detailed_routing_stage, - ClusteredNetlist* clb_nlist, const std::map>& primitive_candidate_block_types, int verbosity, - bool enable_pin_feasibility_filter, - bool balance_block_type_utilization, - const int feasible_block_array_size, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_noc_grp_id) { + bool balance_block_type_utilization) { /* Given a starting seed block, start_new_cluster determines the next cluster type to use * It expands the FPGA if it cannot find a legal cluster for the atom block */ - auto& atom_ctx = g_vpr_ctx.atom(); - auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - - /*Cluster's PartitionRegion is empty initially, meaning it has no floorplanning constraints*/ - PartitionRegion empty_pr; - floorplanning_ctx.cluster_constraints.push_back(empty_pr); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + DeviceContext& mutable_device_ctx = g_vpr_ctx.mutable_device(); + const DeviceContext& device_ctx = g_vpr_ctx.mutable_device(); /* Allocate a dummy initial cluster and load a atom block as a seed and check if it is legal */ AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; @@ -2136,57 +1106,24 @@ void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, //Try packing into each candidate type bool success = false; + t_logical_block_type_ptr block_type; + LegalizationClusterId new_cluster_id; for (auto type : candidate_types) { - t_pb* pb = new t_pb; - pb->pb_graph_node = type->pb_graph_head; - alloc_and_load_pb_stats(pb, feasible_block_array_size); - pb->parent_pb = nullptr; - - *router_data = alloc_and_load_router_data(&lb_type_rr_graphs[type->index], type); - //Try packing into each mode e_block_pack_status pack_result = e_block_pack_status::BLK_STATUS_UNDEFINED; for (int j = 0; j < type->pb_graph_head->pb_type->num_modes && !success; j++) { - pb->mode = j; - - reset_cluster_placement_stats(&cluster_placement_stats[type->index]); - set_mode_cluster_placement_stats(pb->pb_graph_node, j); - - //Note that since we are starting a new cluster, we use FULL_EXTERNAL_PIN_UTIL, - //which allows all cluster pins to be used. This ensures that if we have a large - //molecule which would otherwise exceed the external pin utilization targets it - //can use the full set of cluster pins when selected as the seed block -- ensuring - //it is still implementable. - pack_result = try_pack_molecule(&cluster_placement_stats[type->index], - molecule, primitives_list, pb, - num_models, max_cluster_size, clb_index, - detailed_routing_stage, *router_data, - verbosity, - enable_pin_feasibility_filter, - feasible_block_array_size, - FULL_EXTERNAL_PIN_UTIL, - temp_cluster_pr, - temp_noc_grp_id); - + std::tie(pack_result, new_cluster_id) = cluster_legalizer.start_new_cluster(molecule, type, j); success = (pack_result == e_block_pack_status::BLK_PASSED); } if (success) { VTR_LOGV(verbosity > 2, "\tPASSED_SEED: Block Type %s\n", type->name); - //Once clustering succeeds, add it to the clb netlist - if (pb->name != nullptr) { - free(pb->name); - } - pb->name = vtr::strdup(root_atom_name.c_str()); - clb_index = clb_nlist->create_block(root_atom_name.c_str(), pb, type); + // If clustering succeeds return the new_cluster_id and type. + legalization_cluster_id = new_cluster_id; + block_type = type; break; } else { VTR_LOGV(verbosity > 2, "\tFAILED_SEED: Block Type %s\n", type->name); - //Free failed clustering and try again - free_router_data(*router_data); - free_pb(pb); - delete pb; - *router_data = nullptr; } } @@ -2209,7 +1146,6 @@ void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, VTR_ASSERT(success); //Successfully create cluster - auto block_type = clb_nlist->block_type(clb_index); num_used_type_instances[block_type]++; /* Expand FPGA size if needed */ @@ -2220,7 +1156,7 @@ void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, } if (num_used_type_instances[block_type] > num_instances) { - device_ctx.grid = create_device_grid(device_layout_name, arch->grid_layouts, num_used_type_instances, target_device_utilization); + mutable_device_ctx.grid = create_device_grid(device_layout_name, arch->grid_layouts, num_used_type_instances, target_device_utilization); } } @@ -2236,8 +1172,10 @@ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, AttractionInfo& attraction_groups, const enum e_gain_type gain_mode, t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - const ClusterBlockId cluster_index, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, + const LegalizationClusterId legalization_cluster_id, bool prioritize_transitive_connectivity, int transitive_fanout_threshold, const int feasible_block_array_size, @@ -2254,37 +1192,72 @@ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, // 1. Find unpacked molecules based on criticality and strong connectedness (connected by low fanout nets) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == NOT_VALID) { - add_cluster_molecule_candidates_by_connectivity_and_timing(cur_pb, cluster_placement_stats_ptr, feasible_block_array_size, attraction_groups); + add_cluster_molecule_candidates_by_connectivity_and_timing(cur_pb, + cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, + feasible_block_array_size, + attraction_groups); } if (prioritize_transitive_connectivity) { // 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->explore_transitive_fanout) { - add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, cluster_placement_stats_ptr, clb_inter_blk_nets, - cluster_index, transitive_fanout_threshold, feasible_block_array_size, attraction_groups); + add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, + cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, + clb_inter_blk_nets, + legalization_cluster_id, + transitive_fanout_threshold, + feasible_block_array_size, + attraction_groups); } // 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->tie_break_high_fanout_net) { - add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, cluster_placement_stats_ptr, feasible_block_array_size, attraction_groups); + add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, + cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, + feasible_block_array_size, + attraction_groups); } } else { //Reverse order // 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->tie_break_high_fanout_net) { - add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, cluster_placement_stats_ptr, feasible_block_array_size, attraction_groups); + add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, + cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, + feasible_block_array_size, + attraction_groups); } // 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->explore_transitive_fanout) { - add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, cluster_placement_stats_ptr, clb_inter_blk_nets, - cluster_index, transitive_fanout_threshold, feasible_block_array_size, attraction_groups); + add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, + cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, + clb_inter_blk_nets, + legalization_cluster_id, + transitive_fanout_threshold, + feasible_block_array_size, + attraction_groups); } } // 4. Find unpacked molecules based on attraction group of the current cluster (if the cluster has an attraction group) if (cur_pb->pb_stats->num_feasible_blocks == 0) { - add_cluster_molecule_candidates_by_attraction_group(cur_pb, cluster_placement_stats_ptr, attraction_groups, - feasible_block_array_size, cluster_index, primitive_candidate_block_types); + add_cluster_molecule_candidates_by_attraction_group(cur_pb, + cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, + attraction_groups, + feasible_block_array_size, + legalization_cluster_id, + primitive_candidate_block_types); } /* Grab highest gain molecule */ t_pack_molecule* molecule = nullptr; @@ -2302,6 +1275,8 @@ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, /* Add molecules with strong connectedness to the current cluster to the list of feasible blocks. */ void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, const int feasible_block_array_size, AttractionInfo& attraction_groups) { VTR_ASSERT(cur_pb->pb_stats->num_feasible_blocks == NOT_VALID); @@ -2309,13 +1284,11 @@ void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, cur_pb->pb_stats->num_feasible_blocks = 0; cur_pb->pb_stats->explore_transitive_fanout = true; /* If no legal molecules found, enable exploration of molecules two hops away */ - auto& atom_ctx = g_vpr_ctx.atom(); - for (AtomBlockId blk_id : cur_pb->pb_stats->marked_blocks) { - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { - t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + if (!cluster_legalizer.is_atom_clustered(blk_id)) { + t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr, cluster_legalizer); if (success) { add_molecule_to_pb_stats_candidates(molecule, cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); @@ -2328,29 +1301,31 @@ void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, /* Add molecules based on weak connectedness (connected by high fanout nets) with current cluster */ void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, const int feasible_block_array_size, AttractionInfo& attraction_groups) { /* Because the packer ignores high fanout nets when marking what blocks * to consider, use one of the ignored high fanout net to fill up lightly * related blocks */ + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; + reset_tried_but_unused_cluster_placements(cluster_placement_stats_ptr); AtomNetId net_id = cur_pb->pb_stats->tie_break_high_fanout_net; - auto& atom_ctx = g_vpr_ctx.atom(); - int count = 0; - for (auto pin_id : atom_ctx.nlist.net_pins(net_id)) { + for (auto pin_id : atom_nlist.net_pins(net_id)) { if (count >= AAPACK_MAX_HIGH_FANOUT_EXPLORE) { break; } - AtomBlockId blk_id = atom_ctx.nlist.pin_block(pin_id); + AtomBlockId blk_id = atom_nlist.pin_block(pin_id); - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { - t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + if (!cluster_legalizer.is_atom_clustered(blk_id)) { + t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr, cluster_legalizer); if (success) { add_molecule_to_pb_stats_candidates(molecule, cur_pb->pb_stats->gain, cur_pb, std::min(feasible_block_array_size, AAPACK_MAX_HIGH_FANOUT_EXPLORE), attraction_groups); @@ -2372,14 +1347,15 @@ void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, */ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, AttractionInfo& attraction_groups, const int feasible_block_array_size, - ClusterBlockId clb_index, + LegalizationClusterId legalization_cluster_id, std::map>& primitive_candidate_block_types) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; - auto cluster_type = cluster_ctx.clb_nlist.block_type(clb_index); + auto cluster_type = cluster_legalizer.get_cluster_type(legalization_cluster_id); /* * For each cluster, we want to explore the attraction group molecules as potential @@ -2405,13 +1381,13 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, AttractionGroup& group = attraction_groups.get_attraction_group_info(grp_id); std::vector available_atoms; for (AtomBlockId atom_id : group.group_atoms) { - const auto& atom_model = atom_ctx.nlist.block_model(atom_id); + const auto& atom_model = atom_nlist.block_model(atom_id); auto itr = primitive_candidate_block_types.find(atom_model); VTR_ASSERT(itr != primitive_candidate_block_types.end()); std::vector& candidate_types = itr->second; //Only consider molecules that are unpacked and of the correct type - if (atom_ctx.lookup.atom_clb(atom_id) == ClusterBlockId::INVALID() + if (!cluster_legalizer.is_atom_clustered(atom_id) && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { available_atoms.push_back(atom_id); } @@ -2426,17 +1402,17 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, if (num_available_atoms < 500) { //for (AtomBlockId atom_id : group.group_atoms) { for (AtomBlockId atom_id : available_atoms) { - const auto& atom_model = atom_ctx.nlist.block_model(atom_id); + const auto& atom_model = atom_nlist.block_model(atom_id); auto itr = primitive_candidate_block_types.find(atom_model); VTR_ASSERT(itr != primitive_candidate_block_types.end()); std::vector& candidate_types = itr->second; //Only consider molecules that are unpacked and of the correct type - if (atom_ctx.lookup.atom_clb(atom_id) == ClusterBlockId::INVALID() + if (!cluster_legalizer.is_atom_clustered(atom_id) && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { - t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(atom_id); + t_pack_molecule* molecule = prepacker.get_atom_molecule(atom_id); if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr, cluster_legalizer); if (success) { add_molecule_to_pb_stats_candidates(molecule, cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); @@ -2458,17 +1434,19 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, //AtomBlockId blk_id = group.group_atoms[selected_atom]; AtomBlockId blk_id = available_atoms[selected_atom]; - const auto& atom_model = atom_ctx.nlist.block_model(blk_id); + const auto& atom_model = atom_nlist.block_model(blk_id); auto itr = primitive_candidate_block_types.find(atom_model); VTR_ASSERT(itr != primitive_candidate_block_types.end()); std::vector& candidate_types = itr->second; //Only consider molecules that are unpacked and of the correct type - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID() + if (!cluster_legalizer.is_atom_clustered(blk_id) && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { - t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + bool success = check_free_primitives_for_molecule_atoms(molecule, + cluster_placement_stats_ptr, + cluster_legalizer); if (success) { add_molecule_to_pb_stats_candidates(molecule, cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); @@ -2481,8 +1459,10 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, /* Add molecules based on transitive connections (eg. 2 hops away) with current cluster*/ void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - const ClusterBlockId cluster_index, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, + const LegalizationClusterId legalization_cluster_id, int transitive_fanout_threshold, const int feasible_block_array_size, AttractionInfo& attraction_groups) { @@ -2490,15 +1470,19 @@ void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, cur_pb->pb_stats->explore_transitive_fanout = false; /* First time finding transitive fanout candidates therefore alloc and load them */ - load_transitive_fanout_candidates(cluster_index, + load_transitive_fanout_candidates(legalization_cluster_id, cur_pb->pb_stats, + prepacker, + cluster_legalizer, clb_inter_blk_nets, transitive_fanout_threshold); /* Only consider candidates that pass a very simple legality check */ for (const auto& transitive_candidate : cur_pb->pb_stats->transitive_fanout_candidates) { t_pack_molecule* molecule = transitive_candidate.second; if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + bool success = check_free_primitives_for_molecule_atoms(molecule, + cluster_placement_stats_ptr, + cluster_legalizer); if (success) { add_molecule_to_pb_stats_candidates(molecule, cur_pb->pb_stats->gain, cur_pb, std::min(feasible_block_array_size, AAPACK_MAX_TRANSITIVE_EXPLORE), attraction_groups); @@ -2508,13 +1492,14 @@ void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, } /*Check whether a free primitive exists for each atom block in the molecule*/ -bool check_free_primitives_for_molecule_atoms(t_pack_molecule* molecule, t_cluster_placement_stats* cluster_placement_stats_ptr) { - auto& atom_ctx = g_vpr_ctx.atom(); +bool check_free_primitives_for_molecule_atoms(t_pack_molecule* molecule, + t_cluster_placement_stats* cluster_placement_stats_ptr, + const ClusterLegalizer& cluster_legalizer) { bool success = true; for (int i_atom = 0; i_atom < get_array_size_of_molecule(molecule); i_atom++) { if (molecule->atom_block_ids[i_atom]) { - VTR_ASSERT(atom_ctx.lookup.atom_clb(molecule->atom_block_ids[i_atom]) == ClusterBlockId::INVALID()); + VTR_ASSERT(!cluster_legalizer.is_atom_clustered(molecule->atom_block_ids[i_atom])); auto blk_id2 = molecule->atom_block_ids[i_atom]; if (!exists_free_primitive_for_atom_block(cluster_placement_stats_ptr, blk_id2)) { /* TODO (Jason Luu): debating whether to check if placement exists for molecule @@ -2537,15 +1522,17 @@ t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, const int feasible_block_array_size, int* num_unrelated_clustering_attempts, t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - ClusterBlockId cluster_index, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, + LegalizationClusterId legalization_cluster_id, int verbosity, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size, std::map>& primitive_candidate_block_types) { /* Finds the block with the greatest gain that satisfies the * input, clock and capacity constraints of a cluster that are - * passed in. If no suitable block is found it returns ClusterBlockId::INVALID(). + * passed in. If no suitable block is found it returns nullptr. */ VTR_ASSERT(cur_pb->is_root()); @@ -2553,8 +1540,9 @@ t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, /* If cannot pack into primitive, try packing into cluster */ auto best_molecule = get_highest_gain_molecule(cur_pb, attraction_groups, - NOT_HILL_CLIMBING, cluster_placement_stats_ptr, clb_inter_blk_nets, - cluster_index, prioritize_transitive_connectivity, + NOT_HILL_CLIMBING, cluster_placement_stats_ptr, + prepacker, cluster_legalizer, clb_inter_blk_nets, + legalization_cluster_id, prioritize_transitive_connectivity, transitive_fanout_threshold, feasible_block_array_size, primitive_candidate_block_types); /* If no blocks have any gain to the current cluster, the code above * @@ -2652,14 +1640,15 @@ t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule, const Atom std::vector initialize_seed_atoms(const e_cluster_seed seed_type, const t_molecule_stats& max_molecule_stats, + const Prepacker& prepacker, const vtr::vector& atom_criticality) { - const AtomContext& atom_ctx = g_vpr_ctx.atom(); + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; //Put all atoms in seed list - std::vector seed_atoms(atom_ctx.nlist.blocks().begin(), atom_ctx.nlist.blocks().end()); + std::vector seed_atoms(atom_nlist.blocks().begin(), atom_nlist.blocks().end()); //Initially all gains are zero - vtr::vector atom_gains(atom_ctx.nlist.blocks().size(), 0.); + vtr::vector atom_gains(atom_nlist.blocks().size(), 0.); if (seed_type == e_cluster_seed::TIMING) { VTR_ASSERT(atom_gains.size() == atom_criticality.size()); @@ -2669,21 +1658,21 @@ std::vector initialize_seed_atoms(const e_cluster_seed seed_type, } else if (seed_type == e_cluster_seed::MAX_INPUTS) { //By number of used molecule input pins - for (auto blk : atom_ctx.nlist.blocks()) { - const t_pack_molecule* blk_mol = atom_ctx.prepacker.get_atom_molecule(blk); - const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol, atom_ctx.nlist); + for (auto blk : atom_nlist.blocks()) { + const t_pack_molecule* blk_mol = prepacker.get_atom_molecule(blk); + const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol, atom_nlist); atom_gains[blk] = molecule_stats.num_used_ext_inputs; } } else if (seed_type == e_cluster_seed::BLEND) { //By blended gain (criticality and inputs used) - for (auto blk : atom_ctx.nlist.blocks()) { + for (auto blk : atom_nlist.blocks()) { /* Score seed gain of each block as a weighted sum of timing criticality, * number of tightly coupled blocks connected to it, and number of external inputs */ float seed_blend_fac = 0.5; - const t_pack_molecule* blk_mol = atom_ctx.prepacker.get_atom_molecule(blk); - const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol, atom_ctx.nlist); + const t_pack_molecule* blk_mol = prepacker.get_atom_molecule(blk); + const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol, atom_nlist); VTR_ASSERT(max_molecule_stats.num_used_ext_inputs > 0); float blend_gain = (seed_blend_fac * atom_criticality[blk] @@ -2695,9 +1684,9 @@ std::vector initialize_seed_atoms(const e_cluster_seed seed_type, } else if (seed_type == e_cluster_seed::MAX_PINS || seed_type == e_cluster_seed::MAX_INPUT_PINS) { //By pins per molecule (i.e. available pins on primitives, not pins in use) - for (auto blk : atom_ctx.nlist.blocks()) { - const t_pack_molecule* mol = atom_ctx.prepacker.get_atom_molecule(blk); - const t_molecule_stats molecule_stats = calc_molecule_stats(mol, atom_ctx.nlist); + for (auto blk : atom_nlist.blocks()) { + const t_pack_molecule* mol = prepacker.get_atom_molecule(blk); + const t_molecule_stats molecule_stats = calc_molecule_stats(mol, atom_nlist); int molecule_pins = 0; if (seed_type == e_cluster_seed::MAX_PINS) { @@ -2713,9 +1702,9 @@ std::vector initialize_seed_atoms(const e_cluster_seed seed_type, } } else if (seed_type == e_cluster_seed::BLEND2) { - for (auto blk : atom_ctx.nlist.blocks()) { - const t_pack_molecule* mol = atom_ctx.prepacker.get_atom_molecule(blk); - const t_molecule_stats molecule_stats = calc_molecule_stats(mol, atom_ctx.nlist); + for (auto blk : atom_nlist.blocks()) { + const t_pack_molecule* mol = prepacker.get_atom_molecule(blk); + const t_molecule_stats molecule_stats = calc_molecule_stats(mol, atom_nlist); float pin_ratio = vtr::safe_ratio(molecule_stats.num_pins, max_molecule_stats.num_pins); float input_pin_ratio = vtr::safe_ratio(molecule_stats.num_input_pins, max_molecule_stats.num_input_pins); @@ -2773,17 +1762,18 @@ std::vector initialize_seed_atoms(const e_cluster_seed seed_type, return seed_atoms; } -t_pack_molecule* get_highest_gain_seed_molecule(int& seed_index, const std::vector& seed_atoms) { - auto& atom_ctx = g_vpr_ctx.atom(); - +t_pack_molecule* get_highest_gain_seed_molecule(int& seed_index, + const std::vector& seed_atoms, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer) { while (seed_index < static_cast(seed_atoms.size())) { AtomBlockId blk_id = seed_atoms[seed_index++]; // Check if the atom has already been assigned to a cluster - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(blk_id)) { t_pack_molecule* best = nullptr; - t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); if (molecule->valid) { if (best == nullptr || (best->base_gain) < (molecule->base_gain)) { best = molecule; @@ -2808,7 +1798,7 @@ float get_molecule_gain(t_pack_molecule* molecule, std::map& float gain; int i; int num_introduced_inputs_of_indirectly_related_block; - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); gain = 0; float attraction_group_penalty = 0.1; @@ -2861,357 +1851,6 @@ float get_molecule_gain(t_pack_molecule* molecule, std::map& return gain; } -/* Determine if speculatively packed cur_pb is pin feasible - * Runtime is actually not that bad for this. It's worst case O(k^2) where k is the - * number of pb_graph pins. Can use hash tables or make incremental if becomes an issue. - */ -void try_update_lookahead_pins_used(t_pb* cur_pb) { - int i, j; - const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; - - // run recursively till a leaf (primitive) pb block is reached - if (pb_type->num_modes > 0 && cur_pb->name != nullptr) { - if (cur_pb->child_pbs != nullptr) { - for (i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { - if (cur_pb->child_pbs[i] != nullptr) { - for (j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { - try_update_lookahead_pins_used(&cur_pb->child_pbs[i][j]); - } - } - } - } - } else { - // find if this child (primitive) pb block has an atom mapped to it, - // if yes compute and mark lookahead pins used for that pb block - auto& atom_ctx = g_vpr_ctx.atom(); - AtomBlockId blk_id = atom_ctx.lookup.pb_atom(cur_pb); - if (pb_type->blif_model != nullptr && blk_id) { - compute_and_mark_lookahead_pins_used(blk_id); - } - } -} - -/* Resets nets used at different pin classes for determining pin feasibility */ -void reset_lookahead_pins_used(t_pb* cur_pb) { - int i, j; - const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; - if (cur_pb->pb_stats == nullptr) { - return; /* No pins used, no need to continue */ - } - - if (pb_type->num_modes > 0 && cur_pb->name != nullptr) { - for (i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { - cur_pb->pb_stats->lookahead_input_pins_used[i].clear(); - } - - for (i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { - cur_pb->pb_stats->lookahead_output_pins_used[i].clear(); - } - - if (cur_pb->child_pbs != nullptr) { - for (i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { - if (cur_pb->child_pbs[i] != nullptr) { - for (j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { - reset_lookahead_pins_used(&cur_pb->child_pbs[i][j]); - } - } - } - } - } -} - -/* Determine if pins of speculatively packed pb are legal */ -void compute_and_mark_lookahead_pins_used(const AtomBlockId blk_id) { - auto& atom_ctx = g_vpr_ctx.atom(); - - const t_pb* cur_pb = atom_ctx.lookup.atom_pb(blk_id); - VTR_ASSERT(cur_pb != nullptr); - - /* Walk through inputs, outputs, and clocks marking pins off of the same class */ - for (auto pin_id : atom_ctx.nlist.block_pins(blk_id)) { - auto net_id = atom_ctx.nlist.pin_net(pin_id); - - const t_pb_graph_pin* pb_graph_pin = find_pb_graph_pin(atom_ctx.nlist, atom_ctx.lookup, pin_id); - compute_and_mark_lookahead_pins_used_for_pin(pb_graph_pin, cur_pb, net_id); - } -} - -/** - * Given a pin and its assigned net, mark all pin classes that are affected. - * Check if connecting this pin to it's driver pin or to all sink pins will - * require leaving a pb_block starting from the parent pb_block of the - * primitive till the root block (depth = 0). If leaving a pb_block is - * required add this net to the pin class (to increment the number of used - * pins from this class) that should be used to leave the pb_block. - */ -void compute_and_mark_lookahead_pins_used_for_pin(const t_pb_graph_pin* pb_graph_pin, const t_pb* primitive_pb, const AtomNetId net_id) { - auto& atom_ctx = g_vpr_ctx.atom(); - - // starting from the parent pb of the input primitive go up in the hierarchy till the root block - for (auto cur_pb = primitive_pb->parent_pb; cur_pb; cur_pb = cur_pb->parent_pb) { - const auto depth = cur_pb->pb_graph_node->pb_type->depth; - const auto pin_class = pb_graph_pin->parent_pin_class[depth]; - VTR_ASSERT(pin_class != OPEN); - - const auto driver_blk_id = atom_ctx.nlist.net_driver_block(net_id); - - // if this primitive pin is an input pin - if (pb_graph_pin->port->type == IN_PORT) { - /* find location of net driver if exist in clb, NULL otherwise */ - // find the driver of the input net connected to the pin being studied - const auto driver_pin_id = atom_ctx.nlist.net_driver(net_id); - // find the id of the atom occupying the input primitive_pb - const auto prim_blk_id = atom_ctx.lookup.pb_atom(primitive_pb); - // find the pb block occupied by the driving atom - const auto driver_pb = atom_ctx.lookup.atom_pb(driver_blk_id); - // pb_graph_pin driving net_id in the driver pb block - t_pb_graph_pin* output_pb_graph_pin = nullptr; - // if the driver block is in the same clb as the input primitive block - if (atom_ctx.lookup.atom_clb(driver_blk_id) == atom_ctx.lookup.atom_clb(prim_blk_id)) { - // get pb_graph_pin driving the given net - output_pb_graph_pin = get_driver_pb_graph_pin(driver_pb, driver_pin_id); - } - - bool is_reachable = false; - - // if the driver pin is within the cluster - if (output_pb_graph_pin) { - // find if the driver pin can reach the input pin of the primitive or not - const t_pb* check_pb = driver_pb; - while (check_pb && check_pb != cur_pb) { - check_pb = check_pb->parent_pb; - } - if (check_pb) { - for (int i = 0; i < output_pb_graph_pin->num_connectable_primitive_input_pins[depth]; i++) { - if (pb_graph_pin == output_pb_graph_pin->list_of_connectable_input_pin_ptrs[depth][i]) { - is_reachable = true; - break; - } - } - } - } - - // Must use an input pin to connect the driver to the input pin of the given primitive, either the - // driver atom is not contained in the cluster or is contained but cannot reach the primitive pin - if (!is_reachable) { - // add net to lookahead_input_pins_used if not already added - auto it = std::find(cur_pb->pb_stats->lookahead_input_pins_used[pin_class].begin(), - cur_pb->pb_stats->lookahead_input_pins_used[pin_class].end(), net_id); - if (it == cur_pb->pb_stats->lookahead_input_pins_used[pin_class].end()) { - cur_pb->pb_stats->lookahead_input_pins_used[pin_class].push_back(net_id); - } - } - } else { - VTR_ASSERT(pb_graph_pin->port->type == OUT_PORT); - /* - * Determine if this net (which is driven from within this cluster) leaves this cluster - * (and hence uses an output pin). - */ - - bool net_exits_cluster = true; - int num_net_sinks = static_cast(atom_ctx.nlist.net_sinks(net_id).size()); - - if (pb_graph_pin->num_connectable_primitive_input_pins[depth] >= num_net_sinks) { - //It is possible the net is completely absorbed in the cluster, - //since this pin could (potentially) drive all the net's sinks - - /* Important: This runtime penalty looks a lot scarier than it really is. - * For high fan-out nets, I at most look at the number of pins within the - * cluster which limits runtime. - * - * DO NOT REMOVE THIS INITIAL FILTER WITHOUT CAREFUL ANALYSIS ON RUNTIME!!! - * - * Key Observation: - * For LUT-based designs it is impossible for the average fanout to exceed - * the number of LUT inputs so it's usually around 4-5 (pigeon-hole argument, - * if the average fanout is greater than the number of LUT inputs, where do - * the extra connections go? Therefore, average fanout must be capped to a - * small constant where the constant is equal to the number of LUT inputs). - * The real danger to runtime is when the number of sinks of a net gets doubled - */ - - //Check if all the net sinks are, in fact, inside this cluster - bool all_sinks_in_cur_cluster = true; - ClusterBlockId driver_clb = atom_ctx.lookup.atom_clb(driver_blk_id); - for (auto pin_id : atom_ctx.nlist.net_sinks(net_id)) { - auto sink_blk_id = atom_ctx.nlist.pin_block(pin_id); - if (atom_ctx.lookup.atom_clb(sink_blk_id) != driver_clb) { - all_sinks_in_cur_cluster = false; - break; - } - } - - if (all_sinks_in_cur_cluster) { - //All the sinks are part of this cluster, so the net may be fully absorbed. - // - //Verify this, by counting the number of net sinks reachable from the driver pin. - //If the count equals the number of net sinks then the net is fully absorbed and - //the net does not exit the cluster - /* TODO: I should cache the absorbed outputs, once net is absorbed, - * net is forever absorbed, no point in rechecking every time */ - if (net_sinks_reachable_in_cluster(pb_graph_pin, depth, net_id)) { - //All the sinks are reachable inside the cluster - net_exits_cluster = false; - } - } - } - - if (net_exits_cluster) { - /* This output must exit this cluster */ - cur_pb->pb_stats->lookahead_output_pins_used[pin_class].push_back(net_id); - } - } - } -} - -int net_sinks_reachable_in_cluster(const t_pb_graph_pin* driver_pb_gpin, const int depth, const AtomNetId net_id) { - size_t num_reachable_sinks = 0; - auto& atom_ctx = g_vpr_ctx.atom(); - - //Record the sink pb graph pins we are looking for - std::unordered_set sink_pb_gpins; - for (const AtomPinId pin_id : atom_ctx.nlist.net_sinks(net_id)) { - const t_pb_graph_pin* sink_pb_gpin = find_pb_graph_pin(atom_ctx.nlist, atom_ctx.lookup, pin_id); - VTR_ASSERT(sink_pb_gpin); - - sink_pb_gpins.insert(sink_pb_gpin); - } - - //Count how many sink pins are reachable - for (int i_prim_pin = 0; i_prim_pin < driver_pb_gpin->num_connectable_primitive_input_pins[depth]; ++i_prim_pin) { - const t_pb_graph_pin* reachable_pb_gpin = driver_pb_gpin->list_of_connectable_input_pin_ptrs[depth][i_prim_pin]; - - if (sink_pb_gpins.count(reachable_pb_gpin)) { - ++num_reachable_sinks; - if (num_reachable_sinks == atom_ctx.nlist.net_sinks(net_id).size()) { - return true; - } - } - } - - return false; -} - -/** - * Returns the pb_graph_pin of the atom pin defined by the driver_pin_id in the driver_pb - */ -t_pb_graph_pin* get_driver_pb_graph_pin(const t_pb* driver_pb, const AtomPinId driver_pin_id) { - auto& atom_ctx = g_vpr_ctx.atom(); - const auto driver_pb_type = driver_pb->pb_graph_node->pb_type; - int output_port = 0; - // find the port of the pin driving the net as well as the port model - auto driver_port_id = atom_ctx.nlist.pin_port(driver_pin_id); - auto driver_model_port = atom_ctx.nlist.port_model(driver_port_id); - // find the port id of the port containing the driving pin in the driver_pb_type - for (int i = 0; i < driver_pb_type->num_ports; i++) { - auto& prim_port = driver_pb_type->ports[i]; - if (prim_port.type == OUT_PORT) { - if (prim_port.model_port == driver_model_port) { - // get the output pb_graph_pin driving this input net - return &(driver_pb->pb_graph_node->output_pins[output_port][atom_ctx.nlist.pin_port_bit(driver_pin_id)]); - } - output_port++; - } - } - // the pin should be found - VTR_ASSERT(false); - return nullptr; -} - -/* Check if the number of available inputs/outputs for a pin class is sufficient for speculatively packed blocks */ -bool check_lookahead_pins_used(t_pb* cur_pb, t_ext_pin_util max_external_pin_util) { - const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; - - if (pb_type->num_modes > 0 && cur_pb->name) { - for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { - size_t class_size = cur_pb->pb_graph_node->input_pin_class_size[i]; - - if (cur_pb->is_root()) { - // Scale the class size by the maximum external pin utilization factor - // Use ceil to avoid classes of size 1 from being scaled to zero - class_size = std::ceil(max_external_pin_util.input_pin_util * class_size); - // if the number of pins already used is larger than class size, then the number of - // cluster inputs already used should be our constraint. Why is this needed? This is - // needed since when packing the seed block the maximum external pin utilization is - // used as 1.0 allowing molecules that are using up to all the cluster inputs to be - // packed legally. Therefore, if the seed block is already using more inputs than - // the allowed maximum utilization, this should become the new maximum pin utilization. - class_size = std::max(class_size, cur_pb->pb_stats->input_pins_used[i].size()); - } - - if (cur_pb->pb_stats->lookahead_input_pins_used[i].size() > class_size) { - return false; - } - } - - for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { - size_t class_size = cur_pb->pb_graph_node->output_pin_class_size[i]; - if (cur_pb->is_root()) { - // Scale the class size by the maximum external pin utilization factor - // Use ceil to avoid classes of size 1 from being scaled to zero - class_size = std::ceil(max_external_pin_util.output_pin_util * class_size); - // if the number of pins already used is larger than class size, then the number of - // cluster outputs already used should be our constraint. Why is this needed? This is - // needed since when packing the seed block the maximum external pin utilization is - // used as 1.0 allowing molecules that are using up to all the cluster inputs to be - // packed legally. Therefore, if the seed block is already using more inputs than - // the allowed maximum utilization, this should become the new maximum pin utilization. - class_size = std::max(class_size, cur_pb->pb_stats->output_pins_used[i].size()); - } - - if (cur_pb->pb_stats->lookahead_output_pins_used[i].size() > class_size) { - return false; - } - } - - if (cur_pb->child_pbs) { - for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { - if (cur_pb->child_pbs[i]) { - for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { - if (!check_lookahead_pins_used(&cur_pb->child_pbs[i][j], max_external_pin_util)) - return false; - } - } - } - } - } - - return true; -} - -/* Speculation successful, commit input/output pins used */ -void commit_lookahead_pins_used(t_pb* cur_pb) { - const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; - - if (pb_type->num_modes > 0 && cur_pb->name) { - for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { - VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->input_pin_class_size[i]); - for (size_t j = 0; j < cur_pb->pb_stats->lookahead_input_pins_used[i].size(); j++) { - VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i][j]); - cur_pb->pb_stats->input_pins_used[i].insert({j, cur_pb->pb_stats->lookahead_input_pins_used[i][j]}); - } - } - - for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { - VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->output_pin_class_size[i]); - for (size_t j = 0; j < cur_pb->pb_stats->lookahead_output_pins_used[i].size(); j++) { - VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i][j]); - cur_pb->pb_stats->output_pins_used[i].insert({j, cur_pb->pb_stats->lookahead_output_pins_used[i][j]}); - } - } - - if (cur_pb->child_pbs) { - for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { - if (cur_pb->child_pbs[i]) { - for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { - commit_lookahead_pins_used(&cur_pb->child_pbs[i][j]); - } - } - } - } - } -} - /** * Score unclustered atoms that are two hops away from current cluster * For example, consider a cluster that has a FF feeding an adder in another @@ -3219,30 +1858,32 @@ void commit_lookahead_pins_used(t_pb* cur_pb) { * this function should find other FFs that are feeding other inputs of this adder * since they are two hops away from the FF packed in this cluster */ -void load_transitive_fanout_candidates(ClusterBlockId clb_index, +void load_transitive_fanout_candidates(LegalizationClusterId legalization_cluster_id, t_pb_stats* pb_stats, - vtr::vector>& clb_inter_blk_nets, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, int transitive_fanout_threshold) { - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; // iterate over all the nets that have pins in this cluster for (const auto net_id : pb_stats->marked_nets) { // only consider small nets to constrain runtime - if (int(atom_ctx.nlist.net_pins(net_id).size()) < transitive_fanout_threshold + 1) { + if (int(atom_nlist.net_pins(net_id).size()) < transitive_fanout_threshold + 1) { // iterate over all the pins of the net - for (const auto pin_id : atom_ctx.nlist.net_pins(net_id)) { - AtomBlockId atom_blk_id = atom_ctx.nlist.pin_block(pin_id); + for (const auto pin_id : atom_nlist.net_pins(net_id)) { + AtomBlockId atom_blk_id = atom_nlist.pin_block(pin_id); // get the transitive cluster - ClusterBlockId tclb = atom_ctx.lookup.atom_clb(atom_blk_id); + LegalizationClusterId tclb = cluster_legalizer.get_atom_cluster(atom_blk_id); // if the block connected to this pin is packed in another cluster - if (tclb != clb_index && tclb != ClusterBlockId::INVALID()) { + if (tclb != legalization_cluster_id && tclb != LegalizationClusterId::INVALID()) { // explore transitive nets from already packed cluster for (AtomNetId tnet : clb_inter_blk_nets[tclb]) { // iterate over all the pins of the net - for (AtomPinId tpin : atom_ctx.nlist.net_pins(tnet)) { - auto blk_id = atom_ctx.nlist.pin_block(tpin); + for (AtomPinId tpin : atom_nlist.net_pins(tnet)) { + auto blk_id = atom_nlist.pin_block(tpin); // This transitive atom is not packed, score and add - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(blk_id)) { auto& transitive_fanout_candidates = pb_stats->transitive_fanout_candidates; if (pb_stats->gain.count(blk_id) == 0) { @@ -3250,7 +1891,7 @@ void load_transitive_fanout_candidates(ClusterBlockId clb_index, } else { pb_stats->gain[blk_id] += 0.001; } - t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); if (molecule->valid) { transitive_fanout_candidates.insert({molecule->atom_block_ids[molecule->root], molecule}); } @@ -3265,9 +1906,8 @@ void load_transitive_fanout_candidates(ClusterBlockId clb_index, std::map> identify_primitive_candidate_block_types() { std::map> model_candidates; - auto& atom_ctx = g_vpr_ctx.atom(); - auto& atom_nlist = atom_ctx.nlist; - auto& device_ctx = g_vpr_ctx.device(); + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; + const DeviceContext& device_ctx = g_vpr_ctx.device(); std::set unique_models; // Find all logic models used in the netlist @@ -3295,7 +1935,7 @@ std::map> identify_primiti void print_seed_gains(const char* fname, const std::vector& seed_atoms, const vtr::vector& atom_gain, const vtr::vector& atom_criticality) { FILE* fp = vtr::fopen(fname, "w"); - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); //For prett formatting determine the maximum name length int max_name_len = strlen("atom_block_name"); @@ -3324,95 +1964,6 @@ void print_seed_gains(const char* fname, const std::vector& seed_at fclose(fp); } -/** - * This function takes a chain molecule, and the pb_graph_node that is chosen - * for packing the molecule's root block. Using the given root_primitive, this - * function will identify which chain id this molecule is being mapped to and - * will update the chain id value inside the chain info data structure of this - * molecule - */ -void update_molecule_chain_info(t_pack_molecule* chain_molecule, const t_pb_graph_node* root_primitive) { - VTR_ASSERT(chain_molecule->chain_info->chain_id == -1 && chain_molecule->chain_info->is_long_chain); - - auto chain_root_pins = chain_molecule->pack_pattern->chain_root_pins; - - // long chains should only be placed at the beginning of the chain - // Since for long chains the molecule size is already equal to the - // total number of adders in the cluster. Therefore, it should - // always be placed at the very first adder in this cluster. - for (size_t chainId = 0; chainId < chain_root_pins.size(); chainId++) { - if (chain_root_pins[chainId][0]->parent_node == root_primitive) { - chain_molecule->chain_info->chain_id = chainId; - chain_molecule->chain_info->first_packed_molecule = chain_molecule; - return; - } - } - - VTR_ASSERT(false); -} - -/** - * This function takes the root block of a chain molecule and a proposed - * placement primitive for this block. The function then checks if this - * chain root block has a placement constraint (such as being driven from - * outside the cluster) and returns the status of the placement accordingly. - */ -enum e_block_pack_status check_chain_root_placement_feasibility(const t_pb_graph_node* pb_graph_node, - const t_pack_molecule* molecule, - const AtomBlockId blk_id) { - enum e_block_pack_status block_pack_status = e_block_pack_status::BLK_PASSED; - auto& atom_ctx = g_vpr_ctx.atom(); - - bool is_long_chain = molecule->chain_info->is_long_chain; - - const auto& chain_root_pins = molecule->pack_pattern->chain_root_pins; - - t_model_ports* root_port = chain_root_pins[0][0]->port->model_port; - AtomNetId chain_net_id; - auto port_id = atom_ctx.nlist.find_atom_port(blk_id, root_port); - - if (port_id) { - chain_net_id = atom_ctx.nlist.port_net(port_id, chain_root_pins[0][0]->pin_number); - } - - // if this block is part of a long chain or it is driven by a cluster - // input pin we need to check the placement legality of this block - // Depending on the logic synthesis even small chains that can fit within one - // cluster might need to start at the top of the cluster as their input can be - // driven by a global gnd or vdd. Therefore even if this is not a long chain - // but its input pin is driven by a net, the placement legality is checked. - if (is_long_chain || chain_net_id) { - auto chain_id = molecule->chain_info->chain_id; - // if this chain has a chain id assigned to it (implies is_long_chain too) - if (chain_id != -1) { - // the chosen primitive should be a valid starting point for the chain - // long chains should only be placed at the top of the chain tieOff = 0 - if (pb_graph_node != chain_root_pins[chain_id][0]->parent_node) { - block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; - } - // the chain doesn't have an assigned chain_id yet - } else { - block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; - for (const auto& chain : chain_root_pins) { - for (auto tieOff : chain) { - // check if this chosen primitive is one of the possible - // starting points for this chain. - if (pb_graph_node == tieOff->parent_node) { - // this location matches with the one of the dedicated chain - // input from outside logic block, therefore it is feasible - block_pack_status = e_block_pack_status::BLK_PASSED; - break; - } - // long chains should only be placed at the top of the chain tieOff = 0 - if (is_long_chain) break; - } - } - } - } - - return block_pack_status; -} - /** * This function update the pb_type_count data structure by incrementing * the number of used pb_types in the given packed cluster t_pb @@ -3609,7 +2160,7 @@ t_pb* get_top_level_pb(t_pb* pb) { } void init_clb_atoms_lookup(vtr::vector>& atoms_lookup) { - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); auto& cluster_ctx = g_vpr_ctx.clustering(); atoms_lookup.resize(cluster_ctx.clb_nlist.blocks().size()); diff --git a/vpr/src/pack/cluster_util.h b/vpr/src/pack/cluster_util.h index 4f190645ff0..74afdefa9fb 100644 --- a/vpr/src/pack/cluster_util.h +++ b/vpr/src/pack/cluster_util.h @@ -2,6 +2,7 @@ #define CLUSTER_UTIL_H #include +#include "cluster_legalizer.h" #include "pack_types.h" #include "vtr_vector.h" @@ -20,9 +21,6 @@ class t_pack_molecule; constexpr int AAPACK_MAX_HIGH_FANOUT_EXPLORE = 10; /* For high-fanout nets that are ignored, consider a maximum of this many sinks, must be less than packer_opts.feasible_block_array_size */ constexpr int AAPACK_MAX_TRANSITIVE_EXPLORE = 40; /* When investigating transitive fanout connections in packing, consider a maximum of this many molecules, must be less than packer_opts.feasible_block_array_size */ -//Constant allowing all cluster pins to be used -const t_ext_pin_util FULL_EXTERNAL_PIN_UTIL(1., 1.); - enum e_gain_update { GAIN, NO_GAIN @@ -45,12 +43,6 @@ enum e_net_relation_to_clustered_block { OUTPUT }; -enum e_detailed_routing_stages { - E_DETAILED_ROUTE_AT_END_ONLY = 0, - E_DETAILED_ROUTE_FOR_EACH_ATOM, - E_DETAILED_ROUTE_INVALID -}; - /* Linked list structure. Stores one integer (iblk). */ struct t_molecule_link { t_pack_molecule* moleculeptr; @@ -79,7 +71,6 @@ struct t_cluster_progress_stats { /* Useful data structures for creating or modifying clusters */ struct t_clustering_data { - vtr::vector*> intra_lb_routing; int* hill_climbing_inputs_avail; /* Keeps a linked list of the unclustered blocks to speed up looking for * @@ -106,8 +97,6 @@ struct t_clustering_data { /* Clustering helper functions */ /***********************************/ -void check_clustering(); - //calculate the initial timing at the start of packing stage void calc_init_packing_timing(const t_packer_opts& packer_opts, const t_analysis_opts& analysis_opts, @@ -121,18 +110,10 @@ void free_clustering_data(const t_packer_opts& packer_opts, t_clustering_data& clustering_data); //check clustering legality and output it -void check_and_output_clustering(const t_packer_opts& packer_opts, +void check_and_output_clustering(ClusterLegalizer& cluster_legalizer, + const t_packer_opts& packer_opts, const std::unordered_set& is_clock, - const t_arch* arch, - const int& num_clb, - const vtr::vector*>& intra_lb_routing); - -void get_max_cluster_size_and_pb_depth(int& max_cluster_size, - int& max_pb_depth); - -bool check_cluster_legality(const int& verbosity, - const int& detailed_routing_stage, - t_lb_router_data* router_data); + const t_arch* arch); bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb); @@ -146,34 +127,12 @@ void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, t_pb* pb); void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, - t_cluster_placement_stats** cluster_placement_stats, - t_pb_graph_node*** primitives_list, const Prepacker& prepacker, t_clustering_data& clustering_data, std::unordered_map& net_output_feeds_driving_block_input, int& unclustered_list_head_size, int num_molecules); -void free_pb_stats_recursive(t_pb* pb); - -void try_update_lookahead_pins_used(t_pb* cur_pb); - -void reset_lookahead_pins_used(t_pb* cur_pb); - -void compute_and_mark_lookahead_pins_used(const AtomBlockId blk_id); - -void compute_and_mark_lookahead_pins_used_for_pin(const t_pb_graph_pin* pb_graph_pin, - const t_pb* primitive_pb, - const AtomNetId net_id); - -void commit_lookahead_pins_used(t_pb* cur_pb); - -bool check_lookahead_pins_used(t_pb* cur_pb, t_ext_pin_util max_external_pin_util); - -bool primitive_feasible(const AtomBlockId blk_id, t_pb* cur_pb); - -bool primitive_memory_sibling_feasible(const AtomBlockId blk_id, const t_pb_type* cur_pb_type, const AtomBlockId sibling_memory_blk); - t_pack_molecule* get_molecule_by_num_ext_inputs(const int ext_inps, const enum e_removal_policy remove_flag, t_cluster_placement_stats* cluster_placement_stats_ptr, @@ -192,142 +151,52 @@ void print_pack_status(int num_clb, int& mols_since_last_print, int device_width, int device_height, - AttractionInfo& attraction_groups); - -void rebuild_attraction_groups(AttractionInfo& attraction_groups); - -void record_molecule_failure(t_pack_molecule* molecule, t_pb* pb); - -e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_placement_stats_ptr, - t_pack_molecule* molecule, - t_pb_graph_node** primitives_list, - t_pb* pb, - int max_models, - int max_cluster_size, - ClusterBlockId clb_index, - int detailed_routing_stage, - t_lb_router_data* router_data, - int verbosity, - bool enable_pin_feasibility_filter, - int feasible_block_array_size, - t_ext_pin_util max_external_pin_util, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_noc_grp_id, - int force_site = -1); - -void try_fill_cluster(const t_packer_opts& packer_opts, + AttractionInfo& attraction_groups, + const ClusterLegalizer& cluster_legalizer); + +void rebuild_attraction_groups(AttractionInfo& attraction_groups, + const ClusterLegalizer& cluster_legalizer); + +void try_fill_cluster(ClusterLegalizer& cluster_legalizer, + const Prepacker& prepacker, + const t_packer_opts& packer_opts, t_cluster_placement_stats* cur_cluster_placement_stats_ptr, t_pack_molecule*& prev_molecule, t_pack_molecule*& next_molecule, int& num_same_molecules, - t_pb_graph_node** primitives_list, t_cluster_progress_stats& cluster_stats, int num_clb, - const int num_models, - const int max_cluster_size, - const ClusterBlockId clb_index, - const int detailed_routing_stage, + const LegalizationClusterId legalization_cluster_id, AttractionInfo& attraction_groups, - vtr::vector>& clb_inter_blk_nets, + vtr::vector>& clb_inter_blk_nets, bool allow_unrelated_clustering, const int& high_fanout_threshold, const std::unordered_set& is_clock, const std::unordered_set& is_global, const std::shared_ptr& timing_info, - t_lb_router_data* router_data, - t_ext_pin_util target_ext_pin_util, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_noc_grp_id, e_block_pack_status& block_pack_status, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size, std::unordered_map& net_output_feeds_driving_block_input, std::map>& primitive_candidate_block_types); -t_pack_molecule* save_cluster_routing_and_pick_new_seed(const t_packer_opts& packer_opts, - const int& num_clb, - const std::vector& seed_atoms, - const int& num_blocks_hill_added, - vtr::vector*>& intra_lb_routing, - int& seedindex, - t_cluster_progress_stats& cluster_stats, - t_lb_router_data* router_data); - void store_cluster_info_and_free(const t_packer_opts& packer_opts, - const ClusterBlockId& clb_index, + const LegalizationClusterId clb_index, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector& le_count, - vtr::vector>& clb_inter_blk_nets); - -void free_data_and_requeue_used_mols_if_illegal(const ClusterBlockId& clb_index, - const int& savedseedindex, - std::map& num_used_type_instances, - int& num_clb, - int& seedindex); - -enum e_block_pack_status try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, - const AtomBlockId blk_id, - t_pb* cb, - t_pb** parent, - const int max_models, - const int max_cluster_size, - const ClusterBlockId clb_index, - const t_cluster_placement_stats* cluster_placement_stats_ptr, - const t_pack_molecule* molecule, - t_lb_router_data* router_data, - int verbosity, - const int feasible_block_array_size); - - -/** - * @brief Checks whether an atom block can be added to a clustered block - * without violating floorplanning constraints. It also updates the - * clustered block's floorplanning region by taking the intersection of - * its current region and the floorplanning region of the given atom block. - * - * @param blk_id A unique ID for the candidate atom block to be added to the growing cluster. - * @param clb_index A unique ID for the clustered block that the atom block wants to be added to. - * @param verbosity Controls the detail level of log information printed by this function. - * @param temp_cluster_pr The floorplanning regions of the clustered block. This function may - * update the given region. - * @param cluster_pr_needs_update Indicates whether the floorplanning region of the clustered block - * have updated. - * @return True if adding the given atom block to the clustered block does not violated any - * floorplanning constraints. - */ -bool atom_cluster_floorplanning_check(AtomBlockId blk_id, - ClusterBlockId clb_index, - int verbosity, - PartitionRegion& temp_cluster_pr, - bool& cluster_pr_needs_update); -/** - * @brief Checks if an atom block can be added to a clustered block without - * violating NoC group constraints. For passing this check, either both clustered - * and atom blocks must belong to the same NoC group, or at least one of them should - * not belong to any NoC group. If the atom block is associated with a NoC group while - * the clustered block does not belong to any NoC groups, the NoC group ID of the atom block - * is assigned to the clustered block when the atom is added to it. - * block - * - * @param blk_id A unique ID for the candidate atom block to be added to the growing cluster. - * @param clb_index A unique ID for the clustered block that the atom block wants to be added to. - * @param verbosity Controls the detail level of log information printed by this function. - * @param temp_cluster_noc_grp_id The NoC group ID of the clustered block. This function may update - * this ID. - * @return True if adding the atom block the cluster does not violate NoC group constraints. - */ -bool atom_cluster_noc_group_check(AtomBlockId blk_id, - ClusterBlockId clb_index, - int verbosity, - NocGroupId& temp_cluster_noc_grp_id); + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets); -void revert_place_atom_block(const AtomBlockId blk_id, t_lb_router_data* router_data); - -void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clustered_blk_id, t_pb* cur_pb, enum e_net_relation_to_clustered_block net_relation_to_clustered_block); +void update_connection_gain_values(const AtomNetId net_id, + const AtomBlockId clustered_blk_id, + t_pb* cur_pb, + const ClusterLegalizer& cluster_legalizer, + enum e_net_relation_to_clustered_block net_relation_to_clustered_block); void update_timing_gain_values(const AtomNetId net_id, t_pb* cur_pb, + const ClusterLegalizer& cluster_legalizer, enum e_net_relation_to_clustered_block net_relation_to_clustered_block, const SetupTimingInfo& timing_info, const std::unordered_set& is_global, @@ -336,6 +205,7 @@ void update_timing_gain_values(const AtomNetId net_id, void mark_and_update_partial_gain(const AtomNetId net_id, enum e_gain_update gain_flag, const AtomBlockId clustered_blk_id, + const ClusterLegalizer& cluster_legalizer, bool timing_driven, bool connection_driven, enum e_net_relation_to_clustered_block net_relation_to_clustered_block, @@ -347,7 +217,7 @@ void mark_and_update_partial_gain(const AtomNetId net_id, void update_total_gain(float alpha, float beta, bool timing_driven, bool connection_driven, t_pb* pb, AttractionInfo& attraction_groups); void update_cluster_stats(const t_pack_molecule* molecule, - const ClusterBlockId clb_index, + const ClusterLegalizer& cluster_legalizer, const std::unordered_set& is_clock, const std::unordered_set& is_global, const bool global_clocks, @@ -360,34 +230,25 @@ void update_cluster_stats(const t_pack_molecule* molecule, AttractionInfo& attraction_groups, std::unordered_map& net_output_feeds_driving_block_input); -void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, - t_pb_graph_node** primitives_list, - ClusterBlockId clb_index, +void start_new_cluster(ClusterLegalizer& cluster_legalizer, + LegalizationClusterId& legalization_cluster_id, t_pack_molecule* molecule, std::map& num_used_type_instances, const float target_device_utilization, - const int num_models, - const int max_cluster_size, const t_arch* arch, const std::string& device_layout_name, - std::vector* lb_type_rr_graphs, - t_lb_router_data** router_data, - const int detailed_routing_stage, - ClusteredNetlist* clb_nlist, const std::map>& primitive_candidate_block_types, int verbosity, - bool enable_pin_feasibility_filter, - bool balance_block_type_utilization, - const int feasible_block_array_size, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_noc_grp_id); + bool balance_block_type_utilization); t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, AttractionInfo& attraction_groups, const enum e_gain_type gain_mode, t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - const ClusterBlockId cluster_index, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, + const LegalizationClusterId cluster_index, bool prioritize_transitive_connectivity, int transitive_fanout_threshold, const int feasible_block_array_size, @@ -395,30 +256,40 @@ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, const int feasible_block_array_size, AttractionInfo& attraction_groups); void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, const int feasible_block_array_size, AttractionInfo& attraction_groups); void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, AttractionInfo& attraction_groups, const int feasible_block_array_size, - ClusterBlockId clb_index, + LegalizationClusterId clb_index, std::map>& primitive_candidate_block_types); void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - const ClusterBlockId cluster_index, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, + const LegalizationClusterId cluster_index, int transitive_fanout_threshold, const int feasible_block_array_size, AttractionInfo& attraction_groups); -bool check_free_primitives_for_molecule_atoms(t_pack_molecule* molecule, t_cluster_placement_stats* cluster_placement_stats_ptr); +bool check_free_primitives_for_molecule_atoms(t_pack_molecule* molecule, + t_cluster_placement_stats* cluster_placement_stats_ptr, + const ClusterLegalizer& cluster_legalizer); t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, AttractionInfo& attraction_groups, @@ -428,8 +299,10 @@ t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, const int feasible_block_array_size, int* num_unrelated_clustering_attempts, t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - ClusterBlockId cluster_index, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, + LegalizationClusterId cluster_index, int verbosity, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size, @@ -439,31 +312,27 @@ t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule, const Atom std::vector initialize_seed_atoms(const e_cluster_seed seed_type, const t_molecule_stats& max_molecule_stats, + const Prepacker& prepacker, const vtr::vector& atom_criticality); -t_pack_molecule* get_highest_gain_seed_molecule(int& seed_index, const std::vector& seed_atoms); +t_pack_molecule* get_highest_gain_seed_molecule(int& seed_index, + const std::vector& seed_atoms, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer); float get_molecule_gain(t_pack_molecule* molecule, std::map& blk_gain, AttractGroupId cluster_attraction_group_id, AttractionInfo& attraction_groups, int num_molecule_failures); -int net_sinks_reachable_in_cluster(const t_pb_graph_pin* driver_pb_gpin, const int depth, const AtomNetId net_id); - void print_seed_gains(const char* fname, const std::vector& seed_atoms, const vtr::vector& atom_gain, const vtr::vector& atom_criticality); -void load_transitive_fanout_candidates(ClusterBlockId cluster_index, +void load_transitive_fanout_candidates(LegalizationClusterId cluster_index, t_pb_stats* pb_stats, - vtr::vector>& clb_inter_blk_nets, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, int transitive_fanout_threshold); std::map> identify_primitive_candidate_block_types(); -void update_molecule_chain_info(t_pack_molecule* chain_molecule, const t_pb_graph_node* root_primitive); - -enum e_block_pack_status check_chain_root_placement_feasibility(const t_pb_graph_node* pb_graph_node, - const t_pack_molecule* molecule, - const AtomBlockId blk_id); - -t_pb_graph_pin* get_driver_pb_graph_pin(const t_pb* driver_pb, const AtomPinId driver_pin_id); - size_t update_pb_type_count(const t_pb* pb, std::map& pb_type_count, size_t depth); void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector& le_count); @@ -480,9 +349,5 @@ void print_le_count(std::vector& le_count, const t_pb_type* le_pb_type); t_pb* get_top_level_pb(t_pb* pb); -bool cleanup_pb(t_pb* pb); - -void alloc_and_load_pb_stats(t_pb* pb, const int feasible_block_array_size); - void init_clb_atoms_lookup(vtr::vector>& atoms_lookup); #endif diff --git a/vpr/src/pack/constraints_report.cpp b/vpr/src/pack/constraints_report.cpp index 5c53744fd5e..6b671331c74 100644 --- a/vpr/src/pack/constraints_report.cpp +++ b/vpr/src/pack/constraints_report.cpp @@ -1,9 +1,11 @@ #include "constraints_report.h" +#include "cluster_legalizer.h" +#include "globals.h" +#include "grid_tile_lookup.h" -bool floorplan_constraints_regions_overfull() { +bool floorplan_constraints_regions_overfull(const ClusterLegalizer& cluster_legalizer) { GridTileLookup grid_tiles; - auto& cluster_ctx = g_vpr_ctx.clustering(); auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); auto& device_ctx = g_vpr_ctx.device(); @@ -12,15 +14,13 @@ bool floorplan_constraints_regions_overfull() { // keep record of how many blocks of each type are assigned to each PartitionRegion std::unordered_map> pr_count_info; - for (const ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { - if (!is_cluster_constrained(blk_id)) { + for (LegalizationClusterId cluster_id : cluster_legalizer.clusters()) { + const PartitionRegion& pr = cluster_legalizer.get_cluster_pr(cluster_id); + if (pr.empty()) continue; - } - t_logical_block_type_ptr bt = cluster_ctx.clb_nlist.block_type(blk_id); - const PartitionRegion& pr = floorplanning_ctx.cluster_constraints[blk_id]; + t_logical_block_type_ptr bt = cluster_legalizer.get_cluster_type(cluster_id); auto got = pr_count_info.find(pr); - if (got == pr_count_info.end()) { std::vector block_type_counts(block_types.size(), 0); block_type_counts[bt->index]++; diff --git a/vpr/src/pack/constraints_report.h b/vpr/src/pack/constraints_report.h index 46af3fa83db..c10d1183238 100644 --- a/vpr/src/pack/constraints_report.h +++ b/vpr/src/pack/constraints_report.h @@ -5,9 +5,7 @@ #ifndef VPR_SRC_PACK_CONSTRAINTS_REPORT_H_ #define VPR_SRC_PACK_CONSTRAINTS_REPORT_H_ -#include "globals.h" -#include "grid_tile_lookup.h" -#include "place_constraints.h" +class ClusterLegalizer; /** * @brief Check if any constraint partition regions are overfull, @@ -27,6 +25,6 @@ * * @return True if there is at least one overfull partition. */ -bool floorplan_constraints_regions_overfull(); +bool floorplan_constraints_regions_overfull(const ClusterLegalizer& cluster_legalizer); #endif /* VPR_SRC_PACK_CONSTRAINTS_REPORT_H_ */ diff --git a/vpr/src/pack/noc_aware_cluster_util.cpp b/vpr/src/pack/noc_aware_cluster_util.cpp index 51319175c12..87f981605de 100644 --- a/vpr/src/pack/noc_aware_cluster_util.cpp +++ b/vpr/src/pack/noc_aware_cluster_util.cpp @@ -1,12 +1,12 @@ #include "noc_aware_cluster_util.h" +#include "atom_netlist.h" #include "globals.h" +#include "vpr_types.h" #include -std::vector find_noc_router_atoms() { - const auto& atom_ctx = g_vpr_ctx.atom(); - +std::vector find_noc_router_atoms(const AtomNetlist& atom_netlist) { // NoC router atoms are expected to have a specific blif model const std::string noc_router_blif_model_name = "noc_router_adapter_block"; @@ -14,8 +14,8 @@ std::vector find_noc_router_atoms() { std::vector noc_router_atoms; // iterate over all atoms and find those whose blif model matches - for (auto atom_id : atom_ctx.nlist.blocks()) { - const t_model* model = atom_ctx.nlist.block_model(atom_id); + for (auto atom_id : atom_netlist.blocks()) { + const t_model* model = atom_netlist.block_model(atom_id); if (noc_router_blif_model_name == model->name) { noc_router_atoms.push_back(atom_id); } @@ -24,10 +24,10 @@ std::vector find_noc_router_atoms() { return noc_router_atoms; } -void update_noc_reachability_partitions(const std::vector& noc_atoms) { - const auto& atom_ctx = g_vpr_ctx.atom(); - auto& cl_helper_ctx = g_vpr_ctx.mutable_cl_helper(); - const auto& high_fanout_thresholds = g_vpr_ctx.cl_helper().high_fanout_thresholds; +void update_noc_reachability_partitions(const std::vector& noc_atoms, + const AtomNetlist& atom_netlist, + const t_pack_high_fanout_thresholds& high_fanout_thresholds, + vtr::vector& atom_noc_grp_id) { const auto& grid = g_vpr_ctx.device().grid; t_logical_block_type_ptr logic_block_type = infer_logic_block_type(grid); @@ -35,11 +35,11 @@ void update_noc_reachability_partitions(const std::vector& noc_atom const size_t high_fanout_threshold = high_fanout_thresholds.get_threshold(logical_block_name); // get the total number of atoms - const size_t n_atoms = atom_ctx.nlist.blocks().size(); + const size_t n_atoms = atom_netlist.blocks().size(); vtr::vector atom_visited(n_atoms, false); - cl_helper_ctx.atom_noc_grp_id.resize(n_atoms, NocGroupId::INVALID()); + atom_noc_grp_id.resize(n_atoms, NocGroupId::INVALID()); int noc_grp_id_cnt = 0; @@ -68,24 +68,24 @@ void update_noc_reachability_partitions(const std::vector& noc_atom AtomBlockId current_atom = q.front(); q.pop(); - cl_helper_ctx.atom_noc_grp_id[current_atom] = noc_grp_id; + atom_noc_grp_id[current_atom] = noc_grp_id; - for(auto pin : atom_ctx.nlist.block_pins(current_atom)) { - AtomNetId net_id = atom_ctx.nlist.pin_net(pin); - size_t net_fanout = atom_ctx.nlist.net_sinks(net_id).size(); + for(auto pin : atom_netlist.block_pins(current_atom)) { + AtomNetId net_id = atom_netlist.pin_net(pin); + size_t net_fanout = atom_netlist.net_sinks(net_id).size(); if (net_fanout >= high_fanout_threshold) { continue; } - AtomBlockId driver_atom_id = atom_ctx.nlist.net_driver_block(net_id); + AtomBlockId driver_atom_id = atom_netlist.net_driver_block(net_id); if (!atom_visited[driver_atom_id]) { q.push(driver_atom_id); atom_visited[driver_atom_id] = true; } - for (auto sink_pin : atom_ctx.nlist.net_sinks(net_id)) { - AtomBlockId sink_atom_id = atom_ctx.nlist.pin_block(sink_pin); + for (auto sink_pin : atom_netlist.net_sinks(net_id)) { + AtomBlockId sink_atom_id = atom_netlist.pin_block(sink_pin); if (!atom_visited[sink_atom_id]) { q.push(sink_atom_id); atom_visited[sink_atom_id] = true; @@ -96,4 +96,4 @@ void update_noc_reachability_partitions(const std::vector& noc_atom } } -} \ No newline at end of file +} diff --git a/vpr/src/pack/noc_aware_cluster_util.h b/vpr/src/pack/noc_aware_cluster_util.h index abeb8d8ba95..6f930a21944 100644 --- a/vpr/src/pack/noc_aware_cluster_util.h +++ b/vpr/src/pack/noc_aware_cluster_util.h @@ -17,8 +17,12 @@ */ #include +#include "noc_data_types.h" +#include "vtr_vector.h" -#include "vpr_types.h" +class AtomNetlist; +class AtomBlockId; +class t_pack_high_fanout_thresholds; /** * @brief Iterates over all atom blocks and check whether @@ -26,7 +30,7 @@ * * @return The atom block IDs of the NoC router blocks in the netlist. */ -std::vector find_noc_router_atoms(); +std::vector find_noc_router_atoms(const AtomNetlist& atom_netlist); /** @@ -37,6 +41,9 @@ std::vector find_noc_router_atoms(); * * @param noc_atoms The atom block IDs of the NoC router blocks in the netlist. */ -void update_noc_reachability_partitions(const std::vector& noc_atoms); +void update_noc_reachability_partitions(const std::vector& noc_atoms, + const AtomNetlist& atom_netlist, + const t_pack_high_fanout_thresholds& high_fanout_threshold, + vtr::vector& atom_noc_grp_id); #endif diff --git a/vpr/src/pack/output_clustering.cpp b/vpr/src/pack/output_clustering.cpp index cee87ad51a1..c659837c5fb 100644 --- a/vpr/src/pack/output_clustering.cpp +++ b/vpr/src/pack/output_clustering.cpp @@ -8,10 +8,13 @@ #include #include +#include "cluster_legalizer.h" +#include "clustered_netlist.h" +#include "physical_types.h" +#include "prepack.h" +#include "vpr_context.h" #include "vtr_assert.h" #include "vtr_log.h" -#include "vtr_digest.h" -#include "vtr_memory.h" #include "vpr_types.h" #include "vpr_error.h" @@ -20,11 +23,8 @@ #include "globals.h" #include "atom_netlist.h" -#include "pack_types.h" -#include "cluster_router.h" #include "pb_type_graph.h" #include "output_clustering.h" -#include "read_xml_arch_file.h" #include "vpr_utils.h" #include "pack.h" @@ -36,49 +36,84 @@ static void print_clustering_stats(char* block_name, int num_block_type, float n /**************** Subroutine definitions ************************************/ -/* Prints out one cluster (clb). Both the external pins and the * - * internal connections are printed out. */ -static void print_stats() { - int ipin; - unsigned int itype; - int total_nets_absorbed; - std::unordered_map nets_absorbed; - - int *num_clb_types, *num_clb_inputs_used, *num_clb_outputs_used; - - auto& device_ctx = g_vpr_ctx.device(); - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - num_clb_types = num_clb_inputs_used = num_clb_outputs_used = nullptr; - - num_clb_types = new int[device_ctx.logical_block_types.size()]; - num_clb_inputs_used = new int[device_ctx.logical_block_types.size()]; - num_clb_outputs_used = new int[device_ctx.logical_block_types.size()]; - - for (int i = 0; i < (int)device_ctx.logical_block_types.size(); i++) { - num_clb_types[i] = 0; - num_clb_inputs_used[i] = 0; - num_clb_outputs_used[i] = 0; +static void count_clb_inputs_and_outputs_from_pb_route(const t_pb* pb, + t_logical_block_type_ptr logical_block, + int ipin, + e_pin_type pin_type, + std::unordered_map& nets_absorbed, + int num_clb_inputs_used[], + int num_clb_outputs_used[]) { + VTR_ASSERT_DEBUG(!pb->pb_route.empty()); + int pb_graph_pin_id = get_pb_graph_node_pin_from_pb_graph_node(pb->pb_graph_node, ipin)->pin_count_in_cluster; + + if (pb->pb_route.count(pb_graph_pin_id)) { + //Pin used + AtomNetId atom_net_id = pb->pb_route[pb_graph_pin_id].atom_net_id; + if (atom_net_id) { + nets_absorbed[atom_net_id] = false; + if (pin_type == RECEIVER) { + num_clb_inputs_used[logical_block->index]++; + } else if (pin_type == DRIVER) { + num_clb_outputs_used[logical_block->index]++; + } + } } +} - for (auto net_id : atom_ctx.nlist.nets()) { - nets_absorbed[net_id] = true; +static void count_stats_from_legalizer(const ClusterLegalizer& cluster_legalizer, + std::unordered_map& nets_absorbed, + int num_clb_types[], + int num_clb_inputs_used[], + int num_clb_outputs_used[]) { + for (LegalizationClusterId cluster_id : cluster_legalizer.clusters()) { + t_logical_block_type_ptr logical_block = cluster_legalizer.get_cluster_type(cluster_id); + t_physical_tile_type_ptr physical_tile = pick_physical_type(logical_block); + for (int ipin = 0; ipin < logical_block->pb_type->num_pins; ipin++) { + int physical_pin = get_physical_pin(physical_tile, logical_block, ipin); + e_pin_type pin_type = get_pin_type_from_pin_physical_num(physical_tile, physical_pin); + + const t_pb* pb = cluster_legalizer.get_cluster_pb(cluster_id); + if (pb->pb_route.empty()) + continue; + count_clb_inputs_and_outputs_from_pb_route(pb, + logical_block, + ipin, + pin_type, + nets_absorbed, + num_clb_inputs_used, + num_clb_outputs_used); + } + num_clb_types[logical_block->index]++; } - /* Counters used only for statistics purposes. */ +} - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - auto logical_block = cluster_ctx.clb_nlist.block_type(blk_id); - auto physical_tile = pick_physical_type(logical_block); - for (ipin = 0; ipin < logical_block->pb_type->num_pins; ipin++) { +static void count_stats_from_netlist(std::unordered_map& nets_absorbed, + int num_clb_types[], + int num_clb_inputs_used[], + int num_clb_outputs_used[]) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist; + + for (ClusterBlockId blk_id : clb_nlist.blocks()) { + t_logical_block_type_ptr logical_block = clb_nlist.block_type(blk_id); + t_physical_tile_type_ptr physical_tile = pick_physical_type(logical_block); + for (int ipin = 0; ipin < logical_block->pb_type->num_pins; ipin++) { int physical_pin = get_physical_pin(physical_tile, logical_block, ipin); - auto pin_type = get_pin_type_from_pin_physical_num(physical_tile, physical_pin); - - if (cluster_ctx.clb_nlist.block_pb(blk_id)->pb_route.empty()) { - ClusterNetId clb_net_id = cluster_ctx.clb_nlist.block_net(blk_id, ipin); + e_pin_type pin_type = get_pin_type_from_pin_physical_num(physical_tile, physical_pin); + + if (!clb_nlist.block_pb(blk_id)->pb_route.empty()) { + count_clb_inputs_and_outputs_from_pb_route(clb_nlist.block_pb(blk_id), + logical_block, + ipin, + pin_type, + nets_absorbed, + num_clb_inputs_used, + num_clb_outputs_used); + } else { + ClusterNetId clb_net_id = clb_nlist.block_net(blk_id, ipin); if (clb_net_id != ClusterNetId::INVALID()) { - auto net_id = atom_ctx.lookup.atom_net(clb_net_id); + AtomNetId net_id = atom_ctx.lookup.atom_net(clb_net_id); VTR_ASSERT(net_id); nets_absorbed[net_id] = false; @@ -88,30 +123,45 @@ static void print_stats() { num_clb_outputs_used[logical_block->index]++; } } - } else { - int pb_graph_pin_id = get_pb_graph_node_pin_from_block_pin(blk_id, ipin)->pin_count_in_cluster; - - const t_pb* pb = cluster_ctx.clb_nlist.block_pb(blk_id); - if (pb->pb_route.count(pb_graph_pin_id)) { - //Pin used - auto atom_net_id = pb->pb_route[pb_graph_pin_id].atom_net_id; - if (atom_net_id) { - nets_absorbed[atom_net_id] = false; - if (pin_type == RECEIVER) { - num_clb_inputs_used[logical_block->index]++; - } else if (pin_type == DRIVER) { - num_clb_outputs_used[logical_block->index]++; - } - } - } } } num_clb_types[logical_block->index]++; } +} + +/* Prints out one cluster (clb). Both the external pins and the * + * internal connections are printed out. */ +static void print_stats(const ClusterLegalizer* cluster_legalizer_ptr, bool from_legalizer) { + const DeviceContext& device_ctx = g_vpr_ctx.device(); + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; + + int* num_clb_types = new int[device_ctx.logical_block_types.size()]; + int* num_clb_inputs_used = new int[device_ctx.logical_block_types.size()]; + int* num_clb_outputs_used = new int[device_ctx.logical_block_types.size()]; + + for (size_t i = 0; i < device_ctx.logical_block_types.size(); i++) { + num_clb_types[i] = 0; + num_clb_inputs_used[i] = 0; + num_clb_outputs_used[i] = 0; + } + + std::unordered_map nets_absorbed; + for (AtomNetId net_id : atom_nlist.nets()) { + nets_absorbed[net_id] = true; + } + + /* Counters used only for statistics purposes. */ + if (from_legalizer) { + VTR_ASSERT(cluster_legalizer_ptr != nullptr); + count_stats_from_legalizer(*cluster_legalizer_ptr, nets_absorbed, num_clb_types, num_clb_inputs_used, num_clb_outputs_used); + } else { + VTR_ASSERT(cluster_legalizer_ptr == nullptr); + count_stats_from_netlist(nets_absorbed, num_clb_types, num_clb_inputs_used, num_clb_outputs_used); + } print_clustering_stats_header(); - for (itype = 0; itype < device_ctx.logical_block_types.size(); itype++) { + for (unsigned int itype = 0; itype < device_ctx.logical_block_types.size(); itype++) { if (num_clb_types[itype] == 0) { print_clustering_stats(device_ctx.logical_block_types[itype].name, num_clb_types[itype], 0.0, 0.0); } else { @@ -121,14 +171,14 @@ static void print_stats() { } } - total_nets_absorbed = 0; - for (auto net_id : atom_ctx.nlist.nets()) { + int total_nets_absorbed = 0; + for (AtomNetId net_id : atom_nlist.nets()) { if (nets_absorbed[net_id] == true) { total_nets_absorbed++; } } VTR_LOG("Absorbed logical nets %d out of %d nets, %d nets not absorbed.\n", - total_nets_absorbed, (int)atom_ctx.nlist.nets().size(), (int)atom_ctx.nlist.nets().size() - total_nets_absorbed); + total_nets_absorbed, (int)atom_nlist.nets().size(), (int)atom_nlist.nets().size() - total_nets_absorbed); delete[] num_clb_types; delete[] num_clb_inputs_used; delete[] num_clb_outputs_used; @@ -162,12 +212,12 @@ static const char* clustering_xml_net_text(AtomNetId net_id) { /* This routine prints out the atom_ctx.nlist net name (or open). * net_num is the index of the atom_ctx.nlist net to be printed */ + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; if (!net_id) { return "open"; } else { - auto& atom_ctx = g_vpr_ctx.atom(); - return atom_ctx.nlist.net_name(net_id).c_str(); + return atom_nlist.net_name(net_id).c_str(); } } @@ -218,7 +268,7 @@ static std::string clustering_xml_interconnect_text(t_logical_block_type_ptr typ * cannot simply be marked open as that would lose the routing information. Instead, a block must be * output that reflects the routing resources used. This function handles both cases. */ -static void clustering_xml_open_block(pugi::xml_node parent_node, t_logical_block_type_ptr type, const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, t_pb_graph_node* pb_graph_node, int pb_index, bool is_used, const t_pb_routes& pb_route) { +static void clustering_xml_open_block(pugi::xml_node& parent_node, t_logical_block_type_ptr type, const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, t_pb_graph_node* pb_graph_node, int pb_index, bool is_used, const t_pb_routes& pb_route) { int i, j, k, m; const t_pb_type *pb_type, *child_pb_type; t_mode* mode = nullptr; @@ -362,7 +412,7 @@ static void clustering_xml_open_block(pugi::xml_node parent_node, t_logical_bloc } /* outputs a block that is used (i.e. has configuration) and all of its child blocks */ -static void clustering_xml_block(pugi::xml_node parent_node, t_logical_block_type_ptr type, const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, t_pb* pb, int pb_index, const t_pb_routes& pb_route) { +static void clustering_xml_block(pugi::xml_node& parent_node, t_logical_block_type_ptr type, const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, t_pb* pb, int pb_index, const t_pb_routes& pb_route) { int i, j, k, m; const t_pb_type *pb_type, *child_pb_type; t_pb_graph_node* pb_graph_node; @@ -559,20 +609,42 @@ static void clustering_xml_block(pugi::xml_node parent_node, t_logical_block_typ } } +static void clustering_xml_blocks_from_legalizer(pugi::xml_node& block_node, + const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, + ClusterLegalizer& cluster_legalizer) { + // Finalize the cluster legalization by ensuring that each cluster pb has + // its pb_route calculated. + cluster_legalizer.finalize(); + for (LegalizationClusterId cluster_id : cluster_legalizer.clusters()) { + clustering_xml_block(block_node, + cluster_legalizer.get_cluster_type(cluster_id), + pb_graph_pin_lookup_from_index_by_type, + cluster_legalizer.get_cluster_pb(cluster_id), + size_t(cluster_id), + cluster_legalizer.get_cluster_pb(cluster_id)->pb_route); + } +} + +static void clustering_xml_blocks_from_netlist(pugi::xml_node& block_node, + const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type) { + const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist; + for (auto blk_id : clb_nlist.blocks()) { + /* TODO: Must do check that total CLB pins match top-level pb pins, perhaps check this earlier? */ + clustering_xml_block(block_node, + clb_nlist.block_type(blk_id), + pb_graph_pin_lookup_from_index_by_type, + clb_nlist.block_pb(blk_id), + size_t(blk_id), + clb_nlist.block_pb(blk_id)->pb_route); + } +} + /* This routine dumps out the output netlist in a format suitable for * * input to vpr. This routine also dumps out the internal structure of * * the cluster, in essentially a graph based format. */ -void output_clustering(const vtr::vector*>& intra_lb_routing, bool global_clocks, const std::unordered_set& is_clock, const std::string& architecture_id, const char* out_fname, bool skip_clustering) { - auto& device_ctx = g_vpr_ctx.device(); - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - if (!intra_lb_routing.empty()) { - VTR_ASSERT(intra_lb_routing.size() == cluster_ctx.clb_nlist.blocks().size()); - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - cluster_ctx.clb_nlist.block_pb(blk_id)->pb_route = alloc_and_load_pb_route(intra_lb_routing[blk_id], cluster_ctx.clb_nlist.block_pb(blk_id)->pb_graph_node); - } - } +void output_clustering(ClusterLegalizer* cluster_legalizer_ptr, bool global_clocks, const std::unordered_set& is_clock, const std::string& architecture_id, const char* out_fname, bool skip_clustering, bool from_legalizer) { + const DeviceContext& device_ctx = g_vpr_ctx.device(); + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; IntraLbPbPinLookup pb_graph_pin_lookup_from_index_by_type(device_ctx.logical_block_types); @@ -582,26 +654,26 @@ void output_clustering(const vtr::vector inputs; std::vector outputs; - for (auto blk_id : atom_ctx.nlist.blocks()) { - auto type = atom_ctx.nlist.block_type(blk_id); + for (auto blk_id : atom_nlist.blocks()) { + auto type = atom_nlist.block_type(blk_id); switch (type) { case AtomBlockType::INPAD: if (skip_clustering) { VTR_ASSERT(0); } - inputs.push_back(atom_ctx.nlist.block_name(blk_id)); + inputs.push_back(atom_nlist.block_name(blk_id)); break; case AtomBlockType::OUTPAD: if (skip_clustering) { VTR_ASSERT(0); } - outputs.push_back(atom_ctx.nlist.block_name(blk_id)); + outputs.push_back(atom_nlist.block_name(blk_id)); break; case AtomBlockType::BLOCK: @@ -612,7 +684,7 @@ void output_clustering(const vtr::vector clocks; - for (auto net_id : atom_ctx.nlist.nets()) { + for (auto net_id : atom_nlist.nets()) { if (is_clock.count(net_id)) { - clocks.push_back(atom_ctx.nlist.net_name(net_id)); + clocks.push_back(atom_nlist.net_name(net_id)); } } @@ -631,25 +703,22 @@ void output_clustering(const vtr::vectorpb_route); + if (from_legalizer) { + VTR_ASSERT(cluster_legalizer_ptr != nullptr); + clustering_xml_blocks_from_legalizer(block_node, pb_graph_pin_lookup_from_index_by_type, *cluster_legalizer_ptr); + } else { + VTR_ASSERT(cluster_legalizer_ptr == nullptr); + clustering_xml_blocks_from_netlist(block_node, pb_graph_pin_lookup_from_index_by_type); } } out_xml.save_file(out_fname); - print_stats(); - - if (!intra_lb_routing.empty()) { - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - cluster_ctx.clb_nlist.block_pb(blk_id)->pb_route.clear(); - } - } + print_stats(cluster_legalizer_ptr, from_legalizer); } /******************************************************************** - * An useful API to output packing results to a XML file + * A useful API to output packing results to a XML file * This function is a wrapper for the function output_clustering() * but remove all the requirements on input data structures that * have to be built with other APIs @@ -660,13 +729,15 @@ void output_clustering(const vtr::vector*> intra_lb_routing_placeholder; std::unordered_set is_clock = alloc_and_load_is_clock(); - output_clustering(intra_lb_routing_placeholder, + // Since the cluster legalizer is not being used to output the clustering + // (from_legalizer is false), passing in nullptr. + output_clustering(nullptr, global_clocks, is_clock, architecture_id, out_fname, - false); + false, /*skip_clustering*/ + false /*from_legalizer*/); } diff --git a/vpr/src/pack/output_clustering.h b/vpr/src/pack/output_clustering.h index 509690e4934..df8993b957d 100644 --- a/vpr/src/pack/output_clustering.h +++ b/vpr/src/pack/output_clustering.h @@ -1,12 +1,22 @@ #ifndef OUTPUT_CLUSTERING_H #define OUTPUT_CLUSTERING_H -#include + #include -#include "vpr_types.h" -#include "pack_types.h" +#include + +class AtomNetId; +class ClusterLegalizer; -void output_clustering(const vtr::vector*>& intra_lb_routing, bool global_clocks, const std::unordered_set& is_clock, const std::string& architecture_id, const char* out_fname, bool skip_clustering); +void output_clustering(ClusterLegalizer* cluster_legalizer_ptr, + bool global_clocks, + const std::unordered_set& is_clock, + const std::string& architecture_id, + const char* out_fname, + bool skip_clustering, + bool from_legalizer); -void write_packing_results_to_xml(const bool& global_clocks, const std::string& architecture_id, const char* out_fname); +void write_packing_results_to_xml(const bool& global_clocks, + const std::string& architecture_id, + const char* out_fname); #endif diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp index e8c68ea2244..1335590f53d 100644 --- a/vpr/src/pack/pack.cpp +++ b/vpr/src/pack/pack.cpp @@ -1,22 +1,17 @@ #include +#include "SetupGrid.h" +#include "cluster.h" +#include "cluster_legalizer.h" +#include "cluster_util.h" +#include "globals.h" +#include "pack.h" +#include "prepack.h" #include "vpr_context.h" -#include "vtr_assert.h" -#include "vtr_log.h" - #include "vpr_error.h" #include "vpr_types.h" - -#include "globals.h" -#include "prepack.h" -#include "pack_types.h" -#include "pack.h" -#include "cluster.h" -#include "SetupGrid.h" -#include "noc_aware_cluster_util.h" - -/* #define DUMP_PB_GRAPH 1 */ -/* #define DUMP_BLIF_INPUT 1 */ +#include "vtr_assert.h" +#include "vtr_log.h" static bool try_size_device_grid(const t_arch& arch, const std::map& num_type_instances, @@ -38,9 +33,7 @@ bool try_pack(t_packer_opts* packer_opts, const t_model* library_models, float interc_delay, std::vector* lb_type_rr_graphs) { - AtomContext& atom_mutable_ctx = g_vpr_ctx.mutable_atom(); const AtomContext& atom_ctx = g_vpr_ctx.atom(); - ClusteringHelperContext& helper_ctx = g_vpr_ctx.mutable_cl_helper(); const DeviceContext& device_ctx = g_vpr_ctx.device(); std::unordered_set is_clock, is_global; @@ -48,8 +41,7 @@ bool try_pack(t_packer_opts* packer_opts, VTR_LOG("Begin packing '%s'.\n", packer_opts->circuit_file_name.c_str()); /* determine number of models in the architecture */ - helper_ctx.num_models = count_models(user_models); - helper_ctx.num_models += count_models(library_models); + size_t num_models = count_models(user_models) + count_models(library_models); is_clock = alloc_and_load_is_clock(); is_global.insert(is_clock.begin(), is_clock.end()); @@ -71,8 +63,11 @@ bool try_pack(t_packer_opts* packer_opts, atom_ctx.nlist.blocks().size(), atom_ctx.nlist.nets().size(), num_p_inputs, num_p_outputs); // Run the prepacker, packing the atoms into molecules. + // The Prepacker object performs prepacking and stores the pack molecules. + // As long as the molecules are used, this object must persist. VTR_LOG("Begin prepacking.\n"); - atom_mutable_ctx.prepacker.init(atom_ctx.nlist, device_ctx.logical_block_types); + Prepacker prepacker; + prepacker.init(atom_ctx.nlist, device_ctx.logical_block_types); /* We keep attraction groups off in the first iteration, and * only turn on in later iterations if some floorplan regions turn out to be overfull. @@ -86,11 +81,11 @@ bool try_pack(t_packer_opts* packer_opts, VTR_LOG("Using inter-cluster delay: %g\n", packer_opts->inter_cluster_net_delay); } - helper_ctx.target_external_pin_util = t_ext_pin_util_targets(packer_opts->target_external_pin_util); - helper_ctx.high_fanout_thresholds = t_pack_high_fanout_thresholds(packer_opts->high_fanout_threshold); - - VTR_LOG("Packing with pin utilization targets: %s\n", helper_ctx.target_external_pin_util.to_string().c_str()); - VTR_LOG("Packing with high fanout thresholds: %s\n", helper_ctx.high_fanout_thresholds.to_string().c_str()); + // During clustering, a block is related to un-clustered primitives with nets. + // This relation has three types: low fanout, high fanout, and transitive + // high_fanout_thresholds stores the threshold for nets to a block type to + // be considered high fanout. + t_pack_high_fanout_thresholds high_fanout_thresholds(packer_opts->high_fanout_threshold); bool allow_unrelated_clustering = false; if (packer_opts->allow_unrelated_clustering == e_unrelated_clustering::ON) { @@ -109,30 +104,46 @@ bool try_pack(t_packer_opts* packer_opts, int pack_iteration = 1; bool floorplan_regions_overfull = false; - // find all NoC router atoms - auto noc_atoms = find_noc_router_atoms(); - update_noc_reachability_partitions(noc_atoms); + // Initialize the cluster legalizer. + ClusterLegalizer cluster_legalizer(atom_ctx.nlist, + prepacker, + device_ctx.logical_block_types, + lb_type_rr_graphs, + num_models, + packer_opts->target_external_pin_util, + high_fanout_thresholds, + ClusterLegalizationStrategy::SKIP_INTRA_LB_ROUTE, + packer_opts->enable_pin_feasibility_filter, + packer_opts->feasible_block_array_size, + packer_opts->pack_verbosity); + + VTR_LOG("Packing with pin utilization targets: %s\n", cluster_legalizer.get_target_external_pin_util().to_string().c_str()); + VTR_LOG("Packing with high fanout thresholds: %s\n", high_fanout_thresholds.to_string().c_str()); while (true) { free_clustering_data(*packer_opts, clustering_data); + //Cluster the netlist - helper_ctx.num_used_type_instances = do_clustering( - *packer_opts, - *analysis_opts, - arch, - atom_mutable_ctx.prepacker, - is_clock, - is_global, - allow_unrelated_clustering, - balance_block_type_util, - lb_type_rr_graphs, - attraction_groups, - floorplan_regions_overfull, - clustering_data); + // num_used_type_instances: A map used to save the number of used + // instances from each logical block type. + std::map num_used_type_instances; + num_used_type_instances = do_clustering(*packer_opts, + *analysis_opts, + arch, + prepacker, + cluster_legalizer, + is_clock, + is_global, + allow_unrelated_clustering, + balance_block_type_util, + attraction_groups, + floorplan_regions_overfull, + high_fanout_thresholds, + clustering_data); //Try to size/find a device - bool fits_on_device = try_size_device_grid(*arch, helper_ctx.num_used_type_instances, packer_opts->target_device_utilization, packer_opts->device_layout); + bool fits_on_device = try_size_device_grid(*arch, num_used_type_instances, packer_opts->target_device_utilization, packer_opts->device_layout); /* We use this bool to determine the cause for the clustering not being dense enough. If the clustering * is not dense enough and there are floorplan constraints, it is presumed that the constraints are the cause @@ -187,7 +198,9 @@ bool try_pack(t_packer_opts* packer_opts, VTR_LOG("Pack iteration is %d\n", pack_iteration); attraction_groups.set_att_group_pulls(4); t_ext_pin_util pin_util(1.0, 1.0); - helper_ctx.target_external_pin_util.set_block_pin_util("clb", pin_util); + // TODO: This line assumes the logic block name is "clb" which + // may not be the case. This may need to be investigated. + cluster_legalizer.get_target_external_pin_util().set_block_pin_util("clb", pin_util); } } else { //Unable to pack densely enough: Give Up @@ -201,8 +214,8 @@ bool try_pack(t_packer_opts* packer_opts, std::string resource_reqs; std::string resource_avail; auto& grid = g_vpr_ctx.device().grid; - for (auto iter = helper_ctx.num_used_type_instances.begin(); iter != helper_ctx.num_used_type_instances.end(); ++iter) { - if (iter != helper_ctx.num_used_type_instances.begin()) { + for (auto iter = num_used_type_instances.begin(); iter != num_used_type_instances.end(); ++iter) { + if (iter != num_used_type_instances.begin()) { resource_reqs += ", "; resource_avail += ", "; } @@ -230,8 +243,8 @@ bool try_pack(t_packer_opts* packer_opts, g_vpr_ctx.mutable_floorplanning().cluster_constraints.clear(); //attraction_groups.reset_attraction_groups(); - free_cluster_placement_stats(helper_ctx.cluster_placement_stats); - delete[] helper_ctx.primitives_list; + // Reset the cluster legalizer for re-clustering. + cluster_legalizer.reset(); ++pack_iteration; } @@ -252,7 +265,7 @@ bool try_pack(t_packer_opts* packer_opts, /******************** End **************************/ //check clustering and output it - check_and_output_clustering(*packer_opts, is_clock, arch, helper_ctx.total_clb_num, clustering_data.intra_lb_routing); + check_and_output_clustering(cluster_legalizer, *packer_opts, is_clock, arch); // Free Data Structures free_clustering_data(*packer_opts, clustering_data); diff --git a/vpr/src/pack/pack.h b/vpr/src/pack/pack.h index 0115d2c859a..842feb0aacd 100644 --- a/vpr/src/pack/pack.h +++ b/vpr/src/pack/pack.h @@ -1,11 +1,15 @@ #ifndef PACK_H #define PACK_H -#include #include -#include "vpr_types.h" +#include class AtomNetId; +struct t_analysis_opts; +struct t_arch; +struct t_lb_type_rr_node; +struct t_model; +struct t_packer_opts; bool try_pack(t_packer_opts* packer_opts, const t_analysis_opts* analysis_opts, diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp deleted file mode 100644 index a1b48d0e083..00000000000 --- a/vpr/src/pack/re_cluster.cpp +++ /dev/null @@ -1,276 +0,0 @@ -#include "re_cluster.h" -#include "re_cluster_util.h" -#include "initial_placement.h" -#include "cluster_placement.h" -#include "cluster_router.h" - -bool move_mol_to_new_cluster(t_pack_molecule* molecule, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data) { - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - auto& device_ctx = g_vpr_ctx.device(); - - bool is_removed, is_created; - ClusterBlockId old_clb = atom_to_cluster(molecule->atom_block_ids[molecule->root]); - int molecule_size = get_array_size_of_molecule(molecule); - - NocGroupId temp_noc_grp_id = NocGroupId::INVALID(); - PartitionRegion temp_cluster_pr; - t_lb_router_data* old_router_data = nullptr; - t_lb_router_data* router_data = nullptr; - - //Check that there is a place for a new cluster of the same type - t_logical_block_type_ptr block_type = cluster_ctx.clb_nlist.block_type(old_clb); - int block_mode = cluster_ctx.clb_nlist.block_pb(old_clb)->mode; - - unsigned int num_instances = 0; - for (auto equivalent_tile : block_type->equivalent_tiles) { - num_instances += device_ctx.grid.num_instances(equivalent_tile, -1); - } - - if (helper_ctx.num_used_type_instances[block_type] == num_instances) { - VTR_LOGV(verbosity > 4, "The utilization of block_type %s is 100%. No space for new clusters\n", block_type->name); - VTR_LOGV(verbosity > 4, "Atom %d move aborted\n", molecule->atom_block_ids[molecule->root]); - return false; - } - - //remove the molecule from its current cluster - std::unordered_set& old_clb_atoms = cluster_to_mutable_atoms(old_clb); - if (old_clb_atoms.size() == 1) { - VTR_LOGV(verbosity > 4, "Atom: %zu move failed. This is the last atom in its cluster.\n"); - return false; - } - remove_mol_from_cluster(molecule, molecule_size, old_clb, old_clb_atoms, false, old_router_data); - - //check old cluster legality after removing the molecule - is_removed = is_cluster_legal(old_router_data); - - //if the cluster is legal, commit the molecule removal. Otherwise, abort the move - if (is_removed) { - commit_mol_removal(molecule, molecule_size, old_clb, during_packing, old_router_data, clustering_data); - } else { - VTR_LOGV(verbosity > 4, "Atom: %zu move failed. Can't remove it from the old cluster\n", molecule->atom_block_ids[molecule->root]); - return false; - } - - //Create new cluster of the same type and mode. - ClusterBlockId new_clb(helper_ctx.total_clb_num); - is_created = start_new_cluster_for_mol(molecule, - block_type, - block_mode, - helper_ctx.feasible_block_array_size, - helper_ctx.enable_pin_feasibility_filter, - new_clb, - during_packing, - verbosity, - clustering_data, - &router_data, - temp_cluster_pr, - temp_noc_grp_id); - - //Commit or revert the move - if (is_created) { - commit_mol_move(old_clb, new_clb, during_packing, true); - VTR_LOGV(verbosity > 4, "Atom:%zu is moved to a new cluster\n", molecule->atom_block_ids[molecule->root]); - } else { - revert_mol_move(old_clb, molecule, old_router_data, during_packing, clustering_data); - VTR_LOGV(verbosity > 4, "Atom:%zu move failed. Can't start a new cluster of the same type and mode\n", molecule->atom_block_ids[molecule->root]); - } - - free_router_data(old_router_data); - old_router_data = nullptr; - - //If the move is done after packing not during it, some fixes need to be done on the - //clustered netlist - if (is_created && !during_packing) { - fix_clustered_netlist(molecule, molecule_size, old_clb, new_clb); - } - - return (is_created); -} - -bool move_mol_to_existing_cluster(t_pack_molecule* molecule, - const ClusterBlockId& new_clb, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data) { - //define local variables - bool is_removed, is_added; - AtomBlockId root_atom_id = molecule->atom_block_ids[molecule->root]; - int molecule_size = get_array_size_of_molecule(molecule); - t_lb_router_data* old_router_data = nullptr; - std::unordered_set& new_clb_atoms = cluster_to_mutable_atoms(new_clb); - ClusterBlockId old_clb = atom_to_cluster(root_atom_id); - - //check old and new clusters compatibility - bool is_compatible = check_type_and_mode_compatibility(old_clb, new_clb, verbosity); - if (!is_compatible) - return false; - - //remove the molecule from its current cluster - std::unordered_set& old_clb_atoms = cluster_to_mutable_atoms(old_clb); - if (old_clb_atoms.size() == 1) { - VTR_LOGV(verbosity > 4, "Atom: %zu move failed. This is the last atom in its cluster.\n"); - return false; - } - remove_mol_from_cluster(molecule, molecule_size, old_clb, old_clb_atoms, false, old_router_data); - - //check old cluster legality after removing the molecule - is_removed = is_cluster_legal(old_router_data); - - //if the cluster is legal, commit the molecule removal. Otherwise, abort the move - if (is_removed) { - commit_mol_removal(molecule, molecule_size, old_clb, during_packing, old_router_data, clustering_data); - } else { - VTR_LOGV(verbosity > 4, "Atom: %zu move failed. Can't remove it from the old cluster\n", root_atom_id); - return false; - } - - //Add the atom to the new cluster - t_lb_router_data* new_router_data = nullptr; - is_added = pack_mol_in_existing_cluster(molecule, molecule_size, new_clb, new_clb_atoms, during_packing, clustering_data, new_router_data); - - //Commit or revert the move - if (is_added) { - commit_mol_move(old_clb, new_clb, during_packing, false); - VTR_LOGV(verbosity > 4, "Atom:%zu is moved to a new cluster\n", molecule->atom_block_ids[molecule->root]); - } else { - revert_mol_move(old_clb, molecule, old_router_data, during_packing, clustering_data); - VTR_LOGV(verbosity > 4, "Atom:%zu move failed. Can't start a new cluster of the same type and mode\n", molecule->atom_block_ids[molecule->root]); - } - - free_router_data(old_router_data); - old_router_data = nullptr; - - //If the move is done after packing not during it, some fixes need to be done on the - //clustered netlist - if (is_added && !during_packing) { - fix_clustered_netlist(molecule, molecule_size, old_clb, new_clb); - } - - return (is_added); -} - -#if 1 -bool swap_two_molecules(t_pack_molecule* molecule_1, - t_pack_molecule* molecule_2, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - //define local variables - PartitionRegion temp_cluster_pr_1, temp_cluster_pr_2; - - bool mol_1_success, mol_2_success; - - AtomBlockId root_1_atom_id = molecule_1->atom_block_ids[molecule_1->root]; - AtomBlockId root_2_atom_id = molecule_2->atom_block_ids[molecule_2->root]; - - int molecule_1_size = get_array_size_of_molecule(molecule_1); - int molecule_2_size = get_array_size_of_molecule(molecule_2); - - //Check that the 2 clusters are the same type - ClusterBlockId clb_1 = atom_to_cluster(root_1_atom_id); - ClusterBlockId clb_2 = atom_to_cluster(root_2_atom_id); - - if (clb_1 == clb_2) { - VTR_LOGV(verbosity > 4, "Swap failed. Both atoms are already in the same cluster.\n"); - return false; - } - //Check that the old and new clusters are of the same type - bool is_compitable = check_type_and_mode_compatibility(clb_1, clb_2, verbosity); - if (!is_compitable) - return false; - - t_lb_router_data* old_1_router_data = nullptr; - t_lb_router_data* old_2_router_data = nullptr; - - //save the atoms of the 2 clusters - std::unordered_set& clb_1_atoms = cluster_to_mutable_atoms(clb_1); - std::unordered_set& clb_2_atoms = cluster_to_mutable_atoms(clb_2); - - if (clb_1_atoms.size() == 1 || clb_2_atoms.size() == 1) { - VTR_LOGV(verbosity > 4, "Atom: %zu, %zu swap failed. This is the last atom in its cluster.\n", - molecule_1->atom_block_ids[molecule_1->root], - molecule_2->atom_block_ids[molecule_2->root]); - return false; - } - - t_pb* clb_pb_1 = cluster_ctx.clb_nlist.block_pb(clb_1); - std::string clb_pb_1_name = static_cast(clb_pb_1->name); - t_pb* clb_pb_2 = cluster_ctx.clb_nlist.block_pb(clb_2); - std::string clb_pb_2_name = static_cast(clb_pb_2->name); - - //remove the molecule from its current cluster - remove_mol_from_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, false, old_1_router_data); - commit_mol_removal(molecule_1, molecule_1_size, clb_1, during_packing, old_1_router_data, clustering_data); - - remove_mol_from_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, false, old_2_router_data); - commit_mol_removal(molecule_2, molecule_2_size, clb_2, during_packing, old_2_router_data, clustering_data); - - //Add the atom to the new cluster - mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, during_packing, clustering_data, old_2_router_data); - if (!mol_1_success) { - mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, clustering_data, old_1_router_data); - mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, clustering_data, old_2_router_data); - - VTR_ASSERT(mol_1_success && mol_2_success); - free_router_data(old_1_router_data); - free_router_data(old_2_router_data); - old_1_router_data = nullptr; - old_2_router_data = nullptr; - - free(clb_pb_1->name); - cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str()); - free(clb_pb_2->name); - cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str()); - - return false; - } - - mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_1, clb_1_atoms, during_packing, clustering_data, old_1_router_data); - if (!mol_2_success) { - remove_mol_from_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, true, old_2_router_data); - commit_mol_removal(molecule_1, molecule_1_size, clb_2, during_packing, old_2_router_data, clustering_data); - mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, clustering_data, old_1_router_data); - mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, clustering_data, old_2_router_data); - - VTR_ASSERT(mol_1_success && mol_2_success); - free_router_data(old_1_router_data); - free_router_data(old_2_router_data); - old_1_router_data = nullptr; - old_2_router_data = nullptr; - - free(clb_pb_1->name); - cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str()); - free(clb_pb_2->name); - cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str()); - - return false; - } - - //commit the move if succeeded or revert if failed - VTR_ASSERT(mol_1_success && mol_2_success); - - //If the move is done after packing not during it, some fixes need to be done on the clustered netlist - if (!during_packing) { - fix_clustered_netlist(molecule_1, molecule_1_size, clb_1, clb_2); - fix_clustered_netlist(molecule_2, molecule_2_size, clb_2, clb_1); - } - - free_router_data(old_1_router_data); - free_router_data(old_2_router_data); - old_1_router_data = nullptr; - old_2_router_data = nullptr; - - free(clb_pb_1->name); - cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str()); - free(clb_pb_2->name); - cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str()); - - return true; -} -#endif diff --git a/vpr/src/pack/re_cluster.h b/vpr/src/pack/re_cluster.h deleted file mode 100644 index 5ca2489aac4..00000000000 --- a/vpr/src/pack/re_cluster.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef RE_CLUSTER_H -#define RE_CLUSTER_H -/** - * @file This file includes an API function that updates clustering after its done - * - * To optimize the clustering decisions, this file provides an API that can open up already - * packed clusters and change them. The functions in this API can be used in 2 locations: - * - During packing after the clusterer is done - * - During placement after the initial placement is done - * - */ - -#include "pack_types.h" -#include "clustered_netlist_utils.h" -#include "cluster_util.h" - -/** - * @brief This function moves a molecule out of its cluster and creates a new cluster for it - * - * This function can be called from 2 spots in the vpr flow. - * - First, during packing to optimize the initial clustered netlist - * (during_packing variable should be true.) - * - Second, during placement (during_packing variable should be false). In this case, the clustered - * netlist is updated. - */ -bool move_mol_to_new_cluster(t_pack_molecule* molecule, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data); - -/** - * @brief This function moves a molecule out of its cluster to another cluster that already exists. - * - * This function can be called from 2 spots in the vpr flow. - * - First, during packing to optimize the initial clustered netlist - * (during_packing variable should be true.) - * - Second, during placement (during_packing variable should be false). In this case, the clustered - * netlist is updated. - */ -bool move_mol_to_existing_cluster(t_pack_molecule* molecule, - const ClusterBlockId& new_clb, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data); - -/** - * @brief This function swap two molecules between two different clusters. - * - * This function can be called from 2 spots in the vpr flow. - * - First, during packing to optimize the initial clustered netlist - * (during_packing variable should be true.) - * - Second, during placement (during_packing variable should be false). In this case, the clustered - * netlist is updated. - */ -bool swap_two_molecules(t_pack_molecule* molecule_1, - t_pack_molecule* molecule_2, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data); -#endif diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp deleted file mode 100644 index 46eb04955a5..00000000000 --- a/vpr/src/pack/re_cluster_util.cpp +++ /dev/null @@ -1,764 +0,0 @@ -#include "re_cluster_util.h" -#include "clustered_netlist_utils.h" -#include "cluster_util.h" -#include "cluster_router.h" -#include "cluster_placement.h" -#include "place_macro.h" -#include "initial_placement.h" -#include "read_netlist.h" - -// The name suffix of the new block (if exists) -// This suffix is useful in preventing duplicate high-level cluster block names -const char* name_suffix = "_m"; - -/******************* Static Functions ********************/ -static void load_atom_index_for_pb_pin(t_pb_routes& pb_route, int ipin); -static void load_internal_to_block_net_nums(const t_logical_block_type_ptr type, t_pb_routes& pb_route); -static void fix_atom_pin_mapping(const AtomBlockId blk); - -static void fix_cluster_pins_after_moving(const ClusterBlockId clb_index); - -static std::pair check_net_absorption(AtomNetId atom_net_id, - ClusterBlockId new_clb, - ClusterBlockId old_clb, - ClusterPinId& cluster_pin_id); - -static void fix_cluster_port_after_moving(const ClusterBlockId clb_index); - -static void fix_cluster_net_after_moving(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId old_clb, - ClusterBlockId new_clb); - -static void rebuild_cluster_placement_stats(ClusterBlockId clb_index, - const std::unordered_set& clb_atoms); - -static void update_cluster_pb_stats(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId clb_index, - bool is_added); - -/***************** API functions ***********************/ -ClusterBlockId atom_to_cluster(AtomBlockId atom) { - auto& atom_ctx = g_vpr_ctx.atom(); - return (atom_ctx.lookup.atom_clb(atom)); -} - -const std::unordered_set& cluster_to_atoms(ClusterBlockId cluster) { - const auto& atoms = cluster_to_mutable_atoms(cluster); - - return atoms; -} - -void remove_mol_from_cluster(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId& old_clb, - std::unordered_set& old_clb_atoms, - bool router_data_ready, - t_lb_router_data*& router_data) { - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - - for (int i_atom = 0; i_atom < molecule_size; i_atom++) { - if (molecule->atom_block_ids[i_atom]) { - auto it = old_clb_atoms.find(molecule->atom_block_ids[i_atom]); - if (it != old_clb_atoms.end()) - old_clb_atoms.erase(molecule->atom_block_ids[i_atom]); - } - } - - //re-build router_data structure for this cluster - if (!router_data_ready) - router_data = lb_load_router_data(helper_ctx.lb_type_rr_graphs, old_clb, old_clb_atoms); - - update_cluster_pb_stats(molecule, molecule_size, old_clb, false); -} - -void commit_mol_move(ClusterBlockId old_clb, - ClusterBlockId new_clb, - bool during_packing, - bool new_clb_created) { - auto& device_ctx = g_vpr_ctx.device(); - - //place the new cluster if this function called during placement (after the initial placement is done) - if (!during_packing && new_clb_created) { - int imacro; - g_vpr_ctx.mutable_placement().mutable_block_locs().resize(g_vpr_ctx.placement().block_locs().size() + 1); - get_imacro_from_iblk(&imacro, old_clb, g_vpr_ctx.placement().pl_macros); - set_imacro_for_iblk(&imacro, new_clb); - place_one_block(new_clb, device_ctx.pad_loc_type, nullptr, nullptr, g_vpr_ctx.mutable_placement().mutable_blk_loc_registry()); - } -} - -t_lb_router_data* lb_load_router_data(std::vector* lb_type_rr_graphs, - ClusterBlockId clb_index, - const std::unordered_set& clb_atoms) { - //build data structures used by intra-logic block router - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& atom_ctx = g_vpr_ctx.atom(); - auto block_type = cluster_ctx.clb_nlist.block_type(clb_index); - t_lb_router_data* router_data = alloc_and_load_router_data(&lb_type_rr_graphs[block_type->index], block_type); - - for (auto atom_id : clb_atoms) { - add_atom_as_target(router_data, atom_id); - const t_pb* pb = atom_ctx.lookup.atom_pb(atom_id); - while (pb) { - set_reset_pb_modes(router_data, pb, true); - pb = pb->parent_pb; - } - } - return (router_data); -} - -bool start_new_cluster_for_mol(t_pack_molecule* molecule, - const t_logical_block_type_ptr type, - const int mode, - const int feasible_block_array_size, - bool enable_pin_feasibility_filter, - ClusterBlockId clb_index, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data, - t_lb_router_data** router_data, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_cluster_noc_grp_id, - enum e_detailed_routing_stages detailed_routing_stage, - int force_site) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - /* Cluster's PartitionRegion is empty initially, meaning it has no floorplanning constraints */ - PartitionRegion empty_pr; - floorplanning_ctx.cluster_constraints.push_back(empty_pr); - - /* Allocate a dummy initial cluster and load a atom block as a seed and check if it is legal */ - AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; - const std::string& root_atom_name = atom_ctx.nlist.block_name(root_atom); - //const t_model* root_model = atom_ctx.nlist.block_model(root_atom); - - t_pb* pb = new t_pb; - pb->pb_graph_node = type->pb_graph_head; - alloc_and_load_pb_stats(pb, feasible_block_array_size); - pb->parent_pb = nullptr; - - *router_data = alloc_and_load_router_data(&(helper_ctx.lb_type_rr_graphs[type->index]), type); - - e_block_pack_status pack_result = e_block_pack_status::BLK_STATUS_UNDEFINED; - pb->mode = mode; - t_cluster_placement_stats* cluster_placement_stats = &(helper_ctx.cluster_placement_stats[type->index]); - reset_cluster_placement_stats(cluster_placement_stats); - set_mode_cluster_placement_stats(pb->pb_graph_node, mode); - - pack_result = try_pack_molecule(cluster_placement_stats, - molecule, - helper_ctx.primitives_list, - pb, - helper_ctx.num_models, - helper_ctx.max_cluster_size, - clb_index, - detailed_routing_stage, - *router_data, - 0, - enable_pin_feasibility_filter, - 0, - FULL_EXTERNAL_PIN_UTIL, - temp_cluster_pr, - temp_cluster_noc_grp_id, - force_site); - - // If clustering succeeds, add it to the clb netlist - if (pack_result == e_block_pack_status::BLK_PASSED) { - VTR_LOGV(verbosity > 4, "\tPASSED_SEED: Block Type %s\n", type->name); - //Once clustering succeeds, add it to the clb netlist - if (pb->name != nullptr) { - free(pb->name); - } - std::string new_name = root_atom_name + name_suffix; - pb->name = vtr::strdup(new_name.c_str()); - clb_index = cluster_ctx.clb_nlist.create_block(new_name.c_str(), pb, type); - helper_ctx.total_clb_num++; - int molecule_size = get_array_size_of_molecule(molecule); - update_cluster_pb_stats(molecule, molecule_size, clb_index, true); - - // Update the clb-->atoms lookup table - helper_ctx.atoms_lookup.resize(helper_ctx.total_clb_num); - for (int i_atom = 0; i_atom < molecule_size; ++i_atom) { - if (molecule->atom_block_ids[i_atom]) { - helper_ctx.atoms_lookup[clb_index].insert(molecule->atom_block_ids[i_atom]); - } - } - - //If you are still in packing, update the clustering data. Otherwise, update the clustered netlist. - if (during_packing) { - clustering_data.intra_lb_routing.push_back((*router_data)->saved_lb_nets); - (*router_data)->saved_lb_nets = nullptr; - } else { - cluster_ctx.clb_nlist.block_pb(clb_index)->pb_route = alloc_and_load_pb_route((*router_data)->saved_lb_nets, cluster_ctx.clb_nlist.block_pb(clb_index)->pb_graph_node); - } - } else { - free_pb(pb); - delete pb; - } - - //Free failed clustering - free_router_data(*router_data); - *router_data = nullptr; - - return (pack_result == e_block_pack_status::BLK_PASSED); -} - -bool pack_mol_in_existing_cluster(t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId new_clb, - std::unordered_set& new_clb_atoms, - bool during_packing, - t_clustering_data& clustering_data, - t_lb_router_data*& router_data, - enum e_detailed_routing_stages detailed_routing_stage, - bool enable_pin_feasibility_filter, - int force_site) { - - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - NocGroupId temp_cluster_noc_grp_id; - PartitionRegion temp_cluster_pr; - e_block_pack_status pack_result = e_block_pack_status::BLK_STATUS_UNDEFINED; - t_ext_pin_util target_ext_pin_util = helper_ctx.target_external_pin_util.get_pin_util(cluster_ctx.clb_nlist.block_type(new_clb)->name); - t_logical_block_type_ptr block_type = cluster_ctx.clb_nlist.block_type(new_clb); - t_pb* temp_pb = cluster_ctx.clb_nlist.block_pb(new_clb); - - //re-build cluster placement stats - rebuild_cluster_placement_stats(new_clb, new_clb_atoms); - if (!check_free_primitives_for_molecule_atoms(molecule, &(helper_ctx.cluster_placement_stats[block_type->index]))) - return false; - - //re-build router_data structure for this cluster - router_data = lb_load_router_data(helper_ctx.lb_type_rr_graphs, new_clb, new_clb_atoms); - - pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[block_type->index]), - molecule, - helper_ctx.primitives_list, - temp_pb, - helper_ctx.num_models, - helper_ctx.max_cluster_size, - new_clb, - detailed_routing_stage, - router_data, - 0, - enable_pin_feasibility_filter, - //false, - helper_ctx.feasible_block_array_size, - target_ext_pin_util, - temp_cluster_pr, - temp_cluster_noc_grp_id, - force_site); - - // If clustering succeeds, add it to the clb netlist - if (pack_result == e_block_pack_status::BLK_PASSED) { - //If you are still in packing, update the clustering data. Otherwise, update the clustered netlist. - if (during_packing) { - free_intra_lb_nets(clustering_data.intra_lb_routing[new_clb]); - clustering_data.intra_lb_routing[new_clb] = router_data->saved_lb_nets; - router_data->saved_lb_nets = nullptr; - } else { - cluster_ctx.clb_nlist.block_pb(new_clb)->pb_route.clear(); - cluster_ctx.clb_nlist.block_pb(new_clb)->pb_route = alloc_and_load_pb_route(router_data->saved_lb_nets, cluster_ctx.clb_nlist.block_pb(new_clb)->pb_graph_node); - } - - for (int i_atom = 0; i_atom < molecule_size; i_atom++) { - if (molecule->atom_block_ids[i_atom]) { - new_clb_atoms.insert(molecule->atom_block_ids[i_atom]); - } - } - update_cluster_pb_stats(molecule, molecule_size, new_clb, true); - } - - //Free clustering router data - free_router_data(router_data); - router_data = nullptr; - - return (pack_result == e_block_pack_status::BLK_PASSED); -} - -void fix_clustered_netlist(t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId old_clb, - ClusterBlockId new_clb) { - fix_cluster_port_after_moving(new_clb); - fix_cluster_net_after_moving(molecule, molecule_size, old_clb, new_clb); -} - -void revert_mol_move(ClusterBlockId old_clb, - t_pack_molecule* molecule, - t_lb_router_data*& old_router_data, - bool during_packing, - t_clustering_data& clustering_data) { - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - NocGroupId temp_cluster_noc_grp_id_original; - PartitionRegion temp_cluster_pr_original; - e_block_pack_status pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(old_clb)->index]), - molecule, - helper_ctx.primitives_list, - cluster_ctx.clb_nlist.block_pb(old_clb), - helper_ctx.num_models, - helper_ctx.max_cluster_size, - old_clb, - E_DETAILED_ROUTE_FOR_EACH_ATOM, - old_router_data, - 0, - helper_ctx.enable_pin_feasibility_filter, - helper_ctx.feasible_block_array_size, - helper_ctx.target_external_pin_util.get_pin_util(cluster_ctx.clb_nlist.block_type(old_clb)->name), - temp_cluster_pr_original, - temp_cluster_noc_grp_id_original); - - VTR_ASSERT(pack_result == e_block_pack_status::BLK_PASSED); - //If you are still in packing, update the clustering data. Otherwise, update the clustered netlist. - if (during_packing) { - free_intra_lb_nets(clustering_data.intra_lb_routing[old_clb]); - clustering_data.intra_lb_routing[old_clb] = old_router_data->saved_lb_nets; - old_router_data->saved_lb_nets = nullptr; - } else { - cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route.clear(); - cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route = alloc_and_load_pb_route(old_router_data->saved_lb_nets, cluster_ctx.clb_nlist.block_pb(old_clb)->pb_graph_node); - } - - free_router_data(old_router_data); - old_router_data = nullptr; -} -/*******************************************/ -/************ static functions *************/ -/*******************************************/ - -static void fix_cluster_net_after_moving(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId old_clb, - ClusterBlockId new_clb) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - - AtomNetId atom_net_id; - ClusterPinId cluster_pin; - - //remove all old cluster pin from their nets - ClusterNetId cur_clb_net; - for (auto& old_clb_pin : cluster_ctx.clb_nlist.block_pins(old_clb)) { - cur_clb_net = cluster_ctx.clb_nlist.pin_net(old_clb_pin); - cluster_ctx.clb_nlist.remove_net_pin(cur_clb_net, old_clb_pin); - } - - //delete cluster nets that are no longer used - for (int i_atom = 0; i_atom < molecule_size; i_atom++) { - if (molecule->atom_block_ids[i_atom]) { - for (auto atom_pin : atom_ctx.nlist.block_pins(molecule->atom_block_ids[i_atom])) { - atom_net_id = atom_ctx.nlist.pin_net(atom_pin); - auto [previously_absorbed, now_absorbed] = check_net_absorption(atom_net_id, new_clb, old_clb, cluster_pin); - - if (!previously_absorbed && now_absorbed) { - cur_clb_net = cluster_ctx.clb_nlist.pin_net(cluster_pin); - cluster_ctx.clb_nlist.remove_net(cur_clb_net); - } - } - } - } - - //Fix cluster pin for old and new clbs - fix_cluster_pins_after_moving(old_clb); - fix_cluster_pins_after_moving(new_clb); - - for (AtomBlockId atom_blk : cluster_to_atoms(old_clb)) - fix_atom_pin_mapping(atom_blk); - - for (AtomBlockId atom_blk : cluster_to_atoms(new_clb)) - fix_atom_pin_mapping(atom_blk); - - cluster_ctx.clb_nlist.remove_and_compress(); - load_internal_to_block_net_nums(cluster_ctx.clb_nlist.block_type(old_clb), cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route); - load_internal_to_block_net_nums(cluster_ctx.clb_nlist.block_type(new_clb), cluster_ctx.clb_nlist.block_pb(new_clb)->pb_route); -} - -static void fix_cluster_port_after_moving(const ClusterBlockId clb_index) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - const t_pb* pb = cluster_ctx.clb_nlist.block_pb(clb_index); - - while (!pb->is_root()) { - pb = pb->parent_pb; - } - - size_t num_old_ports = cluster_ctx.clb_nlist.block_ports(clb_index).size(); - const t_pb_type* pb_type = pb->pb_graph_node->pb_type; - - for (size_t port = num_old_ports; port < (unsigned)pb_type->num_ports; port++) { - if (pb_type->ports[port].is_clock && pb_type->ports[port].type == IN_PORT) { - cluster_ctx.clb_nlist.create_port(clb_index, pb_type->ports[port].name, pb_type->ports[port].num_pins, PortType::CLOCK); - } else if (!pb_type->ports[port].is_clock && pb_type->ports[port].type == IN_PORT) { - cluster_ctx.clb_nlist.create_port(clb_index, pb_type->ports[port].name, pb_type->ports[port].num_pins, PortType::INPUT); - } else { - VTR_ASSERT(pb_type->ports[port].type == OUT_PORT); - cluster_ctx.clb_nlist.create_port(clb_index, pb_type->ports[port].name, pb_type->ports[port].num_pins, PortType::OUTPUT); - } - } - - num_old_ports = cluster_ctx.clb_nlist.block_ports(clb_index).size(); -} - -static void fix_cluster_pins_after_moving(const ClusterBlockId clb_index) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - - const t_pb* pb = cluster_ctx.clb_nlist.block_pb(clb_index); - t_pb_graph_pin* pb_graph_pin; - AtomNetId atom_net_id; - ClusterNetId clb_net_id; - - t_logical_block_type_ptr block_type = cluster_ctx.clb_nlist.block_type(clb_index); - - int num_input_ports = pb->pb_graph_node->num_input_ports; - int num_output_ports = pb->pb_graph_node->num_output_ports; - int num_clock_ports = pb->pb_graph_node->num_clock_ports; - - int iport, ipb_pin, ipin, rr_node_index; - - ipin = 0; - // iterating over input ports - for (iport = 0; iport < num_input_ports; iport++) { - ClusterPortId input_port_id = cluster_ctx.clb_nlist.find_port(clb_index, block_type->pb_type->ports[iport].name); - // iterating over physical block pins of each input port - for (ipb_pin = 0; ipb_pin < pb->pb_graph_node->num_input_pins[iport]; ipb_pin++) { - pb_graph_pin = &pb->pb_graph_node->input_pins[iport][ipb_pin]; - rr_node_index = pb_graph_pin->pin_count_in_cluster; - - VTR_ASSERT(pb_graph_pin->pin_count_in_cluster == ipin); - if (pb->pb_route.count(rr_node_index)) { - atom_net_id = pb->pb_route[rr_node_index].atom_net_id; - if (atom_net_id) { - clb_net_id = cluster_ctx.clb_nlist.create_net(atom_ctx.nlist.net_name(atom_net_id)); - atom_ctx.lookup.set_atom_clb_net(atom_net_id, clb_net_id); - ClusterPinId cur_pin_id = cluster_ctx.clb_nlist.find_pin(input_port_id, (BitIndex)ipb_pin); - if (!cur_pin_id) - cluster_ctx.clb_nlist.create_pin(input_port_id, (BitIndex)ipb_pin, clb_net_id, PinType::SINK, ipin); - else - cluster_ctx.clb_nlist.set_pin_net(cur_pin_id, PinType::SINK, clb_net_id); - } - cluster_ctx.clb_nlist.block_pb(clb_index)->pb_route[rr_node_index].pb_graph_pin = pb_graph_pin; - } - ipin++; - } - } - - // iterating over output ports - for (iport = 0; iport < num_output_ports; iport++) { - ClusterPortId output_port_id = cluster_ctx.clb_nlist.find_port(clb_index, block_type->pb_type->ports[num_input_ports + iport].name); - // iterating over physical block pins of each output port - for (ipb_pin = 0; ipb_pin < pb->pb_graph_node->num_output_pins[iport]; ipb_pin++) { - pb_graph_pin = &pb->pb_graph_node->output_pins[iport][ipb_pin]; - rr_node_index = pb_graph_pin->pin_count_in_cluster; - - VTR_ASSERT(pb_graph_pin->pin_count_in_cluster == ipin); - if (pb->pb_route.count(rr_node_index)) { - atom_net_id = pb->pb_route[rr_node_index].atom_net_id; - if (atom_net_id) { - clb_net_id = cluster_ctx.clb_nlist.create_net(atom_ctx.nlist.net_name(atom_net_id)); - atom_ctx.lookup.set_atom_clb_net(atom_net_id, clb_net_id); - ClusterPinId cur_pin_id = cluster_ctx.clb_nlist.find_pin(output_port_id, (BitIndex)ipb_pin); - AtomPinId atom_net_driver = atom_ctx.nlist.net_driver(atom_net_id); - bool driver_is_constant = atom_ctx.nlist.pin_is_constant(atom_net_driver); - if (!cur_pin_id) - cluster_ctx.clb_nlist.create_pin(output_port_id, (BitIndex)ipb_pin, clb_net_id, PinType::DRIVER, ipin, driver_is_constant); - else { - cluster_ctx.clb_nlist.set_pin_net(cur_pin_id, PinType::DRIVER, clb_net_id); - cluster_ctx.clb_nlist.set_pin_is_constant(cur_pin_id, driver_is_constant); - } - VTR_ASSERT(cluster_ctx.clb_nlist.net_is_constant(clb_net_id) == driver_is_constant); - } - cluster_ctx.clb_nlist.block_pb(clb_index)->pb_route[rr_node_index].pb_graph_pin = pb_graph_pin; - } - ipin++; - } - } - - // iterating over clock ports - for (iport = 0; iport < num_clock_ports; iport++) { - ClusterPortId clock_port_id = cluster_ctx.clb_nlist.find_port(clb_index, block_type->pb_type->ports[num_input_ports + num_output_ports + iport].name); - // iterating over physical block pins of each clock port - for (ipb_pin = 0; ipb_pin < pb->pb_graph_node->num_clock_pins[iport]; ipb_pin++) { - pb_graph_pin = &pb->pb_graph_node->clock_pins[iport][ipb_pin]; - rr_node_index = pb_graph_pin->pin_count_in_cluster; - - VTR_ASSERT(pb_graph_pin->pin_count_in_cluster == ipin); - if (pb->pb_route.count(rr_node_index)) { - atom_net_id = pb->pb_route[rr_node_index].atom_net_id; - if (atom_net_id) { - clb_net_id = cluster_ctx.clb_nlist.create_net(atom_ctx.nlist.net_name(atom_net_id)); - atom_ctx.lookup.set_atom_clb_net(atom_net_id, clb_net_id); - ClusterPinId cur_pin_id = cluster_ctx.clb_nlist.find_pin(clock_port_id, (BitIndex)ipb_pin); - if (!cur_pin_id) - cluster_ctx.clb_nlist.create_pin(clock_port_id, (BitIndex)ipb_pin, clb_net_id, PinType::SINK, ipin); - else - cluster_ctx.clb_nlist.set_pin_net(cur_pin_id, PinType::SINK, clb_net_id); - } - cluster_ctx.clb_nlist.block_pb(clb_index)->pb_route[rr_node_index].pb_graph_pin = pb_graph_pin; - } - ipin++; - } - } -} - -static std::pair check_net_absorption(const AtomNetId atom_net_id, - const ClusterBlockId new_clb, - const ClusterBlockId old_clb, - ClusterPinId& cluster_pin_id) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - AtomBlockId atom_block_id; - ClusterBlockId clb_index; - - ClusterNetId clb_net_id = atom_ctx.lookup.clb_net(atom_net_id); - - bool previously_absorbed; - if (clb_net_id == ClusterNetId::INVALID()) - previously_absorbed = true; - else { - previously_absorbed = false; - for (auto& cluster_pin : cluster_ctx.clb_nlist.net_pins(clb_net_id)) { - if (cluster_pin && cluster_ctx.clb_nlist.pin_block(cluster_pin) == old_clb) { - cluster_pin_id = cluster_pin; - break; - } - } - } - - //iterate over net pins and check their cluster - bool now_absorbed = true; - for (auto& net_pin : atom_ctx.nlist.net_pins(atom_net_id)) { - atom_block_id = atom_ctx.nlist.pin_block(net_pin); - clb_index = atom_ctx.lookup.atom_clb(atom_block_id); - - if (clb_index != new_clb) { - now_absorbed = false; - break; - } - } - - return {previously_absorbed, now_absorbed}; -} - -static void fix_atom_pin_mapping(const AtomBlockId blk) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - const t_pb* pb = atom_ctx.lookup.atom_pb(blk); - VTR_ASSERT_MSG(pb, "Atom block must have a matching PB"); - - const t_pb_graph_node* gnode = pb->pb_graph_node; - VTR_ASSERT_MSG(gnode->pb_type->model == atom_ctx.nlist.block_model(blk), - "Atom block PB must match BLIF model"); - - for (int iport = 0; iport < gnode->num_input_ports; ++iport) { - if (gnode->num_input_pins[iport] <= 0) continue; - - const AtomPortId port = atom_ctx.nlist.find_atom_port(blk, gnode->input_pins[iport][0].port->model_port); - if (!port) continue; - - for (int ipin = 0; ipin < gnode->num_input_pins[iport]; ++ipin) { - const t_pb_graph_pin* gpin = &gnode->input_pins[iport][ipin]; - VTR_ASSERT(gpin); - - set_atom_pin_mapping(cluster_ctx.clb_nlist, blk, port, gpin); - } - } - - for (int iport = 0; iport < gnode->num_output_ports; ++iport) { - if (gnode->num_output_pins[iport] <= 0) continue; - - const AtomPortId port = atom_ctx.nlist.find_atom_port(blk, gnode->output_pins[iport][0].port->model_port); - if (!port) continue; - - for (int ipin = 0; ipin < gnode->num_output_pins[iport]; ++ipin) { - const t_pb_graph_pin* gpin = &gnode->output_pins[iport][ipin]; - VTR_ASSERT(gpin); - - set_atom_pin_mapping(cluster_ctx.clb_nlist, blk, port, gpin); - } - } - - for (int iport = 0; iport < gnode->num_clock_ports; ++iport) { - if (gnode->num_clock_pins[iport] <= 0) continue; - - const AtomPortId port = atom_ctx.nlist.find_atom_port(blk, gnode->clock_pins[iport][0].port->model_port); - if (!port) continue; - - for (int ipin = 0; ipin < gnode->num_clock_pins[iport]; ++ipin) { - const t_pb_graph_pin* gpin = &gnode->clock_pins[iport][ipin]; - VTR_ASSERT(gpin); - - set_atom_pin_mapping(cluster_ctx.clb_nlist, blk, port, gpin); - } - } -} - -static void load_internal_to_block_net_nums(const t_logical_block_type_ptr type, t_pb_routes& pb_route) { - int num_pins = type->pb_graph_head->total_pb_pins; - - for (int ipb_pin = 0; ipb_pin < num_pins; ipb_pin++) { - if (!pb_route.count(ipb_pin)) continue; - - if (pb_route[ipb_pin].driver_pb_pin_id != OPEN) { - load_atom_index_for_pb_pin(pb_route, ipb_pin); - } - } -} - -static void load_atom_index_for_pb_pin(t_pb_routes& pb_route, int ipin) { - int driver = pb_route[ipin].driver_pb_pin_id; - - VTR_ASSERT(driver != OPEN); - //VTR_ASSERT(!pb_route[ipin].atom_net_id); - - if (!pb_route[driver].atom_net_id) { - load_atom_index_for_pb_pin(pb_route, driver); - } - - //Store the net coming from the driver - pb_route[ipin].atom_net_id = pb_route[driver].atom_net_id; - - //Store ourselves with the driver - pb_route[driver].sink_pb_pin_ids.push_back(ipin); -} - -#if 0 -static bool count_children_pbs(const t_pb* pb) { - if (pb == nullptr) - return 0; - - for (int i = 0; i < pb->get_num_child_types(); i++) { - for (int j = 0; j < pb->get_num_children_of_type(i); j++) { - if (pb->child_pbs[i] != nullptr && pb->child_pbs[i][j].name != nullptr) { - return true; - } - } - } - return false; -} -#endif - -static void rebuild_cluster_placement_stats(ClusterBlockId clb_index, - const std::unordered_set& clb_atoms) { - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& atom_ctx = g_vpr_ctx.atom(); - - t_cluster_placement_stats* cluster_placement_stats = &(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(clb_index)->index]); - reset_cluster_placement_stats(cluster_placement_stats); - set_mode_cluster_placement_stats(cluster_ctx.clb_nlist.block_pb(clb_index)->pb_graph_node, cluster_ctx.clb_nlist.block_pb(clb_index)->mode); - - for (AtomBlockId atom : clb_atoms) { - const t_pb* atom_pb = atom_ctx.lookup.atom_pb(atom); - commit_primitive(cluster_placement_stats, atom_pb->pb_graph_node); - } -} - -bool is_cluster_legal(t_lb_router_data*& router_data) { - return (check_cluster_legality(0, E_DETAILED_ROUTE_AT_END_ONLY, router_data)); -} - -void commit_mol_removal(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId old_clb, - bool during_packing, - t_lb_router_data*& router_data, - t_clustering_data& clustering_data) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - for (int i_atom = 0; i_atom < molecule_size; i_atom++) { - if (molecule->atom_block_ids[i_atom]) { - revert_place_atom_block(molecule->atom_block_ids[i_atom], router_data); - } - } - - cleanup_pb(cluster_ctx.clb_nlist.block_pb(old_clb)); - - //If you are still in packing, update the clustering data. Otherwise, update the clustered netlist. - if (during_packing) { - free_intra_lb_nets(clustering_data.intra_lb_routing[old_clb]); - clustering_data.intra_lb_routing[old_clb] = router_data->saved_lb_nets; - router_data->saved_lb_nets = nullptr; - } else { - cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route.clear(); - cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route = alloc_and_load_pb_route(router_data->saved_lb_nets, - cluster_ctx.clb_nlist.block_pb(old_clb)->pb_graph_node); - } -} - -bool check_type_and_mode_compatibility(ClusterBlockId old_clb, - ClusterBlockId new_clb, - int verbosity) { - auto& cluster_ctx = g_vpr_ctx.clustering(); - - //Check that the old and new clusters are the same type - if (cluster_ctx.clb_nlist.block_type(old_clb) != cluster_ctx.clb_nlist.block_type(new_clb)) { - VTR_LOGV(verbosity > 4, "Move aborted. New and old cluster blocks are not of the same type"); - return false; - } - - //Check that the old and new clusters are the mode - if (cluster_ctx.clb_nlist.block_pb(old_clb)->mode != cluster_ctx.clb_nlist.block_pb(new_clb)->mode) { - VTR_LOGV(verbosity > 4, "Move aborted. New and old cluster blocks are not of the same mode"); - return false; - } - - return true; -} - -static void update_cluster_pb_stats(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId clb_index, - bool is_added) { - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - t_pb* cur_pb; - - for (int iblock = 0; iblock < molecule_size; iblock++) { - auto blk_id = molecule->atom_block_ids[iblock]; - if (!blk_id) { - continue; - } - - //Update atom netlist mapping - if (is_added) { - atom_ctx.lookup.set_atom_clb(blk_id, clb_index); - } else { - atom_ctx.lookup.set_atom_clb(blk_id, ClusterBlockId::INVALID()); - } - - const t_pb* atom_pb = atom_ctx.lookup.atom_pb(blk_id); - VTR_ASSERT(atom_pb); - - cur_pb = atom_pb->parent_pb; - - while (cur_pb) { - /* reset list of feasible blocks */ - cur_pb->pb_stats->num_feasible_blocks = NOT_VALID; - if (is_added) - cur_pb->pb_stats->num_child_blocks_in_pb++; - else - cur_pb->pb_stats->num_child_blocks_in_pb--; - - cur_pb = cur_pb->parent_pb; - } - } -} - -std::unordered_set& cluster_to_mutable_atoms(ClusterBlockId cluster) { - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - - //If the lookup is not built yet, build it first - if (helper_ctx.atoms_lookup.empty()) - init_clb_atoms_lookup(helper_ctx.atoms_lookup); - - return helper_ctx.atoms_lookup[cluster]; -} \ No newline at end of file diff --git a/vpr/src/pack/re_cluster_util.h b/vpr/src/pack/re_cluster_util.h deleted file mode 100644 index bc940dca881..00000000000 --- a/vpr/src/pack/re_cluster_util.h +++ /dev/null @@ -1,212 +0,0 @@ -#ifndef RE_CLUSTER_UTIL_H -#define RE_CLUSTER_UTIL_H - -#include "clustered_netlist_fwd.h" -#include "clustered_netlist_utils.h" -#include "atom_netlist_fwd.h" -#include "globals.h" -#include "pack_types.h" -#include "cluster_util.h" -/** - * @file - * @brief This files defines some helper functions for the re-clustering API - * - * Re-clustering API is used to move atoms between clusters after the cluster is done. - * This can be very used in iteratively improve the packed solution after the initial clustering is done. - * It can also be used during placement to allow fine-grained moves that can move a BLE or a single FF/LUT. - * - * Note: Some of the helper functions defined here might be useful in different places in VPR. - */ - -/** - * @brief A function that returns the block ID in the clustered netlist - * from its ID in the atom netlist. - */ -ClusterBlockId atom_to_cluster(AtomBlockId atom); - -/** - * @brief A function that return a list of atoms in a cluster - * @note This function can be called only after cluster/packing is done or - * the clustered netlist is created. - * @return Atoms in the given cluster. The returned set is immutable. - */ -const std::unordered_set& cluster_to_atoms(ClusterBlockId cluster); - -/** - * @brief A function that return a list of atoms in a cluster - * @note This function can be called only after cluster/packing is done or - * the clustered netlist is created. - * @return Atoms in the given cluster. The returned set is mutable. - */ -std::unordered_set& cluster_to_mutable_atoms(ClusterBlockId cluster); - -/** - * @brief A function that loads the intra-cluster router data of one cluster - */ -t_lb_router_data* lb_load_router_data(std::vector* lb_type_rr_graphs, - ClusterBlockId clb_index, - const std::unordered_set& clb_atoms); - -/** - * @brief A function that removes a molecule from a cluster and checks legality of - * the old cluster. - * - * It returns true if the removal is done and the old cluster is legal. - * It aborts the removal and returns false if the removal will make the old cluster - * illegal. - * - * This function updates the intra-logic block router data structure (router_data) and - * remove all the atoms of the molecule from old_clb_atoms vector. - * - * @param old_clb: The original cluster of this molecule - * @param old_clb_atoms: A vector containing the list of atoms in the old cluster of the molecule. - * It will be updated in the function to remove the atoms of molecule from it. - * @param router_data: returns the intra logic block router data. - */ -void remove_mol_from_cluster(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId& old_clb, - std::unordered_set& old_clb_atoms, - bool router_data_ready, - t_lb_router_data*& router_data); - -/** - * @brief A function that starts a new cluster for one specific molecule - * - * It places the molecule in a specific type and mode that should be passed by - * the higher level routine. - * - * @param type: the cluster block type needed - * @param mode: the mode of the new cluster - * @param clb_index: the cluster block Id of the newly created cluster block - * @param during_packing: true if this function is called during packing, false if it is called during placement - * @param clustering_data: A data structure containing helper data for the clustering process - * (is updated if this function is called during packing, especially intra_lb_routing data member). - * @param router_data: returns the intra logic block router data. - * @param temp_cluster_pr: returns the partition region of the new cluster. - * @param temp_cluster_noc_grp_id returns the NoC group ID of the new cluster - * @param detailed_routing_stage: options are E_DETAILED_ROUTE_FOR_EACH_ATOM (default) and E_DETAILED_ROUTE_AT_END_ONLY. - * This argument specifies whether or not to run an intra-cluster routing-based legality - * check after adding the molecule to the cluster; default is the more conservative option. - * This argument is passed down to try_pack_mol; if E_DETAILED_ROUTE_AT_END_ONLY is passed, - * the function does not run a detailed intra-cluster routing-based legality check. - * If many molecules will be added to a cluster, this option enables use of a single - * routing check on the completed cluster (vs many incremental checks). - * @param force_site: optional user-specified primitive site on which to place the molecule; this is passed to - * try_pack_molecule and then to get_next_primitive_site. If a force_site argument is provided, - * the molecule is either placed on the specified site or fails to add to the cluster. - * If the force_site argument is set to its default value (-1), vpr selects an available site. - */ -bool start_new_cluster_for_mol(t_pack_molecule* molecule, - t_logical_block_type_ptr type, - int mode, - int feasible_block_array_size, - bool enable_pin_feasibility_filter, - ClusterBlockId clb_index, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data, - t_lb_router_data** router_data, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_cluster_noc_grp_id, - enum e_detailed_routing_stages detailed_routing_stage = E_DETAILED_ROUTE_FOR_EACH_ATOM, - int force_site = -1); - -/** - * @brief A function that packs a molecule into an existing cluster - * - * @param clb_index: the cluster block Id of the new cluster that we need to pack the molecule in. - * @param: clb_atoms: A vector containing the list of atoms in the new cluster block before adding the molecule. - * @param during_packing: true if this function is called during packing, false if it is called during placement - * @param is_swap: true if this function is called during swapping two molecules. False if the called during a single molecule move - * @param clustering_data: A data structure containing helper data for the clustering process - * (is updated if this function is called during packing, especially intra_lb_routing data member). - * @param router_data: returns the intra logic block router data. - * @param temp_cluster_noc_grp_id returns the NoC group ID of the new cluster - * @param detailed_routing_stage: options are E_DETAILED_ROUTE_FOR_EACH_ATOM (default) and E_DETAILED_ROUTE_AT_END_ONLY. - * This argument specifies whether or not to run an intra-cluster routing-based legality - * check after adding the molecule to the cluster; default is the more conservative option. - * This argument is passed down to try_pack_mol; if E_DETAILED_ROUTE_AT_END_ONLY is passed, - * the function does not run a detailed intra-cluster routing-based legality check. - * If many molecules will be added to a cluster, this option enables use of a single - * routing check on the completed cluster (vs many incremental checks). - * @param enable_pin_feasibility_filter: do a pin couting based legality check (before or in place of intra-cluster routing check). - * @param force_site: optional user-specified primitive site on which to place the molecule; this is passed to - * try_pack_molecule and then to get_next_primitive_site. If a force_site argument is provided, - * the molecule is either placed on the specified site or fails to add to the cluster. - * If the force_site argument is set to its default value (-1), vpr selects an available site. - */ -bool pack_mol_in_existing_cluster(t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId new_clb, - std::unordered_set& new_clb_atoms, - bool during_packing, - t_clustering_data& clustering_data, - t_lb_router_data*& router_data, - enum e_detailed_routing_stages detailed_routing_stage = E_DETAILED_ROUTE_FOR_EACH_ATOM, - bool enable_pin_feasibility_filter = true, - int force_site = -1); - -/** - * @brief A function that fix the clustered netlist if the move is performed - * after the packing is done and clustered netlist is built - * - * If you are changing clustering after packing is done, you need to update the clustered netlist by - * deleting the newly absorbed nets and creating nets for the atom nets that become unabsorbed. It also - * fixes the cluster ports for both the old and new clusters. - */ -void fix_clustered_netlist(t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId old_clb, - ClusterBlockId new_clb); - -/** - * @brief A function that commits the molecule move if it is legal - * - * @params during_packing: true if this function is called during packing, false if it is called during placement - * @params new_clb_created: true if the move is creating a new cluster (e.g. move_mol_to_new_cluster) - */ -void commit_mol_move(ClusterBlockId old_clb, - ClusterBlockId new_clb, - bool during_packing, - bool new_clb_created); - -/** - * @brief A function that reverts the molecule move if it is illegal - * - * @params during_packing: true if this function is called during packing, false if it is called during placement - * @params new_clb_created: true if the move is creating a new cluster (e.g. move_mol_to_new_cluster) - * @params - */ -void revert_mol_move(ClusterBlockId old_clb, - t_pack_molecule* molecule, - t_lb_router_data*& old_router_data, - bool during_packing, - t_clustering_data& clustering_data); - -/** - * @brief A function that checks the legality of a cluster by running the intra-cluster routing - */ -bool is_cluster_legal(t_lb_router_data*& router_data); - -/** - * @brief A function that commits the molecule removal if it is legal - * - * @params during_packing: true if this function is called during packing, false if it is called during placement - * @params new_clb_created: true if the move is creating a new cluster (e.g. move_mol_to_new_cluster) - */ -void commit_mol_removal(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId old_clb, - bool during_packing, - t_lb_router_data*& router_data, - t_clustering_data& clustering_data); - -/** - * @brief A function that check that two clusters are of the same type and in the same mode of operation - */ -bool check_type_and_mode_compatibility(ClusterBlockId old_clb, - ClusterBlockId new_clb, - int verbosity); - -#endif diff --git a/vpr/src/place/place_constraints.cpp b/vpr/src/place/place_constraints.cpp index 22b15f5a04f..94af4721026 100644 --- a/vpr/src/place/place_constraints.cpp +++ b/vpr/src/place/place_constraints.cpp @@ -11,7 +11,7 @@ #include "globals.h" #include "place_constraints.h" #include "place_util.h" -#include "re_cluster_util.h" +#include "vpr_context.h" int check_placement_floorplanning(const vtr::vector_map& block_locs) { int error = 0; @@ -221,12 +221,12 @@ bool cluster_floorplanning_legal(ClusterBlockId blk_id, const t_pl_loc& loc) { void load_cluster_constraints() { auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - auto& cluster_ctx = g_vpr_ctx.clustering(); + const ClusteringContext& cluster_ctx = g_vpr_ctx.clustering(); floorplanning_ctx.cluster_constraints.resize(cluster_ctx.clb_nlist.blocks().size()); for (auto cluster_id : cluster_ctx.clb_nlist.blocks()) { - const std::unordered_set& atoms = cluster_to_atoms(cluster_id); + const std::unordered_set& atoms = cluster_ctx.atoms_lookup[cluster_id]; PartitionRegion empty_pr; floorplanning_ctx.cluster_constraints[cluster_id] = empty_pr; diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp index b72b78cdaf1..480919ae9b6 100644 --- a/vpr/src/route/route_common.cpp +++ b/vpr/src/route/route_common.cpp @@ -7,7 +7,6 @@ #include "route_common.h" #include "route_export.h" #include "rr_graph.h" -#include "re_cluster_util.h" /* The numbering relation between the channels and clbs is: * * * diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp index e4ae8e996e5..98620b56009 100644 --- a/vpr/src/util/vpr_utils.cpp +++ b/vpr/src/util/vpr_utils.cpp @@ -4,6 +4,9 @@ #include #include +#include "pack_types.h" +#include "prepack.h" +#include "vpr_context.h" #include "vtr_assert.h" #include "vtr_log.h" #include "vtr_memory.h" @@ -17,7 +20,6 @@ #include "cluster_placement.h" #include "device_grid.h" #include "user_route_constraints.h" -#include "re_cluster_util.h" #include "placer_state.h" #include "grid_block.h" @@ -1124,14 +1126,11 @@ const t_pb_graph_pin* find_pb_graph_pin(const AtomNetlist& netlist, const AtomLo return get_pb_graph_node_pin_from_model_port_pin(model_port, ipin, pb_gnode); } -t_pb_graph_pin* get_pb_graph_node_pin_from_block_pin(ClusterBlockId iblock, int ipin) { +t_pb_graph_pin* get_pb_graph_node_pin_from_pb_graph_node(t_pb_graph_node* pb_graph_node, + int ipin) { int i, count; - const t_pb_type* pb_type; - t_pb_graph_node* pb_graph_node; - auto& cluster_ctx = g_vpr_ctx.clustering(); - pb_graph_node = cluster_ctx.clb_nlist.block_pb(iblock)->pb_graph_node; - pb_type = pb_graph_node->pb_type; + const t_pb_type* pb_type = pb_graph_node->pb_type; /* If this is post-placed, then the ipin may have been shuffled up by the z * num_pins, * bring it back down to 0..num_pins-1 range for easier analysis */ @@ -1169,6 +1168,13 @@ t_pb_graph_pin* get_pb_graph_node_pin_from_block_pin(ClusterBlockId iblock, int return nullptr; } +t_pb_graph_pin* get_pb_graph_node_pin_from_block_pin(ClusterBlockId iblock, int ipin) { + auto& cluster_ctx = g_vpr_ctx.clustering(); + + t_pb_graph_node* pb_graph_node = cluster_ctx.clb_nlist.block_pb(iblock)->pb_graph_node; + return get_pb_graph_node_pin_from_pb_graph_node(pb_graph_node, ipin); +} + const t_port* find_pb_graph_port(const t_pb_graph_node* pb_gnode, const std::string& port_name) { const t_pb_graph_pin* gpin = find_pb_graph_pin(pb_gnode, port_name, 0); @@ -1363,12 +1369,13 @@ std::tuple get_cluster_internal_class_pairs(const AtomLookup& atom_lookup, ClusterBlockId cluster_block_id) { + const ClusteringContext& cluster_ctx = g_vpr_ctx.clustering(); std::vector class_num_vec; auto [physical_tile, sub_tile, rel_cap, logical_block] = get_cluster_blk_physical_spec(cluster_block_id); class_num_vec.reserve(physical_tile->primitive_class_inf.size()); - const auto& cluster_atoms = cluster_to_atoms(cluster_block_id); + const auto& cluster_atoms = cluster_ctx.atoms_lookup[cluster_block_id]; for (AtomBlockId atom_blk_id : cluster_atoms) { auto atom_pb_graph_node = atom_lookup.atom_pb_graph_node(atom_blk_id); auto class_range = get_pb_graph_node_class_physical_range(physical_tile, @@ -1532,7 +1539,7 @@ void free_pb(t_pb* pb) { free_pb_stats(pb); } -void revalid_molecules(const t_pb* pb) { +void revalid_molecules(const t_pb* pb, const Prepacker& prepacker) { const t_pb_type* pb_type = pb->pb_graph_node->pb_type; if (pb_type->blif_model == nullptr) { @@ -1540,7 +1547,7 @@ void revalid_molecules(const t_pb* pb) { for (int i = 0; i < pb_type->modes[mode].num_pb_type_children && pb->child_pbs != nullptr; i++) { for (int j = 0; j < pb_type->modes[mode].pb_type_children[i].num_pb && pb->child_pbs[i] != nullptr; j++) { if (pb->child_pbs[i][j].name != nullptr || pb->child_pbs[i][j].child_pbs != nullptr) { - revalid_molecules(&pb->child_pbs[i][j]); + revalid_molecules(&pb->child_pbs[i][j], prepacker); } } } @@ -1556,7 +1563,7 @@ void revalid_molecules(const t_pb* pb) { atom_ctx.lookup.set_atom_clb(blk_id, ClusterBlockId::INVALID()); atom_ctx.lookup.set_atom_pb(blk_id, nullptr); - t_pack_molecule* cur_molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + t_pack_molecule* cur_molecule = prepacker.get_atom_molecule(blk_id); if (cur_molecule->valid == false) { int i; for (i = 0; i < get_array_size_of_molecule(cur_molecule); i++) { diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h index 24da4489b6b..867670517a4 100644 --- a/vpr/src/util/vpr_utils.h +++ b/vpr/src/util/vpr_utils.h @@ -17,6 +17,7 @@ class DeviceGrid; class PlacerState; +class Prepacker; const t_model* find_model(const t_model* models, const std::string& name, bool required = true); const t_model_ports* find_model_port(const t_model* model, const std::string& name, bool required = true); @@ -192,6 +193,7 @@ int get_max_nets_in_pb_type(const t_pb_type* pb_type); bool primitive_type_feasible(AtomBlockId blk_id, const t_pb_type* cur_pb_type); t_pb_graph_pin* get_pb_graph_node_pin_from_model_port_pin(const t_model_ports* model_port, const int model_pin, const t_pb_graph_node* pb_graph_node); const t_pb_graph_pin* find_pb_graph_pin(const AtomNetlist& netlist, const AtomLookup& netlist_lookup, const AtomPinId pin_id); +t_pb_graph_pin* get_pb_graph_node_pin_from_pb_graph_node(t_pb_graph_node* pb_graph_node, int ipin); t_pb_graph_pin* get_pb_graph_node_pin_from_block_pin(ClusterBlockId iblock, int ipin); vtr::vector alloc_and_load_pin_id_to_pb_mapping(); void free_pin_id_to_pb_mapping(vtr::vector& pin_id_to_pb_mapping); @@ -218,7 +220,7 @@ void parse_direct_pin_name(char* src_string, int line, int* start_pin_index, int void free_pb_stats(t_pb* pb); void free_pb(t_pb* pb); -void revalid_molecules(const t_pb* pb); +void revalid_molecules(const t_pb* pb, const Prepacker& prepacker); void print_switch_usage(); void print_usage_by_wire_length(); diff --git a/vpr/test/test_connection_router.cpp b/vpr/test/test_connection_router.cpp index 1bc208bf3ba..c2ac5329a26 100644 --- a/vpr/test/test_connection_router.cpp +++ b/vpr/test/test_connection_router.cpp @@ -191,9 +191,6 @@ TEST_CASE("connection_router", "[vpr]") { free_routing_structs(); vpr_free_all(arch, vpr_setup); - - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - atom_ctx.prepacker.reset(); } } // namespace diff --git a/vpr/test/test_post_verilog.cpp b/vpr/test/test_post_verilog.cpp index be4bd45f045..a8344fa79d4 100644 --- a/vpr/test/test_post_verilog.cpp +++ b/vpr/test/test_post_verilog.cpp @@ -35,10 +35,6 @@ void do_vpr_flow(const char* input_unc_opt, const char* output_unc_opt) { free_routing_structs(); vpr_free_all(arch, vpr_setup); - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - - atom_ctx.prepacker.reset(); - REQUIRE(flow_succeeded == true); } diff --git a/vpr/test/test_vpr.cpp b/vpr/test/test_vpr.cpp index da0b4c8b21c..0e92311b5c2 100644 --- a/vpr/test/test_vpr.cpp +++ b/vpr/test/test_vpr.cpp @@ -169,9 +169,6 @@ TEST_CASE("read_rr_graph_metadata", "[vpr]") { echo_file_name, false); vpr_free_all(arch, vpr_setup); - - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - atom_ctx.prepacker.reset(); } REQUIRE(src_inode != -1); @@ -233,9 +230,6 @@ TEST_CASE("read_rr_graph_metadata", "[vpr]") { CHECK_THAT(value->as_string().get(&arch.strings), Equals("test edge")); } vpr_free_all(arch, vpr_setup); - - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - atom_ctx.prepacker.reset(); } } // namespace From 229a80451fb3323955bf200425f237b910ff046f Mon Sep 17 00:00:00 2001 From: AlexandreSinger Date: Tue, 10 Sep 2024 10:01:48 -0400 Subject: [PATCH 2/3] [ClusterLegalizer] Moved Stat Counting Into Cluster Legalizer Found that one of the pb_stats members is required for cluster legalization; but was being calculated outside of the cluster legalizer. Moved this in to allow the cluster legalizer to be used outside of the clusterer. Also found an issue where two clusters of the same type cannot be constructed at the same time. Tried fixing it, but it produces different results (different clusters). Will raise in a separate PR, left as a fixme comment. --- vpr/src/pack/cluster_legalizer.cpp | 16 ++++++++++++++-- vpr/src/pack/cluster_legalizer.h | 4 ++++ vpr/src/pack/cluster_util.cpp | 1 - 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/vpr/src/pack/cluster_legalizer.cpp b/vpr/src/pack/cluster_legalizer.cpp index 4cd82799073..1ad897f6b64 100644 --- a/vpr/src/pack/cluster_legalizer.cpp +++ b/vpr/src/pack/cluster_legalizer.cpp @@ -499,6 +499,7 @@ try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, const AtomContext& atom_ctx = g_vpr_ctx.atom(); AtomContext& mutable_atom_ctx = g_vpr_ctx.mutable_atom(); + VTR_ASSERT_SAFE(cb != nullptr); e_block_pack_status block_pack_status = e_block_pack_status::BLK_PASSED; /* Discover parent */ @@ -516,6 +517,7 @@ try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, } /* Create siblings if siblings are not allocated */ + VTR_ASSERT(parent_pb != nullptr); if (parent_pb->child_pbs == nullptr) { VTR_ASSERT(parent_pb->name == nullptr); parent_pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str()); @@ -533,8 +535,8 @@ try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, } } } else { - /* if this is not the first child of this parent, must match existing parent mode */ - if (parent_pb->mode != pb_graph_node->pb_type->parent_mode->index) { + /* if this is not the first child of this parent, must match existing parent mode */ + if (parent_pb->mode != pb_graph_node->pb_type->parent_mode->index) { return e_block_pack_status::BLK_FAILED_FEASIBLE; } } @@ -548,6 +550,7 @@ try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, } VTR_ASSERT(i < mode->num_pb_type_children); t_pb* pb = &parent_pb->child_pbs[i][pb_graph_node->placement_index]; + VTR_ASSERT_SAFE(pb != nullptr); *parent = pb; /* this pb is parent of it's child that called this function */ VTR_ASSERT(pb->pb_graph_node == pb_graph_node); if (pb->pb_stats == nullptr) { @@ -1364,6 +1367,15 @@ e_block_pack_status ClusterLegalizer::try_pack_molecule(t_pack_molecule* molecul commit_primitive(cluster_placement_stats_ptr, primitives_list[i]); atom_cluster_[atom_blk_id] = cluster_id; + + // Update the num child blocks in pb + const t_pb* atom_pb = atom_ctx.lookup.atom_pb(atom_blk_id); + VTR_ASSERT_SAFE(atom_pb != nullptr); + t_pb* cur_pb = atom_pb->parent_pb; + while (cur_pb != nullptr) { + cur_pb->pb_stats->num_child_blocks_in_pb++; + cur_pb = cur_pb->parent_pb; + } } // Update the lookahead pins used. diff --git a/vpr/src/pack/cluster_legalizer.h b/vpr/src/pack/cluster_legalizer.h index ed1c35b857c..744fa3c95f2 100644 --- a/vpr/src/pack/cluster_legalizer.h +++ b/vpr/src/pack/cluster_legalizer.h @@ -416,6 +416,10 @@ class ClusterLegalizer { /// @brief Stats keeper for placement information during packing/clustering. /// TODO: This should be a vector. + /// FIXME: This keeps the stats for each cluster type. This is fine within + /// the clusterer, however it yields a limitation where two clusters + /// of the same type cannot be constructed at the same time. This + /// should stored per cluster. t_cluster_placement_stats* cluster_placement_stats_ = nullptr; /// @brief The utilization of external input/output pins during packing diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp index 3ae20f169c8..0813b2e8b01 100644 --- a/vpr/src/pack/cluster_util.cpp +++ b/vpr/src/pack/cluster_util.cpp @@ -973,7 +973,6 @@ void update_cluster_stats(const t_pack_molecule* molecule, cb = cur_pb; } cur_pb->pb_stats->num_feasible_blocks = NOT_VALID; - cur_pb->pb_stats->num_child_blocks_in_pb++; if (atom_grp_id != AttractGroupId::INVALID()) { /* TODO: Allow clusters to have more than one attraction group. */ From ba88f64a39866bb45ce80cf9d99c95f54dcec668 Mon Sep 17 00:00:00 2001 From: AlexandreSinger Date: Sun, 15 Sep 2024 23:41:46 -0400 Subject: [PATCH 3/3] [ClusterLegalizer] Updated Based on PR Comments Added more documentation. Cleaned up one set which should have been a vector. --- vpr/src/base/vpr_api.cpp | 9 +- vpr/src/pack/cluster_legalizer.cpp | 87 +++++++------ vpr/src/pack/cluster_legalizer.h | 119 +++++++++++++++--- vpr/src/pack/cluster_util.cpp | 136 ++------------------ vpr/src/pack/cluster_util.h | 193 ++++++++++++++++++++++++++++- vpr/src/pack/output_clustering.h | 9 ++ vpr/src/pack/pack.cpp | 4 + vpr/src/util/vpr_utils.h | 2 + 8 files changed, 369 insertions(+), 190 deletions(-) diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index 4794724f234..a8c95274552 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -15,6 +15,7 @@ #include #include +#include "cluster_util.h" #include "vpr_context.h" #include "vtr_assert.h" #include "vtr_math.h" @@ -616,7 +617,7 @@ bool vpr_pack_flow(t_vpr_setup& vpr_setup, const t_arch& arch) { } - // Load cluster_constraints data structure here since loading pack file + // Load cluster_constraints data structure. load_cluster_constraints(); /* Sanity check the resulting netlist */ @@ -708,11 +709,7 @@ void vpr_load_packing(t_vpr_setup& vpr_setup, const t_arch& arch) { vpr_setup.PackerOpts.pack_verbosity); /* Load the mapping between clusters and their atoms */ - cluster_ctx.atoms_lookup.resize(cluster_ctx.clb_nlist.blocks().size()); - for (AtomBlockId atom_blk_id : atom_ctx.nlist.blocks()) { - ClusterBlockId atom_cluster_blk_id = atom_ctx.lookup.atom_clb(atom_blk_id); - cluster_ctx.atoms_lookup[atom_cluster_blk_id].insert(atom_blk_id); - } + init_clb_atoms_lookup(cluster_ctx.atoms_lookup, atom_ctx, cluster_ctx.clb_nlist); process_constant_nets(g_vpr_ctx.mutable_atom().nlist, atom_ctx.lookup, diff --git a/vpr/src/pack/cluster_legalizer.cpp b/vpr/src/pack/cluster_legalizer.cpp index 1ad897f6b64..f4676eea195 100644 --- a/vpr/src/pack/cluster_legalizer.cpp +++ b/vpr/src/pack/cluster_legalizer.cpp @@ -4,7 +4,7 @@ * @date September 2024 * @brief The implementation of the Cluster Legalizer class. * - * Most of the code in this file was original part of cluster_util.cpp and was + * Most of the code in this file was originally part of cluster_util.cpp and was * highly integrated with the clusterer in VPR. All code that was used for * legalizing the clusters was moved into this file and all the functionality * was moved into the ClusterLegalizer class. @@ -40,6 +40,8 @@ /* * @brief Gets the max cluster size that any logical block can have. + * + * This is the maximum number of primitives any cluster can contain. */ static size_t calc_max_cluster_size(const std::vector& logical_block_types) { size_t max_cluster_size = 0; @@ -63,11 +65,6 @@ static void alloc_and_load_pb_stats(t_pb* pb, const int feasible_block_array_siz pb->pb_stats = new t_pb_stats; - /* If statement below is for speed. If nets are reasonably low-fanout, * - * only a relatively small number of blocks will be marked, and updating * - * only those atom block structures will be fastest. If almost all blocks * - * have been touched it should be faster to just run through them all * - * in order (less addressing and better cache locality). */ pb->pb_stats->input_pins_used = std::vector>(pb->pb_graph_node->num_input_pin_class); pb->pb_stats->output_pins_used = std::vector>(pb->pb_graph_node->num_output_pin_class); pb->pb_stats->lookahead_input_pins_used = std::vector>(pb->pb_graph_node->num_input_pin_class); @@ -304,10 +301,10 @@ static bool check_cluster_noc_group(AtomBlockId atom_blk_id, } /** - * This function takes the root block of a chain molecule and a proposed - * placement primitive for this block. The function then checks if this - * chain root block has a placement constraint (such as being driven from - * outside the cluster) and returns the status of the placement accordingly. + * @brief This function takes the root block of a chain molecule and a proposed + * placement primitive for this block. The function then checks if this + * chain root block has a placement constraint (such as being driven from + * outside the cluster) and returns the status of the placement accordingly. */ static enum e_block_pack_status check_chain_root_placement_feasibility(const t_pb_graph_node* pb_graph_node, const t_pack_molecule* molecule, @@ -368,7 +365,7 @@ static enum e_block_pack_status check_chain_root_placement_feasibility(const t_p /* * @brief Check that the two atom blocks blk_id and sibling_blk_id (which should - * both be memory slices) are feasible, in the sence that they have + * both be memory slices) are feasible, in the sense that they have * precicely the same net connections (with the exception of nets in data * port classes). * @@ -480,7 +477,7 @@ static bool primitive_feasible(const AtomBlockId blk_id, t_pb* cur_pb) { } /** - * Try place atom block into current primitive location + * @brief Try to place atom block into current primitive location */ static enum e_block_pack_status try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, @@ -613,7 +610,10 @@ try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, return block_pack_status; } -/* Resets nets used at different pin classes for determining pin feasibility */ +/* + * @brief Resets nets used at different pin classes for determining pin + * feasibility. + */ static void reset_lookahead_pins_used(t_pb* cur_pb) { const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; if (cur_pb->pb_stats == nullptr) { @@ -674,7 +674,7 @@ static int net_sinks_reachable_in_cluster(const t_pb_graph_pin* driver_pb_gpin, } /** - * Returns the pb_graph_pin of the atom pin defined by the driver_pin_id in the driver_pb + * @brief Returns the pb_graph_pin of the atom pin defined by the driver_pin_id in the driver_pb */ static t_pb_graph_pin* get_driver_pb_graph_pin(const t_pb* driver_pb, const AtomPinId driver_pin_id) { const AtomContext& atom_ctx = g_vpr_ctx.atom(); @@ -701,12 +701,12 @@ static t_pb_graph_pin* get_driver_pb_graph_pin(const t_pb* driver_pb, const Atom } /** - * Given a pin and its assigned net, mark all pin classes that are affected. - * Check if connecting this pin to it's driver pin or to all sink pins will - * require leaving a pb_block starting from the parent pb_block of the - * primitive till the root block (depth = 0). If leaving a pb_block is - * required add this net to the pin class (to increment the number of used - * pins from this class) that should be used to leave the pb_block. + * @brief Given a pin and its assigned net, mark all pin classes that are affected. + * Check if connecting this pin to it's driver pin or to all sink pins will + * require leaving a pb_block starting from the parent pb_block of the + * primitive till the root block (depth = 0). If leaving a pb_block is + * required add this net to the pin class (to increment the number of used + * pins from this class) that should be used to leave the pb_block. */ static void compute_and_mark_lookahead_pins_used_for_pin(const t_pb_graph_pin* pb_graph_pin, const t_pb* primitive_pb, @@ -834,7 +834,9 @@ static void compute_and_mark_lookahead_pins_used_for_pin(const t_pb_graph_pin* p } -/* Determine if pins of speculatively packed pb are legal */ +/* + * @brief Determine if pins of speculatively packed pb are legal + */ static void compute_and_mark_lookahead_pins_used(const AtomBlockId blk_id, const vtr::vector_map& atom_cluster) { const AtomContext& atom_ctx = g_vpr_ctx.atom(); @@ -851,7 +853,9 @@ static void compute_and_mark_lookahead_pins_used(const AtomBlockId blk_id, } } -/* Determine if speculatively packed cur_pb is pin feasible +/* + * @brief Determine if speculatively packed cur_pb is pin feasible + * * Runtime is actually not that bad for this. It's worst case O(k^2) where k is the * number of pb_graph pins. Can use hash tables or make incremental if becomes an issue. */ @@ -881,7 +885,10 @@ static void try_update_lookahead_pins_used(t_pb* cur_pb, } } -/* Check if the number of available inputs/outputs for a pin class is sufficient for speculatively packed blocks */ +/* + * @brief Check if the number of available inputs/outputs for a pin class is + * sufficient for speculatively packed blocks. + */ static bool check_lookahead_pins_used(t_pb* cur_pb, t_ext_pin_util max_external_pin_util) { const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; @@ -943,11 +950,11 @@ static bool check_lookahead_pins_used(t_pb* cur_pb, t_ext_pin_util max_external_ } /** - * This function takes a chain molecule, and the pb_graph_node that is chosen - * for packing the molecule's root block. Using the given root_primitive, this - * function will identify which chain id this molecule is being mapped to and - * will update the chain id value inside the chain info data structure of this - * molecule + * @brief This function takes a chain molecule, and the pb_graph_node that is + * chosen for packing the molecule's root block. Using the given + * root_primitive, this function will identify which chain id this + * molecule is being mapped to and will update the chain id value inside + * the chain info data structure of this molecule. */ static void update_molecule_chain_info(t_pack_molecule* chain_molecule, const t_pb_graph_node* root_primitive) { VTR_ASSERT(chain_molecule->chain_info->chain_id == -1 && chain_molecule->chain_info->is_long_chain); @@ -969,7 +976,8 @@ static void update_molecule_chain_info(t_pack_molecule* chain_molecule, const t_ VTR_ASSERT(false); } -/* Revert trial atom block iblock and free up memory space accordingly +/* + * @brief Revert trial atom block iblock and free up memory space accordingly. */ static void revert_place_atom_block(const AtomBlockId blk_id, t_lb_router_data* router_data, @@ -1021,7 +1029,9 @@ static void revert_place_atom_block(const AtomBlockId blk_id, mutable_atom_ctx.lookup.set_atom_pb(blk_id, nullptr); } -/* Speculation successful, commit input/output pins used */ +/* + * @brief Speculation successful, commit input/output pins used. + */ static void commit_lookahead_pins_used(t_pb* cur_pb) { const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; @@ -1055,7 +1065,7 @@ static void commit_lookahead_pins_used(t_pb* cur_pb) { } /** - * Cleans up a pb after unsuccessful molecule packing + * @brief Cleans up a pb after unsuccessful molecule packing * * Recursively frees pbs from a t_pb tree. The given root pb itself is not * deleted. @@ -1135,7 +1145,7 @@ e_block_pack_status ClusterLegalizer::try_pack_molecule(t_pack_molecule* molecul VTR_ASSERT_DEBUG(cluster.pb != nullptr); VTR_ASSERT_DEBUG(cluster.type != nullptr); - // TODO: Remove these global accesses. + // TODO: Remove these global accesses to the contexts. // AtomContext used for: // - printing verbose statements // - Looking up the primitive pb @@ -1349,7 +1359,7 @@ e_block_pack_status ClusterLegalizer::try_pack_molecule(t_pack_molecule* molecul cluster.noc_grp_id = new_cluster_noc_grp_id; // Insert the molecule into the cluster for bookkeeping. - cluster.molecules.insert(molecule); + cluster.molecules.push_back(molecule); for (int i = 0; i < molecule_size; i++) { AtomBlockId atom_blk_id = molecule->atom_block_ids[i]; @@ -1653,10 +1663,13 @@ ClusterLegalizer::ClusterLegalizer(const AtomNetlist& atom_netlist, feasible_block_array_size_ = feasible_block_array_size; log_verbosity_ = log_verbosity; // Get the target external pin utilization - // NOTE: This is really silly, but this can potentially fail. If it does - // it is important that everything is allocated. If not, when it fails - // it will call the reset method when only parts of the class are - // allocated which may cause havoc... + // NOTE: This has to be initialized last due to the fact that VPR_FATA_ERROR + // may be called within the constructor of t_ext_pin_util_targets. If + // this occurs, an excpetion is thrown which will drain the stack. If + // the cluster legalizer object is stored on the stack, this can call + // the destructor prematurely (before certain structures are allocated). + // Therefore, this is created at the end, when the class is in a state + // where it can be destroyed. target_external_pin_util_ = t_ext_pin_util_targets(target_external_pin_util_str); } diff --git a/vpr/src/pack/cluster_legalizer.h b/vpr/src/pack/cluster_legalizer.h index 744fa3c95f2..e3aee27be57 100644 --- a/vpr/src/pack/cluster_legalizer.h +++ b/vpr/src/pack/cluster_legalizer.h @@ -1,7 +1,19 @@ +/** + * @file + * @author Alex Singer + * @date September 2024 + * @brief The declaration of the Cluster Legalizer class. + * + * This file declares a class called the ClusterLegalizer which encapsulates all + * logic for creating legal clusters from prepacked molecules. This class is + * designed to be self-contained to the point that it is able to be called + * externally to the Packer in VPR. + */ + #pragma once -#include #include +#include #include "atom_netlist_fwd.h" #include "noc_data_types.h" #include "partition_region.h" @@ -49,12 +61,15 @@ struct LegalizationCluster { /// @brief A list of the molecules in the cluster. By design, a cluster will /// only contain molecules which have been previously legalized into /// the cluster using a legalization strategy. - std::set molecules; + std::vector molecules; /// @brief The logical block of this cluster. /// TODO: We should be more careful with how this is allocated. Instead of /// pointers, we really should use IDs and store them in a standard - /// container. + /// container. Currently this is being allocated with the new keyword + /// and freed when the cluster is destroyed; however this is used + /// externally to the class and it can be dangerous to pass around + /// a pointer to this object. t_pb* pb; /// @brief The logical block type this cluster represents. @@ -79,7 +94,7 @@ struct LegalizationCluster { }; /* - * @brief A manager class which manages the legalization of cluster. As clusters + * @brief A manager class which manages the legalization of clusters. As clusters * are created, this class will legalize for each molecule added. It also * provides methods which are helpful for clustering. * @@ -99,13 +114,15 @@ struct LegalizationCluster { * look something like this. Note, this example is simplified and the result * of the packings should be checked and handled. * - * ClusterLegalizer legalizer(...); + * ClusterLegalizer legalizer(..., + * ClusterLegalizationStrategy::SKIP_INTRA_LB_ROUTE, + * ...); * * std::tie(status, new_cluster_id) = legalizer.start_new_cluster(seed_mol, * cluster_type, * mode); * for mol in molecules_to_add: - * // Cheaper additions, but may pack a molecule that wouldnt route. + * // Cheaper additions, but may pack a molecule that wouldn't route. * status = legalizer.add_mol_to_cluster(mol, new_cluster_id); * if (status != e_block_pack_status::BLK_PASSED) * break; @@ -114,6 +131,8 @@ struct LegalizationCluster { * if (!legalizer.check_cluster_legality(new_cluster_id)) * // Destroy the illegal cluster. * legalizer.destroy_cluster(new_cluster_id); + * // Clean-up the internal bookeeping of the class (required after + * // destroying a cluster). * legalizer.compress(); * // Handle how to try again (maybe use FULL strategy). * @@ -121,9 +140,11 @@ struct LegalizationCluster { * This strategy will fully route the internal connections of the clusters for * each molecule added. This is much more expensive to run; however, will ensure * that the cluster is fully legalized while it is being created. An example - * of how to sue this strategy would look something like this: + * of how to use this strategy would look something like this: * - * Clusterlegalizer legalizer(...); + * Clusterlegalizer legalizer(..., + * ClusterLegalizationStrategy::FULL, + * ...); * * std::tie(pack_result, new_cluster_id) = legalizer.start_new_cluster(seed_mol, * cluster_type, @@ -174,6 +195,46 @@ class ClusterLegalizer { * @brief Initialize the ClusterLegalizer class. * * Allocates internal state. + * + * @param atom_netlist The complete atom netlist. Used to allocate + * internal structures to the correct size. + * @param prepacker The prepacker object used to prepack the atoms + * into molecules. A reference to this object is + * stored internally to be used to lookup the + * molecules of atoms. + * @param logical_block_types Used to allocate internal objects. Used to + * get the max number of primitives in any block + * type. + * @param lb_type_rr_graphs The routing resource graph internal to the + * different cluster types. A reference is stored + * in the class to be used to allocate and load + * the router data. + * @param num_models The total number of models in the architecture. + * This is the sum of the number of the user and + * library models. Used internally to allocate data + * structures. + * @param target_external_pin_util_str A string used to initialize the + * target external pin utilization of + * each cluster type. + * @param high_fanout_thresholds An object that stores the thresholds for + * a net to be considered high fanout for + * different block types. + * @param cluster_legalization_strategy The legalization strategy to be + * used when creating clusters and + * adding molecules to clusters. + * Controls the checks that are performed. + * @param enable_pin_feasibility_filter A flag to turn on/off the check for + * pin usage feasibility. + * @param feasible_block_array_size The largest number of feasible blocks + * that can be stored in a cluster. Used + * to allocate an internal structure. + * @param log_verbosity Controls how verbose the log messages will + * be within this class. + * + * TODO: A lot of these arguments are only used to allocate C-style arrays + * since the original author was avoiding dynamic allocations. It may + * be more space efficient (and cleaner) to make these dynamic arrays + * and not pass these arguments in. */ ClusterLegalizer(const AtomNetlist& atom_netlist, const Prepacker& prepacker, @@ -211,7 +272,12 @@ class ClusterLegalizer { /* * @brief Add an unclustered molecule to the given legalization cluster. * - * If the addition was unsuccessful, the molecule will remain unclustered. + * The ClusterLegalizationStrategy (set either in the constructor or by the + * set_cluster_legalization_strategy method) decides what checks are + * performed when adding a molecule to the cluster. + * + * If the addition was unsuccessful (i.e. a check fails), the molecule will + * remain unclustered. * * @param molecule The molecule to add to the cluster. * @param cluster_id The ID of the cluster to add the molecule to. @@ -234,8 +300,8 @@ class ClusterLegalizer { void destroy_cluster(LegalizationClusterId cluster_id); /* - * @brief Compress the internal storage of clusters. Should be called after - * a cluster is destroyed. + * @brief Compress the internal storage of clusters. Should be called + * eventually after one or more clusters are destroyed. * * Similar to the Netlist compress method. Will invalidate all Legalization * Cluster IDs. @@ -246,7 +312,7 @@ class ClusterLegalizer { void compress(); /* - * @brief A list of all cluster IDs in the legalizer. + * @brief A range of all cluster IDs in the legalizer. * * If the legalizer has been compressed (or no clusters have been destroyed) * then all cluster IDs in this list will be valid and represent a non-empty @@ -288,14 +354,15 @@ class ClusterLegalizer { void clean_cluster(LegalizationClusterId cluster_id); /* - * @brief Verify that all atoms have been clustered into a cluster. + * @brief Verify that all atoms have been clustered into some cluster. * * This will not verify if all the clusters are fully legal. */ void verify(); /* - * @brief Finalize the clustering. + * @brief Finalize the clustering. Required for generating a Clustered + * Netlist. * * Before generating a Clustered Netlist, each cluster needs to allocate and * load a pb_route. This method will generate a pb_route for each cluster @@ -339,6 +406,14 @@ class ClusterLegalizer { } /// @brief Gets the cluster placement stats of the given cluster. + /// + /// The cluster placement stats are statistics used to monitor which atoms + /// have been physically clustered into the pb (more specifically what site + /// they will go). This can be used externally to the legalizer to detect + /// if an atom could physically go into a cluster (exists_free_primitive_for_atom_block). + /// + /// TODO: Releasing the whole stats can be dangerous. Ideally there should + /// just be a method to see if an atom could physically go in a cluster. inline t_cluster_placement_stats* get_cluster_placement_stats(LegalizationClusterId cluster_id) const { VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); return &(cluster_placement_stats_[get_cluster_type(cluster_id)->index]); @@ -358,6 +433,9 @@ class ClusterLegalizer { } /// @bried Gets the max size a cluster could physically be. + /// + /// This is the maximum number of primitives any cluster could ever have + /// in the architecture. inline size_t get_max_cluster_size() const { return max_cluster_size_; } @@ -411,7 +489,10 @@ class ClusterLegalizer { /// @brief Stores the NoC group ID of each atom block. Atom blocks that /// belong to different NoC groups can't be clustered with each other - /// into the same clustered block. + /// into the same clustered block. Under some optimization settings + /// to improve placement locality / NoC usage. Atoms with different + /// NoC group IDs belong to logic that is disjoint except through + /// NoC traffic. vtr::vector atom_noc_grp_id_; /// @brief Stats keeper for placement information during packing/clustering. @@ -422,8 +503,8 @@ class ClusterLegalizer { /// should stored per cluster. t_cluster_placement_stats* cluster_placement_stats_ = nullptr; - /// @brief The utilization of external input/output pins during packing - /// (between 0 and 1). + /// @brief The maximum fractional utilization of cluster external + /// input/output pins during packing (between 0 and 1). t_ext_pin_util_targets target_external_pin_util_; /// @brief The max size of any molecule. This is used to allocate a dynamic @@ -431,8 +512,8 @@ class ClusterLegalizer { /// expensive to calculate from the prepacker. size_t max_molecule_size_; - /// @brief The max size a cluster could physically be. This is used to - /// allocate dynamic arrays. + /// @brief The max number of primitives a cluster could physically have. + /// This is used to allocate dynamic arrays. size_t max_cluster_size_; /// @brief A vector of routing resource nodes within each logical block type diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp index 0813b2e8b01..39940410b40 100644 --- a/vpr/src/pack/cluster_util.cpp +++ b/vpr/src/pack/cluster_util.cpp @@ -7,6 +7,7 @@ #include "atom_netlist.h" #include "cluster_legalizer.h" #include "cluster_placement.h" +#include "clustered_netlist.h" #include "concrete_timing_info.h" #include "output_clustering.h" #include "prepack.h" @@ -72,7 +73,6 @@ static void echo_clusters(char* filename, const ClusterLegalizer& cluster_legali fclose(fp); } -//calculate the initial timing at the start of packing stage void calc_init_packing_timing(const t_packer_opts& packer_opts, const t_analysis_opts& analysis_opts, const Prepacker& prepacker, @@ -125,7 +125,6 @@ void calc_init_packing_timing(const t_packer_opts& packer_opts, } } -//Free the clustering data structures void free_clustering_data(const t_packer_opts& packer_opts, t_clustering_data& clustering_data) { @@ -136,7 +135,6 @@ void free_clustering_data(const t_packer_opts& packer_opts, delete[] clustering_data.memory_pool; } -//check the clustering and output it void check_and_output_clustering(ClusterLegalizer& cluster_legalizer, const t_packer_opts& packer_opts, const std::unordered_set& is_clock, @@ -156,7 +154,6 @@ void check_and_output_clustering(ClusterLegalizer& cluster_legalizer, true /*from_legalizer*/); } -/*print the header for the clustering progress table*/ void print_pack_status_header() { VTR_LOG("Starting Clustering - Clustering Progress: \n"); VTR_LOG("------------------- -------------------------- ---------\n"); @@ -164,7 +161,6 @@ void print_pack_status_header() { VTR_LOG("------------------- -------------------------- ---------\n"); } -/*incrementally print progress updates during clustering*/ void print_pack_status(int num_clb, int tot_num_molecules, int num_molecules_processed, @@ -203,11 +199,6 @@ void print_pack_status(int num_clb, } } -/* - * Periodically rebuild the attraction groups to reflect which atoms in them - * are still available for new clusters (i.e. remove the atoms that have already - * been packed from the attraction group). - */ void rebuild_attraction_groups(AttractionInfo& attraction_groups, const ClusterLegalizer& cluster_legalizer) { @@ -226,7 +217,6 @@ void rebuild_attraction_groups(AttractionInfo& attraction_groups, } } -/* Determine if atom block is in pb */ bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb) { const AtomContext& atom_ctx = g_vpr_ctx.atom(); @@ -240,9 +230,6 @@ bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb) { return false; } -/* Remove blk from list of feasible blocks sorted according to gain - * Useful for removing blocks that are repeatedly failing. If a block - * has been found to be illegal, we don't repeatedly consider it.*/ void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, t_pb* pb) { int molecule_index; @@ -268,7 +255,6 @@ void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, pb->pb_stats->num_feasible_blocks--; } -/* Add blk to list of feasible blocks sorted according to gain */ void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, std::map& gain, t_pb* pb, @@ -402,14 +388,6 @@ t_pack_molecule* get_molecule_by_num_ext_inputs(const int ext_inps, const enum e_removal_policy remove_flag, t_cluster_placement_stats* cluster_placement_stats_ptr, t_molecule_link* unclustered_list_head) { - /* This routine returns an atom block which has not been clustered, has * - * no connection to the current cluster, satisfies the cluster * - * clock constraints, is a valid subblock inside the cluster, does not exceed the cluster subblock units available, - * and has ext_inps external inputs. Remove_flag * - * controls whether or not blocks that have already been clustered * - * are removed from the unclustered_list data structures. NB: * - * to get a atom block regardless of clock constraints just set clocks_ * - * avail > 0. */ t_molecule_link *ptr, *prev_ptr; int i; @@ -455,12 +433,7 @@ t_pack_molecule* get_free_molecule_with_most_ext_inputs_for_cluster(t_pb* cur_pb t_cluster_placement_stats* cluster_placement_stats_ptr, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size) { - /* This routine is used to find new blocks for clustering when there are no feasible * - * blocks with any attraction to the current cluster (i.e. it finds * - * blocks which are unconnected from the current cluster). It returns * - * the atom block with the largest number of used inputs that satisfies the * - * clocking and number of inputs constraints. If no suitable atom block is * - * found, the routine returns nullptr. + /* * TODO: Analyze if this function is useful in more detail, also, should probably not include clock in input count */ @@ -786,13 +759,6 @@ void mark_and_update_partial_gain(const AtomNetId net_id, const std::unordered_set& is_global, const int high_fanout_net_threshold, std::unordered_map& net_output_feeds_driving_block_input) { - /* Updates the marked data structures, and if gain_flag is GAIN, * - * the gain when an atom block is added to a cluster. The * - * sharinggain is the number of inputs that a atom block shares with * - * blocks that are already in the cluster. Hillgain is the * - * reduction in number of pins-required by adding a atom block to the * - * cluster. The timinggain is the criticality of the most critical* - * atom net between this atom block and an atom block in the cluster. */ const AtomContext& atom_ctx = g_vpr_ctx.atom(); t_pb* cur_pb = atom_ctx.lookup.atom_pb(clustered_blk_id)->parent_pb; @@ -869,9 +835,6 @@ void mark_and_update_partial_gain(const AtomNetId net_id, /*****************************************/ void update_total_gain(float alpha, float beta, bool timing_driven, bool connection_driven, t_pb* pb, AttractionInfo& attraction_groups) { - /*Updates the total gain array to reflect the desired tradeoff between* - *input sharing (sharinggain) and path_length minimization (timinggain) - *input each time a new molecule is added to the cluster.*/ const AtomContext& atom_ctx = g_vpr_ctx.atom(); t_pb* cur_pb = pb; @@ -940,10 +903,6 @@ void update_cluster_stats(const t_pack_molecule* molecule, const SetupTimingInfo& timing_info, AttractionInfo& attraction_groups, std::unordered_map& net_output_feeds_driving_block_input) { - /* Routine that is called each time a new molecule is added to the cluster. - * Makes calls to update cluster stats such as the gain map for atoms, used pins, and clock structures, - * in order to reflect the new content of the cluster. - * Also keeps track of which attraction group the cluster belongs to. */ int molecule_size; int iblock; @@ -1057,9 +1016,6 @@ void start_new_cluster(ClusterLegalizer& cluster_legalizer, const std::map>& primitive_candidate_block_types, int verbosity, bool balance_block_type_utilization) { - /* Given a starting seed block, start_new_cluster determines the next cluster type to use - * It expands the FPGA if it cannot find a legal cluster for the atom block - */ const AtomContext& atom_ctx = g_vpr_ctx.atom(); DeviceContext& mutable_device_ctx = g_vpr_ctx.mutable_device(); @@ -1159,14 +1115,6 @@ void start_new_cluster(ClusterLegalizer& cluster_legalizer, } } -/* - * Get candidate molecule to pack into currently open cluster - * Molecule selection priority: - * 1. Find unpacked molecules based on criticality and strong connectedness (connected by low fanout nets) with current cluster - * 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster - * 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster - * 4. Find unpacked molecules based on attraction group of the current cluster (if the cluster has an attraction group) - */ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, AttractionInfo& attraction_groups, const enum e_gain_type gain_mode, @@ -1271,7 +1219,6 @@ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, return molecule; } -/* Add molecules with strong connectedness to the current cluster to the list of feasible blocks. */ void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, const Prepacker& prepacker, @@ -1297,7 +1244,6 @@ void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, } } -/* Add molecules based on weak connectedness (connected by high fanout nets) with current cluster */ void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, const Prepacker& prepacker, @@ -1336,14 +1282,6 @@ void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, cur_pb->pb_stats->tie_break_high_fanout_net = AtomNetId::INVALID(); /* Mark off that this high fanout net has been considered */ } -/* - * If the current cluster being packed has an attraction group associated with it - * (i.e. there are atoms in it that belong to an attraction group), this routine adds molecules - * from the associated attraction group to the list of feasible blocks for the cluster. - * Attraction groups can be very large, so we only add some randomly selected molecules for efficiency - * if the number of atoms in the group is greater than 500. Therefore, the molecules added to the candidates - * will vary each time you call this function. - */ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, const Prepacker& prepacker, @@ -1455,7 +1393,6 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, } } -/* Add molecules based on transitive connections (eg. 2 hops away) with current cluster*/ void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, const Prepacker& prepacker, @@ -1490,7 +1427,6 @@ void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, } } -/*Check whether a free primitive exists for each atom block in the molecule*/ bool check_free_primitives_for_molecule_atoms(t_pack_molecule* molecule, t_cluster_placement_stats* cluster_placement_stats_ptr, const ClusterLegalizer& cluster_legalizer) { @@ -1568,7 +1504,6 @@ t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, return best_molecule; } -//Calculates molecule statistics for a single molecule t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule, const AtomNetlist& atom_nlist) { t_molecule_stats molecule_stats; @@ -1787,12 +1722,6 @@ t_pack_molecule* get_highest_gain_seed_molecule(int& seed_index, return nullptr; } -/* get gain of packing molecule into current cluster - * gain is equal to: - * total_block_gain - * + molecule_base_gain*some_factor - * - introduced_input_nets_of_unrelated_blocks_pulled_in_by_molecule*some_other_factor - */ float get_molecule_gain(t_pack_molecule* molecule, std::map& blk_gain, AttractGroupId cluster_attraction_group_id, AttractionInfo& attraction_groups, int num_molecule_failures) { float gain; int i; @@ -1850,13 +1779,6 @@ float get_molecule_gain(t_pack_molecule* molecule, std::map& return gain; } -/** - * Score unclustered atoms that are two hops away from current cluster - * For example, consider a cluster that has a FF feeding an adder in another - * cluster. Since this FF is feeding an adder that is packed in another cluster - * this function should find other FFs that are feeding other inputs of this adder - * since they are two hops away from the FF packed in this cluster - */ void load_transitive_fanout_candidates(LegalizationClusterId legalization_cluster_id, t_pb_stats* pb_stats, const Prepacker& prepacker, @@ -1963,10 +1885,6 @@ void print_seed_gains(const char* fname, const std::vector& seed_at fclose(fp); } -/** - * This function update the pb_type_count data structure by incrementing - * the number of used pb_types in the given packed cluster t_pb - */ size_t update_pb_type_count(const t_pb* pb, std::map& pb_type_count, size_t depth) { size_t max_depth = depth; @@ -2010,10 +1928,6 @@ void print_pb_type_count_recurr(t_pb_type* pb_type, size_t max_name_chars, size_ } } -/** - * This function identifies the logic block type which is - * defined by the block type which has a lut primitive - */ t_logical_block_type_ptr identify_logic_block_type(std::map>& primitive_candidate_block_types) { std::string lut_name = ".names"; @@ -2026,12 +1940,6 @@ t_logical_block_type_ptr identify_logic_block_type(std::mapLE) that has more than one instance within the cluster. - */ t_pb_type* identify_le_block_type(t_logical_block_type_ptr logic_block_type) { // if there is no CLB-like cluster, then there is no LE pb_block if (!logic_block_type) @@ -2056,9 +1964,6 @@ t_pb_type* identify_le_block_type(t_logical_block_type_ptr logic_block_type) { return nullptr; } -/** - * This function updates the le_count data structure from the given packed cluster - */ void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector& le_count) { // if this cluster doesn't contain LEs or there // are no les in this architecture, ignore it @@ -2098,10 +2003,6 @@ void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_ } } -/** - * This function returns true if the given physical block has - * a primitive matching the given blif model and is used - */ bool pb_used_for_blif_model(const t_pb* pb, const std::string& blif_model_name) { auto pb_graph_node = pb->pb_graph_node; auto pb_type = pb_graph_node->pb_type; @@ -2129,9 +2030,6 @@ bool pb_used_for_blif_model(const t_pb* pb, const std::string& blif_model_name) return false; } -/** - * Print the LE count data strurture - */ void print_le_count(std::vector& le_count, const t_pb_type* le_pb_type) { VTR_LOG("\nLogic Element (%s) detailed count:\n", le_pb_type->name); VTR_LOG(" Total number of Logic Elements used : %d\n", le_count[0] + le_count[1] + le_count[2]); @@ -2140,11 +2038,6 @@ void print_le_count(std::vector& le_count, const t_pb_type* le_pb_type) { VTR_LOG(" LEs used for registers only : %d\n\n", le_count[2]); } -/** - * Given a pointer to a pb in a cluster, this routine returns - * a pointer to the top-level pb of the given pb. - * This is needed when updating the gain for a cluster. - */ t_pb* get_top_level_pb(t_pb* pb) { t_pb* top_level_pb = pb; @@ -2158,20 +2051,17 @@ t_pb* get_top_level_pb(t_pb* pb) { return top_level_pb; } -void init_clb_atoms_lookup(vtr::vector>& atoms_lookup) { - const AtomContext& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - atoms_lookup.resize(cluster_ctx.clb_nlist.blocks().size()); - - for (auto atom_blk_id : atom_ctx.nlist.blocks()) { +void init_clb_atoms_lookup(vtr::vector>& atoms_lookup, + const AtomContext& atom_ctx, + const ClusteredNetlist& clb_nlist) { + // Resize the atoms lookup to the number of clusters. + atoms_lookup.resize(clb_nlist.blocks().size()); + for (AtomBlockId atom_blk_id : atom_ctx.nlist.blocks()) { + // Get the CLB that this atom is packed into. ClusterBlockId clb_index = atom_ctx.lookup.atom_clb(atom_blk_id); - - /* if this data structure is being built alongside the clustered netlist */ - /* e.g. when ingesting and legalizing a flat placement solution, some atoms */ - /* may not yet be mapped to a valid clb_index */ - if (clb_index != ClusterBlockId::INVALID()) { - atoms_lookup[clb_index].insert(atom_blk_id); - } + // Every atom block should be in a cluster. + VTR_ASSERT_SAFE(clb_index.is_valid()); + // Insert this clb into the lookup's set. + atoms_lookup[clb_index].insert(atom_blk_id); } } diff --git a/vpr/src/pack/cluster_util.h b/vpr/src/pack/cluster_util.h index 74afdefa9fb..d25a3b1ab44 100644 --- a/vpr/src/pack/cluster_util.h +++ b/vpr/src/pack/cluster_util.h @@ -8,10 +8,12 @@ class AtomNetId; class ClusterBlockId; +class ClusteredNetlist; class PreClusterDelayCalculator; class Prepacker; class SetupTimingInfo; class t_pack_molecule; +struct AtomContext; /** * @file @@ -97,7 +99,9 @@ struct t_clustering_data { /* Clustering helper functions */ /***********************************/ -//calculate the initial timing at the start of packing stage +/* + * @brief Calculate the initial timing at the start of packing stage. + */ void calc_init_packing_timing(const t_packer_opts& packer_opts, const t_analysis_opts& analysis_opts, const Prepacker& prepacker, @@ -105,27 +109,48 @@ void calc_init_packing_timing(const t_packer_opts& packer_opts, std::shared_ptr& timing_info, vtr::vector& atom_criticality); -//free the clustering data structures +/* + * @brief Free the clustering data structures. + */ void free_clustering_data(const t_packer_opts& packer_opts, t_clustering_data& clustering_data); -//check clustering legality and output it +/* + * @brief Check clustering legality and output it. + */ void check_and_output_clustering(ClusterLegalizer& cluster_legalizer, const t_packer_opts& packer_opts, const std::unordered_set& is_clock, const t_arch* arch); +/* + * @brief Determine if atom block is in pb. + */ bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb); +/* + * @brief Add blk to list of feasible blocks sorted according to gain. + */ void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, std::map& gain, t_pb* pb, int max_queue_size, AttractionInfo& attraction_groups); +/* + * @brief Remove blk from list of feasible blocks sorted according to gain. + * + * Useful for removing blocks that are repeatedly failing. If a block + * has been found to be illegal, we don't repeatedly consider it. + */ void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, t_pb* pb); - +/* + * @brief Allocates and inits the data structures used for clustering. + * + * This method initializes the list of molecules to pack, the clustering data, + * and the net info. + */ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, const Prepacker& prepacker, t_clustering_data& clustering_data, @@ -133,18 +158,41 @@ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, int& unclustered_list_head_size, int num_molecules); +/* + * @brief This routine returns an atom block which has not been clustered, has + * no connection to the current cluster, satisfies the cluster clock + * constraints, is a valid subblock inside the cluster, does not exceed + * the cluster subblock units available, and has ext_inps external inputs. + * Remove_flag controls whether or not blocks that have already been + * clustered are removed from the unclustered_list data structures. + * NB: to get a atom block regardless of clock constraints just set + * clocks_avail > 0. + */ t_pack_molecule* get_molecule_by_num_ext_inputs(const int ext_inps, const enum e_removal_policy remove_flag, t_cluster_placement_stats* cluster_placement_stats_ptr, t_molecule_link* unclustered_list_head); +/* @brief This routine is used to find new blocks for clustering when there are + * no feasible blocks with any attraction to the current cluster (i.e. + * it finds blocks which are unconnected from the current cluster). It + * returns the atom block with the largest number of used inputs that + * satisfies the clocking and number of inputs constraints. If no + * suitable atom block is found, the routine returns nullptr. + */ t_pack_molecule* get_free_molecule_with_most_ext_inputs_for_cluster(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size); +/* + * @brief Print the header for the clustering progress table. + */ void print_pack_status_header(); +/* + * @brief Incrementally print progress updates during clustering. + */ void print_pack_status(int num_clb, int tot_num_molecules, int num_molecules_processed, @@ -154,9 +202,20 @@ void print_pack_status(int num_clb, AttractionInfo& attraction_groups, const ClusterLegalizer& cluster_legalizer); +/* + * @brief Periodically rebuild the attraction groups to reflect which atoms in + * them are still available for new clusters (i.e. remove the atoms that + * have already been packed from the attraction group). + */ void rebuild_attraction_groups(AttractionInfo& attraction_groups, const ClusterLegalizer& cluster_legalizer); +/* + * @brief Try to pack next_molecule into the given cluster. If this succeeds + * prepares the next_molecule with a new value to pack next iteration. + * + * This method will print the pack status and update the cluster stats. + */ void try_fill_cluster(ClusterLegalizer& cluster_legalizer, const Prepacker& prepacker, const t_packer_opts& packer_opts, @@ -202,6 +261,15 @@ void update_timing_gain_values(const AtomNetId net_id, const std::unordered_set& is_global, std::unordered_map& net_output_feeds_driving_block_input); +/* + * @brief Updates the marked data structures, and if gain_flag is GAIN, the gain + * when an atom block is added to a cluster. The sharinggain is the + * number of inputs that a atom block shares with blocks that are already + * in the cluster. Hillgain is the reduction in number of pins-required + * by adding a atom block to the cluster. The timinggain is the + * criticality of the most critical atom net between this atom block and + * an atom block in the cluster. + */ void mark_and_update_partial_gain(const AtomNetId net_id, enum e_gain_update gain_flag, const AtomBlockId clustered_blk_id, @@ -214,8 +282,20 @@ void mark_and_update_partial_gain(const AtomNetId net_id, const int high_fanout_net_threshold, std::unordered_map& net_output_feeds_driving_block_input); +/* + * @brief Updates the total gain array to reflect the desired tradeoff between + * input sharing (sharinggain) and path_length minimization (timinggain) + * input each time a new molecule is added to the cluster. + */ void update_total_gain(float alpha, float beta, bool timing_driven, bool connection_driven, t_pb* pb, AttractionInfo& attraction_groups); +/* + * @brief Routine that is called each time a new molecule is added to the cluster. + * + * Makes calls to update cluster stats such as the gain map for atoms, used pins, + * and clock structures, in order to reflect the new content of the cluster. + * Also keeps track of which attraction group the cluster belongs to. + */ void update_cluster_stats(const t_pack_molecule* molecule, const ClusterLegalizer& cluster_legalizer, const std::unordered_set& is_clock, @@ -230,6 +310,12 @@ void update_cluster_stats(const t_pack_molecule* molecule, AttractionInfo& attraction_groups, std::unordered_map& net_output_feeds_driving_block_input); +/* + * @brief Given a starting seed block, start_new_cluster determines the next + * cluster type to use. + * + * It expands the FPGA if it cannot find a legal cluster for the atom block + */ void start_new_cluster(ClusterLegalizer& cluster_legalizer, LegalizationClusterId& legalization_cluster_id, t_pack_molecule* molecule, @@ -241,6 +327,19 @@ void start_new_cluster(ClusterLegalizer& cluster_legalizer, int verbosity, bool balance_block_type_utilization); +/* + * @brief Get candidate molecule to pack into currently open cluster + * + * Molecule selection priority: + * 1. Find unpacked molecules based on criticality and strong connectedness + * (connected by low fanout nets) with current cluster. + * 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) + * with current cluster. + * 3. Find unpacked molecules based on weak connectedness (connected by high + * fanout nets) with current cluster. + * 4. Find unpacked molecules based on attraction group of the current cluster + * (if the cluster has an attraction group). + */ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, AttractionInfo& attraction_groups, const enum e_gain_type gain_mode, @@ -254,6 +353,10 @@ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, const int feasible_block_array_size, std::map>& primitive_candidate_block_types); +/* + * @brief Add molecules with strong connectedness to the current cluster to the + * list of feasible blocks. + */ void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, const Prepacker& prepacker, @@ -261,6 +364,10 @@ void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, const int feasible_block_array_size, AttractionInfo& attraction_groups); +/* + * @brief Add molecules based on weak connectedness (connected by high fanout + * nets) with current cluster. + */ void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, const Prepacker& prepacker, @@ -268,6 +375,17 @@ void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, const int feasible_block_array_size, AttractionInfo& attraction_groups); +/* + * @brief If the current cluster being packed has an attraction group associated + * with it (i.e. there are atoms in it that belong to an attraction group), + * this routine adds molecules from the associated attraction group to + * the list of feasible blocks for the cluster. + * + * Attraction groups can be very large, so we only add some randomly selected + * molecules for efficiency if the number of atoms in the group is greater than + * 500. Therefore, the molecules added to the candidates will vary each time you + * call this function. + */ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, const Prepacker& prepacker, @@ -277,6 +395,10 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, LegalizationClusterId clb_index, std::map>& primitive_candidate_block_types); +/* + * @brief Add molecules based on transitive connections (eg. 2 hops away) with + * current cluster. + */ void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, const Prepacker& prepacker, @@ -287,6 +409,10 @@ void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, const int feasible_block_array_size, AttractionInfo& attraction_groups); +/* + * @brief Check whether a free primitive exists for each atom block in the + * molecule. + */ bool check_free_primitives_for_molecule_atoms(t_pack_molecule* molecule, t_cluster_placement_stats* cluster_placement_stats_ptr, const ClusterLegalizer& cluster_legalizer); @@ -308,6 +434,9 @@ t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, const int& unclustered_list_head_size, std::map>& primitive_candidate_block_types); +/* + * @brief Calculates molecule statistics for a single molecule. + */ t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule, const AtomNetlist& atom_nlist); std::vector initialize_seed_atoms(const e_cluster_seed seed_type, @@ -320,10 +449,26 @@ t_pack_molecule* get_highest_gain_seed_molecule(int& seed_index, const Prepacker& prepacker, const ClusterLegalizer& cluster_legalizer); +/* + * @brief Get gain of packing molecule into current cluster. + * + * gain is equal to: + * total_block_gain + * + molecule_base_gain*some_factor + * - introduced_input_nets_of_unrelated_blocks_pulled_in_by_molecule*some_other_factor + */ float get_molecule_gain(t_pack_molecule* molecule, std::map& blk_gain, AttractGroupId cluster_attraction_group_id, AttractionInfo& attraction_groups, int num_molecule_failures); void print_seed_gains(const char* fname, const std::vector& seed_atoms, const vtr::vector& atom_gain, const vtr::vector& atom_criticality); +/** + * @brief Score unclustered atoms that are two hops away from current cluster + * + * For example, consider a cluster that has a FF feeding an adder in another + * cluster. Since this FF is feeding an adder that is packed in another cluster + * this function should find other FFs that are feeding other inputs of this adder + * since they are two hops away from the FF packed in this cluster + */ void load_transitive_fanout_candidates(LegalizationClusterId cluster_index, t_pb_stats* pb_stats, const Prepacker& prepacker, @@ -333,21 +478,59 @@ void load_transitive_fanout_candidates(LegalizationClusterId cluster_index, std::map> identify_primitive_candidate_block_types(); +/** + * @brief This function update the pb_type_count data structure by incrementing + * the number of used pb_types in the given packed cluster t_pb + */ size_t update_pb_type_count(const t_pb* pb, std::map& pb_type_count, size_t depth); +/* + * @brief This function updates the le_count data structure from the given + * packed cluster. + */ void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector& le_count); void print_pb_type_count_recurr(t_pb_type* type, size_t max_name_chars, size_t curr_depth, std::map& pb_type_count); +/* + * @brief This function identifies the logic block type which is defined by the + * block type which has a lut primitive. + */ t_logical_block_type_ptr identify_logic_block_type(std::map>& primitive_candidate_block_types); +/* + * @brief This function returns the pb_type that is similar to Logic Element (LE) + * in an FPGA. + * + * The LE is defined as a physical block that contains a LUT primitive and + * is found by searching a cluster type to find the first pb_type (from the top + * of the hierarchy clb->LE) that has more than one instance within the cluster. + */ t_pb_type* identify_le_block_type(t_logical_block_type_ptr logic_block_type); +/* + * @brief This function returns true if the given physical block has a primitive + * matching the given blif model and is used. + */ bool pb_used_for_blif_model(const t_pb* pb, const std::string& blif_model_name); +/* + * @brief Print the LE count data strurture. + */ void print_le_count(std::vector& le_count, const t_pb_type* le_pb_type); +/* + * @brief Given a pointer to a pb in a cluster, this routine returns a pointer + * to the top-level pb of the given pb. + * + * This is needed when updating the gain for a cluster. + */ t_pb* get_top_level_pb(t_pb* pb); -void init_clb_atoms_lookup(vtr::vector>& atoms_lookup); +/* + * @brief Load the mapping between clusters and their atoms. + */ +void init_clb_atoms_lookup(vtr::vector>& atoms_lookup, + const AtomContext& atom_ctx, + const ClusteredNetlist& clb_nlist); #endif diff --git a/vpr/src/pack/output_clustering.h b/vpr/src/pack/output_clustering.h index df8993b957d..92d734248d1 100644 --- a/vpr/src/pack/output_clustering.h +++ b/vpr/src/pack/output_clustering.h @@ -7,6 +7,15 @@ class AtomNetId; class ClusterLegalizer; +/// @brief Output the clustering, given by the ClusterLegalizer or a clustered +/// netlist, to a clustered netlist file. +/// +/// The clustering can be output from the following sources: +/// 1) From the clustering +/// 2) From another clustered netlist +/// If from_legalizer is true, the ClusterLegalizer will be used to generate the +/// clustered netlist. If from_legalizer is false, the clustered netlist currently +/// in the global scope will be used. void output_clustering(ClusterLegalizer* cluster_legalizer_ptr, bool global_clocks, const std::unordered_set& is_clock, diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp index 1335590f53d..a4a1dcc09ee 100644 --- a/vpr/src/pack/pack.cpp +++ b/vpr/src/pack/pack.cpp @@ -200,6 +200,10 @@ bool try_pack(t_packer_opts* packer_opts, t_ext_pin_util pin_util(1.0, 1.0); // TODO: This line assumes the logic block name is "clb" which // may not be the case. This may need to be investigated. + // Probably we should do this update of ext_pin_util for + // all types that were overused. Or if that is hard, just + // do it for all block types. Doing it only for a clb + // string is dangerous -VB. cluster_legalizer.get_target_external_pin_util().set_block_pin_util("clb", pin_util); } diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h index 867670517a4..9f08dcc0d2b 100644 --- a/vpr/src/util/vpr_utils.h +++ b/vpr/src/util/vpr_utils.h @@ -193,6 +193,8 @@ int get_max_nets_in_pb_type(const t_pb_type* pb_type); bool primitive_type_feasible(AtomBlockId blk_id, const t_pb_type* cur_pb_type); t_pb_graph_pin* get_pb_graph_node_pin_from_model_port_pin(const t_model_ports* model_port, const int model_pin, const t_pb_graph_node* pb_graph_node); const t_pb_graph_pin* find_pb_graph_pin(const AtomNetlist& netlist, const AtomLookup& netlist_lookup, const AtomPinId pin_id); +/// @brief Gets the pb_graph_node pin at the given pin index for the given +/// pb_graph_node. t_pb_graph_pin* get_pb_graph_node_pin_from_pb_graph_node(t_pb_graph_node* pb_graph_node, int ipin); t_pb_graph_pin* get_pb_graph_node_pin_from_block_pin(ClusterBlockId iblock, int ipin); vtr::vector alloc_and_load_pin_id_to_pb_mapping();