diff --git a/vpr/src/base/SetupGrid.h b/vpr/src/base/SetupGrid.h index da9b2631c94..b65a9eb23da 100644 --- a/vpr/src/base/SetupGrid.h +++ b/vpr/src/base/SetupGrid.h @@ -12,6 +12,8 @@ #include #include "physical_types.h" +class DeviceGrid; + ///@brief Find the device satisfying the specified minimum resources /// minimum_instance_counts and target_device_utilization are not required when specifying a fixed layout DeviceGrid create_device_grid(const std::string& layout_name, diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index 2bc4dd2a5f9..a8c95274552 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -15,6 +15,8 @@ #include #include +#include "cluster_util.h" +#include "vpr_context.h" #include "vtr_assert.h" #include "vtr_math.h" #include "vtr_log.h" @@ -359,9 +361,7 @@ void vpr_init_with_options(const t_options* options, t_vpr_setup* vpr_setup, t_a fflush(stdout); - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); auto& device_ctx = g_vpr_ctx.mutable_device(); - helper_ctx.lb_type_rr_graphs = vpr_setup->PackerRRGraph; device_ctx.pad_loc_type = vpr_setup->PlacerOpts.pad_loc_type; } @@ -613,12 +613,13 @@ bool vpr_pack_flow(t_vpr_setup& vpr_setup, const t_arch& arch) { //Load a previous packing from the .net file vpr_load_packing(vpr_setup, arch); - //Load cluster_constraints data structure here since loading pack file - load_cluster_constraints(); } } + // Load cluster_constraints data structure. + load_cluster_constraints(); + /* Sanity check the resulting netlist */ check_netlist(packer_opts.pack_verbosity); @@ -696,6 +697,7 @@ void vpr_load_packing(t_vpr_setup& vpr_setup, const t_arch& arch) { "Must have valid .net filename to load packing"); auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); /* Ensure we have a clean start with void net remapping information */ cluster_ctx.post_routing_clb_pin_nets.clear(); @@ -706,8 +708,11 @@ void vpr_load_packing(t_vpr_setup& vpr_setup, const t_arch& arch) { vpr_setup.FileNameOpts.verify_file_digests, vpr_setup.PackerOpts.pack_verbosity); + /* Load the mapping between clusters and their atoms */ + init_clb_atoms_lookup(cluster_ctx.atoms_lookup, atom_ctx, cluster_ctx.clb_nlist); + process_constant_nets(g_vpr_ctx.mutable_atom().nlist, - g_vpr_ctx.atom().lookup, + atom_ctx.lookup, cluster_ctx.clb_nlist, vpr_setup.constant_net_method, vpr_setup.PackerOpts.pack_verbosity); diff --git a/vpr/src/base/vpr_constraints_writer.cpp b/vpr/src/base/vpr_constraints_writer.cpp index 0de68549aec..29842028754 100644 --- a/vpr/src/base/vpr_constraints_writer.cpp +++ b/vpr/src/base/vpr_constraints_writer.cpp @@ -7,17 +7,15 @@ #include "vpr_constraints_serializer.h" #include "vpr_constraints_uxsdcxx.h" -#include "vtr_time.h" +#include "vpr_context.h" #include "globals.h" #include "pugixml.hpp" -#include "pugixml_util.hpp" -#include "clustered_netlist_utils.h" #include +#include #include "vpr_constraints_writer.h" #include "region.h" -#include "re_cluster_util.h" /** * @brief Create a partition with the given name and a single region. @@ -30,7 +28,6 @@ static Partition create_partition(const std::string& part_name, const Region& re void write_vpr_floorplan_constraints(const char* file_name, int expand, bool subtile, int horizontal_partitions, int vertical_partitions) { VprConstraints constraints; - if (horizontal_partitions != 0 && vertical_partitions != 0) { setup_vpr_floorplan_constraints_cutpoints(constraints, horizontal_partitions, vertical_partitions); } else { @@ -83,8 +80,7 @@ void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int ex part.set_part_region(pr); constraints.mutable_place_constraints().add_partition(part); - const std::unordered_set& atoms = cluster_to_atoms(blk_id); - + const std::unordered_set& atoms = cluster_ctx.atoms_lookup[blk_id]; for (AtomBlockId atom_id : atoms) { constraints.mutable_place_constraints().add_constrained_atom(atom_id, partid); } @@ -92,7 +88,9 @@ void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int ex } } -void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int horizontal_cutpoints, int vertical_cutpoints) { +void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, + int horizontal_cutpoints, + int vertical_cutpoints) { auto& cluster_ctx = g_vpr_ctx.clustering(); auto& block_locs = g_vpr_ctx.placement().block_locs(); auto& device_ctx = g_vpr_ctx.device(); @@ -158,7 +156,7 @@ void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int * appropriate region accordingly */ for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { - const std::unordered_set& atoms = cluster_to_atoms(blk_id); + const std::unordered_set& atoms = cluster_ctx.atoms_lookup[blk_id]; int x = block_locs[blk_id].loc.x; int y = block_locs[blk_id].loc.y; int width = device_ctx.grid.width(); diff --git a/vpr/src/base/vpr_constraints_writer.h b/vpr/src/base/vpr_constraints_writer.h index 25dd7fc08ce..ddfcd259b43 100644 --- a/vpr/src/base/vpr_constraints_writer.h +++ b/vpr/src/base/vpr_constraints_writer.h @@ -25,6 +25,8 @@ #ifndef VPR_SRC_BASE_VPR_CONSTRAINTS_WRITER_H_ #define VPR_SRC_BASE_VPR_CONSTRAINTS_WRITER_H_ +class VprConstraints; + /** * @brief Write out floorplan constraints to an XML file based on current placement * @@ -35,7 +37,11 @@ * @param subtile Specifies whether to write out the constraint regions with or without * subtile values. */ -void write_vpr_floorplan_constraints(const char* file_name, int expand, bool subtile, int horizontal_partitions, int vertical_partitions); +void write_vpr_floorplan_constraints(const char* file_name, + int expand, + bool subtile, + int horizontal_partitions, + int vertical_partitions); /** * @brief Populates VprConstraints by creating a partition for each clustered block. @@ -50,7 +56,9 @@ void write_vpr_floorplan_constraints(const char* file_name, int expand, bool sub * @param subtile Specifies whether to write out the constraint regions with or without * subtile values. */ -void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int expand, bool subtile); +void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, + int expand, + bool subtile); /** * @brief Populates VprConstraints by dividing the grid into multiple partitions. @@ -62,6 +70,8 @@ void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int ex * @param horizontal_cutpoints The number of horizontal cut-lines. * @param vertical_cutpoints The number of vertical cut_lines. */ -void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int horizontal_cutpoints, int vertical_cutpoints); +void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, + int horizontal_cutpoints, + int vertical_cutpoints); #endif /* VPR_SRC_BASE_VPR_CONSTRAINTS_WRITER_H_ */ diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index 7ff7205024a..f69c58e94ab 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -10,6 +10,7 @@ #include "vtr_ndmatrix.h" #include "vtr_optional.h" #include "vtr_vector.h" +#include "vtr_vector_map.h" #include "atom_netlist.h" #include "clustered_netlist.h" #include "rr_graph_view.h" @@ -78,12 +79,6 @@ struct AtomContext : public Context { /// @brief Mappings to/from the Atom Netlist to physically described .blif models AtomLookup lookup; - - /// @brief Prepacker object which performs prepacking and stores the pack - /// molecules. Has a method to get the pack molecule of an AtomBlock. - /// TODO: This is mainly only used in the clusterer. It can probably be - /// removed from the AtomContext entirely. - Prepacker prepacker; }; /** @@ -286,69 +281,23 @@ struct ClusteringContext : public Context { * CLB Netlist ********************************************************************/ - ///@brief New netlist class derived from Netlist + /// @brief New netlist class derived from Netlist ClusteredNetlist clb_nlist; - /* Database for nets of each clb block pin after routing stage - * - post_routing_clb_pin_nets: - * mapping of pb_type pins to clustered net ids - * - pre_routing_net_pin_mapping: - * a copy of mapping for current pb_route index to previous pb_route index - * Record the previous pin mapping for finding the correct pin index during timing analysis - */ + /// @brief Database for nets of each clb block pin after routing stage. + /// - post_routing_clb_pin_nets: + /// mapping of pb_type pins to clustered net ids. + /// - pre_routing_net_pin_mapping: + /// a copy of mapping for current pb_route index to previous pb_route index + /// Record the previous pin mapping for finding the correct pin index during + /// timing analysis. std::map> post_routing_clb_pin_nets; std::map> pre_routing_net_pin_mapping; -}; - -/** - * @brief State relating to helper data structure using in the clustering stage - * - * This should contain helper data structures that are useful in the clustering/packing stage. - * They are encapsulated here as they are useful in clustering and reclustering algorithms that may be used - * in packing or placement stages. - */ -struct ClusteringHelperContext : public Context { - // A map used to save the number of used instances from each logical block type. - std::map num_used_type_instances; - - // Stats keeper for placement information during packing/clustering - t_cluster_placement_stats* cluster_placement_stats; - - // total number of models in the architecture - int num_models; - - int max_cluster_size; - t_pb_graph_node** primitives_list; - bool enable_pin_feasibility_filter; - int feasible_block_array_size; - - // total number of CLBs - int total_clb_num; - - // A vector of routing resource nodes within each of logic cluster_ctx.blocks types [0 .. num_logical_block_type-1] - std::vector* lb_type_rr_graphs; - - // the utilization of external input/output pins during packing (between 0 and 1) - t_ext_pin_util_targets target_external_pin_util; - - // During clustering, a block is related to un-clustered primitives with nets. - // This relation has three types: low fanout, high fanout, and transitive - // high_fanout_thresholds stores the threshold for nets to a block type to be considered high fanout - t_pack_high_fanout_thresholds high_fanout_thresholds; - - // A vector of unordered_sets of AtomBlockIds that are inside each clustered block [0 .. num_clustered_blocks-1] - // unordered_set for faster insertion/deletion during the iterative improvement process of packing + /// @brief A vector of unordered_sets of AtomBlockIds that are inside each + /// clustered block [0 .. num_clustered_blocks-1] + /// This is populated when the packing is loaded. vtr::vector> atoms_lookup; - - /** Stores the NoC group ID of each atom block. Atom blocks that belong - * to different NoC groups can't be clustered with each other into the - * same clustered block.*/ - vtr::vector atom_noc_grp_id; - - ~ClusteringHelperContext() { - delete[] primitives_list; - } }; /** @@ -728,9 +677,6 @@ class VprContext : public Context { const ClusteringContext& clustering() const { return clustering_; } ClusteringContext& mutable_clustering() { return clustering_; } - const ClusteringHelperContext& cl_helper() const { return helper_; } - ClusteringHelperContext& mutable_cl_helper() { return helper_; } - const PlacementContext& placement() const { return placement_; } PlacementContext& mutable_placement() { return placement_; } @@ -760,8 +706,6 @@ class VprContext : public Context { PowerContext power_; ClusteringContext clustering_; - ClusteringHelperContext helper_; - PlacementContext placement_; RoutingContext routing_; FloorplanningContext constraints_; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index cf5f1062f96..b63a2f7d501 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -168,15 +168,6 @@ enum class e_cluster_seed { BLEND2 }; -enum class e_block_pack_status { - BLK_PASSED, - BLK_FAILED_FEASIBLE, - BLK_FAILED_ROUTE, - BLK_FAILED_FLOORPLANNING, - BLK_FAILED_NOC_GROUP, - BLK_STATUS_UNDEFINED -}; - struct t_ext_pin_util { t_ext_pin_util() = default; t_ext_pin_util(float in, float out) diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp index a5ee38b8d0c..607e4b530f3 100644 --- a/vpr/src/pack/cluster.cpp +++ b/vpr/src/pack/cluster.cpp @@ -44,11 +44,10 @@ #include "PreClusterDelayCalculator.h" #include "atom_netlist.h" -#include "cluster_router.h" +#include "cluster_legalizer.h" #include "cluster_util.h" #include "constraints_report.h" #include "globals.h" -#include "pack_types.h" #include "prepack.h" #include "timing_info.h" #include "vpr_types.h" @@ -70,13 +69,14 @@ std::map do_clustering(const t_packer_opts& pa const t_analysis_opts& analysis_opts, const t_arch* arch, Prepacker& prepacker, + ClusterLegalizer& cluster_legalizer, const std::unordered_set& is_clock, const std::unordered_set& is_global, bool allow_unrelated_clustering, bool balance_block_type_utilization, - std::vector* lb_type_rr_graphs, AttractionInfo& attraction_groups, bool& floorplan_regions_overfull, + const t_pack_high_fanout_thresholds& high_fanout_thresholds, t_clustering_data& clustering_data) { /* Does the actual work of clustering multiple netlist blocks * * into clusters. */ @@ -102,7 +102,7 @@ std::map do_clustering(const t_packer_opts& pa t_cluster_progress_stats cluster_stats; //int num_molecules, num_molecules_processed, mols_since_last_print, blocks_since_last_analysis, - int num_blocks_hill_added, max_pb_depth, detailed_routing_stage; + int num_blocks_hill_added; const int verbosity = packer_opts.pack_verbosity; @@ -116,17 +116,11 @@ std::map do_clustering(const t_packer_opts& pa enum e_block_pack_status block_pack_status; - t_cluster_placement_stats* cur_cluster_placement_stats_ptr; - t_lb_router_data* router_data = nullptr; + t_cluster_placement_stats* cur_cluster_placement_stats_ptr = nullptr; t_pack_molecule *istart, *next_molecule, *prev_molecule; auto& atom_ctx = g_vpr_ctx.atom(); auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - - helper_ctx.enable_pin_feasibility_filter = packer_opts.enable_pin_feasibility_filter; - helper_ctx.feasible_block_array_size = packer_opts.feasible_block_array_size; std::shared_ptr clustering_delay_calc; std::shared_ptr timing_info; @@ -141,30 +135,25 @@ std::map do_clustering(const t_packer_opts& pa // Index 2 holds the number of LEs that are used for registers only. std::vector le_count(3, 0); - helper_ctx.total_clb_num = 0; + int total_clb_num = 0; /* TODO: This is memory inefficient, fix if causes problems */ /* Store stats on nets used by packed block, useful for determining transitively connected blocks * (eg. [A1, A2, ..]->[B1, B2, ..]->C implies cluster [A1, A2, ...] and C have a weak link) */ - vtr::vector> clb_inter_blk_nets(atom_ctx.nlist.blocks().size()); + vtr::vector> clb_inter_blk_nets(atom_ctx.nlist.blocks().size()); istart = nullptr; - /* determine bound on cluster size and primitive input size */ - helper_ctx.max_cluster_size = 0; - max_pb_depth = 0; - const t_molecule_stats max_molecule_stats = prepacker.calc_max_molecule_stats(atom_ctx.nlist); prepacker.mark_all_molecules_valid(); cluster_stats.num_molecules = prepacker.get_num_molecules(); - get_max_cluster_size_and_pb_depth(helper_ctx.max_cluster_size, max_pb_depth); - if (packer_opts.hill_climbing_flag) { - clustering_data.hill_climbing_inputs_avail = new int[helper_ctx.max_cluster_size + 1]; - for (int i = 0; i < helper_ctx.max_cluster_size + 1; i++) + size_t max_cluster_size = cluster_legalizer.get_max_cluster_size(); + clustering_data.hill_climbing_inputs_avail = new int[max_cluster_size + 1]; + for (size_t i = 0; i < max_cluster_size + 1; i++) clustering_data.hill_climbing_inputs_avail[i] = 0; } else { clustering_data.hill_climbing_inputs_avail = nullptr; /* if used, die hard */ @@ -173,8 +162,9 @@ std::map do_clustering(const t_packer_opts& pa #if 0 check_for_duplicate_inputs (); #endif + alloc_and_init_clustering(max_molecule_stats, - &(helper_ctx.cluster_placement_stats), &(helper_ctx.primitives_list), prepacker, + prepacker, clustering_data, net_output_feeds_driving_block_input, unclustered_list_head_size, cluster_stats.num_molecules); @@ -187,9 +177,6 @@ std::map do_clustering(const t_packer_opts& pa cluster_stats.blocks_since_last_analysis = 0; num_blocks_hill_added = 0; - VTR_ASSERT(helper_ctx.max_cluster_size < MAX_SHORT); - /* Limit maximum number of elements for each cluster */ - //Default criticalities set to zero (e.g. if not timing driven) vtr::vector atom_criticality(atom_ctx.nlist.blocks().size(), 0.); @@ -199,11 +186,17 @@ std::map do_clustering(const t_packer_opts& pa } // Assign gain scores to atoms and sort them based on the scores. - auto seed_atoms = initialize_seed_atoms(packer_opts.cluster_seed_type, max_molecule_stats, atom_criticality); + auto seed_atoms = initialize_seed_atoms(packer_opts.cluster_seed_type, + max_molecule_stats, + prepacker, + atom_criticality); /* index of next most timing critical block */ int seed_index = 0; - istart = get_highest_gain_seed_molecule(seed_index, seed_atoms); + istart = get_highest_gain_seed_molecule(seed_index, + seed_atoms, + prepacker, + cluster_legalizer); print_pack_status_header(); @@ -214,61 +207,58 @@ std::map do_clustering(const t_packer_opts& pa while (istart != nullptr) { bool is_cluster_legal = false; int saved_seed_index = seed_index; - for (detailed_routing_stage = (int)E_DETAILED_ROUTE_AT_END_ONLY; !is_cluster_legal && detailed_routing_stage != (int)E_DETAILED_ROUTE_INVALID; detailed_routing_stage++) { - // Use the total number created clusters so far as the ID for the new cluster - ClusterBlockId clb_index(helper_ctx.total_clb_num); - - VTR_LOGV(verbosity > 2, "Complex block %d:\n", helper_ctx.total_clb_num); - - /*Used to store cluster's PartitionRegion as primitives are added to it. - * Since some of the primitives might fail legality, this structure temporarily - * stores PartitionRegion information while the cluster is packed*/ - PartitionRegion temp_cluster_pr; - /* - * Stores the cluster's NoC group ID as more primitives are added to it. - * This is used to check if a candidate primitive is in the same NoC group - * as the atom blocks that have already been added to the primitive. - */ - NocGroupId temp_cluster_noc_grp_id = NocGroupId::INVALID(); - - start_new_cluster(helper_ctx.cluster_placement_stats, helper_ctx.primitives_list, - clb_index, istart, + // The basic algorithm: + // 1) Try to put all the molecules in that you can without doing the + // full intra-lb route. Then do full legalization at the end. + // 2) If the legalization at the end fails, try again, but this time + // do full legalization for each molecule added to the cluster. + const ClusterLegalizationStrategy legalization_strategies[] = {ClusterLegalizationStrategy::SKIP_INTRA_LB_ROUTE, + ClusterLegalizationStrategy::FULL}; + for (const ClusterLegalizationStrategy strategy : legalization_strategies) { + // If the cluster is legal, no need to try a stronger cluster legalizer + // mode. + if (is_cluster_legal) + break; + // Set the legalization strategy of the cluster legalizer. + cluster_legalizer.set_legalization_strategy(strategy); + + LegalizationClusterId legalization_cluster_id; + + VTR_LOGV(verbosity > 2, "Complex block %d:\n", total_clb_num); + + start_new_cluster(cluster_legalizer, + legalization_cluster_id, + istart, num_used_type_instances, packer_opts.target_device_utilization, - helper_ctx.num_models, helper_ctx.max_cluster_size, arch, packer_opts.device_layout, - lb_type_rr_graphs, &router_data, - detailed_routing_stage, &cluster_ctx.clb_nlist, primitive_candidate_block_types, verbosity, - packer_opts.enable_pin_feasibility_filter, - balance_block_type_utilization, - packer_opts.feasible_block_array_size, - temp_cluster_pr, - temp_cluster_noc_grp_id); + balance_block_type_utilization); //initial molecule in cluster has been processed cluster_stats.num_molecules_processed++; cluster_stats.mols_since_last_print++; - print_pack_status(helper_ctx.total_clb_num, + print_pack_status(total_clb_num, cluster_stats.num_molecules, cluster_stats.num_molecules_processed, cluster_stats.mols_since_last_print, device_ctx.grid.width(), device_ctx.grid.height(), - attraction_groups); + attraction_groups, + cluster_legalizer); VTR_LOGV(verbosity > 2, - "Complex block %d: '%s' (%s) ", helper_ctx.total_clb_num, - cluster_ctx.clb_nlist.block_name(clb_index).c_str(), - cluster_ctx.clb_nlist.block_type(clb_index)->name); + "Complex block %d: '%s' (%s) ", total_clb_num, + cluster_legalizer.get_cluster_pb(legalization_cluster_id)->name, + cluster_legalizer.get_cluster_type(legalization_cluster_id)->name); VTR_LOGV(verbosity > 2, "."); //Progress dot for seed-block fflush(stdout); - t_ext_pin_util target_ext_pin_util = helper_ctx.target_external_pin_util.get_pin_util(cluster_ctx.clb_nlist.block_type(clb_index)->name); - int high_fanout_threshold = helper_ctx.high_fanout_thresholds.get_threshold(cluster_ctx.clb_nlist.block_type(clb_index)->name); - update_cluster_stats(istart, clb_index, + int high_fanout_threshold = high_fanout_thresholds.get_threshold(cluster_legalizer.get_cluster_type(legalization_cluster_id)->name); + update_cluster_stats(istart, + cluster_legalizer, is_clock, //Set of clock nets is_global, //Set of global nets (currently all clocks) packer_opts.global_clocks, @@ -278,16 +268,16 @@ std::map do_clustering(const t_packer_opts& pa *timing_info, attraction_groups, net_output_feeds_driving_block_input); - helper_ctx.total_clb_num++; + total_clb_num++; if (packer_opts.timing_driven) { cluster_stats.blocks_since_last_analysis++; /*it doesn't make sense to do a timing analysis here since there* *is only one atom block clustered it would not change anything */ } - cur_cluster_placement_stats_ptr = &(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(clb_index)->index]); + cur_cluster_placement_stats_ptr = cluster_legalizer.get_cluster_placement_stats(legalization_cluster_id); cluster_stats.num_unrelated_clustering_attempts = 0; - next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index), + next_molecule = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id), attraction_groups, allow_unrelated_clustering, packer_opts.prioritize_transitive_connectivity, @@ -295,8 +285,10 @@ std::map do_clustering(const t_packer_opts& pa packer_opts.feasible_block_array_size, &cluster_stats.num_unrelated_clustering_attempts, cur_cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, clb_inter_blk_nets, - clb_index, + legalization_cluster_id, verbosity, clustering_data.unclustered_list_head, unclustered_list_head_size, @@ -322,18 +314,16 @@ std::map do_clustering(const t_packer_opts& pa while (next_molecule != nullptr && num_repeated_molecules < max_num_repeated_molecules) { prev_molecule = next_molecule; - try_fill_cluster(packer_opts, + try_fill_cluster(cluster_legalizer, + prepacker, + packer_opts, cur_cluster_placement_stats_ptr, prev_molecule, next_molecule, num_repeated_molecules, - helper_ctx.primitives_list, cluster_stats, - helper_ctx.total_clb_num, - helper_ctx.num_models, - helper_ctx.max_cluster_size, - clb_index, - detailed_routing_stage, + total_clb_num, + legalization_cluster_id, attraction_groups, clb_inter_blk_nets, allow_unrelated_clustering, @@ -341,10 +331,6 @@ std::map do_clustering(const t_packer_opts& pa is_clock, is_global, timing_info, - router_data, - target_ext_pin_util, - temp_cluster_pr, - temp_cluster_noc_grp_id, block_pack_status, clustering_data.unclustered_list_head, unclustered_list_head_size, @@ -352,16 +338,41 @@ std::map do_clustering(const t_packer_opts& pa primitive_candidate_block_types); } - is_cluster_legal = check_cluster_legality(verbosity, detailed_routing_stage, router_data); + if (strategy == ClusterLegalizationStrategy::FULL) { + // If the legalizer fully legalized for every molecule added, + // the cluster should be legal. + is_cluster_legal = true; + } else { + // If the legalizer did not check everything for every molecule, + // need to check that the full cluster is legal (need to perform + // intra-lb routing). + is_cluster_legal = cluster_legalizer.check_cluster_legality(legalization_cluster_id); + } if (is_cluster_legal) { - istart = save_cluster_routing_and_pick_new_seed(packer_opts, helper_ctx.total_clb_num, seed_atoms, num_blocks_hill_added, clustering_data.intra_lb_routing, seed_index, cluster_stats, router_data); - store_cluster_info_and_free(packer_opts, clb_index, logic_block_type, le_pb_type, le_count, clb_inter_blk_nets); + // Pick new seed. + istart = get_highest_gain_seed_molecule(seed_index, + seed_atoms, + prepacker, + cluster_legalizer); + // Update cluster stats. + if (packer_opts.timing_driven && num_blocks_hill_added > 0) + cluster_stats.blocks_since_last_analysis += num_blocks_hill_added; + + store_cluster_info_and_free(packer_opts, legalization_cluster_id, logic_block_type, le_pb_type, le_count, cluster_legalizer, clb_inter_blk_nets); + // Since the cluster will no longer be added to beyond this point, + // clean the cluster of any data not strictly necessary for + // creating the clustered netlist. + cluster_legalizer.clean_cluster(legalization_cluster_id); } else { - free_data_and_requeue_used_mols_if_illegal(clb_index, saved_seed_index, num_used_type_instances, helper_ctx.total_clb_num, seed_index); + // If the cluster is not legal, requeue used mols. + num_used_type_instances[cluster_legalizer.get_cluster_type(legalization_cluster_id)]--; + total_clb_num--; + seed_index = saved_seed_index; + // Destroy the illegal cluster. + cluster_legalizer.destroy_cluster(legalization_cluster_id); + cluster_legalizer.compress(); } - free_router_data(router_data); - router_data = nullptr; } } @@ -371,7 +382,12 @@ std::map do_clustering(const t_packer_opts& pa } //check_floorplan_regions(floorplan_regions_overfull); - floorplan_regions_overfull = floorplan_constraints_regions_overfull(); + floorplan_regions_overfull = floorplan_constraints_regions_overfull(cluster_legalizer); + + // Ensure that we have kept track of the number of clusters correctly. + // TODO: The total_clb_num variable could probably just be replaced by + // clusters().size(). + VTR_ASSERT(cluster_legalizer.clusters().size() == (size_t)total_clb_num); return num_used_type_instances; } diff --git a/vpr/src/pack/cluster.h b/vpr/src/pack/cluster.h index 76b2315ceae..a10d7ccf21a 100644 --- a/vpr/src/pack/cluster.h +++ b/vpr/src/pack/cluster.h @@ -1,32 +1,32 @@ #ifndef CLUSTER_H #define CLUSTER_H -#include #include -#include +#include #include "physical_types.h" #include "vpr_types.h" -#include "atom_netlist_fwd.h" -#include "attraction_groups.h" -#include "cluster_util.h" +class AtomNetid; +class AttractionInfo; +class ClusterLegalizer; +class ClusteredNetlist; class Prepacker; +struct t_clustering_data; std::map do_clustering(const t_packer_opts& packer_opts, const t_analysis_opts& analysis_opts, const t_arch* arch, Prepacker& prepacker, + ClusterLegalizer& cluster_legalizer, const std::unordered_set& is_clock, const std::unordered_set& is_global, bool allow_unrelated_clustering, bool balance_block_type_utilization, - std::vector* lb_type_rr_graphs, AttractionInfo& attraction_groups, bool& floorplan_regions_overfull, + const t_pack_high_fanout_thresholds& high_fanout_thresholds, t_clustering_data& clustering_data); -int get_cluster_of_block(int blkidx); - void print_pb_type_count(const ClusteredNetlist& clb_nlist); #endif diff --git a/vpr/src/pack/cluster_legalizer.cpp b/vpr/src/pack/cluster_legalizer.cpp new file mode 100644 index 00000000000..f4676eea195 --- /dev/null +++ b/vpr/src/pack/cluster_legalizer.cpp @@ -0,0 +1,1781 @@ +/** + * @file + * @author Alex Singer + * @date September 2024 + * @brief The implementation of the Cluster Legalizer class. + * + * Most of the code in this file was originally part of cluster_util.cpp and was + * highly integrated with the clusterer in VPR. All code that was used for + * legalizing the clusters was moved into this file and all the functionality + * was moved into the ClusterLegalizer class. + */ + +#include "cluster_legalizer.h" +#include +#include +#include +#include +#include "atom_lookup.h" +#include "atom_netlist.h" +#include "cluster_placement.h" +#include "cluster_router.h" +#include "cluster_util.h" +#include "globals.h" +#include "logic_types.h" +#include "netlist_utils.h" +#include "noc_aware_cluster_util.h" +#include "noc_data_types.h" +#include "pack_types.h" +#include "partition.h" +#include "partition_region.h" +#include "physical_types.h" +#include "prepack.h" +#include "user_place_constraints.h" +#include "vpr_context.h" +#include "vpr_types.h" +#include "vpr_utils.h" +#include "vtr_assert.h" +#include "vtr_vector.h" +#include "vtr_vector_map.h" + +/* + * @brief Gets the max cluster size that any logical block can have. + * + * This is the maximum number of primitives any cluster can contain. + */ +static size_t calc_max_cluster_size(const std::vector& logical_block_types) { + size_t max_cluster_size = 0; + for (const t_logical_block_type& blk_type : logical_block_types) { + if (is_empty_type(&blk_type)) + continue; + int cur_cluster_size = get_max_primitives_in_pb_type(blk_type.pb_type); + max_cluster_size = std::max(max_cluster_size, cur_cluster_size); + } + return max_cluster_size; +} + +/* + * @brief Allocates the stats stored within the pb of a cluster. + * + * Used to store information used during clustering. + */ +static void alloc_and_load_pb_stats(t_pb* pb, const int feasible_block_array_size) { + /* Call this routine when starting to fill up a new cluster. It resets * + * the gain vector, etc. */ + + pb->pb_stats = new t_pb_stats; + + pb->pb_stats->input_pins_used = std::vector>(pb->pb_graph_node->num_input_pin_class); + pb->pb_stats->output_pins_used = std::vector>(pb->pb_graph_node->num_output_pin_class); + pb->pb_stats->lookahead_input_pins_used = std::vector>(pb->pb_graph_node->num_input_pin_class); + pb->pb_stats->lookahead_output_pins_used = std::vector>(pb->pb_graph_node->num_output_pin_class); + pb->pb_stats->num_feasible_blocks = NOT_VALID; + pb->pb_stats->feasible_blocks = new t_pack_molecule*[feasible_block_array_size]; + + for (int i = 0; i < feasible_block_array_size; i++) + pb->pb_stats->feasible_blocks[i] = nullptr; + + pb->pb_stats->tie_break_high_fanout_net = AtomNetId::INVALID(); + + pb->pb_stats->pulled_from_atom_groups = 0; + pb->pb_stats->num_att_group_atoms_used = 0; + + pb->pb_stats->gain.clear(); + pb->pb_stats->timinggain.clear(); + pb->pb_stats->connectiongain.clear(); + pb->pb_stats->sharinggain.clear(); + pb->pb_stats->hillgain.clear(); + pb->pb_stats->transitive_fanout_candidates.clear(); + + pb->pb_stats->num_pins_of_net_in_pb.clear(); + + pb->pb_stats->num_child_blocks_in_pb = 0; + + pb->pb_stats->explore_transitive_fanout = true; +} + +/* + * @brief Check the atom blocks of a cluster pb. Used in the verify method. + */ +/* TODO: May want to check that all atom blocks are actually reached */ +static void check_cluster_atom_blocks(t_pb* pb, std::unordered_set& blocks_checked) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + const t_pb_type* pb_type = pb->pb_graph_node->pb_type; + if (pb_type->num_modes == 0) { + /* primitive */ + AtomBlockId blk_id = atom_ctx.lookup.pb_atom(pb); + if (blk_id) { + if (blocks_checked.count(blk_id)) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "pb %s contains atom block %s but atom block is already contained in another pb.\n", + pb->name, atom_ctx.nlist.block_name(blk_id).c_str()); + } + blocks_checked.insert(blk_id); + if (pb != atom_ctx.lookup.atom_pb(blk_id)) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "pb %s contains atom block %s but atom block does not link to pb.\n", + pb->name, atom_ctx.nlist.block_name(blk_id).c_str()); + } + } + } else { + /* this is a container pb, all container pbs must contain children */ + bool has_child = false; + for (int i = 0; i < pb_type->modes[pb->mode].num_pb_type_children; i++) { + for (int j = 0; j < pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) { + if (pb->child_pbs[i] != nullptr) { + if (pb->child_pbs[i][j].name != nullptr) { + has_child = true; + check_cluster_atom_blocks(&pb->child_pbs[i][j], blocks_checked); + } + } + } + } + VTR_ASSERT(has_child); + } +} + +/// @brief Recursively frees the pb stats of the given pb, without freeing the +/// pb itself. +static void free_pb_stats_recursive(t_pb* pb) { + /* Releases all the memory used by clustering data structures. */ + if (pb) { + if (pb->pb_graph_node != nullptr) { + if (!pb->pb_graph_node->is_primitive()) { + for (int i = 0; i < pb->pb_graph_node->pb_type->modes[pb->mode].num_pb_type_children; i++) { + for (int j = 0; j < pb->pb_graph_node->pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) { + if (pb->child_pbs && pb->child_pbs[i]) { + free_pb_stats_recursive(&pb->child_pbs[i][j]); + } + } + } + } + } + free_pb_stats(pb); + } +} + +/* Record the failure of the molecule in this cluster in the current pb stats. + * If a molecule fails repeatedly, it's gain will be penalized if packing with + * attraction groups on. */ +static void record_molecule_failure(t_pack_molecule* molecule, t_pb* pb) { + //Only have to record the failure for the first atom in the molecule. + //The convention when checking if a molecule has failed to pack in the cluster + //is to check whether the first atoms has been recorded as having failed + + auto got = pb->pb_stats->atom_failures.find(molecule->atom_block_ids[0]); + if (got == pb->pb_stats->atom_failures.end()) { + pb->pb_stats->atom_failures.insert({molecule->atom_block_ids[0], 1}); + } else { + got->second++; + } +} + +/** + * @brief Checks whether an atom block can be added to a clustered block + * without violating floorplanning constraints. It also updates the + * clustered block's floorplanning region by taking the intersection of + * its current region and the floorplanning region of the given atom block. + * + * @param atom_blk_id A unique ID for the candidate atom block to + * be added to the growing cluster. + * @param cluster_pr The floorplanning regions of the clustered + * block. This function may update the given + * region. + * @param constraints The set of user-given place constraints. + * @param log_verbosity Controls the detail level of log information + * printed by this function. + * @param cluster_pr_needs_update Indicates whether the floorplanning region + * of the clustered block have updated. + * + * @return True if adding the given atom block to the clustered block does not + * violated any floorplanning constraints. + */ +static bool check_cluster_floorplanning(AtomBlockId atom_blk_id, + PartitionRegion& cluster_pr, + const UserPlaceConstraints& constraints, + int log_verbosity, + bool& cluster_pr_needs_update) { + // Get the partition ID of the atom. + PartitionId part_id = constraints.get_atom_partition(atom_blk_id); + // If the partition ID is invalid, then it can be put in the cluster + // regardless of what the cluster's PartitionRegion is since it is not + // constrained. + if (!part_id.is_valid()) { + VTR_LOGV(log_verbosity > 3, + "\t\t\t Intersect: Atom block %d has no floorplanning constraints\n", + atom_blk_id); + cluster_pr_needs_update = false; + return true; + } + + // Get the Atom and Cluster Partition Regions + const PartitionRegion& atom_pr = constraints.get_partition_pr(part_id); + + // If the Cluster's PartitionRegion is empty, then this atom's PR becomes + // the Cluster's new PartitionRegion. + if (cluster_pr.empty()) { + VTR_LOGV(log_verbosity > 3, + "\t\t\t Intersect: Atom block %d has floorplanning constraints\n", + atom_blk_id); + cluster_pr = atom_pr; + cluster_pr_needs_update = true; + return true; + } + + // The Cluster's new PartitionRegion is the intersection of the Cluster's + // original PartitionRegion and the atom's PartitionRegion. + update_cluster_part_reg(cluster_pr, atom_pr); + + // If the intersection is empty, then the atom cannot be placed in this + // Cluster due to floorplanning constraints. + if (cluster_pr.empty()) { + VTR_LOGV(log_verbosity > 3, + "\t\t\t Intersect: Atom block %d failed floorplanning check for cluster\n", + atom_blk_id); + cluster_pr_needs_update = false; + return false; + } + + // If the Cluster's new PartitionRegion is non-empty, then this atom passes + // the floorplanning constraints and the cluster's PartitionRegion should be + // updated. + cluster_pr_needs_update = true; + VTR_LOGV(log_verbosity > 3, + "\t\t\t Intersect: Atom block %d passed cluster, cluster PR was updated with intersection result \n", + atom_blk_id); + return true; +} + +/** + * @brief Checks if an atom block can be added to a clustered block without + * violating NoC group constraints. For passing this check, either both + * clustered and atom blocks must belong to the same NoC group, or at + * least one of them should not belong to any NoC group. If the atom block + * is associated with a NoC group while the clustered block does not + * belong to any NoC groups, the NoC group ID of the atom block is assigned + * to the clustered block when the atom is added to it. + * + * @param atom_blk_id A unique ID for the candidate atom block to be + * added to the growing cluster. + * @param cluster_noc_grp_id The NoC group ID of the clustered block. This + * function may update this ID. + * @param atom_noc_grp_ids A mapping from atoms to NoC group IDs. + * @param log_verbosity Controls the detail level of log information + * printed by this function. + * + * @return True if adding the atom block the cluster does not violate NoC group + * constraints. + */ +static bool check_cluster_noc_group(AtomBlockId atom_blk_id, + NocGroupId& cluster_noc_grp_id, + const vtr::vector& atom_noc_grp_ids, + int log_verbosity) { + const NocGroupId atom_noc_grp_id = atom_noc_grp_ids.empty() ? NocGroupId::INVALID() : atom_noc_grp_ids[atom_blk_id]; + + if (!cluster_noc_grp_id.is_valid()) { + // If the cluster does not have a NoC group, assign the atom's NoC group + // to the cluster. + VTR_LOGV(log_verbosity > 3, + "\t\t\t NoC Group: Atom block %d passed cluster, cluster's NoC group was updated with the atom's group %d\n", + atom_blk_id, (size_t)atom_noc_grp_id); + cluster_noc_grp_id = atom_noc_grp_id; + return true; + } + + if (cluster_noc_grp_id == atom_noc_grp_id) { + // If the cluster has the same NoC group ID as the atom, they are + // compatible. + VTR_LOGV(log_verbosity > 3, + "\t\t\t NoC Group: Atom block %d passed cluster, cluster's NoC group was compatible with the atom's group %d\n", + atom_blk_id, (size_t)atom_noc_grp_id); + return true; + } + + // If the cluster belongs to a different NoC group than the atom's group, + // they are incompatible. + VTR_LOGV(log_verbosity > 3, + "\t\t\t NoC Group: Atom block %d failed NoC group check for cluster. Cluster's NoC group: %d, atom's NoC group: %d\n", + atom_blk_id, (size_t)cluster_noc_grp_id, (size_t)atom_noc_grp_id); + return false; +} + +/** + * @brief This function takes the root block of a chain molecule and a proposed + * placement primitive for this block. The function then checks if this + * chain root block has a placement constraint (such as being driven from + * outside the cluster) and returns the status of the placement accordingly. + */ +static enum e_block_pack_status check_chain_root_placement_feasibility(const t_pb_graph_node* pb_graph_node, + const t_pack_molecule* molecule, + const AtomBlockId blk_id) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + enum e_block_pack_status block_pack_status = e_block_pack_status::BLK_PASSED; + + bool is_long_chain = molecule->chain_info->is_long_chain; + + const auto& chain_root_pins = molecule->pack_pattern->chain_root_pins; + + t_model_ports* root_port = chain_root_pins[0][0]->port->model_port; + AtomNetId chain_net_id; + auto port_id = atom_ctx.nlist.find_atom_port(blk_id, root_port); + + if (port_id) { + chain_net_id = atom_ctx.nlist.port_net(port_id, chain_root_pins[0][0]->pin_number); + } + + // if this block is part of a long chain or it is driven by a cluster + // input pin we need to check the placement legality of this block + // Depending on the logic synthesis even small chains that can fit within one + // cluster might need to start at the top of the cluster as their input can be + // driven by a global gnd or vdd. Therefore even if this is not a long chain + // but its input pin is driven by a net, the placement legality is checked. + if (is_long_chain || chain_net_id) { + auto chain_id = molecule->chain_info->chain_id; + // if this chain has a chain id assigned to it (implies is_long_chain too) + if (chain_id != -1) { + // the chosen primitive should be a valid starting point for the chain + // long chains should only be placed at the top of the chain tieOff = 0 + if (pb_graph_node != chain_root_pins[chain_id][0]->parent_node) { + block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; + } + // the chain doesn't have an assigned chain_id yet + } else { + block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; + for (const auto& chain : chain_root_pins) { + for (auto tieOff : chain) { + // check if this chosen primitive is one of the possible + // starting points for this chain. + if (pb_graph_node == tieOff->parent_node) { + // this location matches with the one of the dedicated chain + // input from outside logic block, therefore it is feasible + block_pack_status = e_block_pack_status::BLK_PASSED; + break; + } + // long chains should only be placed at the top of the chain tieOff = 0 + if (is_long_chain) break; + } + } + } + } + + return block_pack_status; +} + +/* + * @brief Check that the two atom blocks blk_id and sibling_blk_id (which should + * both be memory slices) are feasible, in the sense that they have + * precicely the same net connections (with the exception of nets in data + * port classes). + * + * Note that this routine does not check pin feasibility against the cur_pb_type; so + * primitive_type_feasible() should also be called on blk_id before concluding it is feasible. + */ +static bool primitive_memory_sibling_feasible(const AtomBlockId blk_id, const t_pb_type* cur_pb_type, const AtomBlockId sibling_blk_id) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + VTR_ASSERT(cur_pb_type->class_type == MEMORY_CLASS); + + //First, identify the 'data' ports by looking at the cur_pb_type + std::unordered_set data_ports; + for (int iport = 0; iport < cur_pb_type->num_ports; ++iport) { + const char* port_class = cur_pb_type->ports[iport].port_class; + if (port_class && strstr(port_class, "data") == port_class) { + //The port_class starts with "data", so it is a data port + + //Record the port + data_ports.insert(cur_pb_type->ports[iport].model_port); + } + } + + //Now verify that all nets (except those connected to data ports) are equivalent + //between blk_id and sibling_blk_id + + //Since the atom netlist stores only in-use ports, we iterate over the model to ensure + //all ports are compared + const t_model* model = cur_pb_type->model; + for (t_model_ports* port : {model->inputs, model->outputs}) { + for (; port; port = port->next) { + if (data_ports.count(port)) { + //Don't check data ports + continue; + } + + //Note: VPR doesn't support multi-driven nets, so all outputs + //should be data ports, otherwise the siblings will both be + //driving the output net + + //Get the ports from each primitive + auto blk_port_id = atom_ctx.nlist.find_atom_port(blk_id, port); + auto sib_port_id = atom_ctx.nlist.find_atom_port(sibling_blk_id, port); + + //Check that all nets (including unconnected nets) match + for (int ipin = 0; ipin < port->size; ++ipin) { + //The nets are initialized as invalid (i.e. disconnected) + AtomNetId blk_net_id; + AtomNetId sib_net_id; + + //We can get the actual net provided the port exists + // + //Note that if the port did not exist, the net is left + //as invalid/disconneced + if (blk_port_id) { + blk_net_id = atom_ctx.nlist.port_net(blk_port_id, ipin); + } + if (sib_port_id) { + sib_net_id = atom_ctx.nlist.port_net(sib_port_id, ipin); + } + + //The sibling and block must have the same (possibly disconnected) + //net on this pin + if (blk_net_id != sib_net_id) { + //Nets do not match, not feasible + return false; + } + } + } + } + + return true; +} + +/* + * @brief Check if the given atom is feasible in the given pb. + */ +static bool primitive_feasible(const AtomBlockId blk_id, t_pb* cur_pb) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + const t_pb_type* cur_pb_type = cur_pb->pb_graph_node->pb_type; + + VTR_ASSERT(cur_pb_type->num_modes == 0); /* primitive */ + + AtomBlockId cur_pb_blk_id = atom_ctx.lookup.pb_atom(cur_pb); + if (cur_pb_blk_id && cur_pb_blk_id != blk_id) { + /* This pb already has a different logical block */ + return false; + } + + if (cur_pb_type->class_type == MEMORY_CLASS) { + /* Memory class has additional feasibility requirements: + * - all siblings must share all nets, including open nets, with the exception of data nets */ + + /* find sibling if one exists */ + AtomBlockId sibling_memory_blk_id = find_memory_sibling(cur_pb); + + if (sibling_memory_blk_id) { + //There is a sibling, see if the current block is feasible with it + bool sibling_feasible = primitive_memory_sibling_feasible(blk_id, cur_pb_type, sibling_memory_blk_id); + if (!sibling_feasible) { + return false; + } + } + } + + //Generic feasibility check + return primitive_type_feasible(blk_id, cur_pb_type); +} + +/** + * @brief Try to place atom block into current primitive location + */ +static enum e_block_pack_status +try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, + const AtomBlockId blk_id, + t_pb* cb, + t_pb** parent, + const int max_models, + const int max_cluster_size, + const LegalizationClusterId cluster_id, + vtr::vector_map& atom_cluster, + const t_cluster_placement_stats* cluster_placement_stats_ptr, + const t_pack_molecule* molecule, + t_lb_router_data* router_data, + int verbosity, + const int feasible_block_array_size) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + AtomContext& mutable_atom_ctx = g_vpr_ctx.mutable_atom(); + + VTR_ASSERT_SAFE(cb != nullptr); + e_block_pack_status block_pack_status = e_block_pack_status::BLK_PASSED; + + /* Discover parent */ + t_pb* parent_pb = nullptr; + if (pb_graph_node->parent_pb_graph_node != cb->pb_graph_node) { + t_pb* my_parent = nullptr; + block_pack_status = try_place_atom_block_rec(pb_graph_node->parent_pb_graph_node, blk_id, cb, + &my_parent, max_models, max_cluster_size, cluster_id, + atom_cluster, + cluster_placement_stats_ptr, molecule, router_data, + verbosity, feasible_block_array_size); + parent_pb = my_parent; + } else { + parent_pb = cb; + } + + /* Create siblings if siblings are not allocated */ + VTR_ASSERT(parent_pb != nullptr); + if (parent_pb->child_pbs == nullptr) { + VTR_ASSERT(parent_pb->name == nullptr); + parent_pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str()); + parent_pb->mode = pb_graph_node->pb_type->parent_mode->index; + set_reset_pb_modes(router_data, parent_pb, true); + const t_mode* mode = &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode]; + parent_pb->child_pbs = new t_pb*[mode->num_pb_type_children]; + + for (int i = 0; i < mode->num_pb_type_children; i++) { + parent_pb->child_pbs[i] = new t_pb[mode->pb_type_children[i].num_pb]; + + for (int j = 0; j < mode->pb_type_children[i].num_pb; j++) { + parent_pb->child_pbs[i][j].parent_pb = parent_pb; + parent_pb->child_pbs[i][j].pb_graph_node = &(parent_pb->pb_graph_node->child_pb_graph_nodes[parent_pb->mode][i][j]); + } + } + } else { + /* if this is not the first child of this parent, must match existing parent mode */ + if (parent_pb->mode != pb_graph_node->pb_type->parent_mode->index) { + return e_block_pack_status::BLK_FAILED_FEASIBLE; + } + } + + const t_mode* mode = &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode]; + int i; + for (i = 0; i < mode->num_pb_type_children; i++) { + if (pb_graph_node->pb_type == &mode->pb_type_children[i]) { + break; + } + } + VTR_ASSERT(i < mode->num_pb_type_children); + t_pb* pb = &parent_pb->child_pbs[i][pb_graph_node->placement_index]; + VTR_ASSERT_SAFE(pb != nullptr); + *parent = pb; /* this pb is parent of it's child that called this function */ + VTR_ASSERT(pb->pb_graph_node == pb_graph_node); + if (pb->pb_stats == nullptr) { + alloc_and_load_pb_stats(pb, feasible_block_array_size); + } + const t_pb_type* pb_type = pb_graph_node->pb_type; + + /* Any pb_type under an mode, which is disabled for packing, should not be considerd for mapping + * Early exit to flag failure + */ + if (true == pb_type->parent_mode->disable_packing) { + return e_block_pack_status::BLK_FAILED_FEASIBLE; + } + + bool is_primitive = (pb_type->num_modes == 0); + + if (is_primitive) { + VTR_ASSERT(!atom_ctx.lookup.pb_atom(pb) + && atom_ctx.lookup.atom_pb(blk_id) == nullptr + && atom_cluster[blk_id] == LegalizationClusterId::INVALID()); + /* try pack to location */ + VTR_ASSERT(pb->name == nullptr); + pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str()); + + //Update the atom netlist mappings + atom_cluster[blk_id] = cluster_id; + // NOTE: This pb is different from the pb of the cluster. It is the pb + // of the actual primitive. + // TODO: It would be a good idea to remove the use of this global + // variables to prevent external users from modifying this by + // mistake. + mutable_atom_ctx.lookup.set_atom_pb(blk_id, pb); + + add_atom_as_target(router_data, blk_id); + if (!primitive_feasible(blk_id, pb)) { + /* failed location feasibility check, revert pack */ + block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; + } + + // if this block passed and is part of a chained molecule + if (block_pack_status == e_block_pack_status::BLK_PASSED && molecule->is_chain()) { + auto molecule_root_block = molecule->atom_block_ids[molecule->root]; + // if this is the root block of the chain molecule check its placmeent feasibility + if (blk_id == molecule_root_block) { + block_pack_status = check_chain_root_placement_feasibility(pb_graph_node, molecule, blk_id); + } + } + + VTR_LOGV(verbosity > 4 && block_pack_status == e_block_pack_status::BLK_PASSED, + "\t\t\tPlaced atom '%s' (%s) at %s\n", + atom_ctx.nlist.block_name(blk_id).c_str(), + atom_ctx.nlist.block_model(blk_id)->name, + pb->hierarchical_type_name().c_str()); + } + + if (block_pack_status != e_block_pack_status::BLK_PASSED) { + free(pb->name); + pb->name = nullptr; + } + return block_pack_status; +} + +/* + * @brief Resets nets used at different pin classes for determining pin + * feasibility. + */ +static void reset_lookahead_pins_used(t_pb* cur_pb) { + const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; + if (cur_pb->pb_stats == nullptr) { + return; /* No pins used, no need to continue */ + } + + if (pb_type->num_modes > 0 && cur_pb->name != nullptr) { + for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { + cur_pb->pb_stats->lookahead_input_pins_used[i].clear(); + } + + for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { + cur_pb->pb_stats->lookahead_output_pins_used[i].clear(); + } + + if (cur_pb->child_pbs != nullptr) { + for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { + if (cur_pb->child_pbs[i] != nullptr) { + for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { + reset_lookahead_pins_used(&cur_pb->child_pbs[i][j]); + } + } + } + } + } +} + +/* + * @brief Checks if the sinks of the given net are reachable from the driver + * pb gpin. + */ +static int net_sinks_reachable_in_cluster(const t_pb_graph_pin* driver_pb_gpin, const int depth, const AtomNetId net_id) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + //Record the sink pb graph pins we are looking for + std::unordered_set sink_pb_gpins; + for (const AtomPinId pin_id : atom_ctx.nlist.net_sinks(net_id)) { + const t_pb_graph_pin* sink_pb_gpin = find_pb_graph_pin(atom_ctx.nlist, atom_ctx.lookup, pin_id); + VTR_ASSERT(sink_pb_gpin); + + sink_pb_gpins.insert(sink_pb_gpin); + } + + //Count how many sink pins are reachable + size_t num_reachable_sinks = 0; + for (int i_prim_pin = 0; i_prim_pin < driver_pb_gpin->num_connectable_primitive_input_pins[depth]; ++i_prim_pin) { + const t_pb_graph_pin* reachable_pb_gpin = driver_pb_gpin->list_of_connectable_input_pin_ptrs[depth][i_prim_pin]; + + if (sink_pb_gpins.count(reachable_pb_gpin)) { + ++num_reachable_sinks; + if (num_reachable_sinks == atom_ctx.nlist.net_sinks(net_id).size()) { + return true; + } + } + } + + return false; +} + +/** + * @brief Returns the pb_graph_pin of the atom pin defined by the driver_pin_id in the driver_pb + */ +static t_pb_graph_pin* get_driver_pb_graph_pin(const t_pb* driver_pb, const AtomPinId driver_pin_id) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + const auto driver_pb_type = driver_pb->pb_graph_node->pb_type; + int output_port = 0; + // find the port of the pin driving the net as well as the port model + auto driver_port_id = atom_ctx.nlist.pin_port(driver_pin_id); + auto driver_model_port = atom_ctx.nlist.port_model(driver_port_id); + // find the port id of the port containing the driving pin in the driver_pb_type + for (int i = 0; i < driver_pb_type->num_ports; i++) { + auto& prim_port = driver_pb_type->ports[i]; + if (prim_port.type == OUT_PORT) { + if (prim_port.model_port == driver_model_port) { + // get the output pb_graph_pin driving this input net + return &(driver_pb->pb_graph_node->output_pins[output_port][atom_ctx.nlist.pin_port_bit(driver_pin_id)]); + } + output_port++; + } + } + // the pin should be found + VTR_ASSERT(false); + return nullptr; +} + +/** + * @brief Given a pin and its assigned net, mark all pin classes that are affected. + * Check if connecting this pin to it's driver pin or to all sink pins will + * require leaving a pb_block starting from the parent pb_block of the + * primitive till the root block (depth = 0). If leaving a pb_block is + * required add this net to the pin class (to increment the number of used + * pins from this class) that should be used to leave the pb_block. + */ +static void compute_and_mark_lookahead_pins_used_for_pin(const t_pb_graph_pin* pb_graph_pin, + const t_pb* primitive_pb, + const AtomNetId net_id, + const vtr::vector_map& atom_cluster) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + // starting from the parent pb of the input primitive go up in the hierarchy till the root block + for (auto cur_pb = primitive_pb->parent_pb; cur_pb; cur_pb = cur_pb->parent_pb) { + const auto depth = cur_pb->pb_graph_node->pb_type->depth; + const auto pin_class = pb_graph_pin->parent_pin_class[depth]; + VTR_ASSERT(pin_class != OPEN); + + const auto driver_blk_id = atom_ctx.nlist.net_driver_block(net_id); + + // if this primitive pin is an input pin + if (pb_graph_pin->port->type == IN_PORT) { + /* find location of net driver if exist in clb, NULL otherwise */ + // find the driver of the input net connected to the pin being studied + const auto driver_pin_id = atom_ctx.nlist.net_driver(net_id); + // find the id of the atom occupying the input primitive_pb + const auto prim_blk_id = atom_ctx.lookup.pb_atom(primitive_pb); + // find the pb block occupied by the driving atom + const auto driver_pb = atom_ctx.lookup.atom_pb(driver_blk_id); + // pb_graph_pin driving net_id in the driver pb block + t_pb_graph_pin* output_pb_graph_pin = nullptr; + // if the driver block is in the same clb as the input primitive block + LegalizationClusterId driver_cluster_id = atom_cluster[driver_blk_id]; + LegalizationClusterId prim_cluster_id = atom_cluster[prim_blk_id]; + if (driver_cluster_id == prim_cluster_id) { + // get pb_graph_pin driving the given net + output_pb_graph_pin = get_driver_pb_graph_pin(driver_pb, driver_pin_id); + } + + bool is_reachable = false; + + // if the driver pin is within the cluster + if (output_pb_graph_pin) { + // find if the driver pin can reach the input pin of the primitive or not + const t_pb* check_pb = driver_pb; + while (check_pb && check_pb != cur_pb) { + check_pb = check_pb->parent_pb; + } + if (check_pb) { + for (int i = 0; i < output_pb_graph_pin->num_connectable_primitive_input_pins[depth]; i++) { + if (pb_graph_pin == output_pb_graph_pin->list_of_connectable_input_pin_ptrs[depth][i]) { + is_reachable = true; + break; + } + } + } + } + + // Must use an input pin to connect the driver to the input pin of the given primitive, either the + // driver atom is not contained in the cluster or is contained but cannot reach the primitive pin + if (!is_reachable) { + // add net to lookahead_input_pins_used if not already added + auto it = std::find(cur_pb->pb_stats->lookahead_input_pins_used[pin_class].begin(), + cur_pb->pb_stats->lookahead_input_pins_used[pin_class].end(), net_id); + if (it == cur_pb->pb_stats->lookahead_input_pins_used[pin_class].end()) { + cur_pb->pb_stats->lookahead_input_pins_used[pin_class].push_back(net_id); + } + } + } else { + VTR_ASSERT(pb_graph_pin->port->type == OUT_PORT); + /* + * Determine if this net (which is driven from within this cluster) leaves this cluster + * (and hence uses an output pin). + */ + + bool net_exits_cluster = true; + int num_net_sinks = static_cast(atom_ctx.nlist.net_sinks(net_id).size()); + + if (pb_graph_pin->num_connectable_primitive_input_pins[depth] >= num_net_sinks) { + //It is possible the net is completely absorbed in the cluster, + //since this pin could (potentially) drive all the net's sinks + + /* Important: This runtime penalty looks a lot scarier than it really is. + * For high fan-out nets, I at most look at the number of pins within the + * cluster which limits runtime. + * + * DO NOT REMOVE THIS INITIAL FILTER WITHOUT CAREFUL ANALYSIS ON RUNTIME!!! + * + * Key Observation: + * For LUT-based designs it is impossible for the average fanout to exceed + * the number of LUT inputs so it's usually around 4-5 (pigeon-hole argument, + * if the average fanout is greater than the number of LUT inputs, where do + * the extra connections go? Therefore, average fanout must be capped to a + * small constant where the constant is equal to the number of LUT inputs). + * The real danger to runtime is when the number of sinks of a net gets doubled + */ + + //Check if all the net sinks are, in fact, inside this cluster + bool all_sinks_in_cur_cluster = true; + LegalizationClusterId driver_cluster = atom_cluster[driver_blk_id]; + for (auto pin_id : atom_ctx.nlist.net_sinks(net_id)) { + auto sink_blk_id = atom_ctx.nlist.pin_block(pin_id); + if (atom_cluster[sink_blk_id] != driver_cluster) { + all_sinks_in_cur_cluster = false; + break; + } + } + + if (all_sinks_in_cur_cluster) { + //All the sinks are part of this cluster, so the net may be fully absorbed. + // + //Verify this, by counting the number of net sinks reachable from the driver pin. + //If the count equals the number of net sinks then the net is fully absorbed and + //the net does not exit the cluster + /* TODO: I should cache the absorbed outputs, once net is absorbed, + * net is forever absorbed, no point in rechecking every time */ + if (net_sinks_reachable_in_cluster(pb_graph_pin, depth, net_id)) { + //All the sinks are reachable inside the cluster + net_exits_cluster = false; + } + } + } + + if (net_exits_cluster) { + /* This output must exit this cluster */ + cur_pb->pb_stats->lookahead_output_pins_used[pin_class].push_back(net_id); + } + } + } +} + + +/* + * @brief Determine if pins of speculatively packed pb are legal + */ +static void compute_and_mark_lookahead_pins_used(const AtomBlockId blk_id, + const vtr::vector_map& atom_cluster) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + const t_pb* cur_pb = atom_ctx.lookup.atom_pb(blk_id); + VTR_ASSERT(cur_pb != nullptr); + + /* Walk through inputs, outputs, and clocks marking pins off of the same class */ + for (auto pin_id : atom_ctx.nlist.block_pins(blk_id)) { + auto net_id = atom_ctx.nlist.pin_net(pin_id); + + const t_pb_graph_pin* pb_graph_pin = find_pb_graph_pin(atom_ctx.nlist, atom_ctx.lookup, pin_id); + compute_and_mark_lookahead_pins_used_for_pin(pb_graph_pin, cur_pb, net_id, atom_cluster); + } +} + +/* + * @brief Determine if speculatively packed cur_pb is pin feasible + * + * Runtime is actually not that bad for this. It's worst case O(k^2) where k is the + * number of pb_graph pins. Can use hash tables or make incremental if becomes an issue. + */ +static void try_update_lookahead_pins_used(t_pb* cur_pb, + const vtr::vector_map& atom_cluster) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + // run recursively till a leaf (primitive) pb block is reached + const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; + if (pb_type->num_modes > 0 && cur_pb->name != nullptr) { + if (cur_pb->child_pbs != nullptr) { + for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { + if (cur_pb->child_pbs[i] != nullptr) { + for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { + try_update_lookahead_pins_used(&cur_pb->child_pbs[i][j], atom_cluster); + } + } + } + } + } else { + // find if this child (primitive) pb block has an atom mapped to it, + // if yes compute and mark lookahead pins used for that pb block + AtomBlockId blk_id = atom_ctx.lookup.pb_atom(cur_pb); + if (pb_type->blif_model != nullptr && blk_id) { + compute_and_mark_lookahead_pins_used(blk_id, atom_cluster); + } + } +} + +/* + * @brief Check if the number of available inputs/outputs for a pin class is + * sufficient for speculatively packed blocks. + */ +static bool check_lookahead_pins_used(t_pb* cur_pb, t_ext_pin_util max_external_pin_util) { + const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; + + if (pb_type->num_modes > 0 && cur_pb->name) { + for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { + size_t class_size = cur_pb->pb_graph_node->input_pin_class_size[i]; + + if (cur_pb->is_root()) { + // Scale the class size by the maximum external pin utilization factor + // Use ceil to avoid classes of size 1 from being scaled to zero + class_size = std::ceil(max_external_pin_util.input_pin_util * class_size); + // if the number of pins already used is larger than class size, then the number of + // cluster inputs already used should be our constraint. Why is this needed? This is + // needed since when packing the seed block the maximum external pin utilization is + // used as 1.0 allowing molecules that are using up to all the cluster inputs to be + // packed legally. Therefore, if the seed block is already using more inputs than + // the allowed maximum utilization, this should become the new maximum pin utilization. + class_size = std::max(class_size, cur_pb->pb_stats->input_pins_used[i].size()); + } + + if (cur_pb->pb_stats->lookahead_input_pins_used[i].size() > class_size) { + return false; + } + } + + for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { + size_t class_size = cur_pb->pb_graph_node->output_pin_class_size[i]; + if (cur_pb->is_root()) { + // Scale the class size by the maximum external pin utilization factor + // Use ceil to avoid classes of size 1 from being scaled to zero + class_size = std::ceil(max_external_pin_util.output_pin_util * class_size); + // if the number of pins already used is larger than class size, then the number of + // cluster outputs already used should be our constraint. Why is this needed? This is + // needed since when packing the seed block the maximum external pin utilization is + // used as 1.0 allowing molecules that are using up to all the cluster inputs to be + // packed legally. Therefore, if the seed block is already using more inputs than + // the allowed maximum utilization, this should become the new maximum pin utilization. + class_size = std::max(class_size, cur_pb->pb_stats->output_pins_used[i].size()); + } + + if (cur_pb->pb_stats->lookahead_output_pins_used[i].size() > class_size) { + return false; + } + } + + if (cur_pb->child_pbs) { + for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { + if (cur_pb->child_pbs[i]) { + for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { + if (!check_lookahead_pins_used(&cur_pb->child_pbs[i][j], max_external_pin_util)) + return false; + } + } + } + } + } + + return true; +} + +/** + * @brief This function takes a chain molecule, and the pb_graph_node that is + * chosen for packing the molecule's root block. Using the given + * root_primitive, this function will identify which chain id this + * molecule is being mapped to and will update the chain id value inside + * the chain info data structure of this molecule. + */ +static void update_molecule_chain_info(t_pack_molecule* chain_molecule, const t_pb_graph_node* root_primitive) { + VTR_ASSERT(chain_molecule->chain_info->chain_id == -1 && chain_molecule->chain_info->is_long_chain); + + auto chain_root_pins = chain_molecule->pack_pattern->chain_root_pins; + + // long chains should only be placed at the beginning of the chain + // Since for long chains the molecule size is already equal to the + // total number of adders in the cluster. Therefore, it should + // always be placed at the very first adder in this cluster. + for (size_t chainId = 0; chainId < chain_root_pins.size(); chainId++) { + if (chain_root_pins[chainId][0]->parent_node == root_primitive) { + chain_molecule->chain_info->chain_id = chainId; + chain_molecule->chain_info->first_packed_molecule = chain_molecule; + return; + } + } + + VTR_ASSERT(false); +} + +/* + * @brief Revert trial atom block iblock and free up memory space accordingly. + */ +static void revert_place_atom_block(const AtomBlockId blk_id, + t_lb_router_data* router_data, + const Prepacker& prepacker, + vtr::vector_map& atom_cluster) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + AtomContext& mutable_atom_ctx = g_vpr_ctx.mutable_atom(); + + //We cast away const here since we may free the pb, and it is + //being removed from the active mapping. + // + //In general most code works fine accessing cosnt t_pb*, + //which is why we store them as such in atom_ctx.lookup + t_pb* pb = const_cast(atom_ctx.lookup.atom_pb(blk_id)); + + if (pb != nullptr) { + /* When freeing molecules, the current block might already have been freed by a prior revert + * When this happens, no need to do anything beyond basic book keeping at the atom block + */ + + t_pb* next = pb->parent_pb; + revalid_molecules(pb, prepacker); + free_pb(pb); + pb = next; + + while (pb != nullptr) { + /* If this is pb is created only for the purposes of holding new molecule, remove it + * Must check if cluster is already freed (which can be the case) + */ + next = pb->parent_pb; + + if (pb->child_pbs != nullptr && pb->pb_stats != nullptr + && pb->pb_stats->num_child_blocks_in_pb == 0) { + set_reset_pb_modes(router_data, pb, false); + if (next != nullptr) { + /* If the code gets here, then that means that placing the initial seed molecule + * failed, don't free the actual complex block itself as the seed needs to find + * another placement */ + revalid_molecules(pb, prepacker); + free_pb(pb); + } + } + pb = next; + } + } + + //Update the atom netlist mapping + atom_cluster[blk_id] = LegalizationClusterId::INVALID(); + mutable_atom_ctx.lookup.set_atom_pb(blk_id, nullptr); +} + +/* + * @brief Speculation successful, commit input/output pins used. + */ +static void commit_lookahead_pins_used(t_pb* cur_pb) { + const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; + + if (pb_type->num_modes > 0 && cur_pb->name) { + for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { + VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->input_pin_class_size[i]); + for (size_t j = 0; j < cur_pb->pb_stats->lookahead_input_pins_used[i].size(); j++) { + VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i][j]); + cur_pb->pb_stats->input_pins_used[i].insert({j, cur_pb->pb_stats->lookahead_input_pins_used[i][j]}); + } + } + + for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { + VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->output_pin_class_size[i]); + for (size_t j = 0; j < cur_pb->pb_stats->lookahead_output_pins_used[i].size(); j++) { + VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i][j]); + cur_pb->pb_stats->output_pins_used[i].insert({j, cur_pb->pb_stats->lookahead_output_pins_used[i][j]}); + } + } + + if (cur_pb->child_pbs) { + for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { + if (cur_pb->child_pbs[i]) { + for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { + commit_lookahead_pins_used(&cur_pb->child_pbs[i][j]); + } + } + } + } + } +} + +/** + * @brief Cleans up a pb after unsuccessful molecule packing + * + * Recursively frees pbs from a t_pb tree. The given root pb itself is not + * deleted. + * + * If a pb object has its children allocated then before freeing them the + * function checks if there is no atom that corresponds to any of them. The + * check is performed only for leaf (primitive) pbs. The function recurses for + * non-primitive pbs. + * + * The cleaning itself includes deleting all child pbs, resetting mode of the + * pb and also freeing its name. This prepares the pb for another round of + * molecule packing tryout. + */ +static bool cleanup_pb(t_pb* pb) { + bool can_free = true; + + /* Recursively check if there are any children with already assigned atoms */ + if (pb->child_pbs != nullptr) { + const t_mode* mode = &pb->pb_graph_node->pb_type->modes[pb->mode]; + VTR_ASSERT(mode != nullptr); + + /* Check each mode */ + for (int i = 0; i < mode->num_pb_type_children; ++i) { + /* Check each child */ + if (pb->child_pbs[i] != nullptr) { + for (int j = 0; j < mode->pb_type_children[i].num_pb; ++j) { + t_pb* pb_child = &pb->child_pbs[i][j]; + t_pb_type* pb_type = pb_child->pb_graph_node->pb_type; + + /* Primitive, check occupancy */ + if (pb_type->num_modes == 0) { + if (pb_child->name != nullptr) { + can_free = false; + } + } + + /* Non-primitive, recurse */ + else { + if (!cleanup_pb(pb_child)) { + can_free = false; + } + } + } + } + } + + /* Free if can */ + if (can_free) { + for (int i = 0; i < mode->num_pb_type_children; ++i) { + if (pb->child_pbs[i] != nullptr) { + delete[] pb->child_pbs[i]; + } + } + + delete[] pb->child_pbs; + pb->child_pbs = nullptr; + pb->mode = 0; + + if (pb->name) { + free(pb->name); + pb->name = nullptr; + } + } + } + + return can_free; +} + +e_block_pack_status ClusterLegalizer::try_pack_molecule(t_pack_molecule* molecule, + LegalizationCluster& cluster, + LegalizationClusterId cluster_id, + const t_ext_pin_util& max_external_pin_util) { + // Try to pack the molecule into a cluster with this pb type. + + // Safety debugs. + VTR_ASSERT_DEBUG(molecule != nullptr); + VTR_ASSERT_DEBUG(cluster.pb != nullptr); + VTR_ASSERT_DEBUG(cluster.type != nullptr); + + // TODO: Remove these global accesses to the contexts. + // AtomContext used for: + // - printing verbose statements + // - Looking up the primitive pb + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + // FloorplanningContext used for: + // - Checking if the atom can be placed in the cluster for floorplanning + // constraints. + const FloorplanningContext& floorplanning_ctx = g_vpr_ctx.floorplanning(); + if (log_verbosity_ > 3) { + AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; + VTR_LOG("\t\tTry pack molecule: '%s' (%s)", + atom_ctx.nlist.block_name(root_atom).c_str(), + atom_ctx.nlist.block_model(root_atom)->name); + VTR_LOGV(molecule->pack_pattern, + " molecule_type %s molecule_size %zu", + molecule->pack_pattern->name, + molecule->atom_block_ids.size()); + VTR_LOG("\n"); + } + + // if this cluster has a molecule placed in it that is part of a long chain + // (a chain that consists of more than one molecule), don't allow more long chain + // molecules to be placed in this cluster. To avoid possibly creating cluster level + // blocks that have incompatible placement constraints or form very long placement + // macros that limit placement flexibility. + t_cluster_placement_stats* cluster_placement_stats_ptr = &(cluster_placement_stats_[cluster.type->index]); + if (cluster_placement_stats_ptr->has_long_chain && molecule->is_chain() && molecule->chain_info->is_long_chain) { + VTR_LOGV(log_verbosity_ > 4, "\t\t\tFAILED Placement Feasibility Filter: Only one long chain per cluster is allowed\n"); + //Record the failure of this molecule in the current pb stats + record_molecule_failure(molecule, cluster.pb); + // Free the allocated data. + return e_block_pack_status::BLK_FAILED_FEASIBLE; + } + + // Check if every atom in the molecule is legal in the cluster from a + // floorplanning perspective + bool cluster_pr_update_check = false; + PartitionRegion new_cluster_pr = cluster.pr; + // TODO: This can be made more efficient by pre-computing the intersection + // of all the atoms' PRs in the molecule. + int molecule_size = get_array_size_of_molecule(molecule); + for (int i_mol = 0; i_mol < molecule_size; i_mol++) { + // Try to intersect with atom PartitionRegion if atom exists + AtomBlockId atom_blk_id = molecule->atom_block_ids[i_mol]; + if (atom_blk_id) { + bool cluster_pr_needs_update = false; + bool block_pack_floorplan_status = check_cluster_floorplanning(atom_blk_id, + new_cluster_pr, + floorplanning_ctx.constraints, + log_verbosity_, + cluster_pr_needs_update); + if (!block_pack_floorplan_status) { + // Record the failure of this molecule in the current pb stats + record_molecule_failure(molecule, cluster.pb); + return e_block_pack_status::BLK_FAILED_FLOORPLANNING; + } + + if (cluster_pr_needs_update) { + cluster_pr_update_check = true; + } + } + } + + // Check if all atoms in the molecule can be added to the cluster without + // NoC group conflicts + NocGroupId new_cluster_noc_grp_id = cluster.noc_grp_id; + for (int i_mol = 0; i_mol < molecule_size; i_mol++) { + AtomBlockId atom_blk_id = molecule->atom_block_ids[i_mol]; + if (atom_blk_id) { + bool block_pack_noc_grp_status = check_cluster_noc_group(atom_blk_id, + new_cluster_noc_grp_id, + atom_noc_grp_id_, + log_verbosity_); + if (!block_pack_noc_grp_status) { + // Record the failure of this molecule in the current pb stats + record_molecule_failure(molecule, cluster.pb); + return e_block_pack_status::BLK_FAILED_NOC_GROUP; + } + } + } + + std::vector primitives_list(max_molecule_size_, nullptr); + e_block_pack_status block_pack_status = e_block_pack_status::BLK_STATUS_UNDEFINED; + while (block_pack_status != e_block_pack_status::BLK_PASSED) { + if (!get_next_primitive_list(cluster_placement_stats_ptr, + molecule, + primitives_list.data())) { + VTR_LOGV(log_verbosity_ > 3, "\t\tFAILED No candidate primitives available\n"); + block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; + break; /* no more candidate primitives available, this molecule will not pack, return fail */ + } + + block_pack_status = e_block_pack_status::BLK_PASSED; + int failed_location = 0; + for (int i_mol = 0; i_mol < molecule_size && block_pack_status == e_block_pack_status::BLK_PASSED; i_mol++) { + VTR_ASSERT((primitives_list[i_mol] == nullptr) == (!molecule->atom_block_ids[i_mol])); + failed_location = i_mol + 1; + AtomBlockId atom_blk_id = molecule->atom_block_ids[i_mol]; + if (!atom_blk_id.is_valid()) + continue; + // NOTE: This parent variable is only used in the recursion of this + // function. + t_pb* parent = nullptr; + block_pack_status = try_place_atom_block_rec(primitives_list[i_mol], + atom_blk_id, + cluster.pb, + &parent, + num_models_, + max_cluster_size_, + cluster_id, + atom_cluster_, + cluster_placement_stats_ptr, + molecule, + cluster.router_data, + log_verbosity_, + feasible_block_array_size_); + } + + if (enable_pin_feasibility_filter_ && block_pack_status == e_block_pack_status::BLK_PASSED) { + // Check if pin usage is feasible for the current packing assignment + reset_lookahead_pins_used(cluster.pb); + try_update_lookahead_pins_used(cluster.pb, atom_cluster_); + if (!check_lookahead_pins_used(cluster.pb, max_external_pin_util)) { + VTR_LOGV(log_verbosity_ > 4, "\t\t\tFAILED Pin Feasibility Filter\n"); + block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; + } else { + VTR_LOGV(log_verbosity_ > 3, "\t\t\tPin Feasibility: Passed pin feasibility filter\n"); + } + } + + if (block_pack_status == e_block_pack_status::BLK_PASSED) { + /* + * during the clustering step of `do_clustering`, `detailed_routing_stage` is incremented at each iteration until it a cluster + * is correctly generated or `detailed_routing_stage` assumes an invalid value (E_DETAILED_ROUTE_INVALID). + * depending on its value we have different behaviors: + * - E_DETAILED_ROUTE_AT_END_ONLY: Skip routing if heuristic is to route at the end of packing complex block. + * - E_DETAILED_ROUTE_FOR_EACH_ATOM: Try to route if heuristic is to route for every atom. If the clusterer arrives at this stage, + * it means that more checks have to be performed as the previous stage failed to generate a new cluster. + * + * mode_status is a data structure containing the status of the mode selection. Its members are: + * - bool is_mode_conflict + * - bool try_expand_all_modes + * - bool expand_all_modes + * + * is_mode_conflict affects this stage. Its value determines whether the cluster failed to pack after a mode conflict issue. + * It holds a flag that is used to verify whether try_intra_lb_route ended in a mode conflict issue. + * + * Until is_mode_conflict is set to FALSE by try_intra_lb_route, the loop re-iterates. If all the available modes are exhausted + * an error will be thrown during mode conflicts checks (this to prevent infinite loops). + * + * If the value is TRUE the cluster has to be re-routed, and its internal pb_graph_nodes will have more restrict choices + * for what regards the mode that has to be selected. + * + * is_mode_conflict is initially set to TRUE, and, unless a mode conflict is found, it is set to false in `try_intra_lb_route`. + * + * try_expand_all_modes is set if the node expansion failed to find a valid routing path. The clusterer tries to find another route + * by using all the modes during node expansion. + * + * expand_all_modes is used to enable the expansion of all the nodes using all the possible modes. + */ + t_mode_selection_status mode_status; + bool is_routed = false; + bool do_detailed_routing_stage = (cluster_legalization_strategy_ == ClusterLegalizationStrategy::FULL); + if (do_detailed_routing_stage) { + do { + reset_intra_lb_route(cluster.router_data); + is_routed = try_intra_lb_route(cluster.router_data, log_verbosity_, &mode_status); + } while (do_detailed_routing_stage && mode_status.is_mode_issue()); + } + + if (do_detailed_routing_stage && !is_routed) { + /* Cannot pack */ + VTR_LOGV(log_verbosity_ > 4, "\t\t\tFAILED Detailed Routing Legality\n"); + block_pack_status = e_block_pack_status::BLK_FAILED_ROUTE; + } else { + /* Pack successful, commit + * TODO: SW Engineering note - may want to update cluster stats here too instead of doing it outside + */ + VTR_ASSERT(block_pack_status == e_block_pack_status::BLK_PASSED); + if (molecule->is_chain()) { + /* Chained molecules often take up lots of area and are important, + * if a chain is packed in, want to rename logic block to match chain name */ + AtomBlockId chain_root_blk_id = molecule->atom_block_ids[molecule->pack_pattern->root_block->block_id]; + t_pb* cur_pb = atom_ctx.lookup.atom_pb(chain_root_blk_id)->parent_pb; + while (cur_pb != nullptr) { + free(cur_pb->name); + cur_pb->name = vtr::strdup(atom_ctx.nlist.block_name(chain_root_blk_id).c_str()); + cur_pb = cur_pb->parent_pb; + } + // if this molecule is part of a chain, mark the cluster as having a long chain + // molecule. Also check if it's the first molecule in the chain to be packed. + // If so, update the chain id for this chain of molecules to make sure all + // molecules will be packed to the same chain id and can reach each other using + // the chain direct links between clusters + if (molecule->chain_info->is_long_chain) { + cluster_placement_stats_ptr->has_long_chain = true; + if (molecule->chain_info->chain_id == -1) { + update_molecule_chain_info(molecule, primitives_list[molecule->root]); + } + } + } + + //update cluster PartitionRegion if atom with floorplanning constraints was added + if (cluster_pr_update_check) { + cluster.pr = new_cluster_pr; + VTR_LOGV(log_verbosity_ > 2, "\nUpdated PartitionRegion of cluster\n"); + } + + // Update the cluster's NoC group ID. This is cheap so it does + // not need the check like the what the PR did above. + cluster.noc_grp_id = new_cluster_noc_grp_id; + + // Insert the molecule into the cluster for bookkeeping. + cluster.molecules.push_back(molecule); + + for (int i = 0; i < molecule_size; i++) { + AtomBlockId atom_blk_id = molecule->atom_block_ids[i]; + if (!atom_blk_id.is_valid()) + continue; + + /* invalidate all molecules that share atom block with current molecule */ + t_pack_molecule* cur_molecule = prepacker_.get_atom_molecule(atom_blk_id); + // TODO: This should really be named better. Something like + // "is_clustered". and then it should be set to true. + // Right now, valid implies "not clustered" which is + // confusing. + cur_molecule->valid = false; + + commit_primitive(cluster_placement_stats_ptr, primitives_list[i]); + + atom_cluster_[atom_blk_id] = cluster_id; + + // Update the num child blocks in pb + const t_pb* atom_pb = atom_ctx.lookup.atom_pb(atom_blk_id); + VTR_ASSERT_SAFE(atom_pb != nullptr); + t_pb* cur_pb = atom_pb->parent_pb; + while (cur_pb != nullptr) { + cur_pb->pb_stats->num_child_blocks_in_pb++; + cur_pb = cur_pb->parent_pb; + } + } + + // Update the lookahead pins used. + commit_lookahead_pins_used(cluster.pb); + } + } + + if (block_pack_status != e_block_pack_status::BLK_PASSED) { + /* Pack unsuccessful, undo inserting molecule into cluster */ + for (int i = 0; i < failed_location; i++) { + AtomBlockId atom_blk_id = molecule->atom_block_ids[i]; + if (atom_blk_id) { + remove_atom_from_target(cluster.router_data, atom_blk_id); + } + } + for (int i = 0; i < failed_location; i++) { + AtomBlockId atom_blk_id = molecule->atom_block_ids[i]; + if (atom_blk_id) { + revert_place_atom_block(atom_blk_id, cluster.router_data, prepacker_, atom_cluster_); + } + } + + // Record the failure of this molecule in the current pb stats + record_molecule_failure(molecule, cluster.pb); + + /* Packing failed, but a part of the pb tree is still allocated and pbs have their modes set. + * Before trying to pack next molecule the unused pbs need to be freed and, the most important, + * their modes reset. This task is performed by the cleanup_pb() function below. */ + cleanup_pb(cluster.pb); + } else { + VTR_LOGV(log_verbosity_ > 3, "\t\tPASSED pack molecule\n"); + } + } + return block_pack_status; +} + +std::tuple +ClusterLegalizer::start_new_cluster(t_pack_molecule* molecule, + t_logical_block_type_ptr cluster_type, + int cluster_mode) { + // Safety asserts to ensure the API is being called with valid arguments. + VTR_ASSERT_DEBUG(molecule != nullptr); + VTR_ASSERT_DEBUG(cluster_type != nullptr); + VTR_ASSERT_DEBUG(cluster_mode < cluster_type->pb_graph_head->pb_type->num_modes); + // Ensure that the molecule has not already been placed. + VTR_ASSERT_SAFE(molecule_cluster_.find(molecule) == molecule_cluster_.end() || + !molecule_cluster_[molecule].is_valid()); + // Safety asserts to ensure that the API was initialized properly. + VTR_ASSERT_DEBUG(cluster_placement_stats_ != nullptr && + lb_type_rr_graphs_ != nullptr); + + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; + + // Create the physical block for this cluster based on the type. + t_pb* cluster_pb = new t_pb; + cluster_pb->pb_graph_node = cluster_type->pb_graph_head; + alloc_and_load_pb_stats(cluster_pb, feasible_block_array_size_); + cluster_pb->parent_pb = nullptr; + cluster_pb->mode = cluster_mode; + + // Allocate and load the LB router data + t_lb_router_data* router_data = alloc_and_load_router_data(&lb_type_rr_graphs_[cluster_type->index], + cluster_type); + + // Reset the cluster placement stats + t_cluster_placement_stats* cluster_placement_stats_ptr = &(cluster_placement_stats_[cluster_type->index]); + reset_cluster_placement_stats(cluster_placement_stats_ptr); + set_mode_cluster_placement_stats(cluster_pb->pb_graph_node, cluster_pb->mode); + + // Create the new cluster + LegalizationCluster new_cluster; + new_cluster.pb = cluster_pb; + new_cluster.router_data = router_data; + new_cluster.pr = PartitionRegion(); + new_cluster.noc_grp_id = NocGroupId::INVALID(); + new_cluster.type = cluster_type; + + // Try to pack the molecule into the new_cluster. + // When starting a new cluster, we set the external pin utilization to full + // (meaning all cluster pins are allowed to be used). + const t_ext_pin_util FULL_EXTERNAL_PIN_UTIL(1., 1.); + LegalizationClusterId new_cluster_id = LegalizationClusterId(legalization_cluster_ids_.size()); + e_block_pack_status pack_status = try_pack_molecule(molecule, + new_cluster, + new_cluster_id, + FULL_EXTERNAL_PIN_UTIL); + + if (pack_status == e_block_pack_status::BLK_PASSED) { + // Give the new cluster pb a name. The current convention is to name the + // cluster after the root atom of the first molecule packed into it. + AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; + const std::string& root_atom_name = atom_nlist.block_name(root_atom); + if (new_cluster.pb->name != nullptr) + free(new_cluster.pb->name); + new_cluster.pb->name = vtr::strdup(root_atom_name.c_str()); + // Move the cluster into the vector of clusters and ids. + legalization_cluster_ids_.push_back(new_cluster_id); + legalization_clusters_.push_back(std::move(new_cluster)); + // Update the molecule to cluster map. + molecule_cluster_[molecule] = new_cluster_id; + } else { + // Delete the new_cluster. + free_pb(new_cluster.pb); + delete new_cluster.pb; + free_router_data(new_cluster.router_data); + new_cluster_id = LegalizationClusterId::INVALID(); + } + + return {pack_status, new_cluster_id}; +} + +e_block_pack_status ClusterLegalizer::add_mol_to_cluster(t_pack_molecule* molecule, + LegalizationClusterId cluster_id) { + // Safety asserts to make sure the inputs are valid. + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + VTR_ASSERT(legalization_cluster_ids_[cluster_id].is_valid() && "Cannot add to a destroyed cluster"); + // Ensure that the molecule has not already been placed. + VTR_ASSERT(molecule_cluster_.find(molecule) == molecule_cluster_.end() || + !molecule_cluster_[molecule].is_valid()); + // Safety asserts to ensure that the API was initialized properly. + VTR_ASSERT_DEBUG(cluster_placement_stats_ != nullptr && + lb_type_rr_graphs_ != nullptr); + + // Get the cluster. + LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + VTR_ASSERT(cluster.router_data != nullptr && "Cannot add molecule to cleaned cluster!"); + // Set the target_external_pin_util. + t_ext_pin_util target_ext_pin_util = target_external_pin_util_.get_pin_util(cluster.type->name); + // Try to pack the molecule into the cluster. + e_block_pack_status pack_status = try_pack_molecule(molecule, + cluster, + cluster_id, + target_ext_pin_util); + + // If the packing was successful, set the molecules' cluster to this one. + if (pack_status == e_block_pack_status::BLK_PASSED) + molecule_cluster_[molecule] = cluster_id; + + return pack_status; +} + +void ClusterLegalizer::destroy_cluster(LegalizationClusterId cluster_id) { + // Safety asserts to make sure the inputs are valid. + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + VTR_ASSERT(legalization_cluster_ids_[cluster_id].is_valid() && "Cannot destroy an already destroyed cluster"); + // Get the cluster. + LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + // Remove all molecules from the cluster. + for (t_pack_molecule* mol : cluster.molecules) { + VTR_ASSERT_SAFE(molecule_cluster_.find(mol) != molecule_cluster_.end() && + molecule_cluster_[mol] == cluster_id); + molecule_cluster_[mol] = LegalizationClusterId::INVALID(); + // The overall clustering algorithm uses this valid flag to indicate + // that a molecule has not been packed (clustered) yet. Since we are + // destroying a cluster, all of its molecules are now no longer clustered + // so they are all validated. + mol->valid = true; + // Revert the placement of all blocks in the molecule. + int molecule_size = get_array_size_of_molecule(mol); + for (int i = 0; i < molecule_size; i++) { + AtomBlockId atom_blk_id = mol->atom_block_ids[i]; + if (atom_blk_id) { + revert_place_atom_block(atom_blk_id, cluster.router_data, prepacker_, atom_cluster_); + } + } + } + cluster.molecules.clear(); + // Free the rest of the cluster data. + // Casting things to nullptr for safety just in case someone is trying to use it. + free_pb(cluster.pb); + delete cluster.pb; + cluster.pb = nullptr; + free_router_data(cluster.router_data); + cluster.router_data = nullptr; + cluster.pr = PartitionRegion(); + + // Mark the cluster as invalid. + legalization_cluster_ids_[cluster_id] = LegalizationClusterId::INVALID(); +} + +void ClusterLegalizer::compress() { + // Create a map from the old ids to the new (compressed) one. + vtr::vector_map cluster_id_map; + cluster_id_map = compress_ids(legalization_cluster_ids_); + // Update all cluster values. + legalization_cluster_ids_ = clean_and_reorder_ids(cluster_id_map); + legalization_clusters_ = clean_and_reorder_values(legalization_clusters_, cluster_id_map); + // Update the reverse lookups. + for (auto& it : molecule_cluster_) { + if (!it.second.is_valid()) + continue; + molecule_cluster_[it.first] = cluster_id_map[it.second]; + } + for (size_t i = 0; i < atom_cluster_.size(); i++) { + AtomBlockId atom_blk_id = AtomBlockId(i); + LegalizationClusterId old_cluster_id = atom_cluster_[atom_blk_id]; + if (!old_cluster_id.is_valid()) + continue; + atom_cluster_[atom_blk_id] = cluster_id_map[old_cluster_id]; + } + // Shrink everything to fit + legalization_cluster_ids_.shrink_to_fit(); + legalization_clusters_.shrink_to_fit(); + atom_cluster_.shrink_to_fit(); +} + +void ClusterLegalizer::clean_cluster(LegalizationClusterId cluster_id) { + // Safety asserts to make sure the inputs are valid. + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + // Get the cluster. + LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + VTR_ASSERT(cluster.router_data != nullptr && "Should not clean an already cleaned cluster!"); + // Free the pb stats. + free_pb_stats_recursive(cluster.pb); + // Load the pb_route so we can free the cluster router data. + // The pb_route is used when creating a netlist from the legalized clusters. + std::vector* saved_lb_nets = cluster.router_data->saved_lb_nets; + t_pb_graph_node* pb_graph_node = cluster.pb->pb_graph_node; + cluster.pb->pb_route = alloc_and_load_pb_route(saved_lb_nets, pb_graph_node); + // Free the router data. + free_router_data(cluster.router_data); + cluster.router_data = nullptr; +} + +// TODO: This is fine for the current implementation of the legalizer. But if +// more complex strategies are added, this will need to be updated to +// check more than just routing (such as PR and NoC groups). +bool ClusterLegalizer::check_cluster_legality(LegalizationClusterId cluster_id) { + // Safety asserts to make sure the inputs are valid. + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + // To check if a cluster is fully legal, try to perform an intra logic block + // route on the cluster. If it succeeds, the cluster is fully legal. + t_mode_selection_status mode_status; + LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + return try_intra_lb_route(cluster.router_data, log_verbosity_, &mode_status); +} + +ClusterLegalizer::ClusterLegalizer(const AtomNetlist& atom_netlist, + const Prepacker& prepacker, + const std::vector& logical_block_types, + std::vector* lb_type_rr_graphs, + size_t num_models, + const std::vector& target_external_pin_util_str, + const t_pack_high_fanout_thresholds& high_fanout_thresholds, + ClusterLegalizationStrategy cluster_legalization_strategy, + bool enable_pin_feasibility_filter, + int feasible_block_array_size, + int log_verbosity) : prepacker_(prepacker) { + // Verify that the inputs are valid. + VTR_ASSERT_SAFE(lb_type_rr_graphs != nullptr); + + // Resize the atom_cluster lookup to make the accesses much cheaper. + atom_cluster_.resize(atom_netlist.blocks().size(), LegalizationClusterId::INVALID()); + // Allocate the cluster_placement_stats + cluster_placement_stats_ = alloc_and_load_cluster_placement_stats(); + // Pre-compute the max size of any molecule. + max_molecule_size_ = prepacker.get_max_molecule_size(); + // Calculate the max cluster size + // - Limit maximum number of elements for each cluster to MAX_SHORT + max_cluster_size_ = calc_max_cluster_size(logical_block_types); + VTR_ASSERT(max_cluster_size_ < MAX_SHORT); + // Get a reference to the rr graphs. + lb_type_rr_graphs_ = lb_type_rr_graphs; + // Get the number of models in the architecture. + num_models_ = num_models; + // Find all NoC router atoms. + std::vector noc_atoms = find_noc_router_atoms(atom_netlist); + update_noc_reachability_partitions(noc_atoms, + atom_netlist, + high_fanout_thresholds, + atom_noc_grp_id_); + // Copy the options passed by the user + cluster_legalization_strategy_ = cluster_legalization_strategy; + enable_pin_feasibility_filter_ = enable_pin_feasibility_filter; + feasible_block_array_size_ = feasible_block_array_size; + log_verbosity_ = log_verbosity; + // Get the target external pin utilization + // NOTE: This has to be initialized last due to the fact that VPR_FATA_ERROR + // may be called within the constructor of t_ext_pin_util_targets. If + // this occurs, an excpetion is thrown which will drain the stack. If + // the cluster legalizer object is stored on the stack, this can call + // the destructor prematurely (before certain structures are allocated). + // Therefore, this is created at the end, when the class is in a state + // where it can be destroyed. + target_external_pin_util_ = t_ext_pin_util_targets(target_external_pin_util_str); +} + +void ClusterLegalizer::reset() { + // Destroy all of the clusters and compress. + for (LegalizationClusterId cluster_id : legalization_cluster_ids_) { + if (!cluster_id.is_valid()) + continue; + destroy_cluster(cluster_id); + } + compress(); + // Reset the molecule_cluster map + molecule_cluster_.clear(); + // Reset the cluster placement stats. + free_cluster_placement_stats(cluster_placement_stats_); + cluster_placement_stats_ = alloc_and_load_cluster_placement_stats(); +} + +void ClusterLegalizer::verify() { + std::unordered_set atoms_checked; + auto& atom_ctx = g_vpr_ctx.atom(); + + if (clusters().size() == 0) { + VTR_LOG_WARN("Packing produced no clustered blocks"); + } + + /* + * Check that each atom block connects to one physical primitive and that the primitive links up to the parent clb + */ + for (auto blk_id : atom_ctx.nlist.blocks()) { + //Each atom should be part of a pb + const t_pb* atom_pb = atom_ctx.lookup.atom_pb(blk_id); + if (!atom_pb) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "Atom block %s is not mapped to a pb\n", + atom_ctx.nlist.block_name(blk_id).c_str()); + } + + //Check the reverse mapping is consistent + if (atom_ctx.lookup.pb_atom(atom_pb) != blk_id) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "pb %s does not contain atom block %s but atom block %s maps to pb.\n", + atom_pb->name, + atom_ctx.nlist.block_name(blk_id).c_str(), + atom_ctx.nlist.block_name(blk_id).c_str()); + } + + VTR_ASSERT(atom_ctx.nlist.block_name(blk_id) == atom_pb->name); + + const t_pb* cur_pb = atom_pb; + while (cur_pb->parent_pb) { + cur_pb = cur_pb->parent_pb; + VTR_ASSERT(cur_pb->name); + } + + LegalizationClusterId cluster_id = get_atom_cluster(blk_id); + if (cluster_id == LegalizationClusterId::INVALID()) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "Atom %s is not mapped to a CLB\n", + atom_ctx.nlist.block_name(blk_id).c_str()); + } + + if (cur_pb != get_cluster_pb(cluster_id)) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "CLB %s does not match CLB contained by pb %s.\n", + cur_pb->name, atom_pb->name); + } + } + + /* Check that I do not have spurious links in children pbs */ + for (LegalizationClusterId cluster_id : clusters()) { + if (!cluster_id.is_valid()) + continue; + check_cluster_atom_blocks(get_cluster_pb(cluster_id), + atoms_checked); + } + + for (auto blk_id : atom_ctx.nlist.blocks()) { + if (!atoms_checked.count(blk_id)) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "Atom block %s not found in any cluster.\n", + atom_ctx.nlist.block_name(blk_id).c_str()); + } + } +} + +void ClusterLegalizer::finalize() { + for (LegalizationClusterId cluster_id : legalization_cluster_ids_) { + if (!cluster_id.is_valid()) + continue; + // If the cluster has not already been cleaned, clean it. This will + // generate the pb_route necessary for generating a clustered netlist. + const LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + if (cluster.router_data != nullptr) + clean_cluster(cluster_id); + } +} + +ClusterLegalizer::~ClusterLegalizer() { + // Destroy all clusters (no need to compress). + for (LegalizationClusterId cluster_id : legalization_cluster_ids_) { + if (!cluster_id.is_valid()) + continue; + destroy_cluster(cluster_id); + } + // Free the cluster_placement_stats + free_cluster_placement_stats(cluster_placement_stats_); +} + diff --git a/vpr/src/pack/cluster_legalizer.h b/vpr/src/pack/cluster_legalizer.h new file mode 100644 index 00000000000..e3aee27be57 --- /dev/null +++ b/vpr/src/pack/cluster_legalizer.h @@ -0,0 +1,559 @@ +/** + * @file + * @author Alex Singer + * @date September 2024 + * @brief The declaration of the Cluster Legalizer class. + * + * This file declares a class called the ClusterLegalizer which encapsulates all + * logic for creating legal clusters from prepacked molecules. This class is + * designed to be self-contained to the point that it is able to be called + * externally to the Packer in VPR. + */ + +#pragma once + +#include +#include +#include "atom_netlist_fwd.h" +#include "noc_data_types.h" +#include "partition_region.h" +#include "vpr_types.h" +#include "vtr_range.h" +#include "vtr_strong_id.h" +#include "vtr_vector.h" +#include "vtr_vector_map.h" + +class Prepacker; +class t_pb_graph_node; +struct t_lb_router_data; + +// A special ID to identify the legalization clusters. This is separate from the +// ClusterBlockId since this legalizer is not necessarily tied to the Clustered +// netlist, but is used as a sub-routine to it. +struct legalization_cluster_id_tag; +typedef vtr::StrongId LegalizationClusterId; + +/// @brief The different legalization strategies the cluster legalizer can perform. +/// +/// Allows the user of the API to select how thorough the legalizer should be +/// when adding molecules into clusters. +enum class ClusterLegalizationStrategy { + FULL, // Run the full legalizer (including intra-lb routing) + SKIP_INTRA_LB_ROUTE // Do all legality checks except intra-lb routing +}; + +/// @brief The status of the cluster legalization. +enum class e_block_pack_status { + BLK_PASSED, // Passed legalization. + BLK_FAILED_FEASIBLE, // Failed due to block not feasibly being able to go in the cluster. + BLK_FAILED_ROUTE, // Failed due to intra-lb routing failure. + BLK_FAILED_FLOORPLANNING, // Failed due to not being compatible with the cluster's current PartitionRegion. + BLK_FAILED_NOC_GROUP, // Failed due to not being compatible with the cluster's NoC group. + BLK_STATUS_UNDEFINED // Undefined status. Something went wrong. +}; + +/* + * @brief A struct containing information about the cluster. + * + * This contains necessary information for legalizing a cluster. + */ +struct LegalizationCluster { + /// @brief A list of the molecules in the cluster. By design, a cluster will + /// only contain molecules which have been previously legalized into + /// the cluster using a legalization strategy. + std::vector molecules; + + /// @brief The logical block of this cluster. + /// TODO: We should be more careful with how this is allocated. Instead of + /// pointers, we really should use IDs and store them in a standard + /// container. Currently this is being allocated with the new keyword + /// and freed when the cluster is destroyed; however this is used + /// externally to the class and it can be dangerous to pass around + /// a pointer to this object. + t_pb* pb; + + /// @brief The logical block type this cluster represents. + t_logical_block_type_ptr type; + + /// @brief The partition region of legal positions this cluster can be placed. + /// Used to detect if a molecule can physically be placed in a cluster. + /// It is derived from the partition region constraints on the atoms + /// in the cluster (not fundamental but good for performance). + PartitionRegion pr; + + /// @brief The NoC group that this cluster is a part of. Is used to check if + /// a candidate primitive is in the same NoC group as the atom blocks + /// that have already been added to the primitive. This can be helpful + /// for optimization. + NocGroupId noc_grp_id; + + /// @brief The router data of the intra lb router used for this cluster. + /// Contains information about the atoms in the cluster and how they + /// can be routed within. + t_lb_router_data* router_data; +}; + +/* + * @brief A manager class which manages the legalization of clusters. As clusters + * are created, this class will legalize for each molecule added. It also + * provides methods which are helpful for clustering. + * + * Usage: + * The ClusterLegalizer class maintains the clusters within itself since the + * legalization of a cluster depends on the molecules which have already been + * inserted into the clusters prior. + * + * The class provides different legalization strategies the user may use to + * legalize: + * 1) SKIP_INTRA_LB_ROUTE + * 2) FULL + * + * 1) SKIP_INTRA_LB_ROUTE Legalization Strategy Example: + * This strategy will not fully route the interal connections of the clusters + * until when the user specifies. An example of how to use this strategy would + * look something like this. Note, this example is simplified and the result + * of the packings should be checked and handled. + * + * ClusterLegalizer legalizer(..., + * ClusterLegalizationStrategy::SKIP_INTRA_LB_ROUTE, + * ...); + * + * std::tie(status, new_cluster_id) = legalizer.start_new_cluster(seed_mol, + * cluster_type, + * mode); + * for mol in molecules_to_add: + * // Cheaper additions, but may pack a molecule that wouldn't route. + * status = legalizer.add_mol_to_cluster(mol, new_cluster_id); + * if (status != e_block_pack_status::BLK_PASSED) + * break; + * + * // Do the expensive check once all molecules are in. + * if (!legalizer.check_cluster_legality(new_cluster_id)) + * // Destroy the illegal cluster. + * legalizer.destroy_cluster(new_cluster_id); + * // Clean-up the internal bookeeping of the class (required after + * // destroying a cluster). + * legalizer.compress(); + * // Handle how to try again (maybe use FULL strategy). + * + * 2) FULL Legalization Strategy Example: + * This strategy will fully route the internal connections of the clusters for + * each molecule added. This is much more expensive to run; however, will ensure + * that the cluster is fully legalized while it is being created. An example + * of how to use this strategy would look something like this: + * + * Clusterlegalizer legalizer(..., + * ClusterLegalizationStrategy::FULL, + * ...); + * + * std::tie(pack_result, new_cluster_id) = legalizer.start_new_cluster(seed_mol, + * cluster_type, + * mode); + * for mol in molecules_to_add: + * // Do the expensive check for each molecule added. + * status = legalizer.add_mol_to_cluster(mol, new_cluster_id); + * if (status != e_block_pack_status::BLK_PASSED) + * break; + * + * // new_cluster_id now contains a fully legalized cluster. + */ +class ClusterLegalizer { +public: + // Iterator for the legalization cluster IDs + typedef typename vtr::vector_map::const_iterator cluster_iterator; + + // Range for the legalization cluster IDs + typedef typename vtr::Range cluster_range; + +private: + + /* + * @brief Helper method that tries to pack the given molecule into a cluster. + * + * This method runs all the legality checks specified by the legalization + * strategy. If the molecule can be packed into the cluster, it will insert + * it into the cluster. + * + * @param molecule The molecule to insert into the cluster. + * @param cluster The cluster to try to insert the molecule into. + * @param cluster_id The ID of the cluster. + * @param max_external_pin_util The max external pin utilization for a + * cluster of this type. + */ + e_block_pack_status try_pack_molecule(t_pack_molecule* molecule, + LegalizationCluster& cluster, + LegalizationClusterId cluster_id, + const t_ext_pin_util& max_external_pin_util); + +public: + + // Explicitly deleted default constructor. Need to use other constructor to + // initialize state correctly. + ClusterLegalizer() = delete; + + /* + * @brief Initialize the ClusterLegalizer class. + * + * Allocates internal state. + * + * @param atom_netlist The complete atom netlist. Used to allocate + * internal structures to the correct size. + * @param prepacker The prepacker object used to prepack the atoms + * into molecules. A reference to this object is + * stored internally to be used to lookup the + * molecules of atoms. + * @param logical_block_types Used to allocate internal objects. Used to + * get the max number of primitives in any block + * type. + * @param lb_type_rr_graphs The routing resource graph internal to the + * different cluster types. A reference is stored + * in the class to be used to allocate and load + * the router data. + * @param num_models The total number of models in the architecture. + * This is the sum of the number of the user and + * library models. Used internally to allocate data + * structures. + * @param target_external_pin_util_str A string used to initialize the + * target external pin utilization of + * each cluster type. + * @param high_fanout_thresholds An object that stores the thresholds for + * a net to be considered high fanout for + * different block types. + * @param cluster_legalization_strategy The legalization strategy to be + * used when creating clusters and + * adding molecules to clusters. + * Controls the checks that are performed. + * @param enable_pin_feasibility_filter A flag to turn on/off the check for + * pin usage feasibility. + * @param feasible_block_array_size The largest number of feasible blocks + * that can be stored in a cluster. Used + * to allocate an internal structure. + * @param log_verbosity Controls how verbose the log messages will + * be within this class. + * + * TODO: A lot of these arguments are only used to allocate C-style arrays + * since the original author was avoiding dynamic allocations. It may + * be more space efficient (and cleaner) to make these dynamic arrays + * and not pass these arguments in. + */ + ClusterLegalizer(const AtomNetlist& atom_netlist, + const Prepacker& prepacker, + const std::vector& logical_block_types, + std::vector* lb_type_rr_graphs, + size_t num_models, + const std::vector& target_external_pin_util_str, + const t_pack_high_fanout_thresholds& high_fanout_thresholds, + ClusterLegalizationStrategy cluster_legalization_strategy, + bool enable_pin_feasibility_filter, + int feasible_block_array_size, + int log_verbosity); + + // This class allocates and deallocates memory within. This class should not + // be copied or moved to prevent it from double freeing / losing pointers. + ClusterLegalizer(const ClusterLegalizer&) = delete; + ClusterLegalizer& operator=(const ClusterLegalizer&) = delete; + + /* + * @brief Start a new legalization cluster with the given molecule. + * + * @param molecule The seed molecule used to start the new cluster. + * @param cluster_type The type of the cluster to start. + * @param cluster_mode The mode of the new cluster for the given type. + * + * @return A pair for the status of the packing and the ID of the new + * cluster. If the new cluster could not be created, the pack + * status will return the reason and the ID would be invalid. + */ + std::tuple + start_new_cluster(t_pack_molecule* molecule, + t_logical_block_type_ptr cluster_type, + int cluster_mode); + + /* + * @brief Add an unclustered molecule to the given legalization cluster. + * + * The ClusterLegalizationStrategy (set either in the constructor or by the + * set_cluster_legalization_strategy method) decides what checks are + * performed when adding a molecule to the cluster. + * + * If the addition was unsuccessful (i.e. a check fails), the molecule will + * remain unclustered. + * + * @param molecule The molecule to add to the cluster. + * @param cluster_id The ID of the cluster to add the molecule to. + * + * @return The status of the pack (if the addition was successful and + * if not why). + */ + e_block_pack_status add_mol_to_cluster(t_pack_molecule* molecule, + LegalizationClusterId cluster_id); + + /* + * @brief Destroy the given cluster. + * + * This unclusters all molecules in the cluster so they can be re-clustered + * into different clusters. Should call the compress() method after destroying + * one or more clusters. + * + * @param cluster_id The ID of the cluster to destroy. + */ + void destroy_cluster(LegalizationClusterId cluster_id); + + /* + * @brief Compress the internal storage of clusters. Should be called + * eventually after one or more clusters are destroyed. + * + * Similar to the Netlist compress method. Will invalidate all Legalization + * Cluster IDs. + * + * This method can be quite expensive, so it is a good idea to batch many + * cluster destructions and then compress at the end. + */ + void compress(); + + /* + * @brief A range of all cluster IDs in the legalizer. + * + * If the legalizer has been compressed (or no clusters have been destroyed) + * then all cluster IDs in this list will be valid and represent a non-empty + * legalization cluster. + */ + cluster_range clusters() const { + return vtr::make_range(legalization_cluster_ids_.begin(), legalization_cluster_ids_.end()); + } + + /* + * @brief Check that the given cluster is fully legal. + * + * This method runs an intra_lb_route on the given cluster. This ignores + * the cluster legalization strategy set by the user. This method will not + * correct the problematic molecules, it will only return true if the + * cluster is legal and false if it is not. + * + * @param cluster_id The ID of the cluster to fully legalize. + * + * @return True if the cluster is legal, false otherwise. + */ + bool check_cluster_legality(LegalizationClusterId cluster_id); + + /* + * @brief Cleans the cluster of unnessary data, reducing the memory footprint. + * + * After this function is called, no more molecules can be added to the + * cluster. This method will ensure that the cluster has enough information + * to generate a clustered netlist from the legalized clusters. + * + * Specifically, this frees the pb stats (which is used by the clusterer + * to compute the gain) and the router data of the cluster. + * + * TODO: The pb stats should really not be calculated or stored in the + * cluster legalizer. + * + * @param cluster_id The ID of the cluster to clean. + */ + void clean_cluster(LegalizationClusterId cluster_id); + + /* + * @brief Verify that all atoms have been clustered into some cluster. + * + * This will not verify if all the clusters are fully legal. + */ + void verify(); + + /* + * @brief Finalize the clustering. Required for generating a Clustered + * Netlist. + * + * Before generating a Clustered Netlist, each cluster needs to allocate and + * load a pb_route. This method will generate a pb_route for each cluster + * and store it into the clusters' pb. + */ + void finalize(); + + /* + * @brief Resets the legalizer to its initial state. + * + * Destroys all clusters and resets the cluster placement stats. + */ + void reset(); + + /// @brief Gets the top-level pb of the given cluster. + inline t_pb* get_cluster_pb(LegalizationClusterId cluster_id) const { + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + const LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + return cluster.pb; + } + + /// @brief Gets the logical block type of the given cluster. + inline t_logical_block_type_ptr get_cluster_type(LegalizationClusterId cluster_id) const { + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + const LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + return cluster.type; + } + + /// @brief Gets the current partition region (the intersection of all + /// contained atoms) of the given cluster. + inline const PartitionRegion& get_cluster_pr(LegalizationClusterId cluster_id) const { + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + const LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + return cluster.pr; + } + + /// @brief Gets the ID of the cluster that contains the given atom block. + inline LegalizationClusterId get_atom_cluster(AtomBlockId blk_id) const { + VTR_ASSERT_SAFE(blk_id.is_valid() && (size_t)blk_id < atom_cluster_.size()); + return atom_cluster_[blk_id]; + } + + /// @brief Gets the cluster placement stats of the given cluster. + /// + /// The cluster placement stats are statistics used to monitor which atoms + /// have been physically clustered into the pb (more specifically what site + /// they will go). This can be used externally to the legalizer to detect + /// if an atom could physically go into a cluster (exists_free_primitive_for_atom_block). + /// + /// TODO: Releasing the whole stats can be dangerous. Ideally there should + /// just be a method to see if an atom could physically go in a cluster. + inline t_cluster_placement_stats* get_cluster_placement_stats(LegalizationClusterId cluster_id) const { + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + return &(cluster_placement_stats_[get_cluster_type(cluster_id)->index]); + } + + /// @brief Returns true if the given atom block has been packed into a + /// cluster, false otherwise. + inline bool is_atom_clustered(AtomBlockId blk_id) const { + // Simply, if the atom is not in an invalid cluster, it has been clustered. + return get_atom_cluster(blk_id) != LegalizationClusterId::INVALID(); + } + + /// @brief Returns a reference to the target_external_pin_util object. This + /// allows the user to modify the external pin utilization if needed. + inline t_ext_pin_util_targets& get_target_external_pin_util() { + return target_external_pin_util_; + } + + /// @bried Gets the max size a cluster could physically be. + /// + /// This is the maximum number of primitives any cluster could ever have + /// in the architecture. + inline size_t get_max_cluster_size() const { + return max_cluster_size_; + } + + /* + * @brief Set the legalization strategy of the cluster legalizer. + * + * This allows the strategy of the cluster legalizer to change based on the + * needs of the user. For example, one can set the legalizer to use a more + * relaxed strategy to insert a batch of molecules in cheaply, saving the + * full legalizerion for the end (using check_cluster_legality). + * + * @param strategy The strategy to set the cluster legalizer to. + */ + inline void set_legalization_strategy(ClusterLegalizationStrategy strategy) { + cluster_legalization_strategy_ = strategy; + } + + /* + * @brief Set how verbose the log messages should be for the cluster legalizer. + * + * This allows the user to set the verbosity at different points for easier + * usability. + * + * Set the verbosity to 4 to see most of the log messages on how the + * molecules move through the legalizer. + * Set the verbosity to 5 to see all the log messages in the legalizer. + * + * @param verbosity The value to set the verbosity to. + */ + inline void set_log_verbosity(int verbosity) { + log_verbosity_ = verbosity; + } + + /// @brief Destructor of the class. Frees allocated data. + ~ClusterLegalizer(); + +private: + /// @brief A vector of the legalization cluster IDs. If any of them are + /// invalid, then that means that the cluster has been destroyed. + vtr::vector_map legalization_cluster_ids_; + + /// @brief Lookup table for which cluster each molecule is in. + std::unordered_map molecule_cluster_; + + /// @brief List of all legalization clusters. + vtr::vector_map legalization_clusters_; + + /// @brief A lookup-table for which cluster the given atom is packed into. + vtr::vector_map atom_cluster_; + + /// @brief Stores the NoC group ID of each atom block. Atom blocks that + /// belong to different NoC groups can't be clustered with each other + /// into the same clustered block. Under some optimization settings + /// to improve placement locality / NoC usage. Atoms with different + /// NoC group IDs belong to logic that is disjoint except through + /// NoC traffic. + vtr::vector atom_noc_grp_id_; + + /// @brief Stats keeper for placement information during packing/clustering. + /// TODO: This should be a vector. + /// FIXME: This keeps the stats for each cluster type. This is fine within + /// the clusterer, however it yields a limitation where two clusters + /// of the same type cannot be constructed at the same time. This + /// should stored per cluster. + t_cluster_placement_stats* cluster_placement_stats_ = nullptr; + + /// @brief The maximum fractional utilization of cluster external + /// input/output pins during packing (between 0 and 1). + t_ext_pin_util_targets target_external_pin_util_; + + /// @brief The max size of any molecule. This is used to allocate a dynamic + /// array within the legalizer, and in its current form this is a bit + /// expensive to calculate from the prepacker. + size_t max_molecule_size_; + + /// @brief The max number of primitives a cluster could physically have. + /// This is used to allocate dynamic arrays. + size_t max_cluster_size_; + + /// @brief A vector of routing resource nodes within each logical block type + /// [0 .. num_logical_block_types-1] + /// TODO: This really should not be a pointer to a vector... I think this is + /// meant to be a vector of vectors... + std::vector* lb_type_rr_graphs_ = nullptr; + + /// @brief The total number of models (user + library) in the architecture. + /// Used to allocate space in dynamic data structures. + size_t num_models_; + + /// @brief The current legalization strategy of the cluster legalizer. + ClusterLegalizationStrategy cluster_legalization_strategy_; + + /// @brief Controls whether the pin counting feasibility filter is used + /// during clustering. When enabled the clustering engine counts the + /// number of available pins in groups/classes of mutually connected + /// pins within a cluster. These counts are used to quickly filter + /// out candidate primitives/atoms/molecules for which the cluster + /// has insufficient pins to route (without performing a full + /// routing). This reduces packing run-time. This matches the packer + /// option of the same name. + bool enable_pin_feasibility_filter_; + + /// @brief The max size of the priority queue for candidates that pass the + /// early filter legality test but not the more detailed routing + /// filter. This matches the packer option of the same name. + int feasible_block_array_size_; + + /// @brief Used to set the verbosity of log messages in the legalizer. Used + /// for debugging. When log_verbosity > 3, the legalizer will print + /// messages when a molecule is successful during legalization. When + /// log_verbosity is > 4, the legalizer will print when a molecule + /// fails a legality check. This parameter is also passed into the + /// intra-lb router. + int log_verbosity_; + + /// @brief The prepacker object that stores the molecules which will be + /// legalized into clusters. + const Prepacker& prepacker_; +}; + diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp index 8fd0bcfa56f..39940410b40 100644 --- a/vpr/src/pack/cluster_util.cpp +++ b/vpr/src/pack/cluster_util.cpp @@ -1,11 +1,13 @@ #include "cluster_util.h" #include +#include #include "PreClusterTimingGraphResolver.h" #include "PreClusterDelayCalculator.h" #include "atom_netlist.h" -#include "cluster_router.h" +#include "cluster_legalizer.h" #include "cluster_placement.h" +#include "clustered_netlist.h" #include "concrete_timing_info.h" #include "output_clustering.h" #include "prepack.h" @@ -19,48 +21,8 @@ /* Global variables in clustering */ /**********************************/ -/* TODO: May want to check that all atom blocks are actually reached */ -static void check_cluster_atom_blocks(t_pb* pb, std::unordered_set& blocks_checked) { - int i, j; - const t_pb_type* pb_type; - bool has_child = false; - auto& atom_ctx = g_vpr_ctx.atom(); - - pb_type = pb->pb_graph_node->pb_type; - if (pb_type->num_modes == 0) { - /* primitive */ - auto blk_id = atom_ctx.lookup.pb_atom(pb); - if (blk_id) { - if (blocks_checked.count(blk_id)) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "pb %s contains atom block %s but atom block is already contained in another pb.\n", - pb->name, atom_ctx.nlist.block_name(blk_id).c_str()); - } - blocks_checked.insert(blk_id); - if (pb != atom_ctx.lookup.atom_pb(blk_id)) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "pb %s contains atom block %s but atom block does not link to pb.\n", - pb->name, atom_ctx.nlist.block_name(blk_id).c_str()); - } - } - } else { - /* this is a container pb, all container pbs must contain children */ - for (i = 0; i < pb_type->modes[pb->mode].num_pb_type_children; i++) { - for (j = 0; j < pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) { - if (pb->child_pbs[i] != nullptr) { - if (pb->child_pbs[i][j].name != nullptr) { - has_child = true; - check_cluster_atom_blocks(&pb->child_pbs[i][j], blocks_checked); - } - } - } - } - VTR_ASSERT(has_child); - } -} - /*Print the contents of each cluster to an echo file*/ -static void echo_clusters(char* filename) { +static void echo_clusters(char* filename, const ClusterLegalizer& cluster_legalizer) { FILE* fp; fp = vtr::fopen(filename, "w"); @@ -70,22 +32,21 @@ static void echo_clusters(char* filename) { fprintf(fp, "\n"); auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - std::map> cluster_atoms; + std::map> cluster_atoms; - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - cluster_atoms.insert({blk_id, std::vector()}); + for (LegalizationClusterId cluster_id : cluster_legalizer.clusters()) { + cluster_atoms.insert({cluster_id, std::vector()}); } for (auto atom_blk_id : atom_ctx.nlist.blocks()) { - ClusterBlockId clb_index = atom_ctx.lookup.atom_clb(atom_blk_id); + LegalizationClusterId cluster_id = cluster_legalizer.get_atom_cluster(atom_blk_id); - cluster_atoms[clb_index].push_back(atom_blk_id); + cluster_atoms[cluster_id].push_back(atom_blk_id); } for (auto& cluster_atom : cluster_atoms) { - const std::string& cluster_name = cluster_ctx.clb_nlist.block_name(cluster_atom.first); + const std::string& cluster_name = cluster_legalizer.get_cluster_pb(cluster_atom.first)->name; fprintf(fp, "Cluster %s Id: %zu \n", cluster_name.c_str(), size_t(cluster_atom.first)); fprintf(fp, "\tAtoms in cluster: \n"); @@ -98,12 +59,11 @@ static void echo_clusters(char* filename) { } fprintf(fp, "\nCluster Floorplanning Constraints:\n"); - const auto& floorplanning_ctx = g_vpr_ctx.floorplanning(); - for (ClusterBlockId clb_id : cluster_ctx.clb_nlist.blocks()) { - const std::vector& regions = floorplanning_ctx.cluster_constraints[clb_id].get_regions(); + for (LegalizationClusterId cluster_id : cluster_legalizer.clusters()) { + const std::vector& regions = cluster_legalizer.get_cluster_pr(cluster_id).get_regions(); if (!regions.empty()) { - fprintf(fp, "\nRegions in Cluster %zu:\n", size_t(clb_id)); + fprintf(fp, "\nRegions in Cluster %zu:\n", size_t(cluster_id)); for (const auto& region : regions) { print_region(fp, region); } @@ -113,81 +73,13 @@ static void echo_clusters(char* filename) { fclose(fp); } -/* TODO: Add more error checking! */ -void check_clustering() { - std::unordered_set atoms_checked; - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - if (cluster_ctx.clb_nlist.blocks().size() == 0) { - VTR_LOG_WARN("Packing produced no clustered blocks"); - } - - /* - * Check that each atom block connects to one physical primitive and that the primitive links up to the parent clb - */ - for (auto blk_id : atom_ctx.nlist.blocks()) { - //Each atom should be part of a pb - const t_pb* atom_pb = atom_ctx.lookup.atom_pb(blk_id); - if (!atom_pb) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "Atom block %s is not mapped to a pb\n", - atom_ctx.nlist.block_name(blk_id).c_str()); - } - - //Check the reverse mapping is consistent - if (atom_ctx.lookup.pb_atom(atom_pb) != blk_id) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "pb %s does not contain atom block %s but atom block %s maps to pb.\n", - atom_pb->name, - atom_ctx.nlist.block_name(blk_id).c_str(), - atom_ctx.nlist.block_name(blk_id).c_str()); - } - - VTR_ASSERT(atom_ctx.nlist.block_name(blk_id) == atom_pb->name); - - const t_pb* cur_pb = atom_pb; - while (cur_pb->parent_pb) { - cur_pb = cur_pb->parent_pb; - VTR_ASSERT(cur_pb->name); - } - - ClusterBlockId clb_index = atom_ctx.lookup.atom_clb(blk_id); - if (clb_index == ClusterBlockId::INVALID()) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "Atom %s is not mapped to a CLB\n", - atom_ctx.nlist.block_name(blk_id).c_str()); - } - - if (cur_pb != cluster_ctx.clb_nlist.block_pb(clb_index)) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "CLB %s does not match CLB contained by pb %s.\n", - cur_pb->name, atom_pb->name); - } - } - - /* Check that I do not have spurious links in children pbs */ - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - check_cluster_atom_blocks(cluster_ctx.clb_nlist.block_pb(blk_id), atoms_checked); - } - - for (auto blk_id : atom_ctx.nlist.blocks()) { - if (!atoms_checked.count(blk_id)) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "Atom block %s not found in any cluster.\n", - atom_ctx.nlist.block_name(blk_id).c_str()); - } - } -} - -//calculate the initial timing at the start of packing stage void calc_init_packing_timing(const t_packer_opts& packer_opts, const t_analysis_opts& analysis_opts, const Prepacker& prepacker, std::shared_ptr& clustering_delay_calc, std::shared_ptr& timing_info, vtr::vector& atom_criticality) { - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); /* * Initialize the timing analyzer @@ -233,94 +125,35 @@ void calc_init_packing_timing(const t_packer_opts& packer_opts, } } -//Free the clustering data structures void free_clustering_data(const t_packer_opts& packer_opts, t_clustering_data& clustering_data) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) - free_intra_lb_nets(clustering_data.intra_lb_routing[blk_id]); - - clustering_data.intra_lb_routing.clear(); if (packer_opts.hill_climbing_flag) delete[] clustering_data.hill_climbing_inputs_avail; - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) - cluster_ctx.clb_nlist.remove_block(blk_id); - - cluster_ctx.clb_nlist = ClusteredNetlist(); - delete[] clustering_data.unclustered_list_head; delete[] clustering_data.memory_pool; } -//check the clustering and output it -void check_and_output_clustering(const t_packer_opts& packer_opts, +void check_and_output_clustering(ClusterLegalizer& cluster_legalizer, + const t_packer_opts& packer_opts, const std::unordered_set& is_clock, - const t_arch* arch, - const int& num_clb, - const vtr::vector*>& intra_lb_routing) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - VTR_ASSERT(num_clb == (int)cluster_ctx.clb_nlist.blocks().size()); - check_clustering(); + const t_arch* arch) { + cluster_legalizer.verify(); if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_CLUSTERS)) { - echo_clusters(getEchoFileName(E_ECHO_CLUSTERS)); - } - - output_clustering(intra_lb_routing, packer_opts.global_clocks, is_clock, arch->architecture_id, packer_opts.output_file.c_str(), false); - - VTR_ASSERT(cluster_ctx.clb_nlist.blocks().size() == intra_lb_routing.size()); -} - -void get_max_cluster_size_and_pb_depth(int& max_cluster_size, - int& max_pb_depth) { - auto& device_ctx = g_vpr_ctx.mutable_device(); - int cur_cluster_size, cur_pb_depth; - - for (const auto& type : device_ctx.logical_block_types) { - if (is_empty_type(&type)) - continue; - - cur_cluster_size = get_max_primitives_in_pb_type(type.pb_type); - cur_pb_depth = get_max_depth_of_pb_type(type.pb_type); - if (cur_cluster_size > max_cluster_size) { - max_cluster_size = cur_cluster_size; - } - if (cur_pb_depth > max_pb_depth) { - max_pb_depth = cur_pb_depth; - } + echo_clusters(getEchoFileName(E_ECHO_CLUSTERS), cluster_legalizer); } -} -bool check_cluster_legality(const int& verbosity, - const int& detailed_routing_stage, - t_lb_router_data* router_data) { - bool is_cluster_legal; - - if (detailed_routing_stage == (int)E_DETAILED_ROUTE_AT_END_ONLY) { - /* is_mode_conflict does not affect this stage. It is needed when trying to route the packed clusters. - * - * It holds a flag that is used to verify whether try_intra_lb_route ended in a mode conflict issue. - * If the value is TRUE the cluster has to be repacked, and its internal pb_graph_nodes will have more restrict choices - * for what regards the mode that has to be selected - */ - t_mode_selection_status mode_status; - is_cluster_legal = try_intra_lb_route(router_data, verbosity, &mode_status); - if (is_cluster_legal) { - VTR_LOGV(verbosity > 2, "\tPassed route at end.\n"); - } else { - VTR_LOGV(verbosity > 0, "Failed route at end, repack cluster trying detailed routing at each stage.\n"); - } - } else { - is_cluster_legal = true; - } - return is_cluster_legal; + output_clustering(&cluster_legalizer, + packer_opts.global_clocks, + is_clock, + arch->architecture_id, + packer_opts.output_file.c_str(), + false, /*skip_clustering*/ + true /*from_legalizer*/); } -/*print the header for the clustering progress table*/ void print_pack_status_header() { VTR_LOG("Starting Clustering - Clustering Progress: \n"); VTR_LOG("------------------- -------------------------- ---------\n"); @@ -328,14 +161,14 @@ void print_pack_status_header() { VTR_LOG("------------------- -------------------------- ---------\n"); } -/*incrementally print progress updates during clustering*/ void print_pack_status(int num_clb, int tot_num_molecules, int num_molecules_processed, int& mols_since_last_print, int device_width, int device_height, - AttractionInfo& attraction_groups) { + AttractionInfo& attraction_groups, + const ClusterLegalizer& cluster_legalizer) { //Print a packing update each time another 4% of molecules have been packed. const float print_frequency = 0.04; @@ -361,18 +194,13 @@ void print_pack_status(int num_clb, fflush(stdout); mols_since_last_print = 0; if (attraction_groups.num_attraction_groups() > 0) { - rebuild_attraction_groups(attraction_groups); + rebuild_attraction_groups(attraction_groups, cluster_legalizer); } } } -/* - * Periodically rebuild the attraction groups to reflect which atoms in them - * are still available for new clusters (i.e. remove the atoms that have already - * been packed from the attraction group). - */ -void rebuild_attraction_groups(AttractionInfo& attraction_groups) { - auto& atom_ctx = g_vpr_ctx.atom(); +void rebuild_attraction_groups(AttractionInfo& attraction_groups, + const ClusterLegalizer& cluster_legalizer) { for (int igroup = 0; igroup < attraction_groups.num_attraction_groups(); igroup++) { AttractGroupId group_id(igroup); @@ -380,8 +208,7 @@ void rebuild_attraction_groups(AttractionInfo& attraction_groups) { AttractionGroup new_att_group_info; for (AtomBlockId atom : group.group_atoms) { - //If the ClusterBlockId is anything other than invalid, the atom has been packed already - if (atom_ctx.lookup.atom_clb(atom) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(atom)) { new_att_group_info.group_atoms.push_back(atom); } } @@ -390,9 +217,8 @@ void rebuild_attraction_groups(AttractionInfo& attraction_groups) { } } -/* Determine if atom block is in pb */ bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb) { - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); const t_pb* cur_pb = atom_ctx.lookup.atom_pb(blk_id); while (cur_pb) { @@ -404,9 +230,6 @@ bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb) { return false; } -/* Remove blk from list of feasible blocks sorted according to gain - * Useful for removing blocks that are repeatedly failing. If a block - * has been found to be illegal, we don't repeatedly consider it.*/ void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, t_pb* pb) { int molecule_index; @@ -432,7 +255,6 @@ void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, pb->pb_stats->num_feasible_blocks--; } -/* Add blk to list of feasible blocks sorted according to gain */ void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, std::map& gain, t_pb* pb, @@ -502,8 +324,6 @@ void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, /*****************************************/ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, - t_cluster_placement_stats** cluster_placement_stats, - t_pb_graph_node*** primitives_list, const Prepacker& prepacker, t_clustering_data& clustering_data, std::unordered_map& net_output_feeds_driving_block_input, @@ -560,160 +380,14 @@ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, } } } - - /* alloc and load cluster placement info */ - *cluster_placement_stats = alloc_and_load_cluster_placement_stats(); - - /* alloc array that will store primitives that a molecule gets placed to, - * primitive_list is referenced by index, for example a atom block in index 2 of a molecule matches to a primitive in index 2 in primitive_list - * this array must be the size of the biggest molecule - */ - size_t max_molecule_size = prepacker.get_max_molecule_size(); - *primitives_list = new t_pb_graph_node*[max_molecule_size]; - for (size_t i = 0; i < max_molecule_size; i++) - (*primitives_list)[i] = nullptr; } /*****************************************/ -void free_pb_stats_recursive(t_pb* pb) { - int i, j; - /* Releases all the memory used by clustering data structures. */ - if (pb) { - if (pb->pb_graph_node != nullptr) { - if (!pb->pb_graph_node->is_primitive()) { - for (i = 0; i < pb->pb_graph_node->pb_type->modes[pb->mode].num_pb_type_children; i++) { - for (j = 0; j < pb->pb_graph_node->pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) { - if (pb->child_pbs && pb->child_pbs[i]) { - free_pb_stats_recursive(&pb->child_pbs[i][j]); - } - } - } - } - } - free_pb_stats(pb); - } -} - -bool primitive_feasible(const AtomBlockId blk_id, t_pb* cur_pb) { - const t_pb_type* cur_pb_type = cur_pb->pb_graph_node->pb_type; - - VTR_ASSERT(cur_pb_type->num_modes == 0); /* primitive */ - - auto& atom_ctx = g_vpr_ctx.atom(); - AtomBlockId cur_pb_blk_id = atom_ctx.lookup.pb_atom(cur_pb); - if (cur_pb_blk_id && cur_pb_blk_id != blk_id) { - /* This pb already has a different logical block */ - return false; - } - - if (cur_pb_type->class_type == MEMORY_CLASS) { - /* Memory class has additional feasibility requirements: - * - all siblings must share all nets, including open nets, with the exception of data nets */ - - /* find sibling if one exists */ - AtomBlockId sibling_memory_blk_id = find_memory_sibling(cur_pb); - - if (sibling_memory_blk_id) { - //There is a sibling, see if the current block is feasible with it - bool sibling_feasible = primitive_memory_sibling_feasible(blk_id, cur_pb_type, sibling_memory_blk_id); - if (!sibling_feasible) { - return false; - } - } - } - - //Generic feasibility check - return primitive_type_feasible(blk_id, cur_pb_type); -} - -bool primitive_memory_sibling_feasible(const AtomBlockId blk_id, const t_pb_type* cur_pb_type, const AtomBlockId sibling_blk_id) { - /* Check that the two atom blocks blk_id and sibling_blk_id (which should both be memory slices) - * are feasible, in the sence that they have precicely the same net connections (with the - * exception of nets in data port classes). - * - * Note that this routine does not check pin feasibility against the cur_pb_type; so - * primitive_type_feasible() should also be called on blk_id before concluding it is feasible. - */ - auto& atom_ctx = g_vpr_ctx.atom(); - VTR_ASSERT(cur_pb_type->class_type == MEMORY_CLASS); - - //First, identify the 'data' ports by looking at the cur_pb_type - std::unordered_set data_ports; - for (int iport = 0; iport < cur_pb_type->num_ports; ++iport) { - const char* port_class = cur_pb_type->ports[iport].port_class; - if (port_class && strstr(port_class, "data") == port_class) { - //The port_class starts with "data", so it is a data port - - //Record the port - data_ports.insert(cur_pb_type->ports[iport].model_port); - } - } - - //Now verify that all nets (except those connected to data ports) are equivalent - //between blk_id and sibling_blk_id - - //Since the atom netlist stores only in-use ports, we iterate over the model to ensure - //all ports are compared - const t_model* model = cur_pb_type->model; - for (t_model_ports* port : {model->inputs, model->outputs}) { - for (; port; port = port->next) { - if (data_ports.count(port)) { - //Don't check data ports - continue; - } - - //Note: VPR doesn't support multi-driven nets, so all outputs - //should be data ports, otherwise the siblings will both be - //driving the output net - - //Get the ports from each primitive - auto blk_port_id = atom_ctx.nlist.find_atom_port(blk_id, port); - auto sib_port_id = atom_ctx.nlist.find_atom_port(sibling_blk_id, port); - - //Check that all nets (including unconnected nets) match - for (int ipin = 0; ipin < port->size; ++ipin) { - //The nets are initialized as invalid (i.e. disconnected) - AtomNetId blk_net_id; - AtomNetId sib_net_id; - - //We can get the actual net provided the port exists - // - //Note that if the port did not exist, the net is left - //as invalid/disconneced - if (blk_port_id) { - blk_net_id = atom_ctx.nlist.port_net(blk_port_id, ipin); - } - if (sib_port_id) { - sib_net_id = atom_ctx.nlist.port_net(sib_port_id, ipin); - } - - //The sibling and block must have the same (possibly disconnected) - //net on this pin - if (blk_net_id != sib_net_id) { - //Nets do not match, not feasible - return false; - } - } - } - } - - return true; -} -/*****************************************/ t_pack_molecule* get_molecule_by_num_ext_inputs(const int ext_inps, const enum e_removal_policy remove_flag, t_cluster_placement_stats* cluster_placement_stats_ptr, t_molecule_link* unclustered_list_head) { - /* This routine returns an atom block which has not been clustered, has * - * no connection to the current cluster, satisfies the cluster * - * clock constraints, is a valid subblock inside the cluster, does not exceed the cluster subblock units available, - * and has ext_inps external inputs. If * - * there is no such atom block it returns ClusterBlockId::INVALID(). Remove_flag * - * controls whether or not blocks that have already been clustered * - * are removed from the unclustered_list data structures. NB: * - * to get a atom block regardless of clock constraints just set clocks_ * - * avail > 0. */ t_molecule_link *ptr, *prev_ptr; int i; @@ -759,12 +433,7 @@ t_pack_molecule* get_free_molecule_with_most_ext_inputs_for_cluster(t_pb* cur_pb t_cluster_placement_stats* cluster_placement_stats_ptr, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size) { - /* This routine is used to find new blocks for clustering when there are no feasible * - * blocks with any attraction to the current cluster (i.e. it finds * - * blocks which are unconnected from the current cluster). It returns * - * the atom block with the largest number of used inputs that satisfies the * - * clocking and number of inputs constraints. If no suitable atom block is * - * found, the routine returns ClusterBlockId::INVALID(). + /* * TODO: Analyze if this function is useful in more detail, also, should probably not include clock in input count */ @@ -790,684 +459,30 @@ t_pack_molecule* get_free_molecule_with_most_ext_inputs_for_cluster(t_pb* cur_pb } /*****************************************/ -void alloc_and_load_pb_stats(t_pb* pb, const int feasible_block_array_size) { - /* Call this routine when starting to fill up a new cluster. It resets * - * the gain vector, etc. */ - - pb->pb_stats = new t_pb_stats; - - /* If statement below is for speed. If nets are reasonably low-fanout, * - * only a relatively small number of blocks will be marked, and updating * - * only those atom block structures will be fastest. If almost all blocks * - * have been touched it should be faster to just run through them all * - * in order (less addressing and better cache locality). */ - pb->pb_stats->input_pins_used = std::vector>(pb->pb_graph_node->num_input_pin_class); - pb->pb_stats->output_pins_used = std::vector>(pb->pb_graph_node->num_output_pin_class); - pb->pb_stats->lookahead_input_pins_used = std::vector>(pb->pb_graph_node->num_input_pin_class); - pb->pb_stats->lookahead_output_pins_used = std::vector>(pb->pb_graph_node->num_output_pin_class); - pb->pb_stats->num_feasible_blocks = NOT_VALID; - pb->pb_stats->feasible_blocks = new t_pack_molecule*[feasible_block_array_size]; - - for (int i = 0; i < feasible_block_array_size; i++) - pb->pb_stats->feasible_blocks[i] = nullptr; - - pb->pb_stats->tie_break_high_fanout_net = AtomNetId::INVALID(); - - pb->pb_stats->pulled_from_atom_groups = 0; - pb->pb_stats->num_att_group_atoms_used = 0; - - pb->pb_stats->gain.clear(); - pb->pb_stats->timinggain.clear(); - pb->pb_stats->connectiongain.clear(); - pb->pb_stats->sharinggain.clear(); - pb->pb_stats->hillgain.clear(); - pb->pb_stats->transitive_fanout_candidates.clear(); - - pb->pb_stats->num_pins_of_net_in_pb.clear(); - - pb->pb_stats->num_child_blocks_in_pb = 0; - - pb->pb_stats->explore_transitive_fanout = true; -} -/*****************************************/ - -/** - * Cleans up a pb after unsuccessful molecule packing - * - * Recursively frees pbs from a t_pb tree. The given root pb itself is not - * deleted. - * - * If a pb object has its children allocated then before freeing them the - * function checks if there is no atom that corresponds to any of them. The - * check is performed only for leaf (primitive) pbs. The function recurses for - * non-primitive pbs. - * - * The cleaning itself includes deleting all child pbs, resetting mode of the - * pb and also freeing its name. This prepares the pb for another round of - * molecule packing tryout. - */ -bool cleanup_pb(t_pb* pb) { - bool can_free = true; - - /* Recursively check if there are any children with already assigned atoms */ - if (pb->child_pbs != nullptr) { - const t_mode* mode = &pb->pb_graph_node->pb_type->modes[pb->mode]; - VTR_ASSERT(mode != nullptr); - - /* Check each mode */ - for (int i = 0; i < mode->num_pb_type_children; ++i) { - /* Check each child */ - if (pb->child_pbs[i] != nullptr) { - for (int j = 0; j < mode->pb_type_children[i].num_pb; ++j) { - t_pb* pb_child = &pb->child_pbs[i][j]; - t_pb_type* pb_type = pb_child->pb_graph_node->pb_type; - - /* Primitive, check occupancy */ - if (pb_type->num_modes == 0) { - if (pb_child->name != nullptr) { - can_free = false; - } - } - - /* Non-primitive, recurse */ - else { - if (!cleanup_pb(pb_child)) { - can_free = false; - } - } - } - } - } - - /* Free if can */ - if (can_free) { - for (int i = 0; i < mode->num_pb_type_children; ++i) { - if (pb->child_pbs[i] != nullptr) { - delete[] pb->child_pbs[i]; - } - } - - delete[] pb->child_pbs; - pb->child_pbs = nullptr; - pb->mode = 0; - - if (pb->name) { - free(pb->name); - pb->name = nullptr; - } - } - } - - return can_free; -} - -/** - * Performs legality checks to see whether the selected molecule can be - * packed into the current cluster. The legality checks are related to - * floorplanning, pin feasibility, and routing (if detailed route - * checking is enabled). The routine returns BLK_PASSED if the molecule - * can be packed in the cluster. If the block passes, the routine commits - * it to the current cluster and updates the appropriate data structures. - * Otherwise, it returns the appropriate failed pack status based on which - * legality check the molecule failed. - */ -e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_placement_stats_ptr, - t_pack_molecule* molecule, - t_pb_graph_node** primitives_list, - t_pb* pb, - int max_models, - int max_cluster_size, - ClusterBlockId clb_index, - int detailed_routing_stage, - t_lb_router_data* router_data, - int verbosity, - bool enable_pin_feasibility_filter, - int feasible_block_array_size, - t_ext_pin_util max_external_pin_util, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_noc_grp_id, - int force_site) { - t_pb* parent; - t_pb* cur_pb; - - const auto& atom_ctx = g_vpr_ctx.atom(); - auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - parent = nullptr; - - const int molecule_size = get_array_size_of_molecule(molecule); - - if (verbosity > 3) { - AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; - VTR_LOG("\t\tTry pack molecule: '%s' (%s)", - atom_ctx.nlist.block_name(root_atom).c_str(), - atom_ctx.nlist.block_model(root_atom)->name); - VTR_LOGV(molecule->pack_pattern, - " molecule_type %s molecule_size %zu", - molecule->pack_pattern->name, - molecule->atom_block_ids.size()); - VTR_LOG("\n"); - } - - // if this cluster has a molecule placed in it that is part of a long chain - // (a chain that consists of more than one molecule), don't allow more long chain - // molecules to be placed in this cluster. To avoid possibly creating cluster level - // blocks that have incompatible placement constraints or form very long placement - // macros that limit placement flexibility. - if (cluster_placement_stats_ptr->has_long_chain && molecule->is_chain() && molecule->chain_info->is_long_chain) { - VTR_LOGV(verbosity > 4, "\t\t\tFAILED Placement Feasibility Filter: Only one long chain per cluster is allowed\n"); - //Record the failure of this molecule in the current pb stats - record_molecule_failure(molecule, pb); - return e_block_pack_status::BLK_FAILED_FEASIBLE; - } - - bool cluster_pr_update_check = false; - - //check if every atom in the molecule is legal in the cluster from a floorplanning perspective - for (int i_mol = 0; i_mol < molecule_size; i_mol++) { - //try to intersect with atom PartitionRegion if atom exists - if (molecule->atom_block_ids[i_mol]) { - bool cluster_pr_needs_update = false; - bool block_pack_floorplan_status = atom_cluster_floorplanning_check(molecule->atom_block_ids[i_mol], - clb_index, verbosity, - temp_cluster_pr, - cluster_pr_needs_update); - - if (!block_pack_floorplan_status) { - //Record the failure of this molecule in the current pb stats - record_molecule_failure(molecule, pb); - return e_block_pack_status::BLK_FAILED_FLOORPLANNING; - } - - if (cluster_pr_needs_update) { - cluster_pr_update_check = true; - } - } - } - - // check if all atoms in the molecule can be added to the cluster without NoC group conflicts - for (int i_mol = 0; i_mol < molecule_size; i_mol++) { - if (molecule->atom_block_ids[i_mol]) { - bool block_pack_noc_grp_status = atom_cluster_noc_group_check(molecule->atom_block_ids[i_mol], - clb_index, verbosity, - temp_noc_grp_id); - - if (!block_pack_noc_grp_status) { - //Record the failure of this molecule in the current pb stats - record_molecule_failure(molecule, pb); - return e_block_pack_status::BLK_FAILED_NOC_GROUP; - } - } - } - - e_block_pack_status block_pack_status = e_block_pack_status::BLK_STATUS_UNDEFINED; - - while (block_pack_status != e_block_pack_status::BLK_PASSED) { - if (get_next_primitive_list(cluster_placement_stats_ptr, molecule, - primitives_list, force_site)) { - block_pack_status = e_block_pack_status::BLK_PASSED; - - int failed_location = 0; - - for (int i_mol = 0; i_mol < molecule_size && block_pack_status == e_block_pack_status::BLK_PASSED; i_mol++) { - VTR_ASSERT((primitives_list[i_mol] == nullptr) == (!molecule->atom_block_ids[i_mol])); - failed_location = i_mol + 1; - // try place atom block if it exists - if (molecule->atom_block_ids[i_mol]) { - block_pack_status = try_place_atom_block_rec(primitives_list[i_mol], - molecule->atom_block_ids[i_mol], pb, &parent, - max_models, max_cluster_size, clb_index, - cluster_placement_stats_ptr, molecule, router_data, - verbosity, feasible_block_array_size); - } - } - - if (enable_pin_feasibility_filter && block_pack_status == e_block_pack_status::BLK_PASSED) { - /* Check if pin usage is feasible for the current packing assignment */ - reset_lookahead_pins_used(pb); - try_update_lookahead_pins_used(pb); - if (!check_lookahead_pins_used(pb, max_external_pin_util)) { - VTR_LOGV(verbosity > 4, "\t\t\tFAILED Pin Feasibility Filter\n"); - block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; - } - } - - if (block_pack_status == e_block_pack_status::BLK_PASSED) { - /* - * during the clustering step of `do_clustering`, `detailed_routing_stage` is incremented at each iteration until it a cluster - * is correctly generated or `detailed_routing_stage` assumes an invalid value (E_DETAILED_ROUTE_INVALID). - * depending on its value we have different behaviors: - * - E_DETAILED_ROUTE_AT_END_ONLY: Skip routing if heuristic is to route at the end of packing complex block. - * - E_DETAILED_ROUTE_FOR_EACH_ATOM: Try to route if heuristic is to route for every atom. If the clusterer arrives at this stage, - * it means that more checks have to be performed as the previous stage failed to generate a new cluster. - * - * mode_status is a data structure containing the status of the mode selection. Its members are: - * - bool is_mode_conflict - * - bool try_expand_all_modes - * - bool expand_all_modes - * - * is_mode_conflict affects this stage. Its value determines whether the cluster failed to pack after a mode conflict issue. - * It holds a flag that is used to verify whether try_intra_lb_route ended in a mode conflict issue. - * - * Until is_mode_conflict is set to FALSE by try_intra_lb_route, the loop re-iterates. If all the available modes are exhausted - * an error will be thrown during mode conflicts checks (this to prevent infinite loops). - * - * If the value is TRUE the cluster has to be re-routed, and its internal pb_graph_nodes will have more restrict choices - * for what regards the mode that has to be selected. - * - * is_mode_conflict is initially set to TRUE, and, unless a mode conflict is found, it is set to false in `try_intra_lb_route`. - * - * try_expand_all_modes is set if the node expansion failed to find a valid routing path. The clusterer tries to find another route - * by using all the modes during node expansion. - * - * expand_all_modes is used to enable the expansion of all the nodes using all the possible modes. - */ - t_mode_selection_status mode_status; - bool is_routed = false; - bool do_detailed_routing_stage = detailed_routing_stage == (int)E_DETAILED_ROUTE_FOR_EACH_ATOM; - if (do_detailed_routing_stage) { - do { - reset_intra_lb_route(router_data); - is_routed = try_intra_lb_route(router_data, verbosity, &mode_status); - } while (do_detailed_routing_stage && mode_status.is_mode_issue()); - } - - if (do_detailed_routing_stage && !is_routed) { - /* Cannot pack */ - VTR_LOGV(verbosity > 4, "\t\t\tFAILED Detailed Routing Legality\n"); - block_pack_status = e_block_pack_status::BLK_FAILED_ROUTE; - } else { - /* Pack successful, commit - * TODO: SW Engineering note - may want to update cluster stats here too instead of doing it outside - */ - VTR_ASSERT(block_pack_status == e_block_pack_status::BLK_PASSED); - if (molecule->is_chain()) { - /* Chained molecules often take up lots of area and are important, - * if a chain is packed in, want to rename logic block to match chain name */ - AtomBlockId chain_root_blk_id = molecule->atom_block_ids[molecule->pack_pattern->root_block->block_id]; - cur_pb = atom_ctx.lookup.atom_pb(chain_root_blk_id)->parent_pb; - while (cur_pb != nullptr) { - free(cur_pb->name); - cur_pb->name = vtr::strdup(atom_ctx.nlist.block_name(chain_root_blk_id).c_str()); - cur_pb = cur_pb->parent_pb; - } - // if this molecule is part of a chain, mark the cluster as having a long chain - // molecule. Also check if it's the first molecule in the chain to be packed. - // If so, update the chain id for this chain of molecules to make sure all - // molecules will be packed to the same chain id and can reach each other using - // the chain direct links between clusters - if (molecule->chain_info->is_long_chain) { - cluster_placement_stats_ptr->has_long_chain = true; - if (molecule->chain_info->chain_id == -1) { - update_molecule_chain_info(molecule, primitives_list[molecule->root]); - } - } - } - - //update cluster PartitionRegion if atom with floorplanning constraints was added - if (cluster_pr_update_check) { - floorplanning_ctx.cluster_constraints[clb_index] = temp_cluster_pr; - VTR_LOGV(verbosity > 2, "\nUpdated PartitionRegion of cluster %d\n", clb_index); - } - - for (int i = 0; i < molecule_size; i++) { - if (molecule->atom_block_ids[i]) { - /* invalidate all molecules that share atom block with current molecule */ - t_pack_molecule* cur_molecule = atom_ctx.prepacker.get_atom_molecule(molecule->atom_block_ids[i]); - cur_molecule->valid = false; - - commit_primitive(cluster_placement_stats_ptr, primitives_list[i]); - } - } - } - } - - if (block_pack_status != e_block_pack_status::BLK_PASSED) { - for (int i = 0; i < failed_location; i++) { - if (molecule->atom_block_ids[i]) { - remove_atom_from_target(router_data, molecule->atom_block_ids[i]); - } - } - for (int i = 0; i < failed_location; i++) { - if (molecule->atom_block_ids[i]) { - revert_place_atom_block(molecule->atom_block_ids[i], router_data); - } - } - - //Record the failure of this molecule in the current pb stats - record_molecule_failure(molecule, pb); - - /* Packing failed, but a part of the pb tree is still allocated and pbs have their modes set. - * Before trying to pack next molecule the unused pbs need to be freed and, the most important, - * their modes reset. This task is performed by the cleanup_pb() function below. */ - cleanup_pb(pb); - - } else { - VTR_LOGV(verbosity > 3, "\t\tPASSED pack molecule\n"); - } - } else { - VTR_LOGV(verbosity > 3, "\t\tFAILED No candidate primitives available\n"); - block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; - break; /* no more candidate primitives available, this molecule will not pack, return fail */ - } - } - return block_pack_status; -} - -/* Record the failure of the molecule in this cluster in the current pb stats. - * If a molecule fails repeatedly, it's gain will be penalized if packing with - * attraction groups on. */ -void record_molecule_failure(t_pack_molecule* molecule, t_pb* pb) { - //Only have to record the failure for the first atom in the molecule. - //The convention when checking if a molecule has failed to pack in the cluster - //is to check whether the first atoms has been recorded as having failed - - auto got = pb->pb_stats->atom_failures.find(molecule->atom_block_ids[0]); - if (got == pb->pb_stats->atom_failures.end()) { - pb->pb_stats->atom_failures.insert({molecule->atom_block_ids[0], 1}); - } else { - got->second++; - } -} - -/** - * Try place atom block into current primitive location - */ - -enum e_block_pack_status try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, - const AtomBlockId blk_id, - t_pb* cb, - t_pb** parent, - const int max_models, - const int max_cluster_size, - const ClusterBlockId clb_index, - const t_cluster_placement_stats* cluster_placement_stats_ptr, - const t_pack_molecule* molecule, - t_lb_router_data* router_data, - int verbosity, - const int feasible_block_array_size) { - int i, j; - bool is_primitive; - enum e_block_pack_status block_pack_status; - - t_pb* my_parent; - t_pb *pb, *parent_pb; - const t_pb_type* pb_type; - - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - - my_parent = nullptr; - - block_pack_status = e_block_pack_status::BLK_PASSED; - - /* Discover parent */ - if (pb_graph_node->parent_pb_graph_node != cb->pb_graph_node) { - block_pack_status = try_place_atom_block_rec(pb_graph_node->parent_pb_graph_node, blk_id, cb, - &my_parent, max_models, max_cluster_size, clb_index, - cluster_placement_stats_ptr, molecule, router_data, - verbosity, feasible_block_array_size); - parent_pb = my_parent; - } else { - parent_pb = cb; - } - - /* Create siblings if siblings are not allocated */ - if (parent_pb->child_pbs == nullptr) { - atom_ctx.lookup.set_atom_pb(AtomBlockId::INVALID(), parent_pb); - - VTR_ASSERT(parent_pb->name == nullptr); - parent_pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str()); - parent_pb->mode = pb_graph_node->pb_type->parent_mode->index; - set_reset_pb_modes(router_data, parent_pb, true); - const t_mode* mode = &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode]; - parent_pb->child_pbs = new t_pb*[mode->num_pb_type_children]; - - for (i = 0; i < mode->num_pb_type_children; i++) { - parent_pb->child_pbs[i] = new t_pb[mode->pb_type_children[i].num_pb]; - - for (j = 0; j < mode->pb_type_children[i].num_pb; j++) { - parent_pb->child_pbs[i][j].parent_pb = parent_pb; - - atom_ctx.lookup.set_atom_pb(AtomBlockId::INVALID(), &parent_pb->child_pbs[i][j]); - - parent_pb->child_pbs[i][j].pb_graph_node = &(parent_pb->pb_graph_node->child_pb_graph_nodes[parent_pb->mode][i][j]); - } - } - } else { - /* if this is not the first child of this parent, must match existing parent mode */ - if (parent_pb->mode != pb_graph_node->pb_type->parent_mode->index) { - return e_block_pack_status::BLK_FAILED_FEASIBLE; - } - } - - const t_mode* mode = &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode]; - for (i = 0; i < mode->num_pb_type_children; i++) { - if (pb_graph_node->pb_type == &mode->pb_type_children[i]) { - break; - } - } - VTR_ASSERT(i < mode->num_pb_type_children); - pb = &parent_pb->child_pbs[i][pb_graph_node->placement_index]; - *parent = pb; /* this pb is parent of it's child that called this function */ - VTR_ASSERT(pb->pb_graph_node == pb_graph_node); - if (pb->pb_stats == nullptr) { - alloc_and_load_pb_stats(pb, feasible_block_array_size); - } - pb_type = pb_graph_node->pb_type; - - /* Any pb_type under an mode, which is disabled for packing, should not be considerd for mapping - * Early exit to flag failure - */ - if (true == pb_type->parent_mode->disable_packing) { - return e_block_pack_status::BLK_FAILED_FEASIBLE; - } - is_primitive = (pb_type->num_modes == 0); - - if (is_primitive) { - VTR_ASSERT(!atom_ctx.lookup.pb_atom(pb) - && atom_ctx.lookup.atom_pb(blk_id) == nullptr - && atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()); - /* try pack to location */ - VTR_ASSERT(pb->name == nullptr); - pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str()); - - //Update the atom netlist mappings - atom_ctx.lookup.set_atom_clb(blk_id, clb_index); - atom_ctx.lookup.set_atom_pb(blk_id, pb); - - add_atom_as_target(router_data, blk_id); - if (!primitive_feasible(blk_id, pb)) { - /* failed location feasibility check, revert pack */ - block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; - } - - // if this block passed and is part of a chained molecule - if (block_pack_status == e_block_pack_status::BLK_PASSED && molecule->is_chain()) { - auto molecule_root_block = molecule->atom_block_ids[molecule->root]; - // if this is the root block of the chain molecule check its placmeent feasibility - if (blk_id == molecule_root_block) { - block_pack_status = check_chain_root_placement_feasibility(pb_graph_node, molecule, blk_id); - } - } - - VTR_LOGV(verbosity > 4 && block_pack_status == e_block_pack_status::BLK_PASSED, - "\t\t\tPlaced atom '%s' (%s) at %s\n", - atom_ctx.nlist.block_name(blk_id).c_str(), - atom_ctx.nlist.block_model(blk_id)->name, - pb->hierarchical_type_name().c_str()); - } - - if (block_pack_status != e_block_pack_status::BLK_PASSED) { - free(pb->name); - pb->name = nullptr; - } - return block_pack_status; -} - -/* - * Checks if the atom and cluster have compatible floorplanning constraints - * If the atom and cluster both have non-empty PartitionRegions, and the intersection - * of the PartitionRegions is empty, the atom cannot be packed in the cluster. - */ -bool atom_cluster_floorplanning_check(AtomBlockId blk_id, - ClusterBlockId clb_index, - int verbosity, - PartitionRegion& temp_cluster_pr, - bool& cluster_pr_needs_update) { - auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - - /*check if the atom can go in the cluster by checking if the atom and cluster have intersecting PartitionRegions*/ - - //get partition that atom belongs to - PartitionId partid; - partid = floorplanning_ctx.constraints.get_atom_partition(blk_id); - - //if the atom does not belong to a partition, it can be put in the cluster - //regardless of what the cluster's PartitionRegion is because it has no constraints - if (partid == PartitionId::INVALID()) { - VTR_LOGV(verbosity > 3, - "\t\t\t Intersect: Atom block %d has no floorplanning constraints, passed for cluster %d \n", - blk_id, clb_index); - cluster_pr_needs_update = false; - return true; - } else { - //get pr of that partition - const PartitionRegion& atom_pr = floorplanning_ctx.constraints.get_partition_pr(partid); - - //intersect it with the pr of the current cluster - PartitionRegion cluster_pr = floorplanning_ctx.cluster_constraints[clb_index]; - - if (cluster_pr.empty()) { - temp_cluster_pr = atom_pr; - cluster_pr_needs_update = true; - VTR_LOGV(verbosity > 3, - "\t\t\t Intersect: Atom block %d has floorplanning constraints, passed cluster %d which has empty PR\n", - blk_id, clb_index); - return true; - } else { - //update cluster_pr with the intersection of the cluster's PartitionRegion - //and the atom's PartitionRegion - update_cluster_part_reg(cluster_pr, atom_pr); - } - - // At this point, cluster_pr is the intersection of atom_pr and the clusters current pr - if (cluster_pr.empty()) { - VTR_LOGV(verbosity > 3, - "\t\t\t Intersect: Atom block %d failed floorplanning check for cluster %d \n", - blk_id, clb_index); - cluster_pr_needs_update = false; - return false; - } else { - //update the cluster's PartitionRegion with the intersecting PartitionRegion - temp_cluster_pr = cluster_pr; - cluster_pr_needs_update = true; - VTR_LOGV(verbosity > 3, - "\t\t\t Intersect: Atom block %d passed cluster %d, cluster PR was updated with intersection result \n", - blk_id, clb_index); - return true; - } - } -} - -bool atom_cluster_noc_group_check(AtomBlockId blk_id, - ClusterBlockId clb_index, - int verbosity, - NocGroupId& temp_cluster_noc_grp_id) { - const auto& atom_noc_grp_ids = g_vpr_ctx.cl_helper().atom_noc_grp_id; - const NocGroupId atom_noc_grp_id = atom_noc_grp_ids.empty() ? NocGroupId::INVALID() : atom_noc_grp_ids[blk_id]; - - if (temp_cluster_noc_grp_id == NocGroupId::INVALID()) { - // the cluster does not have a NoC group - // assign the atom's NoC group to cluster - VTR_LOGV(verbosity > 3, - "\t\t\t NoC Group: Atom block %d passed cluster %d, cluster's NoC group was updated with the atom's group %d\n", - blk_id, clb_index, (size_t)atom_noc_grp_id); - temp_cluster_noc_grp_id = atom_noc_grp_id; - return true; - } else if (temp_cluster_noc_grp_id == atom_noc_grp_id) { - // the cluster has the same NoC group ID as the atom, - // so they are compatible - VTR_LOGV(verbosity > 3, - "\t\t\t NoC Group: Atom block %d passed cluster %d, cluster's NoC group was compatible with the atom's group %d\n", - blk_id, clb_index, (size_t)atom_noc_grp_id); - return true; - } else { - // the cluster belongs to a different NoC group than the atom's group, - // so they are incompatible - VTR_LOGV(verbosity > 3, - "\t\t\t NoC Group: Atom block %d failed NoC group check for cluster %d. Cluster's NoC group: %d, atom's NoC group: %d\n", - blk_id, clb_index, (size_t)temp_cluster_noc_grp_id, size_t(atom_noc_grp_id)); - return false; - } -} - -/* Revert trial atom block iblock and free up memory space accordingly - */ -void revert_place_atom_block(const AtomBlockId blk_id, t_lb_router_data* router_data) { - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - - //We cast away const here since we may free the pb, and it is - //being removed from the active mapping. - // - //In general most code works fine accessing cosnt t_pb*, - //which is why we store them as such in atom_ctx.lookup - t_pb* pb = const_cast(atom_ctx.lookup.atom_pb(blk_id)); - - if (pb != nullptr) { - /* When freeing molecules, the current block might already have been freed by a prior revert - * When this happens, no need to do anything beyond basic book keeping at the atom block - */ - - t_pb* next = pb->parent_pb; - revalid_molecules(pb); - free_pb(pb); - pb = next; - - while (pb != nullptr) { - /* If this is pb is created only for the purposes of holding new molecule, remove it - * Must check if cluster is already freed (which can be the case) - */ - next = pb->parent_pb; - - if (pb->child_pbs != nullptr && pb->pb_stats != nullptr - && pb->pb_stats->num_child_blocks_in_pb == 0) { - set_reset_pb_modes(router_data, pb, false); - if (next != nullptr) { - /* If the code gets here, then that means that placing the initial seed molecule - * failed, don't free the actual complex block itself as the seed needs to find - * another placement */ - revalid_molecules(pb); - free_pb(pb); - } - } - pb = next; - } - } - - //Update the atom netlist mapping - atom_ctx.lookup.set_atom_clb(blk_id, ClusterBlockId::INVALID()); - atom_ctx.lookup.set_atom_pb(blk_id, nullptr); -} - -void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clustered_blk_id, t_pb* cur_pb, enum e_net_relation_to_clustered_block net_relation_to_clustered_block) { +void update_connection_gain_values(const AtomNetId net_id, + const AtomBlockId clustered_blk_id, + t_pb* cur_pb, + const ClusterLegalizer& cluster_legalizer, + enum e_net_relation_to_clustered_block net_relation_to_clustered_block) { /*This function is called when the connectiongain values on the net net_id* *require updating. */ + const AtomContext& atom_ctx = g_vpr_ctx.atom(); int num_internal_connections, num_open_connections, num_stuck_connections; num_internal_connections = num_open_connections = num_stuck_connections = 0; - auto& atom_ctx = g_vpr_ctx.atom(); - ClusterBlockId clb_index = atom_ctx.lookup.atom_clb(clustered_blk_id); + LegalizationClusterId legalization_cluster_id = cluster_legalizer.get_atom_cluster(clustered_blk_id); /* may wish to speed things up by ignoring clock nets since they are high fanout */ for (auto pin_id : atom_ctx.nlist.net_pins(net_id)) { auto blk_id = atom_ctx.nlist.pin_block(pin_id); - if (atom_ctx.lookup.atom_clb(blk_id) == clb_index + if (cluster_legalizer.get_atom_cluster(blk_id) == legalization_cluster_id && is_atom_blk_in_pb(blk_id, atom_ctx.lookup.atom_pb(clustered_blk_id))) { num_internal_connections++; - } else if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + } else if (!cluster_legalizer.is_atom_clustered(blk_id)) { num_open_connections++; } else { num_stuck_connections++; @@ -1479,7 +494,7 @@ void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clu auto blk_id = atom_ctx.nlist.pin_block(pin_id); VTR_ASSERT(blk_id); - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(blk_id)) { /* TODO: Gain function accurate only if net has one connection to block, * TODO: Should we handle case where net has multi-connection to block? * Gain computation is only off by a bit in this case */ @@ -1502,7 +517,7 @@ void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clu auto driver_pin_id = atom_ctx.nlist.net_driver(net_id); auto blk_id = atom_ctx.nlist.pin_block(driver_pin_id); - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(blk_id)) { if (cur_pb->pb_stats->connectiongain.count(blk_id) == 0) { cur_pb->pb_stats->connectiongain[blk_id] = 0; } @@ -1514,53 +529,33 @@ void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clu } } -void try_fill_cluster(const t_packer_opts& packer_opts, +void try_fill_cluster(ClusterLegalizer& cluster_legalizer, + const Prepacker& prepacker, + const t_packer_opts& packer_opts, t_cluster_placement_stats* cur_cluster_placement_stats_ptr, t_pack_molecule*& prev_molecule, t_pack_molecule*& next_molecule, int& num_same_molecules, - t_pb_graph_node** primitives_list, t_cluster_progress_stats& cluster_stats, int num_clb, - const int num_models, - const int max_cluster_size, - const ClusterBlockId clb_index, - const int detailed_routing_stage, + const LegalizationClusterId legalization_cluster_id, AttractionInfo& attraction_groups, - vtr::vector>& clb_inter_blk_nets, + vtr::vector>& clb_inter_blk_nets, bool allow_unrelated_clustering, const int& high_fanout_threshold, const std::unordered_set& is_clock, const std::unordered_set& is_global, const std::shared_ptr& timing_info, - t_lb_router_data* router_data, - t_ext_pin_util target_ext_pin_util, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_noc_grp_id, e_block_pack_status& block_pack_status, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size, std::unordered_map& net_output_feeds_driving_block_input, std::map>& primitive_candidate_block_types) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - block_pack_status = try_pack_molecule(cur_cluster_placement_stats_ptr, - next_molecule, - primitives_list, - cluster_ctx.clb_nlist.block_pb(clb_index), - num_models, - max_cluster_size, - clb_index, - detailed_routing_stage, - router_data, - packer_opts.pack_verbosity, - packer_opts.enable_pin_feasibility_filter, - packer_opts.feasible_block_array_size, - target_ext_pin_util, - temp_cluster_pr, - temp_noc_grp_id); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + const DeviceContext& device_ctx = g_vpr_ctx.device(); + + block_pack_status = cluster_legalizer.add_mol_to_cluster(next_molecule, + legalization_cluster_id); auto blk_id = next_molecule->atom_block_ids[next_molecule->root]; VTR_ASSERT(blk_id); @@ -1588,7 +583,7 @@ void try_fill_cluster(const t_packer_opts& packer_opts, } } - next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index), + next_molecule = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id), attraction_groups, allow_unrelated_clustering, packer_opts.prioritize_transitive_connectivity, @@ -1596,8 +591,11 @@ void try_fill_cluster(const t_packer_opts& packer_opts, packer_opts.feasible_block_array_size, &cluster_stats.num_unrelated_clustering_attempts, cur_cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, clb_inter_blk_nets, - clb_index, packer_opts.pack_verbosity, + legalization_cluster_id, + packer_opts.pack_verbosity, unclustered_list_head, unclustered_list_head_size, primitive_candidate_block_types); @@ -1625,9 +623,11 @@ void try_fill_cluster(const t_packer_opts& packer_opts, cluster_stats.mols_since_last_print, device_ctx.grid.width(), device_ctx.grid.height(), - attraction_groups); + attraction_groups, + cluster_legalizer); - update_cluster_stats(next_molecule, clb_index, + update_cluster_stats(next_molecule, + cluster_legalizer, is_clock, //Set of all clocks is_global, //Set of all global signals (currently clocks) packer_opts.global_clocks, packer_opts.alpha, packer_opts.beta, packer_opts.timing_driven, @@ -1641,7 +641,7 @@ void try_fill_cluster(const t_packer_opts& packer_opts, if (packer_opts.timing_driven) { cluster_stats.blocks_since_last_analysis++; /* historically, timing slacks were recomputed after X number of blocks were packed, but this doesn't significantly alter results so I (jluu) did not port the code */ } - next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index), + next_molecule = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id), attraction_groups, allow_unrelated_clustering, packer_opts.prioritize_transitive_connectivity, @@ -1649,8 +649,10 @@ void try_fill_cluster(const t_packer_opts& packer_opts, packer_opts.feasible_block_array_size, &cluster_stats.num_unrelated_clustering_attempts, cur_cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, clb_inter_blk_nets, - clb_index, + legalization_cluster_id, packer_opts.pack_verbosity, unclustered_list_head, unclustered_list_head_size, @@ -1661,78 +663,37 @@ void try_fill_cluster(const t_packer_opts& packer_opts, } } -t_pack_molecule* save_cluster_routing_and_pick_new_seed(const t_packer_opts& packer_opts, - const int& num_clb, - const std::vector& seed_atoms, - const int& num_blocks_hill_added, - vtr::vector*>& intra_lb_routing, - int& seedindex, - t_cluster_progress_stats& cluster_stats, - t_lb_router_data* router_data) { - t_pack_molecule* next_seed = nullptr; - - intra_lb_routing.push_back(router_data->saved_lb_nets); - VTR_ASSERT((int)intra_lb_routing.size() == num_clb); - router_data->saved_lb_nets = nullptr; - - //Pick a new seed - next_seed = get_highest_gain_seed_molecule(seedindex, seed_atoms); - - if (packer_opts.timing_driven) { - if (num_blocks_hill_added > 0) { - cluster_stats.blocks_since_last_analysis += num_blocks_hill_added; - } - } - return next_seed; -} - void store_cluster_info_and_free(const t_packer_opts& packer_opts, - const ClusterBlockId& clb_index, + const LegalizationClusterId legalization_cluster_id, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector& le_count, - vtr::vector>& clb_inter_blk_nets) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - auto& atom_ctx = g_vpr_ctx.atom(); + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); /* store info that will be used later in packing from pb_stats and free the rest */ - t_pb_stats* pb_stats = cluster_ctx.clb_nlist.block_pb(clb_index)->pb_stats; + t_pb* cur_pb = cluster_legalizer.get_cluster_pb(legalization_cluster_id); + t_pb_stats* pb_stats = cur_pb->pb_stats; for (const AtomNetId mnet_id : pb_stats->marked_nets) { int external_terminals = atom_ctx.nlist.net_pins(mnet_id).size() - pb_stats->num_pins_of_net_in_pb[mnet_id]; /* Check if external terminals of net is within the fanout limit and that there exists external terminals */ if (external_terminals < packer_opts.transitive_fanout_threshold && external_terminals > 0) { - clb_inter_blk_nets[clb_index].push_back(mnet_id); + clb_inter_blk_nets[legalization_cluster_id].push_back(mnet_id); } } - auto cur_pb = cluster_ctx.clb_nlist.block_pb(clb_index); // update the data structure holding the LE counts update_le_count(cur_pb, logic_block_type, le_pb_type, le_count); //print clustering progress incrementally //print_pack_status(num_clb, num_molecules, num_molecules_processed, mols_since_last_print, device_ctx.grid.width(), device_ctx.grid.height()); - free_pb_stats_recursive(cur_pb); -} - -/* Free up data structures and requeue used molecules */ -void free_data_and_requeue_used_mols_if_illegal(const ClusterBlockId& clb_index, - const int& savedseedindex, - std::map& num_used_type_instances, - int& num_clb, - int& seedindex) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - num_used_type_instances[cluster_ctx.clb_nlist.block_type(clb_index)]--; - revalid_molecules(cluster_ctx.clb_nlist.block_pb(clb_index)); - cluster_ctx.clb_nlist.remove_block(clb_index); - cluster_ctx.clb_nlist.compress(); - num_clb--; - seedindex = savedseedindex; } /*****************************************/ void update_timing_gain_values(const AtomNetId net_id, t_pb* cur_pb, + const ClusterLegalizer& cluster_legalizer, enum e_net_relation_to_clustered_block net_relation_to_clustered_block, const SetupTimingInfo& timing_info, const std::unordered_set& is_global, @@ -1741,7 +702,7 @@ void update_timing_gain_values(const AtomNetId net_id, *net_id requires updating. */ float timinggain; - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); /* Check if this atom net lists its driving atom block twice. If so, avoid * * double counting this atom block by skipping the first (driving) pin. */ @@ -1753,7 +714,7 @@ void update_timing_gain_values(const AtomNetId net_id, && !is_global.count(net_id)) { for (auto pin_id : pins) { auto blk_id = atom_ctx.nlist.pin_block(pin_id); - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(blk_id)) { timinggain = timing_info.setup_pin_criticality(pin_id); if (cur_pb->pb_stats->timinggain.count(blk_id) == 0) { @@ -1772,7 +733,7 @@ void update_timing_gain_values(const AtomNetId net_id, auto driver_pin = atom_ctx.nlist.net_driver(net_id); auto new_blk_id = atom_ctx.nlist.pin_block(driver_pin); - if (atom_ctx.lookup.atom_clb(new_blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(new_blk_id)) { for (auto pin_id : atom_ctx.nlist.net_sinks(net_id)) { timinggain = timing_info.setup_pin_criticality(pin_id); @@ -1790,6 +751,7 @@ void update_timing_gain_values(const AtomNetId net_id, void mark_and_update_partial_gain(const AtomNetId net_id, enum e_gain_update gain_flag, const AtomBlockId clustered_blk_id, + const ClusterLegalizer& cluster_legalizer, bool timing_driven, bool connection_driven, enum e_net_relation_to_clustered_block net_relation_to_clustered_block, @@ -1797,15 +759,8 @@ void mark_and_update_partial_gain(const AtomNetId net_id, const std::unordered_set& is_global, const int high_fanout_net_threshold, std::unordered_map& net_output_feeds_driving_block_input) { - /* Updates the marked data structures, and if gain_flag is GAIN, * - * the gain when an atom block is added to a cluster. The * - * sharinggain is the number of inputs that a atom block shares with * - * blocks that are already in the cluster. Hillgain is the * - * reduction in number of pins-required by adding a atom block to the * - * cluster. The timinggain is the criticality of the most critical* - * atom net between this atom block and an atom block in the cluster. */ - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); t_pb* cur_pb = atom_ctx.lookup.atom_pb(clustered_blk_id)->parent_pb; cur_pb = get_top_level_pb(cur_pb); @@ -1845,7 +800,7 @@ void mark_and_update_partial_gain(const AtomNetId net_id, if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(net_id) == 0) { for (auto pin_id : pins) { auto blk_id = atom_ctx.nlist.pin_block(pin_id); - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(blk_id)) { if (cur_pb->pb_stats->sharinggain.count(blk_id) == 0) { cur_pb->pb_stats->marked_blocks.push_back(blk_id); cur_pb->pb_stats->sharinggain[blk_id] = 1; @@ -1860,11 +815,12 @@ void mark_and_update_partial_gain(const AtomNetId net_id, if (connection_driven) { update_connection_gain_values(net_id, clustered_blk_id, cur_pb, + cluster_legalizer, net_relation_to_clustered_block); } if (timing_driven) { - update_timing_gain_values(net_id, cur_pb, + update_timing_gain_values(net_id, cur_pb, cluster_legalizer, net_relation_to_clustered_block, timing_info, is_global, @@ -1879,10 +835,7 @@ void mark_and_update_partial_gain(const AtomNetId net_id, /*****************************************/ void update_total_gain(float alpha, float beta, bool timing_driven, bool connection_driven, t_pb* pb, AttractionInfo& attraction_groups) { - /*Updates the total gain array to reflect the desired tradeoff between* - *input sharing (sharinggain) and path_length minimization (timinggain) - *input each time a new molecule is added to the cluster.*/ - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); t_pb* cur_pb = pb; cur_pb = get_top_level_pb(cur_pb); @@ -1938,7 +891,7 @@ void update_total_gain(float alpha, float beta, bool timing_driven, bool connect /*****************************************/ void update_cluster_stats(const t_pack_molecule* molecule, - const ClusterBlockId clb_index, + const ClusterLegalizer& cluster_legalizer, const std::unordered_set& is_clock, const std::unordered_set& is_global, const bool global_clocks, @@ -1950,16 +903,12 @@ void update_cluster_stats(const t_pack_molecule* molecule, const SetupTimingInfo& timing_info, AttractionInfo& attraction_groups, std::unordered_map& net_output_feeds_driving_block_input) { - /* Routine that is called each time a new molecule is added to the cluster. - * Makes calls to update cluster stats such as the gain map for atoms, used pins, and clock structures, - * in order to reflect the new content of the cluster. - * Also keeps track of which attraction group the cluster belongs to. */ int molecule_size; int iblock; t_pb *cur_pb, *cb; - auto& atom_ctx = g_vpr_ctx.mutable_atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); molecule_size = get_array_size_of_molecule(molecule); cb = nullptr; @@ -1969,9 +918,6 @@ void update_cluster_stats(const t_pack_molecule* molecule, continue; } - //Update atom netlist mapping - atom_ctx.lookup.set_atom_clb(blk_id, clb_index); - const t_pb* atom_pb = atom_ctx.lookup.atom_pb(blk_id); VTR_ASSERT(atom_pb); @@ -1986,7 +932,6 @@ void update_cluster_stats(const t_pack_molecule* molecule, cb = cur_pb; } cur_pb->pb_stats->num_feasible_blocks = NOT_VALID; - cur_pb->pb_stats->num_child_blocks_in_pb++; if (atom_grp_id != AttractGroupId::INVALID()) { /* TODO: Allow clusters to have more than one attraction group. */ @@ -2000,7 +945,7 @@ void update_cluster_stats(const t_pack_molecule* molecule, for (auto pin_id : atom_ctx.nlist.block_output_pins(blk_id)) { auto net_id = atom_ctx.nlist.pin_net(pin_id); if (!is_clock.count(net_id) || !global_clocks) { - mark_and_update_partial_gain(net_id, GAIN, blk_id, + mark_and_update_partial_gain(net_id, GAIN, blk_id, cluster_legalizer, timing_driven, connection_driven, OUTPUT, timing_info, @@ -2008,7 +953,7 @@ void update_cluster_stats(const t_pack_molecule* molecule, high_fanout_net_threshold, net_output_feeds_driving_block_input); } else { - mark_and_update_partial_gain(net_id, NO_GAIN, blk_id, + mark_and_update_partial_gain(net_id, NO_GAIN, blk_id, cluster_legalizer, timing_driven, connection_driven, OUTPUT, timing_info, @@ -2021,7 +966,7 @@ void update_cluster_stats(const t_pack_molecule* molecule, /* Next Inputs */ for (auto pin_id : atom_ctx.nlist.block_input_pins(blk_id)) { auto net_id = atom_ctx.nlist.pin_net(pin_id); - mark_and_update_partial_gain(net_id, GAIN, blk_id, + mark_and_update_partial_gain(net_id, GAIN, blk_id, cluster_legalizer, timing_driven, connection_driven, INPUT, timing_info, @@ -2034,14 +979,14 @@ void update_cluster_stats(const t_pack_molecule* molecule, for (auto pin_id : atom_ctx.nlist.block_clock_pins(blk_id)) { auto net_id = atom_ctx.nlist.pin_net(pin_id); if (global_clocks) { - mark_and_update_partial_gain(net_id, NO_GAIN, blk_id, + mark_and_update_partial_gain(net_id, NO_GAIN, blk_id, cluster_legalizer, timing_driven, connection_driven, INPUT, timing_info, is_global, high_fanout_net_threshold, net_output_feeds_driving_block_input); } else { - mark_and_update_partial_gain(net_id, GAIN, blk_id, + mark_and_update_partial_gain(net_id, GAIN, blk_id, cluster_legalizer, timing_driven, connection_driven, INPUT, timing_info, is_global, @@ -2052,8 +997,6 @@ void update_cluster_stats(const t_pack_molecule* molecule, update_total_gain(alpha, beta, timing_driven, connection_driven, atom_pb->parent_pb, attraction_groups); - - commit_lookahead_pins_used(cb); } // if this molecule came from the transitive fanout candidates remove it @@ -2063,38 +1006,20 @@ void update_cluster_stats(const t_pack_molecule* molecule, } } -void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, - t_pb_graph_node** primitives_list, - ClusterBlockId clb_index, +void start_new_cluster(ClusterLegalizer& cluster_legalizer, + LegalizationClusterId& legalization_cluster_id, t_pack_molecule* molecule, std::map& num_used_type_instances, const float target_device_utilization, - const int num_models, - const int max_cluster_size, const t_arch* arch, const std::string& device_layout_name, - std::vector* lb_type_rr_graphs, - t_lb_router_data** router_data, - const int detailed_routing_stage, - ClusteredNetlist* clb_nlist, const std::map>& primitive_candidate_block_types, int verbosity, - bool enable_pin_feasibility_filter, - bool balance_block_type_utilization, - const int feasible_block_array_size, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_noc_grp_id) { - /* Given a starting seed block, start_new_cluster determines the next cluster type to use - * It expands the FPGA if it cannot find a legal cluster for the atom block - */ + bool balance_block_type_utilization) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - - /*Cluster's PartitionRegion is empty initially, meaning it has no floorplanning constraints*/ - PartitionRegion empty_pr; - floorplanning_ctx.cluster_constraints.push_back(empty_pr); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + DeviceContext& mutable_device_ctx = g_vpr_ctx.mutable_device(); + const DeviceContext& device_ctx = g_vpr_ctx.mutable_device(); /* Allocate a dummy initial cluster and load a atom block as a seed and check if it is legal */ AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; @@ -2136,57 +1061,24 @@ void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, //Try packing into each candidate type bool success = false; + t_logical_block_type_ptr block_type; + LegalizationClusterId new_cluster_id; for (auto type : candidate_types) { - t_pb* pb = new t_pb; - pb->pb_graph_node = type->pb_graph_head; - alloc_and_load_pb_stats(pb, feasible_block_array_size); - pb->parent_pb = nullptr; - - *router_data = alloc_and_load_router_data(&lb_type_rr_graphs[type->index], type); - //Try packing into each mode e_block_pack_status pack_result = e_block_pack_status::BLK_STATUS_UNDEFINED; for (int j = 0; j < type->pb_graph_head->pb_type->num_modes && !success; j++) { - pb->mode = j; - - reset_cluster_placement_stats(&cluster_placement_stats[type->index]); - set_mode_cluster_placement_stats(pb->pb_graph_node, j); - - //Note that since we are starting a new cluster, we use FULL_EXTERNAL_PIN_UTIL, - //which allows all cluster pins to be used. This ensures that if we have a large - //molecule which would otherwise exceed the external pin utilization targets it - //can use the full set of cluster pins when selected as the seed block -- ensuring - //it is still implementable. - pack_result = try_pack_molecule(&cluster_placement_stats[type->index], - molecule, primitives_list, pb, - num_models, max_cluster_size, clb_index, - detailed_routing_stage, *router_data, - verbosity, - enable_pin_feasibility_filter, - feasible_block_array_size, - FULL_EXTERNAL_PIN_UTIL, - temp_cluster_pr, - temp_noc_grp_id); - + std::tie(pack_result, new_cluster_id) = cluster_legalizer.start_new_cluster(molecule, type, j); success = (pack_result == e_block_pack_status::BLK_PASSED); } if (success) { VTR_LOGV(verbosity > 2, "\tPASSED_SEED: Block Type %s\n", type->name); - //Once clustering succeeds, add it to the clb netlist - if (pb->name != nullptr) { - free(pb->name); - } - pb->name = vtr::strdup(root_atom_name.c_str()); - clb_index = clb_nlist->create_block(root_atom_name.c_str(), pb, type); + // If clustering succeeds return the new_cluster_id and type. + legalization_cluster_id = new_cluster_id; + block_type = type; break; } else { VTR_LOGV(verbosity > 2, "\tFAILED_SEED: Block Type %s\n", type->name); - //Free failed clustering and try again - free_router_data(*router_data); - free_pb(pb); - delete pb; - *router_data = nullptr; } } @@ -2209,7 +1101,6 @@ void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, VTR_ASSERT(success); //Successfully create cluster - auto block_type = clb_nlist->block_type(clb_index); num_used_type_instances[block_type]++; /* Expand FPGA size if needed */ @@ -2220,24 +1111,18 @@ void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, } if (num_used_type_instances[block_type] > num_instances) { - device_ctx.grid = create_device_grid(device_layout_name, arch->grid_layouts, num_used_type_instances, target_device_utilization); + mutable_device_ctx.grid = create_device_grid(device_layout_name, arch->grid_layouts, num_used_type_instances, target_device_utilization); } } -/* - * Get candidate molecule to pack into currently open cluster - * Molecule selection priority: - * 1. Find unpacked molecules based on criticality and strong connectedness (connected by low fanout nets) with current cluster - * 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster - * 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster - * 4. Find unpacked molecules based on attraction group of the current cluster (if the cluster has an attraction group) - */ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, AttractionInfo& attraction_groups, const enum e_gain_type gain_mode, t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - const ClusterBlockId cluster_index, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, + const LegalizationClusterId legalization_cluster_id, bool prioritize_transitive_connectivity, int transitive_fanout_threshold, const int feasible_block_array_size, @@ -2254,37 +1139,72 @@ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, // 1. Find unpacked molecules based on criticality and strong connectedness (connected by low fanout nets) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == NOT_VALID) { - add_cluster_molecule_candidates_by_connectivity_and_timing(cur_pb, cluster_placement_stats_ptr, feasible_block_array_size, attraction_groups); + add_cluster_molecule_candidates_by_connectivity_and_timing(cur_pb, + cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, + feasible_block_array_size, + attraction_groups); } if (prioritize_transitive_connectivity) { // 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->explore_transitive_fanout) { - add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, cluster_placement_stats_ptr, clb_inter_blk_nets, - cluster_index, transitive_fanout_threshold, feasible_block_array_size, attraction_groups); + add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, + cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, + clb_inter_blk_nets, + legalization_cluster_id, + transitive_fanout_threshold, + feasible_block_array_size, + attraction_groups); } // 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->tie_break_high_fanout_net) { - add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, cluster_placement_stats_ptr, feasible_block_array_size, attraction_groups); + add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, + cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, + feasible_block_array_size, + attraction_groups); } } else { //Reverse order // 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->tie_break_high_fanout_net) { - add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, cluster_placement_stats_ptr, feasible_block_array_size, attraction_groups); + add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, + cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, + feasible_block_array_size, + attraction_groups); } // 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->explore_transitive_fanout) { - add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, cluster_placement_stats_ptr, clb_inter_blk_nets, - cluster_index, transitive_fanout_threshold, feasible_block_array_size, attraction_groups); + add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, + cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, + clb_inter_blk_nets, + legalization_cluster_id, + transitive_fanout_threshold, + feasible_block_array_size, + attraction_groups); } } // 4. Find unpacked molecules based on attraction group of the current cluster (if the cluster has an attraction group) if (cur_pb->pb_stats->num_feasible_blocks == 0) { - add_cluster_molecule_candidates_by_attraction_group(cur_pb, cluster_placement_stats_ptr, attraction_groups, - feasible_block_array_size, cluster_index, primitive_candidate_block_types); + add_cluster_molecule_candidates_by_attraction_group(cur_pb, + cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, + attraction_groups, + feasible_block_array_size, + legalization_cluster_id, + primitive_candidate_block_types); } /* Grab highest gain molecule */ t_pack_molecule* molecule = nullptr; @@ -2299,9 +1219,10 @@ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, return molecule; } -/* Add molecules with strong connectedness to the current cluster to the list of feasible blocks. */ void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, const int feasible_block_array_size, AttractionInfo& attraction_groups) { VTR_ASSERT(cur_pb->pb_stats->num_feasible_blocks == NOT_VALID); @@ -2309,13 +1230,11 @@ void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, cur_pb->pb_stats->num_feasible_blocks = 0; cur_pb->pb_stats->explore_transitive_fanout = true; /* If no legal molecules found, enable exploration of molecules two hops away */ - auto& atom_ctx = g_vpr_ctx.atom(); - for (AtomBlockId blk_id : cur_pb->pb_stats->marked_blocks) { - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { - t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + if (!cluster_legalizer.is_atom_clustered(blk_id)) { + t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr, cluster_legalizer); if (success) { add_molecule_to_pb_stats_candidates(molecule, cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); @@ -2325,32 +1244,33 @@ void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, } } -/* Add molecules based on weak connectedness (connected by high fanout nets) with current cluster */ void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, const int feasible_block_array_size, AttractionInfo& attraction_groups) { /* Because the packer ignores high fanout nets when marking what blocks * to consider, use one of the ignored high fanout net to fill up lightly * related blocks */ + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; + reset_tried_but_unused_cluster_placements(cluster_placement_stats_ptr); AtomNetId net_id = cur_pb->pb_stats->tie_break_high_fanout_net; - auto& atom_ctx = g_vpr_ctx.atom(); - int count = 0; - for (auto pin_id : atom_ctx.nlist.net_pins(net_id)) { + for (auto pin_id : atom_nlist.net_pins(net_id)) { if (count >= AAPACK_MAX_HIGH_FANOUT_EXPLORE) { break; } - AtomBlockId blk_id = atom_ctx.nlist.pin_block(pin_id); + AtomBlockId blk_id = atom_nlist.pin_block(pin_id); - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { - t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + if (!cluster_legalizer.is_atom_clustered(blk_id)) { + t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr, cluster_legalizer); if (success) { add_molecule_to_pb_stats_candidates(molecule, cur_pb->pb_stats->gain, cur_pb, std::min(feasible_block_array_size, AAPACK_MAX_HIGH_FANOUT_EXPLORE), attraction_groups); @@ -2362,24 +1282,17 @@ void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, cur_pb->pb_stats->tie_break_high_fanout_net = AtomNetId::INVALID(); /* Mark off that this high fanout net has been considered */ } -/* - * If the current cluster being packed has an attraction group associated with it - * (i.e. there are atoms in it that belong to an attraction group), this routine adds molecules - * from the associated attraction group to the list of feasible blocks for the cluster. - * Attraction groups can be very large, so we only add some randomly selected molecules for efficiency - * if the number of atoms in the group is greater than 500. Therefore, the molecules added to the candidates - * will vary each time you call this function. - */ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, AttractionInfo& attraction_groups, const int feasible_block_array_size, - ClusterBlockId clb_index, + LegalizationClusterId legalization_cluster_id, std::map>& primitive_candidate_block_types) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; - auto cluster_type = cluster_ctx.clb_nlist.block_type(clb_index); + auto cluster_type = cluster_legalizer.get_cluster_type(legalization_cluster_id); /* * For each cluster, we want to explore the attraction group molecules as potential @@ -2405,13 +1318,13 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, AttractionGroup& group = attraction_groups.get_attraction_group_info(grp_id); std::vector available_atoms; for (AtomBlockId atom_id : group.group_atoms) { - const auto& atom_model = atom_ctx.nlist.block_model(atom_id); + const auto& atom_model = atom_nlist.block_model(atom_id); auto itr = primitive_candidate_block_types.find(atom_model); VTR_ASSERT(itr != primitive_candidate_block_types.end()); std::vector& candidate_types = itr->second; //Only consider molecules that are unpacked and of the correct type - if (atom_ctx.lookup.atom_clb(atom_id) == ClusterBlockId::INVALID() + if (!cluster_legalizer.is_atom_clustered(atom_id) && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { available_atoms.push_back(atom_id); } @@ -2426,17 +1339,17 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, if (num_available_atoms < 500) { //for (AtomBlockId atom_id : group.group_atoms) { for (AtomBlockId atom_id : available_atoms) { - const auto& atom_model = atom_ctx.nlist.block_model(atom_id); + const auto& atom_model = atom_nlist.block_model(atom_id); auto itr = primitive_candidate_block_types.find(atom_model); VTR_ASSERT(itr != primitive_candidate_block_types.end()); std::vector& candidate_types = itr->second; //Only consider molecules that are unpacked and of the correct type - if (atom_ctx.lookup.atom_clb(atom_id) == ClusterBlockId::INVALID() + if (!cluster_legalizer.is_atom_clustered(atom_id) && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { - t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(atom_id); + t_pack_molecule* molecule = prepacker.get_atom_molecule(atom_id); if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr, cluster_legalizer); if (success) { add_molecule_to_pb_stats_candidates(molecule, cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); @@ -2458,17 +1371,19 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, //AtomBlockId blk_id = group.group_atoms[selected_atom]; AtomBlockId blk_id = available_atoms[selected_atom]; - const auto& atom_model = atom_ctx.nlist.block_model(blk_id); + const auto& atom_model = atom_nlist.block_model(blk_id); auto itr = primitive_candidate_block_types.find(atom_model); VTR_ASSERT(itr != primitive_candidate_block_types.end()); std::vector& candidate_types = itr->second; //Only consider molecules that are unpacked and of the correct type - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID() + if (!cluster_legalizer.is_atom_clustered(blk_id) && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { - t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + bool success = check_free_primitives_for_molecule_atoms(molecule, + cluster_placement_stats_ptr, + cluster_legalizer); if (success) { add_molecule_to_pb_stats_candidates(molecule, cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); @@ -2478,11 +1393,12 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, } } -/* Add molecules based on transitive connections (eg. 2 hops away) with current cluster*/ void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - const ClusterBlockId cluster_index, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, + const LegalizationClusterId legalization_cluster_id, int transitive_fanout_threshold, const int feasible_block_array_size, AttractionInfo& attraction_groups) { @@ -2490,15 +1406,19 @@ void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, cur_pb->pb_stats->explore_transitive_fanout = false; /* First time finding transitive fanout candidates therefore alloc and load them */ - load_transitive_fanout_candidates(cluster_index, + load_transitive_fanout_candidates(legalization_cluster_id, cur_pb->pb_stats, + prepacker, + cluster_legalizer, clb_inter_blk_nets, transitive_fanout_threshold); /* Only consider candidates that pass a very simple legality check */ for (const auto& transitive_candidate : cur_pb->pb_stats->transitive_fanout_candidates) { t_pack_molecule* molecule = transitive_candidate.second; if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + bool success = check_free_primitives_for_molecule_atoms(molecule, + cluster_placement_stats_ptr, + cluster_legalizer); if (success) { add_molecule_to_pb_stats_candidates(molecule, cur_pb->pb_stats->gain, cur_pb, std::min(feasible_block_array_size, AAPACK_MAX_TRANSITIVE_EXPLORE), attraction_groups); @@ -2507,14 +1427,14 @@ void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, } } -/*Check whether a free primitive exists for each atom block in the molecule*/ -bool check_free_primitives_for_molecule_atoms(t_pack_molecule* molecule, t_cluster_placement_stats* cluster_placement_stats_ptr) { - auto& atom_ctx = g_vpr_ctx.atom(); +bool check_free_primitives_for_molecule_atoms(t_pack_molecule* molecule, + t_cluster_placement_stats* cluster_placement_stats_ptr, + const ClusterLegalizer& cluster_legalizer) { bool success = true; for (int i_atom = 0; i_atom < get_array_size_of_molecule(molecule); i_atom++) { if (molecule->atom_block_ids[i_atom]) { - VTR_ASSERT(atom_ctx.lookup.atom_clb(molecule->atom_block_ids[i_atom]) == ClusterBlockId::INVALID()); + VTR_ASSERT(!cluster_legalizer.is_atom_clustered(molecule->atom_block_ids[i_atom])); auto blk_id2 = molecule->atom_block_ids[i_atom]; if (!exists_free_primitive_for_atom_block(cluster_placement_stats_ptr, blk_id2)) { /* TODO (Jason Luu): debating whether to check if placement exists for molecule @@ -2537,15 +1457,17 @@ t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, const int feasible_block_array_size, int* num_unrelated_clustering_attempts, t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - ClusterBlockId cluster_index, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, + LegalizationClusterId legalization_cluster_id, int verbosity, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size, std::map>& primitive_candidate_block_types) { /* Finds the block with the greatest gain that satisfies the * input, clock and capacity constraints of a cluster that are - * passed in. If no suitable block is found it returns ClusterBlockId::INVALID(). + * passed in. If no suitable block is found it returns nullptr. */ VTR_ASSERT(cur_pb->is_root()); @@ -2553,8 +1475,9 @@ t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, /* If cannot pack into primitive, try packing into cluster */ auto best_molecule = get_highest_gain_molecule(cur_pb, attraction_groups, - NOT_HILL_CLIMBING, cluster_placement_stats_ptr, clb_inter_blk_nets, - cluster_index, prioritize_transitive_connectivity, + NOT_HILL_CLIMBING, cluster_placement_stats_ptr, + prepacker, cluster_legalizer, clb_inter_blk_nets, + legalization_cluster_id, prioritize_transitive_connectivity, transitive_fanout_threshold, feasible_block_array_size, primitive_candidate_block_types); /* If no blocks have any gain to the current cluster, the code above * @@ -2581,7 +1504,6 @@ t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, return best_molecule; } -//Calculates molecule statistics for a single molecule t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule, const AtomNetlist& atom_nlist) { t_molecule_stats molecule_stats; @@ -2652,14 +1574,15 @@ t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule, const Atom std::vector initialize_seed_atoms(const e_cluster_seed seed_type, const t_molecule_stats& max_molecule_stats, + const Prepacker& prepacker, const vtr::vector& atom_criticality) { - const AtomContext& atom_ctx = g_vpr_ctx.atom(); + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; //Put all atoms in seed list - std::vector seed_atoms(atom_ctx.nlist.blocks().begin(), atom_ctx.nlist.blocks().end()); + std::vector seed_atoms(atom_nlist.blocks().begin(), atom_nlist.blocks().end()); //Initially all gains are zero - vtr::vector atom_gains(atom_ctx.nlist.blocks().size(), 0.); + vtr::vector atom_gains(atom_nlist.blocks().size(), 0.); if (seed_type == e_cluster_seed::TIMING) { VTR_ASSERT(atom_gains.size() == atom_criticality.size()); @@ -2669,21 +1592,21 @@ std::vector initialize_seed_atoms(const e_cluster_seed seed_type, } else if (seed_type == e_cluster_seed::MAX_INPUTS) { //By number of used molecule input pins - for (auto blk : atom_ctx.nlist.blocks()) { - const t_pack_molecule* blk_mol = atom_ctx.prepacker.get_atom_molecule(blk); - const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol, atom_ctx.nlist); + for (auto blk : atom_nlist.blocks()) { + const t_pack_molecule* blk_mol = prepacker.get_atom_molecule(blk); + const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol, atom_nlist); atom_gains[blk] = molecule_stats.num_used_ext_inputs; } } else if (seed_type == e_cluster_seed::BLEND) { //By blended gain (criticality and inputs used) - for (auto blk : atom_ctx.nlist.blocks()) { + for (auto blk : atom_nlist.blocks()) { /* Score seed gain of each block as a weighted sum of timing criticality, * number of tightly coupled blocks connected to it, and number of external inputs */ float seed_blend_fac = 0.5; - const t_pack_molecule* blk_mol = atom_ctx.prepacker.get_atom_molecule(blk); - const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol, atom_ctx.nlist); + const t_pack_molecule* blk_mol = prepacker.get_atom_molecule(blk); + const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol, atom_nlist); VTR_ASSERT(max_molecule_stats.num_used_ext_inputs > 0); float blend_gain = (seed_blend_fac * atom_criticality[blk] @@ -2695,9 +1618,9 @@ std::vector initialize_seed_atoms(const e_cluster_seed seed_type, } else if (seed_type == e_cluster_seed::MAX_PINS || seed_type == e_cluster_seed::MAX_INPUT_PINS) { //By pins per molecule (i.e. available pins on primitives, not pins in use) - for (auto blk : atom_ctx.nlist.blocks()) { - const t_pack_molecule* mol = atom_ctx.prepacker.get_atom_molecule(blk); - const t_molecule_stats molecule_stats = calc_molecule_stats(mol, atom_ctx.nlist); + for (auto blk : atom_nlist.blocks()) { + const t_pack_molecule* mol = prepacker.get_atom_molecule(blk); + const t_molecule_stats molecule_stats = calc_molecule_stats(mol, atom_nlist); int molecule_pins = 0; if (seed_type == e_cluster_seed::MAX_PINS) { @@ -2713,9 +1636,9 @@ std::vector initialize_seed_atoms(const e_cluster_seed seed_type, } } else if (seed_type == e_cluster_seed::BLEND2) { - for (auto blk : atom_ctx.nlist.blocks()) { - const t_pack_molecule* mol = atom_ctx.prepacker.get_atom_molecule(blk); - const t_molecule_stats molecule_stats = calc_molecule_stats(mol, atom_ctx.nlist); + for (auto blk : atom_nlist.blocks()) { + const t_pack_molecule* mol = prepacker.get_atom_molecule(blk); + const t_molecule_stats molecule_stats = calc_molecule_stats(mol, atom_nlist); float pin_ratio = vtr::safe_ratio(molecule_stats.num_pins, max_molecule_stats.num_pins); float input_pin_ratio = vtr::safe_ratio(molecule_stats.num_input_pins, max_molecule_stats.num_input_pins); @@ -2773,17 +1696,18 @@ std::vector initialize_seed_atoms(const e_cluster_seed seed_type, return seed_atoms; } -t_pack_molecule* get_highest_gain_seed_molecule(int& seed_index, const std::vector& seed_atoms) { - auto& atom_ctx = g_vpr_ctx.atom(); - +t_pack_molecule* get_highest_gain_seed_molecule(int& seed_index, + const std::vector& seed_atoms, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer) { while (seed_index < static_cast(seed_atoms.size())) { AtomBlockId blk_id = seed_atoms[seed_index++]; // Check if the atom has already been assigned to a cluster - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(blk_id)) { t_pack_molecule* best = nullptr; - t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); if (molecule->valid) { if (best == nullptr || (best->base_gain) < (molecule->base_gain)) { best = molecule; @@ -2798,17 +1722,11 @@ t_pack_molecule* get_highest_gain_seed_molecule(int& seed_index, const std::vect return nullptr; } -/* get gain of packing molecule into current cluster - * gain is equal to: - * total_block_gain - * + molecule_base_gain*some_factor - * - introduced_input_nets_of_unrelated_blocks_pulled_in_by_molecule*some_other_factor - */ float get_molecule_gain(t_pack_molecule* molecule, std::map& blk_gain, AttractGroupId cluster_attraction_group_id, AttractionInfo& attraction_groups, int num_molecule_failures) { float gain; int i; int num_introduced_inputs_of_indirectly_related_block; - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); gain = 0; float attraction_group_penalty = 0.1; @@ -2861,388 +1779,32 @@ float get_molecule_gain(t_pack_molecule* molecule, std::map& return gain; } -/* Determine if speculatively packed cur_pb is pin feasible - * Runtime is actually not that bad for this. It's worst case O(k^2) where k is the - * number of pb_graph pins. Can use hash tables or make incremental if becomes an issue. - */ -void try_update_lookahead_pins_used(t_pb* cur_pb) { - int i, j; - const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; - - // run recursively till a leaf (primitive) pb block is reached - if (pb_type->num_modes > 0 && cur_pb->name != nullptr) { - if (cur_pb->child_pbs != nullptr) { - for (i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { - if (cur_pb->child_pbs[i] != nullptr) { - for (j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { - try_update_lookahead_pins_used(&cur_pb->child_pbs[i][j]); - } - } - } - } - } else { - // find if this child (primitive) pb block has an atom mapped to it, - // if yes compute and mark lookahead pins used for that pb block - auto& atom_ctx = g_vpr_ctx.atom(); - AtomBlockId blk_id = atom_ctx.lookup.pb_atom(cur_pb); - if (pb_type->blif_model != nullptr && blk_id) { - compute_and_mark_lookahead_pins_used(blk_id); - } - } -} - -/* Resets nets used at different pin classes for determining pin feasibility */ -void reset_lookahead_pins_used(t_pb* cur_pb) { - int i, j; - const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; - if (cur_pb->pb_stats == nullptr) { - return; /* No pins used, no need to continue */ - } - - if (pb_type->num_modes > 0 && cur_pb->name != nullptr) { - for (i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { - cur_pb->pb_stats->lookahead_input_pins_used[i].clear(); - } - - for (i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { - cur_pb->pb_stats->lookahead_output_pins_used[i].clear(); - } - - if (cur_pb->child_pbs != nullptr) { - for (i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { - if (cur_pb->child_pbs[i] != nullptr) { - for (j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { - reset_lookahead_pins_used(&cur_pb->child_pbs[i][j]); - } - } - } - } - } -} - -/* Determine if pins of speculatively packed pb are legal */ -void compute_and_mark_lookahead_pins_used(const AtomBlockId blk_id) { - auto& atom_ctx = g_vpr_ctx.atom(); - - const t_pb* cur_pb = atom_ctx.lookup.atom_pb(blk_id); - VTR_ASSERT(cur_pb != nullptr); - - /* Walk through inputs, outputs, and clocks marking pins off of the same class */ - for (auto pin_id : atom_ctx.nlist.block_pins(blk_id)) { - auto net_id = atom_ctx.nlist.pin_net(pin_id); - - const t_pb_graph_pin* pb_graph_pin = find_pb_graph_pin(atom_ctx.nlist, atom_ctx.lookup, pin_id); - compute_and_mark_lookahead_pins_used_for_pin(pb_graph_pin, cur_pb, net_id); - } -} - -/** - * Given a pin and its assigned net, mark all pin classes that are affected. - * Check if connecting this pin to it's driver pin or to all sink pins will - * require leaving a pb_block starting from the parent pb_block of the - * primitive till the root block (depth = 0). If leaving a pb_block is - * required add this net to the pin class (to increment the number of used - * pins from this class) that should be used to leave the pb_block. - */ -void compute_and_mark_lookahead_pins_used_for_pin(const t_pb_graph_pin* pb_graph_pin, const t_pb* primitive_pb, const AtomNetId net_id) { - auto& atom_ctx = g_vpr_ctx.atom(); - - // starting from the parent pb of the input primitive go up in the hierarchy till the root block - for (auto cur_pb = primitive_pb->parent_pb; cur_pb; cur_pb = cur_pb->parent_pb) { - const auto depth = cur_pb->pb_graph_node->pb_type->depth; - const auto pin_class = pb_graph_pin->parent_pin_class[depth]; - VTR_ASSERT(pin_class != OPEN); - - const auto driver_blk_id = atom_ctx.nlist.net_driver_block(net_id); - - // if this primitive pin is an input pin - if (pb_graph_pin->port->type == IN_PORT) { - /* find location of net driver if exist in clb, NULL otherwise */ - // find the driver of the input net connected to the pin being studied - const auto driver_pin_id = atom_ctx.nlist.net_driver(net_id); - // find the id of the atom occupying the input primitive_pb - const auto prim_blk_id = atom_ctx.lookup.pb_atom(primitive_pb); - // find the pb block occupied by the driving atom - const auto driver_pb = atom_ctx.lookup.atom_pb(driver_blk_id); - // pb_graph_pin driving net_id in the driver pb block - t_pb_graph_pin* output_pb_graph_pin = nullptr; - // if the driver block is in the same clb as the input primitive block - if (atom_ctx.lookup.atom_clb(driver_blk_id) == atom_ctx.lookup.atom_clb(prim_blk_id)) { - // get pb_graph_pin driving the given net - output_pb_graph_pin = get_driver_pb_graph_pin(driver_pb, driver_pin_id); - } - - bool is_reachable = false; - - // if the driver pin is within the cluster - if (output_pb_graph_pin) { - // find if the driver pin can reach the input pin of the primitive or not - const t_pb* check_pb = driver_pb; - while (check_pb && check_pb != cur_pb) { - check_pb = check_pb->parent_pb; - } - if (check_pb) { - for (int i = 0; i < output_pb_graph_pin->num_connectable_primitive_input_pins[depth]; i++) { - if (pb_graph_pin == output_pb_graph_pin->list_of_connectable_input_pin_ptrs[depth][i]) { - is_reachable = true; - break; - } - } - } - } - - // Must use an input pin to connect the driver to the input pin of the given primitive, either the - // driver atom is not contained in the cluster or is contained but cannot reach the primitive pin - if (!is_reachable) { - // add net to lookahead_input_pins_used if not already added - auto it = std::find(cur_pb->pb_stats->lookahead_input_pins_used[pin_class].begin(), - cur_pb->pb_stats->lookahead_input_pins_used[pin_class].end(), net_id); - if (it == cur_pb->pb_stats->lookahead_input_pins_used[pin_class].end()) { - cur_pb->pb_stats->lookahead_input_pins_used[pin_class].push_back(net_id); - } - } - } else { - VTR_ASSERT(pb_graph_pin->port->type == OUT_PORT); - /* - * Determine if this net (which is driven from within this cluster) leaves this cluster - * (and hence uses an output pin). - */ - - bool net_exits_cluster = true; - int num_net_sinks = static_cast(atom_ctx.nlist.net_sinks(net_id).size()); - - if (pb_graph_pin->num_connectable_primitive_input_pins[depth] >= num_net_sinks) { - //It is possible the net is completely absorbed in the cluster, - //since this pin could (potentially) drive all the net's sinks - - /* Important: This runtime penalty looks a lot scarier than it really is. - * For high fan-out nets, I at most look at the number of pins within the - * cluster which limits runtime. - * - * DO NOT REMOVE THIS INITIAL FILTER WITHOUT CAREFUL ANALYSIS ON RUNTIME!!! - * - * Key Observation: - * For LUT-based designs it is impossible for the average fanout to exceed - * the number of LUT inputs so it's usually around 4-5 (pigeon-hole argument, - * if the average fanout is greater than the number of LUT inputs, where do - * the extra connections go? Therefore, average fanout must be capped to a - * small constant where the constant is equal to the number of LUT inputs). - * The real danger to runtime is when the number of sinks of a net gets doubled - */ - - //Check if all the net sinks are, in fact, inside this cluster - bool all_sinks_in_cur_cluster = true; - ClusterBlockId driver_clb = atom_ctx.lookup.atom_clb(driver_blk_id); - for (auto pin_id : atom_ctx.nlist.net_sinks(net_id)) { - auto sink_blk_id = atom_ctx.nlist.pin_block(pin_id); - if (atom_ctx.lookup.atom_clb(sink_blk_id) != driver_clb) { - all_sinks_in_cur_cluster = false; - break; - } - } - - if (all_sinks_in_cur_cluster) { - //All the sinks are part of this cluster, so the net may be fully absorbed. - // - //Verify this, by counting the number of net sinks reachable from the driver pin. - //If the count equals the number of net sinks then the net is fully absorbed and - //the net does not exit the cluster - /* TODO: I should cache the absorbed outputs, once net is absorbed, - * net is forever absorbed, no point in rechecking every time */ - if (net_sinks_reachable_in_cluster(pb_graph_pin, depth, net_id)) { - //All the sinks are reachable inside the cluster - net_exits_cluster = false; - } - } - } - - if (net_exits_cluster) { - /* This output must exit this cluster */ - cur_pb->pb_stats->lookahead_output_pins_used[pin_class].push_back(net_id); - } - } - } -} - -int net_sinks_reachable_in_cluster(const t_pb_graph_pin* driver_pb_gpin, const int depth, const AtomNetId net_id) { - size_t num_reachable_sinks = 0; - auto& atom_ctx = g_vpr_ctx.atom(); - - //Record the sink pb graph pins we are looking for - std::unordered_set sink_pb_gpins; - for (const AtomPinId pin_id : atom_ctx.nlist.net_sinks(net_id)) { - const t_pb_graph_pin* sink_pb_gpin = find_pb_graph_pin(atom_ctx.nlist, atom_ctx.lookup, pin_id); - VTR_ASSERT(sink_pb_gpin); - - sink_pb_gpins.insert(sink_pb_gpin); - } - - //Count how many sink pins are reachable - for (int i_prim_pin = 0; i_prim_pin < driver_pb_gpin->num_connectable_primitive_input_pins[depth]; ++i_prim_pin) { - const t_pb_graph_pin* reachable_pb_gpin = driver_pb_gpin->list_of_connectable_input_pin_ptrs[depth][i_prim_pin]; - - if (sink_pb_gpins.count(reachable_pb_gpin)) { - ++num_reachable_sinks; - if (num_reachable_sinks == atom_ctx.nlist.net_sinks(net_id).size()) { - return true; - } - } - } - - return false; -} - -/** - * Returns the pb_graph_pin of the atom pin defined by the driver_pin_id in the driver_pb - */ -t_pb_graph_pin* get_driver_pb_graph_pin(const t_pb* driver_pb, const AtomPinId driver_pin_id) { - auto& atom_ctx = g_vpr_ctx.atom(); - const auto driver_pb_type = driver_pb->pb_graph_node->pb_type; - int output_port = 0; - // find the port of the pin driving the net as well as the port model - auto driver_port_id = atom_ctx.nlist.pin_port(driver_pin_id); - auto driver_model_port = atom_ctx.nlist.port_model(driver_port_id); - // find the port id of the port containing the driving pin in the driver_pb_type - for (int i = 0; i < driver_pb_type->num_ports; i++) { - auto& prim_port = driver_pb_type->ports[i]; - if (prim_port.type == OUT_PORT) { - if (prim_port.model_port == driver_model_port) { - // get the output pb_graph_pin driving this input net - return &(driver_pb->pb_graph_node->output_pins[output_port][atom_ctx.nlist.pin_port_bit(driver_pin_id)]); - } - output_port++; - } - } - // the pin should be found - VTR_ASSERT(false); - return nullptr; -} - -/* Check if the number of available inputs/outputs for a pin class is sufficient for speculatively packed blocks */ -bool check_lookahead_pins_used(t_pb* cur_pb, t_ext_pin_util max_external_pin_util) { - const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; - - if (pb_type->num_modes > 0 && cur_pb->name) { - for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { - size_t class_size = cur_pb->pb_graph_node->input_pin_class_size[i]; - - if (cur_pb->is_root()) { - // Scale the class size by the maximum external pin utilization factor - // Use ceil to avoid classes of size 1 from being scaled to zero - class_size = std::ceil(max_external_pin_util.input_pin_util * class_size); - // if the number of pins already used is larger than class size, then the number of - // cluster inputs already used should be our constraint. Why is this needed? This is - // needed since when packing the seed block the maximum external pin utilization is - // used as 1.0 allowing molecules that are using up to all the cluster inputs to be - // packed legally. Therefore, if the seed block is already using more inputs than - // the allowed maximum utilization, this should become the new maximum pin utilization. - class_size = std::max(class_size, cur_pb->pb_stats->input_pins_used[i].size()); - } - - if (cur_pb->pb_stats->lookahead_input_pins_used[i].size() > class_size) { - return false; - } - } - - for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { - size_t class_size = cur_pb->pb_graph_node->output_pin_class_size[i]; - if (cur_pb->is_root()) { - // Scale the class size by the maximum external pin utilization factor - // Use ceil to avoid classes of size 1 from being scaled to zero - class_size = std::ceil(max_external_pin_util.output_pin_util * class_size); - // if the number of pins already used is larger than class size, then the number of - // cluster outputs already used should be our constraint. Why is this needed? This is - // needed since when packing the seed block the maximum external pin utilization is - // used as 1.0 allowing molecules that are using up to all the cluster inputs to be - // packed legally. Therefore, if the seed block is already using more inputs than - // the allowed maximum utilization, this should become the new maximum pin utilization. - class_size = std::max(class_size, cur_pb->pb_stats->output_pins_used[i].size()); - } - - if (cur_pb->pb_stats->lookahead_output_pins_used[i].size() > class_size) { - return false; - } - } - - if (cur_pb->child_pbs) { - for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { - if (cur_pb->child_pbs[i]) { - for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { - if (!check_lookahead_pins_used(&cur_pb->child_pbs[i][j], max_external_pin_util)) - return false; - } - } - } - } - } - - return true; -} - -/* Speculation successful, commit input/output pins used */ -void commit_lookahead_pins_used(t_pb* cur_pb) { - const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; - - if (pb_type->num_modes > 0 && cur_pb->name) { - for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { - VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->input_pin_class_size[i]); - for (size_t j = 0; j < cur_pb->pb_stats->lookahead_input_pins_used[i].size(); j++) { - VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i][j]); - cur_pb->pb_stats->input_pins_used[i].insert({j, cur_pb->pb_stats->lookahead_input_pins_used[i][j]}); - } - } - - for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { - VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->output_pin_class_size[i]); - for (size_t j = 0; j < cur_pb->pb_stats->lookahead_output_pins_used[i].size(); j++) { - VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i][j]); - cur_pb->pb_stats->output_pins_used[i].insert({j, cur_pb->pb_stats->lookahead_output_pins_used[i][j]}); - } - } - - if (cur_pb->child_pbs) { - for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { - if (cur_pb->child_pbs[i]) { - for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { - commit_lookahead_pins_used(&cur_pb->child_pbs[i][j]); - } - } - } - } - } -} - -/** - * Score unclustered atoms that are two hops away from current cluster - * For example, consider a cluster that has a FF feeding an adder in another - * cluster. Since this FF is feeding an adder that is packed in another cluster - * this function should find other FFs that are feeding other inputs of this adder - * since they are two hops away from the FF packed in this cluster - */ -void load_transitive_fanout_candidates(ClusterBlockId clb_index, +void load_transitive_fanout_candidates(LegalizationClusterId legalization_cluster_id, t_pb_stats* pb_stats, - vtr::vector>& clb_inter_blk_nets, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, int transitive_fanout_threshold) { - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; // iterate over all the nets that have pins in this cluster for (const auto net_id : pb_stats->marked_nets) { // only consider small nets to constrain runtime - if (int(atom_ctx.nlist.net_pins(net_id).size()) < transitive_fanout_threshold + 1) { + if (int(atom_nlist.net_pins(net_id).size()) < transitive_fanout_threshold + 1) { // iterate over all the pins of the net - for (const auto pin_id : atom_ctx.nlist.net_pins(net_id)) { - AtomBlockId atom_blk_id = atom_ctx.nlist.pin_block(pin_id); + for (const auto pin_id : atom_nlist.net_pins(net_id)) { + AtomBlockId atom_blk_id = atom_nlist.pin_block(pin_id); // get the transitive cluster - ClusterBlockId tclb = atom_ctx.lookup.atom_clb(atom_blk_id); + LegalizationClusterId tclb = cluster_legalizer.get_atom_cluster(atom_blk_id); // if the block connected to this pin is packed in another cluster - if (tclb != clb_index && tclb != ClusterBlockId::INVALID()) { + if (tclb != legalization_cluster_id && tclb != LegalizationClusterId::INVALID()) { // explore transitive nets from already packed cluster for (AtomNetId tnet : clb_inter_blk_nets[tclb]) { // iterate over all the pins of the net - for (AtomPinId tpin : atom_ctx.nlist.net_pins(tnet)) { - auto blk_id = atom_ctx.nlist.pin_block(tpin); + for (AtomPinId tpin : atom_nlist.net_pins(tnet)) { + auto blk_id = atom_nlist.pin_block(tpin); // This transitive atom is not packed, score and add - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(blk_id)) { auto& transitive_fanout_candidates = pb_stats->transitive_fanout_candidates; if (pb_stats->gain.count(blk_id) == 0) { @@ -3250,7 +1812,7 @@ void load_transitive_fanout_candidates(ClusterBlockId clb_index, } else { pb_stats->gain[blk_id] += 0.001; } - t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); if (molecule->valid) { transitive_fanout_candidates.insert({molecule->atom_block_ids[molecule->root], molecule}); } @@ -3265,9 +1827,8 @@ void load_transitive_fanout_candidates(ClusterBlockId clb_index, std::map> identify_primitive_candidate_block_types() { std::map> model_candidates; - auto& atom_ctx = g_vpr_ctx.atom(); - auto& atom_nlist = atom_ctx.nlist; - auto& device_ctx = g_vpr_ctx.device(); + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; + const DeviceContext& device_ctx = g_vpr_ctx.device(); std::set unique_models; // Find all logic models used in the netlist @@ -3295,7 +1856,7 @@ std::map> identify_primiti void print_seed_gains(const char* fname, const std::vector& seed_atoms, const vtr::vector& atom_gain, const vtr::vector& atom_criticality) { FILE* fp = vtr::fopen(fname, "w"); - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); //For prett formatting determine the maximum name length int max_name_len = strlen("atom_block_name"); @@ -3324,99 +1885,6 @@ void print_seed_gains(const char* fname, const std::vector& seed_at fclose(fp); } -/** - * This function takes a chain molecule, and the pb_graph_node that is chosen - * for packing the molecule's root block. Using the given root_primitive, this - * function will identify which chain id this molecule is being mapped to and - * will update the chain id value inside the chain info data structure of this - * molecule - */ -void update_molecule_chain_info(t_pack_molecule* chain_molecule, const t_pb_graph_node* root_primitive) { - VTR_ASSERT(chain_molecule->chain_info->chain_id == -1 && chain_molecule->chain_info->is_long_chain); - - auto chain_root_pins = chain_molecule->pack_pattern->chain_root_pins; - - // long chains should only be placed at the beginning of the chain - // Since for long chains the molecule size is already equal to the - // total number of adders in the cluster. Therefore, it should - // always be placed at the very first adder in this cluster. - for (size_t chainId = 0; chainId < chain_root_pins.size(); chainId++) { - if (chain_root_pins[chainId][0]->parent_node == root_primitive) { - chain_molecule->chain_info->chain_id = chainId; - chain_molecule->chain_info->first_packed_molecule = chain_molecule; - return; - } - } - - VTR_ASSERT(false); -} - -/** - * This function takes the root block of a chain molecule and a proposed - * placement primitive for this block. The function then checks if this - * chain root block has a placement constraint (such as being driven from - * outside the cluster) and returns the status of the placement accordingly. - */ -enum e_block_pack_status check_chain_root_placement_feasibility(const t_pb_graph_node* pb_graph_node, - const t_pack_molecule* molecule, - const AtomBlockId blk_id) { - enum e_block_pack_status block_pack_status = e_block_pack_status::BLK_PASSED; - auto& atom_ctx = g_vpr_ctx.atom(); - - bool is_long_chain = molecule->chain_info->is_long_chain; - - const auto& chain_root_pins = molecule->pack_pattern->chain_root_pins; - - t_model_ports* root_port = chain_root_pins[0][0]->port->model_port; - AtomNetId chain_net_id; - auto port_id = atom_ctx.nlist.find_atom_port(blk_id, root_port); - - if (port_id) { - chain_net_id = atom_ctx.nlist.port_net(port_id, chain_root_pins[0][0]->pin_number); - } - - // if this block is part of a long chain or it is driven by a cluster - // input pin we need to check the placement legality of this block - // Depending on the logic synthesis even small chains that can fit within one - // cluster might need to start at the top of the cluster as their input can be - // driven by a global gnd or vdd. Therefore even if this is not a long chain - // but its input pin is driven by a net, the placement legality is checked. - if (is_long_chain || chain_net_id) { - auto chain_id = molecule->chain_info->chain_id; - // if this chain has a chain id assigned to it (implies is_long_chain too) - if (chain_id != -1) { - // the chosen primitive should be a valid starting point for the chain - // long chains should only be placed at the top of the chain tieOff = 0 - if (pb_graph_node != chain_root_pins[chain_id][0]->parent_node) { - block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; - } - // the chain doesn't have an assigned chain_id yet - } else { - block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; - for (const auto& chain : chain_root_pins) { - for (auto tieOff : chain) { - // check if this chosen primitive is one of the possible - // starting points for this chain. - if (pb_graph_node == tieOff->parent_node) { - // this location matches with the one of the dedicated chain - // input from outside logic block, therefore it is feasible - block_pack_status = e_block_pack_status::BLK_PASSED; - break; - } - // long chains should only be placed at the top of the chain tieOff = 0 - if (is_long_chain) break; - } - } - } - } - - return block_pack_status; -} - -/** - * This function update the pb_type_count data structure by incrementing - * the number of used pb_types in the given packed cluster t_pb - */ size_t update_pb_type_count(const t_pb* pb, std::map& pb_type_count, size_t depth) { size_t max_depth = depth; @@ -3460,10 +1928,6 @@ void print_pb_type_count_recurr(t_pb_type* pb_type, size_t max_name_chars, size_ } } -/** - * This function identifies the logic block type which is - * defined by the block type which has a lut primitive - */ t_logical_block_type_ptr identify_logic_block_type(std::map>& primitive_candidate_block_types) { std::string lut_name = ".names"; @@ -3476,12 +1940,6 @@ t_logical_block_type_ptr identify_logic_block_type(std::mapLE) that has more than one instance within the cluster. - */ t_pb_type* identify_le_block_type(t_logical_block_type_ptr logic_block_type) { // if there is no CLB-like cluster, then there is no LE pb_block if (!logic_block_type) @@ -3506,9 +1964,6 @@ t_pb_type* identify_le_block_type(t_logical_block_type_ptr logic_block_type) { return nullptr; } -/** - * This function updates the le_count data structure from the given packed cluster - */ void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector& le_count) { // if this cluster doesn't contain LEs or there // are no les in this architecture, ignore it @@ -3548,10 +2003,6 @@ void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_ } } -/** - * This function returns true if the given physical block has - * a primitive matching the given blif model and is used - */ bool pb_used_for_blif_model(const t_pb* pb, const std::string& blif_model_name) { auto pb_graph_node = pb->pb_graph_node; auto pb_type = pb_graph_node->pb_type; @@ -3579,9 +2030,6 @@ bool pb_used_for_blif_model(const t_pb* pb, const std::string& blif_model_name) return false; } -/** - * Print the LE count data strurture - */ void print_le_count(std::vector& le_count, const t_pb_type* le_pb_type) { VTR_LOG("\nLogic Element (%s) detailed count:\n", le_pb_type->name); VTR_LOG(" Total number of Logic Elements used : %d\n", le_count[0] + le_count[1] + le_count[2]); @@ -3590,11 +2038,6 @@ void print_le_count(std::vector& le_count, const t_pb_type* le_pb_type) { VTR_LOG(" LEs used for registers only : %d\n\n", le_count[2]); } -/** - * Given a pointer to a pb in a cluster, this routine returns - * a pointer to the top-level pb of the given pb. - * This is needed when updating the gain for a cluster. - */ t_pb* get_top_level_pb(t_pb* pb) { t_pb* top_level_pb = pb; @@ -3608,20 +2051,17 @@ t_pb* get_top_level_pb(t_pb* pb) { return top_level_pb; } -void init_clb_atoms_lookup(vtr::vector>& atoms_lookup) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - atoms_lookup.resize(cluster_ctx.clb_nlist.blocks().size()); - - for (auto atom_blk_id : atom_ctx.nlist.blocks()) { +void init_clb_atoms_lookup(vtr::vector>& atoms_lookup, + const AtomContext& atom_ctx, + const ClusteredNetlist& clb_nlist) { + // Resize the atoms lookup to the number of clusters. + atoms_lookup.resize(clb_nlist.blocks().size()); + for (AtomBlockId atom_blk_id : atom_ctx.nlist.blocks()) { + // Get the CLB that this atom is packed into. ClusterBlockId clb_index = atom_ctx.lookup.atom_clb(atom_blk_id); - - /* if this data structure is being built alongside the clustered netlist */ - /* e.g. when ingesting and legalizing a flat placement solution, some atoms */ - /* may not yet be mapped to a valid clb_index */ - if (clb_index != ClusterBlockId::INVALID()) { - atoms_lookup[clb_index].insert(atom_blk_id); - } + // Every atom block should be in a cluster. + VTR_ASSERT_SAFE(clb_index.is_valid()); + // Insert this clb into the lookup's set. + atoms_lookup[clb_index].insert(atom_blk_id); } } diff --git a/vpr/src/pack/cluster_util.h b/vpr/src/pack/cluster_util.h index 4f190645ff0..d25a3b1ab44 100644 --- a/vpr/src/pack/cluster_util.h +++ b/vpr/src/pack/cluster_util.h @@ -2,15 +2,18 @@ #define CLUSTER_UTIL_H #include +#include "cluster_legalizer.h" #include "pack_types.h" #include "vtr_vector.h" class AtomNetId; class ClusterBlockId; +class ClusteredNetlist; class PreClusterDelayCalculator; class Prepacker; class SetupTimingInfo; class t_pack_molecule; +struct AtomContext; /** * @file @@ -20,9 +23,6 @@ class t_pack_molecule; constexpr int AAPACK_MAX_HIGH_FANOUT_EXPLORE = 10; /* For high-fanout nets that are ignored, consider a maximum of this many sinks, must be less than packer_opts.feasible_block_array_size */ constexpr int AAPACK_MAX_TRANSITIVE_EXPLORE = 40; /* When investigating transitive fanout connections in packing, consider a maximum of this many molecules, must be less than packer_opts.feasible_block_array_size */ -//Constant allowing all cluster pins to be used -const t_ext_pin_util FULL_EXTERNAL_PIN_UTIL(1., 1.); - enum e_gain_update { GAIN, NO_GAIN @@ -45,12 +45,6 @@ enum e_net_relation_to_clustered_block { OUTPUT }; -enum e_detailed_routing_stages { - E_DETAILED_ROUTE_AT_END_ONLY = 0, - E_DETAILED_ROUTE_FOR_EACH_ATOM, - E_DETAILED_ROUTE_INVALID -}; - /* Linked list structure. Stores one integer (iblk). */ struct t_molecule_link { t_pack_molecule* moleculeptr; @@ -79,7 +73,6 @@ struct t_cluster_progress_stats { /* Useful data structures for creating or modifying clusters */ struct t_clustering_data { - vtr::vector*> intra_lb_routing; int* hill_climbing_inputs_avail; /* Keeps a linked list of the unclustered blocks to speed up looking for * @@ -106,9 +99,9 @@ struct t_clustering_data { /* Clustering helper functions */ /***********************************/ -void check_clustering(); - -//calculate the initial timing at the start of packing stage +/* + * @brief Calculate the initial timing at the start of packing stage. + */ void calc_init_packing_timing(const t_packer_opts& packer_opts, const t_analysis_opts& analysis_opts, const Prepacker& prepacker, @@ -116,226 +109,171 @@ void calc_init_packing_timing(const t_packer_opts& packer_opts, std::shared_ptr& timing_info, vtr::vector& atom_criticality); -//free the clustering data structures +/* + * @brief Free the clustering data structures. + */ void free_clustering_data(const t_packer_opts& packer_opts, t_clustering_data& clustering_data); -//check clustering legality and output it -void check_and_output_clustering(const t_packer_opts& packer_opts, +/* + * @brief Check clustering legality and output it. + */ +void check_and_output_clustering(ClusterLegalizer& cluster_legalizer, + const t_packer_opts& packer_opts, const std::unordered_set& is_clock, - const t_arch* arch, - const int& num_clb, - const vtr::vector*>& intra_lb_routing); - -void get_max_cluster_size_and_pb_depth(int& max_cluster_size, - int& max_pb_depth); - -bool check_cluster_legality(const int& verbosity, - const int& detailed_routing_stage, - t_lb_router_data* router_data); + const t_arch* arch); +/* + * @brief Determine if atom block is in pb. + */ bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb); +/* + * @brief Add blk to list of feasible blocks sorted according to gain. + */ void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, std::map& gain, t_pb* pb, int max_queue_size, AttractionInfo& attraction_groups); +/* + * @brief Remove blk from list of feasible blocks sorted according to gain. + * + * Useful for removing blocks that are repeatedly failing. If a block + * has been found to be illegal, we don't repeatedly consider it. + */ void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, t_pb* pb); - +/* + * @brief Allocates and inits the data structures used for clustering. + * + * This method initializes the list of molecules to pack, the clustering data, + * and the net info. + */ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, - t_cluster_placement_stats** cluster_placement_stats, - t_pb_graph_node*** primitives_list, const Prepacker& prepacker, t_clustering_data& clustering_data, std::unordered_map& net_output_feeds_driving_block_input, int& unclustered_list_head_size, int num_molecules); -void free_pb_stats_recursive(t_pb* pb); - -void try_update_lookahead_pins_used(t_pb* cur_pb); - -void reset_lookahead_pins_used(t_pb* cur_pb); - -void compute_and_mark_lookahead_pins_used(const AtomBlockId blk_id); - -void compute_and_mark_lookahead_pins_used_for_pin(const t_pb_graph_pin* pb_graph_pin, - const t_pb* primitive_pb, - const AtomNetId net_id); - -void commit_lookahead_pins_used(t_pb* cur_pb); - -bool check_lookahead_pins_used(t_pb* cur_pb, t_ext_pin_util max_external_pin_util); - -bool primitive_feasible(const AtomBlockId blk_id, t_pb* cur_pb); - -bool primitive_memory_sibling_feasible(const AtomBlockId blk_id, const t_pb_type* cur_pb_type, const AtomBlockId sibling_memory_blk); - +/* + * @brief This routine returns an atom block which has not been clustered, has + * no connection to the current cluster, satisfies the cluster clock + * constraints, is a valid subblock inside the cluster, does not exceed + * the cluster subblock units available, and has ext_inps external inputs. + * Remove_flag controls whether or not blocks that have already been + * clustered are removed from the unclustered_list data structures. + * NB: to get a atom block regardless of clock constraints just set + * clocks_avail > 0. + */ t_pack_molecule* get_molecule_by_num_ext_inputs(const int ext_inps, const enum e_removal_policy remove_flag, t_cluster_placement_stats* cluster_placement_stats_ptr, t_molecule_link* unclustered_list_head); +/* @brief This routine is used to find new blocks for clustering when there are + * no feasible blocks with any attraction to the current cluster (i.e. + * it finds blocks which are unconnected from the current cluster). It + * returns the atom block with the largest number of used inputs that + * satisfies the clocking and number of inputs constraints. If no + * suitable atom block is found, the routine returns nullptr. + */ t_pack_molecule* get_free_molecule_with_most_ext_inputs_for_cluster(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size); +/* + * @brief Print the header for the clustering progress table. + */ void print_pack_status_header(); +/* + * @brief Incrementally print progress updates during clustering. + */ void print_pack_status(int num_clb, int tot_num_molecules, int num_molecules_processed, int& mols_since_last_print, int device_width, int device_height, - AttractionInfo& attraction_groups); - -void rebuild_attraction_groups(AttractionInfo& attraction_groups); - -void record_molecule_failure(t_pack_molecule* molecule, t_pb* pb); - -e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_placement_stats_ptr, - t_pack_molecule* molecule, - t_pb_graph_node** primitives_list, - t_pb* pb, - int max_models, - int max_cluster_size, - ClusterBlockId clb_index, - int detailed_routing_stage, - t_lb_router_data* router_data, - int verbosity, - bool enable_pin_feasibility_filter, - int feasible_block_array_size, - t_ext_pin_util max_external_pin_util, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_noc_grp_id, - int force_site = -1); - -void try_fill_cluster(const t_packer_opts& packer_opts, + AttractionInfo& attraction_groups, + const ClusterLegalizer& cluster_legalizer); + +/* + * @brief Periodically rebuild the attraction groups to reflect which atoms in + * them are still available for new clusters (i.e. remove the atoms that + * have already been packed from the attraction group). + */ +void rebuild_attraction_groups(AttractionInfo& attraction_groups, + const ClusterLegalizer& cluster_legalizer); + +/* + * @brief Try to pack next_molecule into the given cluster. If this succeeds + * prepares the next_molecule with a new value to pack next iteration. + * + * This method will print the pack status and update the cluster stats. + */ +void try_fill_cluster(ClusterLegalizer& cluster_legalizer, + const Prepacker& prepacker, + const t_packer_opts& packer_opts, t_cluster_placement_stats* cur_cluster_placement_stats_ptr, t_pack_molecule*& prev_molecule, t_pack_molecule*& next_molecule, int& num_same_molecules, - t_pb_graph_node** primitives_list, t_cluster_progress_stats& cluster_stats, int num_clb, - const int num_models, - const int max_cluster_size, - const ClusterBlockId clb_index, - const int detailed_routing_stage, + const LegalizationClusterId legalization_cluster_id, AttractionInfo& attraction_groups, - vtr::vector>& clb_inter_blk_nets, + vtr::vector>& clb_inter_blk_nets, bool allow_unrelated_clustering, const int& high_fanout_threshold, const std::unordered_set& is_clock, const std::unordered_set& is_global, const std::shared_ptr& timing_info, - t_lb_router_data* router_data, - t_ext_pin_util target_ext_pin_util, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_noc_grp_id, e_block_pack_status& block_pack_status, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size, std::unordered_map& net_output_feeds_driving_block_input, std::map>& primitive_candidate_block_types); -t_pack_molecule* save_cluster_routing_and_pick_new_seed(const t_packer_opts& packer_opts, - const int& num_clb, - const std::vector& seed_atoms, - const int& num_blocks_hill_added, - vtr::vector*>& intra_lb_routing, - int& seedindex, - t_cluster_progress_stats& cluster_stats, - t_lb_router_data* router_data); - void store_cluster_info_and_free(const t_packer_opts& packer_opts, - const ClusterBlockId& clb_index, + const LegalizationClusterId clb_index, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector& le_count, - vtr::vector>& clb_inter_blk_nets); - -void free_data_and_requeue_used_mols_if_illegal(const ClusterBlockId& clb_index, - const int& savedseedindex, - std::map& num_used_type_instances, - int& num_clb, - int& seedindex); - -enum e_block_pack_status try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, - const AtomBlockId blk_id, - t_pb* cb, - t_pb** parent, - const int max_models, - const int max_cluster_size, - const ClusterBlockId clb_index, - const t_cluster_placement_stats* cluster_placement_stats_ptr, - const t_pack_molecule* molecule, - t_lb_router_data* router_data, - int verbosity, - const int feasible_block_array_size); - - -/** - * @brief Checks whether an atom block can be added to a clustered block - * without violating floorplanning constraints. It also updates the - * clustered block's floorplanning region by taking the intersection of - * its current region and the floorplanning region of the given atom block. - * - * @param blk_id A unique ID for the candidate atom block to be added to the growing cluster. - * @param clb_index A unique ID for the clustered block that the atom block wants to be added to. - * @param verbosity Controls the detail level of log information printed by this function. - * @param temp_cluster_pr The floorplanning regions of the clustered block. This function may - * update the given region. - * @param cluster_pr_needs_update Indicates whether the floorplanning region of the clustered block - * have updated. - * @return True if adding the given atom block to the clustered block does not violated any - * floorplanning constraints. - */ -bool atom_cluster_floorplanning_check(AtomBlockId blk_id, - ClusterBlockId clb_index, - int verbosity, - PartitionRegion& temp_cluster_pr, - bool& cluster_pr_needs_update); -/** - * @brief Checks if an atom block can be added to a clustered block without - * violating NoC group constraints. For passing this check, either both clustered - * and atom blocks must belong to the same NoC group, or at least one of them should - * not belong to any NoC group. If the atom block is associated with a NoC group while - * the clustered block does not belong to any NoC groups, the NoC group ID of the atom block - * is assigned to the clustered block when the atom is added to it. - * block - * - * @param blk_id A unique ID for the candidate atom block to be added to the growing cluster. - * @param clb_index A unique ID for the clustered block that the atom block wants to be added to. - * @param verbosity Controls the detail level of log information printed by this function. - * @param temp_cluster_noc_grp_id The NoC group ID of the clustered block. This function may update - * this ID. - * @return True if adding the atom block the cluster does not violate NoC group constraints. - */ -bool atom_cluster_noc_group_check(AtomBlockId blk_id, - ClusterBlockId clb_index, - int verbosity, - NocGroupId& temp_cluster_noc_grp_id); - -void revert_place_atom_block(const AtomBlockId blk_id, t_lb_router_data* router_data); + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets); -void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clustered_blk_id, t_pb* cur_pb, enum e_net_relation_to_clustered_block net_relation_to_clustered_block); +void update_connection_gain_values(const AtomNetId net_id, + const AtomBlockId clustered_blk_id, + t_pb* cur_pb, + const ClusterLegalizer& cluster_legalizer, + enum e_net_relation_to_clustered_block net_relation_to_clustered_block); void update_timing_gain_values(const AtomNetId net_id, t_pb* cur_pb, + const ClusterLegalizer& cluster_legalizer, enum e_net_relation_to_clustered_block net_relation_to_clustered_block, const SetupTimingInfo& timing_info, const std::unordered_set& is_global, std::unordered_map& net_output_feeds_driving_block_input); +/* + * @brief Updates the marked data structures, and if gain_flag is GAIN, the gain + * when an atom block is added to a cluster. The sharinggain is the + * number of inputs that a atom block shares with blocks that are already + * in the cluster. Hillgain is the reduction in number of pins-required + * by adding a atom block to the cluster. The timinggain is the + * criticality of the most critical atom net between this atom block and + * an atom block in the cluster. + */ void mark_and_update_partial_gain(const AtomNetId net_id, enum e_gain_update gain_flag, const AtomBlockId clustered_blk_id, + const ClusterLegalizer& cluster_legalizer, bool timing_driven, bool connection_driven, enum e_net_relation_to_clustered_block net_relation_to_clustered_block, @@ -344,10 +282,22 @@ void mark_and_update_partial_gain(const AtomNetId net_id, const int high_fanout_net_threshold, std::unordered_map& net_output_feeds_driving_block_input); +/* + * @brief Updates the total gain array to reflect the desired tradeoff between + * input sharing (sharinggain) and path_length minimization (timinggain) + * input each time a new molecule is added to the cluster. + */ void update_total_gain(float alpha, float beta, bool timing_driven, bool connection_driven, t_pb* pb, AttractionInfo& attraction_groups); +/* + * @brief Routine that is called each time a new molecule is added to the cluster. + * + * Makes calls to update cluster stats such as the gain map for atoms, used pins, + * and clock structures, in order to reflect the new content of the cluster. + * Also keeps track of which attraction group the cluster belongs to. + */ void update_cluster_stats(const t_pack_molecule* molecule, - const ClusterBlockId clb_index, + const ClusterLegalizer& cluster_legalizer, const std::unordered_set& is_clock, const std::unordered_set& is_global, const bool global_clocks, @@ -360,65 +310,112 @@ void update_cluster_stats(const t_pack_molecule* molecule, AttractionInfo& attraction_groups, std::unordered_map& net_output_feeds_driving_block_input); -void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, - t_pb_graph_node** primitives_list, - ClusterBlockId clb_index, +/* + * @brief Given a starting seed block, start_new_cluster determines the next + * cluster type to use. + * + * It expands the FPGA if it cannot find a legal cluster for the atom block + */ +void start_new_cluster(ClusterLegalizer& cluster_legalizer, + LegalizationClusterId& legalization_cluster_id, t_pack_molecule* molecule, std::map& num_used_type_instances, const float target_device_utilization, - const int num_models, - const int max_cluster_size, const t_arch* arch, const std::string& device_layout_name, - std::vector* lb_type_rr_graphs, - t_lb_router_data** router_data, - const int detailed_routing_stage, - ClusteredNetlist* clb_nlist, const std::map>& primitive_candidate_block_types, int verbosity, - bool enable_pin_feasibility_filter, - bool balance_block_type_utilization, - const int feasible_block_array_size, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_noc_grp_id); + bool balance_block_type_utilization); +/* + * @brief Get candidate molecule to pack into currently open cluster + * + * Molecule selection priority: + * 1. Find unpacked molecules based on criticality and strong connectedness + * (connected by low fanout nets) with current cluster. + * 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) + * with current cluster. + * 3. Find unpacked molecules based on weak connectedness (connected by high + * fanout nets) with current cluster. + * 4. Find unpacked molecules based on attraction group of the current cluster + * (if the cluster has an attraction group). + */ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, AttractionInfo& attraction_groups, const enum e_gain_type gain_mode, t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - const ClusterBlockId cluster_index, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, + const LegalizationClusterId cluster_index, bool prioritize_transitive_connectivity, int transitive_fanout_threshold, const int feasible_block_array_size, std::map>& primitive_candidate_block_types); +/* + * @brief Add molecules with strong connectedness to the current cluster to the + * list of feasible blocks. + */ void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, const int feasible_block_array_size, AttractionInfo& attraction_groups); +/* + * @brief Add molecules based on weak connectedness (connected by high fanout + * nets) with current cluster. + */ void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, const int feasible_block_array_size, AttractionInfo& attraction_groups); +/* + * @brief If the current cluster being packed has an attraction group associated + * with it (i.e. there are atoms in it that belong to an attraction group), + * this routine adds molecules from the associated attraction group to + * the list of feasible blocks for the cluster. + * + * Attraction groups can be very large, so we only add some randomly selected + * molecules for efficiency if the number of atoms in the group is greater than + * 500. Therefore, the molecules added to the candidates will vary each time you + * call this function. + */ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, AttractionInfo& attraction_groups, const int feasible_block_array_size, - ClusterBlockId clb_index, + LegalizationClusterId clb_index, std::map>& primitive_candidate_block_types); +/* + * @brief Add molecules based on transitive connections (eg. 2 hops away) with + * current cluster. + */ void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - const ClusterBlockId cluster_index, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, + const LegalizationClusterId cluster_index, int transitive_fanout_threshold, const int feasible_block_array_size, AttractionInfo& attraction_groups); -bool check_free_primitives_for_molecule_atoms(t_pack_molecule* molecule, t_cluster_placement_stats* cluster_placement_stats_ptr); +/* + * @brief Check whether a free primitive exists for each atom block in the + * molecule. + */ +bool check_free_primitives_for_molecule_atoms(t_pack_molecule* molecule, + t_cluster_placement_stats* cluster_placement_stats_ptr, + const ClusterLegalizer& cluster_legalizer); t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, AttractionInfo& attraction_groups, @@ -428,61 +425,112 @@ t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, const int feasible_block_array_size, int* num_unrelated_clustering_attempts, t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - ClusterBlockId cluster_index, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, + LegalizationClusterId cluster_index, int verbosity, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size, std::map>& primitive_candidate_block_types); +/* + * @brief Calculates molecule statistics for a single molecule. + */ t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule, const AtomNetlist& atom_nlist); std::vector initialize_seed_atoms(const e_cluster_seed seed_type, const t_molecule_stats& max_molecule_stats, + const Prepacker& prepacker, const vtr::vector& atom_criticality); -t_pack_molecule* get_highest_gain_seed_molecule(int& seed_index, const std::vector& seed_atoms); +t_pack_molecule* get_highest_gain_seed_molecule(int& seed_index, + const std::vector& seed_atoms, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer); +/* + * @brief Get gain of packing molecule into current cluster. + * + * gain is equal to: + * total_block_gain + * + molecule_base_gain*some_factor + * - introduced_input_nets_of_unrelated_blocks_pulled_in_by_molecule*some_other_factor + */ float get_molecule_gain(t_pack_molecule* molecule, std::map& blk_gain, AttractGroupId cluster_attraction_group_id, AttractionInfo& attraction_groups, int num_molecule_failures); -int net_sinks_reachable_in_cluster(const t_pb_graph_pin* driver_pb_gpin, const int depth, const AtomNetId net_id); - void print_seed_gains(const char* fname, const std::vector& seed_atoms, const vtr::vector& atom_gain, const vtr::vector& atom_criticality); -void load_transitive_fanout_candidates(ClusterBlockId cluster_index, +/** + * @brief Score unclustered atoms that are two hops away from current cluster + * + * For example, consider a cluster that has a FF feeding an adder in another + * cluster. Since this FF is feeding an adder that is packed in another cluster + * this function should find other FFs that are feeding other inputs of this adder + * since they are two hops away from the FF packed in this cluster + */ +void load_transitive_fanout_candidates(LegalizationClusterId cluster_index, t_pb_stats* pb_stats, - vtr::vector>& clb_inter_blk_nets, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, int transitive_fanout_threshold); std::map> identify_primitive_candidate_block_types(); -void update_molecule_chain_info(t_pack_molecule* chain_molecule, const t_pb_graph_node* root_primitive); - -enum e_block_pack_status check_chain_root_placement_feasibility(const t_pb_graph_node* pb_graph_node, - const t_pack_molecule* molecule, - const AtomBlockId blk_id); - -t_pb_graph_pin* get_driver_pb_graph_pin(const t_pb* driver_pb, const AtomPinId driver_pin_id); - +/** + * @brief This function update the pb_type_count data structure by incrementing + * the number of used pb_types in the given packed cluster t_pb + */ size_t update_pb_type_count(const t_pb* pb, std::map& pb_type_count, size_t depth); +/* + * @brief This function updates the le_count data structure from the given + * packed cluster. + */ void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector& le_count); void print_pb_type_count_recurr(t_pb_type* type, size_t max_name_chars, size_t curr_depth, std::map& pb_type_count); +/* + * @brief This function identifies the logic block type which is defined by the + * block type which has a lut primitive. + */ t_logical_block_type_ptr identify_logic_block_type(std::map>& primitive_candidate_block_types); +/* + * @brief This function returns the pb_type that is similar to Logic Element (LE) + * in an FPGA. + * + * The LE is defined as a physical block that contains a LUT primitive and + * is found by searching a cluster type to find the first pb_type (from the top + * of the hierarchy clb->LE) that has more than one instance within the cluster. + */ t_pb_type* identify_le_block_type(t_logical_block_type_ptr logic_block_type); +/* + * @brief This function returns true if the given physical block has a primitive + * matching the given blif model and is used. + */ bool pb_used_for_blif_model(const t_pb* pb, const std::string& blif_model_name); +/* + * @brief Print the LE count data strurture. + */ void print_le_count(std::vector& le_count, const t_pb_type* le_pb_type); +/* + * @brief Given a pointer to a pb in a cluster, this routine returns a pointer + * to the top-level pb of the given pb. + * + * This is needed when updating the gain for a cluster. + */ t_pb* get_top_level_pb(t_pb* pb); -bool cleanup_pb(t_pb* pb); - -void alloc_and_load_pb_stats(t_pb* pb, const int feasible_block_array_size); - -void init_clb_atoms_lookup(vtr::vector>& atoms_lookup); +/* + * @brief Load the mapping between clusters and their atoms. + */ +void init_clb_atoms_lookup(vtr::vector>& atoms_lookup, + const AtomContext& atom_ctx, + const ClusteredNetlist& clb_nlist); #endif diff --git a/vpr/src/pack/constraints_report.cpp b/vpr/src/pack/constraints_report.cpp index 5c53744fd5e..6b671331c74 100644 --- a/vpr/src/pack/constraints_report.cpp +++ b/vpr/src/pack/constraints_report.cpp @@ -1,9 +1,11 @@ #include "constraints_report.h" +#include "cluster_legalizer.h" +#include "globals.h" +#include "grid_tile_lookup.h" -bool floorplan_constraints_regions_overfull() { +bool floorplan_constraints_regions_overfull(const ClusterLegalizer& cluster_legalizer) { GridTileLookup grid_tiles; - auto& cluster_ctx = g_vpr_ctx.clustering(); auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); auto& device_ctx = g_vpr_ctx.device(); @@ -12,15 +14,13 @@ bool floorplan_constraints_regions_overfull() { // keep record of how many blocks of each type are assigned to each PartitionRegion std::unordered_map> pr_count_info; - for (const ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { - if (!is_cluster_constrained(blk_id)) { + for (LegalizationClusterId cluster_id : cluster_legalizer.clusters()) { + const PartitionRegion& pr = cluster_legalizer.get_cluster_pr(cluster_id); + if (pr.empty()) continue; - } - t_logical_block_type_ptr bt = cluster_ctx.clb_nlist.block_type(blk_id); - const PartitionRegion& pr = floorplanning_ctx.cluster_constraints[blk_id]; + t_logical_block_type_ptr bt = cluster_legalizer.get_cluster_type(cluster_id); auto got = pr_count_info.find(pr); - if (got == pr_count_info.end()) { std::vector block_type_counts(block_types.size(), 0); block_type_counts[bt->index]++; diff --git a/vpr/src/pack/constraints_report.h b/vpr/src/pack/constraints_report.h index 46af3fa83db..c10d1183238 100644 --- a/vpr/src/pack/constraints_report.h +++ b/vpr/src/pack/constraints_report.h @@ -5,9 +5,7 @@ #ifndef VPR_SRC_PACK_CONSTRAINTS_REPORT_H_ #define VPR_SRC_PACK_CONSTRAINTS_REPORT_H_ -#include "globals.h" -#include "grid_tile_lookup.h" -#include "place_constraints.h" +class ClusterLegalizer; /** * @brief Check if any constraint partition regions are overfull, @@ -27,6 +25,6 @@ * * @return True if there is at least one overfull partition. */ -bool floorplan_constraints_regions_overfull(); +bool floorplan_constraints_regions_overfull(const ClusterLegalizer& cluster_legalizer); #endif /* VPR_SRC_PACK_CONSTRAINTS_REPORT_H_ */ diff --git a/vpr/src/pack/noc_aware_cluster_util.cpp b/vpr/src/pack/noc_aware_cluster_util.cpp index 51319175c12..87f981605de 100644 --- a/vpr/src/pack/noc_aware_cluster_util.cpp +++ b/vpr/src/pack/noc_aware_cluster_util.cpp @@ -1,12 +1,12 @@ #include "noc_aware_cluster_util.h" +#include "atom_netlist.h" #include "globals.h" +#include "vpr_types.h" #include -std::vector find_noc_router_atoms() { - const auto& atom_ctx = g_vpr_ctx.atom(); - +std::vector find_noc_router_atoms(const AtomNetlist& atom_netlist) { // NoC router atoms are expected to have a specific blif model const std::string noc_router_blif_model_name = "noc_router_adapter_block"; @@ -14,8 +14,8 @@ std::vector find_noc_router_atoms() { std::vector noc_router_atoms; // iterate over all atoms and find those whose blif model matches - for (auto atom_id : atom_ctx.nlist.blocks()) { - const t_model* model = atom_ctx.nlist.block_model(atom_id); + for (auto atom_id : atom_netlist.blocks()) { + const t_model* model = atom_netlist.block_model(atom_id); if (noc_router_blif_model_name == model->name) { noc_router_atoms.push_back(atom_id); } @@ -24,10 +24,10 @@ std::vector find_noc_router_atoms() { return noc_router_atoms; } -void update_noc_reachability_partitions(const std::vector& noc_atoms) { - const auto& atom_ctx = g_vpr_ctx.atom(); - auto& cl_helper_ctx = g_vpr_ctx.mutable_cl_helper(); - const auto& high_fanout_thresholds = g_vpr_ctx.cl_helper().high_fanout_thresholds; +void update_noc_reachability_partitions(const std::vector& noc_atoms, + const AtomNetlist& atom_netlist, + const t_pack_high_fanout_thresholds& high_fanout_thresholds, + vtr::vector& atom_noc_grp_id) { const auto& grid = g_vpr_ctx.device().grid; t_logical_block_type_ptr logic_block_type = infer_logic_block_type(grid); @@ -35,11 +35,11 @@ void update_noc_reachability_partitions(const std::vector& noc_atom const size_t high_fanout_threshold = high_fanout_thresholds.get_threshold(logical_block_name); // get the total number of atoms - const size_t n_atoms = atom_ctx.nlist.blocks().size(); + const size_t n_atoms = atom_netlist.blocks().size(); vtr::vector atom_visited(n_atoms, false); - cl_helper_ctx.atom_noc_grp_id.resize(n_atoms, NocGroupId::INVALID()); + atom_noc_grp_id.resize(n_atoms, NocGroupId::INVALID()); int noc_grp_id_cnt = 0; @@ -68,24 +68,24 @@ void update_noc_reachability_partitions(const std::vector& noc_atom AtomBlockId current_atom = q.front(); q.pop(); - cl_helper_ctx.atom_noc_grp_id[current_atom] = noc_grp_id; + atom_noc_grp_id[current_atom] = noc_grp_id; - for(auto pin : atom_ctx.nlist.block_pins(current_atom)) { - AtomNetId net_id = atom_ctx.nlist.pin_net(pin); - size_t net_fanout = atom_ctx.nlist.net_sinks(net_id).size(); + for(auto pin : atom_netlist.block_pins(current_atom)) { + AtomNetId net_id = atom_netlist.pin_net(pin); + size_t net_fanout = atom_netlist.net_sinks(net_id).size(); if (net_fanout >= high_fanout_threshold) { continue; } - AtomBlockId driver_atom_id = atom_ctx.nlist.net_driver_block(net_id); + AtomBlockId driver_atom_id = atom_netlist.net_driver_block(net_id); if (!atom_visited[driver_atom_id]) { q.push(driver_atom_id); atom_visited[driver_atom_id] = true; } - for (auto sink_pin : atom_ctx.nlist.net_sinks(net_id)) { - AtomBlockId sink_atom_id = atom_ctx.nlist.pin_block(sink_pin); + for (auto sink_pin : atom_netlist.net_sinks(net_id)) { + AtomBlockId sink_atom_id = atom_netlist.pin_block(sink_pin); if (!atom_visited[sink_atom_id]) { q.push(sink_atom_id); atom_visited[sink_atom_id] = true; @@ -96,4 +96,4 @@ void update_noc_reachability_partitions(const std::vector& noc_atom } } -} \ No newline at end of file +} diff --git a/vpr/src/pack/noc_aware_cluster_util.h b/vpr/src/pack/noc_aware_cluster_util.h index abeb8d8ba95..6f930a21944 100644 --- a/vpr/src/pack/noc_aware_cluster_util.h +++ b/vpr/src/pack/noc_aware_cluster_util.h @@ -17,8 +17,12 @@ */ #include +#include "noc_data_types.h" +#include "vtr_vector.h" -#include "vpr_types.h" +class AtomNetlist; +class AtomBlockId; +class t_pack_high_fanout_thresholds; /** * @brief Iterates over all atom blocks and check whether @@ -26,7 +30,7 @@ * * @return The atom block IDs of the NoC router blocks in the netlist. */ -std::vector find_noc_router_atoms(); +std::vector find_noc_router_atoms(const AtomNetlist& atom_netlist); /** @@ -37,6 +41,9 @@ std::vector find_noc_router_atoms(); * * @param noc_atoms The atom block IDs of the NoC router blocks in the netlist. */ -void update_noc_reachability_partitions(const std::vector& noc_atoms); +void update_noc_reachability_partitions(const std::vector& noc_atoms, + const AtomNetlist& atom_netlist, + const t_pack_high_fanout_thresholds& high_fanout_threshold, + vtr::vector& atom_noc_grp_id); #endif diff --git a/vpr/src/pack/output_clustering.cpp b/vpr/src/pack/output_clustering.cpp index cee87ad51a1..c659837c5fb 100644 --- a/vpr/src/pack/output_clustering.cpp +++ b/vpr/src/pack/output_clustering.cpp @@ -8,10 +8,13 @@ #include #include +#include "cluster_legalizer.h" +#include "clustered_netlist.h" +#include "physical_types.h" +#include "prepack.h" +#include "vpr_context.h" #include "vtr_assert.h" #include "vtr_log.h" -#include "vtr_digest.h" -#include "vtr_memory.h" #include "vpr_types.h" #include "vpr_error.h" @@ -20,11 +23,8 @@ #include "globals.h" #include "atom_netlist.h" -#include "pack_types.h" -#include "cluster_router.h" #include "pb_type_graph.h" #include "output_clustering.h" -#include "read_xml_arch_file.h" #include "vpr_utils.h" #include "pack.h" @@ -36,49 +36,84 @@ static void print_clustering_stats(char* block_name, int num_block_type, float n /**************** Subroutine definitions ************************************/ -/* Prints out one cluster (clb). Both the external pins and the * - * internal connections are printed out. */ -static void print_stats() { - int ipin; - unsigned int itype; - int total_nets_absorbed; - std::unordered_map nets_absorbed; - - int *num_clb_types, *num_clb_inputs_used, *num_clb_outputs_used; - - auto& device_ctx = g_vpr_ctx.device(); - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - num_clb_types = num_clb_inputs_used = num_clb_outputs_used = nullptr; - - num_clb_types = new int[device_ctx.logical_block_types.size()]; - num_clb_inputs_used = new int[device_ctx.logical_block_types.size()]; - num_clb_outputs_used = new int[device_ctx.logical_block_types.size()]; - - for (int i = 0; i < (int)device_ctx.logical_block_types.size(); i++) { - num_clb_types[i] = 0; - num_clb_inputs_used[i] = 0; - num_clb_outputs_used[i] = 0; +static void count_clb_inputs_and_outputs_from_pb_route(const t_pb* pb, + t_logical_block_type_ptr logical_block, + int ipin, + e_pin_type pin_type, + std::unordered_map& nets_absorbed, + int num_clb_inputs_used[], + int num_clb_outputs_used[]) { + VTR_ASSERT_DEBUG(!pb->pb_route.empty()); + int pb_graph_pin_id = get_pb_graph_node_pin_from_pb_graph_node(pb->pb_graph_node, ipin)->pin_count_in_cluster; + + if (pb->pb_route.count(pb_graph_pin_id)) { + //Pin used + AtomNetId atom_net_id = pb->pb_route[pb_graph_pin_id].atom_net_id; + if (atom_net_id) { + nets_absorbed[atom_net_id] = false; + if (pin_type == RECEIVER) { + num_clb_inputs_used[logical_block->index]++; + } else if (pin_type == DRIVER) { + num_clb_outputs_used[logical_block->index]++; + } + } } +} - for (auto net_id : atom_ctx.nlist.nets()) { - nets_absorbed[net_id] = true; +static void count_stats_from_legalizer(const ClusterLegalizer& cluster_legalizer, + std::unordered_map& nets_absorbed, + int num_clb_types[], + int num_clb_inputs_used[], + int num_clb_outputs_used[]) { + for (LegalizationClusterId cluster_id : cluster_legalizer.clusters()) { + t_logical_block_type_ptr logical_block = cluster_legalizer.get_cluster_type(cluster_id); + t_physical_tile_type_ptr physical_tile = pick_physical_type(logical_block); + for (int ipin = 0; ipin < logical_block->pb_type->num_pins; ipin++) { + int physical_pin = get_physical_pin(physical_tile, logical_block, ipin); + e_pin_type pin_type = get_pin_type_from_pin_physical_num(physical_tile, physical_pin); + + const t_pb* pb = cluster_legalizer.get_cluster_pb(cluster_id); + if (pb->pb_route.empty()) + continue; + count_clb_inputs_and_outputs_from_pb_route(pb, + logical_block, + ipin, + pin_type, + nets_absorbed, + num_clb_inputs_used, + num_clb_outputs_used); + } + num_clb_types[logical_block->index]++; } - /* Counters used only for statistics purposes. */ +} - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - auto logical_block = cluster_ctx.clb_nlist.block_type(blk_id); - auto physical_tile = pick_physical_type(logical_block); - for (ipin = 0; ipin < logical_block->pb_type->num_pins; ipin++) { +static void count_stats_from_netlist(std::unordered_map& nets_absorbed, + int num_clb_types[], + int num_clb_inputs_used[], + int num_clb_outputs_used[]) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist; + + for (ClusterBlockId blk_id : clb_nlist.blocks()) { + t_logical_block_type_ptr logical_block = clb_nlist.block_type(blk_id); + t_physical_tile_type_ptr physical_tile = pick_physical_type(logical_block); + for (int ipin = 0; ipin < logical_block->pb_type->num_pins; ipin++) { int physical_pin = get_physical_pin(physical_tile, logical_block, ipin); - auto pin_type = get_pin_type_from_pin_physical_num(physical_tile, physical_pin); - - if (cluster_ctx.clb_nlist.block_pb(blk_id)->pb_route.empty()) { - ClusterNetId clb_net_id = cluster_ctx.clb_nlist.block_net(blk_id, ipin); + e_pin_type pin_type = get_pin_type_from_pin_physical_num(physical_tile, physical_pin); + + if (!clb_nlist.block_pb(blk_id)->pb_route.empty()) { + count_clb_inputs_and_outputs_from_pb_route(clb_nlist.block_pb(blk_id), + logical_block, + ipin, + pin_type, + nets_absorbed, + num_clb_inputs_used, + num_clb_outputs_used); + } else { + ClusterNetId clb_net_id = clb_nlist.block_net(blk_id, ipin); if (clb_net_id != ClusterNetId::INVALID()) { - auto net_id = atom_ctx.lookup.atom_net(clb_net_id); + AtomNetId net_id = atom_ctx.lookup.atom_net(clb_net_id); VTR_ASSERT(net_id); nets_absorbed[net_id] = false; @@ -88,30 +123,45 @@ static void print_stats() { num_clb_outputs_used[logical_block->index]++; } } - } else { - int pb_graph_pin_id = get_pb_graph_node_pin_from_block_pin(blk_id, ipin)->pin_count_in_cluster; - - const t_pb* pb = cluster_ctx.clb_nlist.block_pb(blk_id); - if (pb->pb_route.count(pb_graph_pin_id)) { - //Pin used - auto atom_net_id = pb->pb_route[pb_graph_pin_id].atom_net_id; - if (atom_net_id) { - nets_absorbed[atom_net_id] = false; - if (pin_type == RECEIVER) { - num_clb_inputs_used[logical_block->index]++; - } else if (pin_type == DRIVER) { - num_clb_outputs_used[logical_block->index]++; - } - } - } } } num_clb_types[logical_block->index]++; } +} + +/* Prints out one cluster (clb). Both the external pins and the * + * internal connections are printed out. */ +static void print_stats(const ClusterLegalizer* cluster_legalizer_ptr, bool from_legalizer) { + const DeviceContext& device_ctx = g_vpr_ctx.device(); + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; + + int* num_clb_types = new int[device_ctx.logical_block_types.size()]; + int* num_clb_inputs_used = new int[device_ctx.logical_block_types.size()]; + int* num_clb_outputs_used = new int[device_ctx.logical_block_types.size()]; + + for (size_t i = 0; i < device_ctx.logical_block_types.size(); i++) { + num_clb_types[i] = 0; + num_clb_inputs_used[i] = 0; + num_clb_outputs_used[i] = 0; + } + + std::unordered_map nets_absorbed; + for (AtomNetId net_id : atom_nlist.nets()) { + nets_absorbed[net_id] = true; + } + + /* Counters used only for statistics purposes. */ + if (from_legalizer) { + VTR_ASSERT(cluster_legalizer_ptr != nullptr); + count_stats_from_legalizer(*cluster_legalizer_ptr, nets_absorbed, num_clb_types, num_clb_inputs_used, num_clb_outputs_used); + } else { + VTR_ASSERT(cluster_legalizer_ptr == nullptr); + count_stats_from_netlist(nets_absorbed, num_clb_types, num_clb_inputs_used, num_clb_outputs_used); + } print_clustering_stats_header(); - for (itype = 0; itype < device_ctx.logical_block_types.size(); itype++) { + for (unsigned int itype = 0; itype < device_ctx.logical_block_types.size(); itype++) { if (num_clb_types[itype] == 0) { print_clustering_stats(device_ctx.logical_block_types[itype].name, num_clb_types[itype], 0.0, 0.0); } else { @@ -121,14 +171,14 @@ static void print_stats() { } } - total_nets_absorbed = 0; - for (auto net_id : atom_ctx.nlist.nets()) { + int total_nets_absorbed = 0; + for (AtomNetId net_id : atom_nlist.nets()) { if (nets_absorbed[net_id] == true) { total_nets_absorbed++; } } VTR_LOG("Absorbed logical nets %d out of %d nets, %d nets not absorbed.\n", - total_nets_absorbed, (int)atom_ctx.nlist.nets().size(), (int)atom_ctx.nlist.nets().size() - total_nets_absorbed); + total_nets_absorbed, (int)atom_nlist.nets().size(), (int)atom_nlist.nets().size() - total_nets_absorbed); delete[] num_clb_types; delete[] num_clb_inputs_used; delete[] num_clb_outputs_used; @@ -162,12 +212,12 @@ static const char* clustering_xml_net_text(AtomNetId net_id) { /* This routine prints out the atom_ctx.nlist net name (or open). * net_num is the index of the atom_ctx.nlist net to be printed */ + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; if (!net_id) { return "open"; } else { - auto& atom_ctx = g_vpr_ctx.atom(); - return atom_ctx.nlist.net_name(net_id).c_str(); + return atom_nlist.net_name(net_id).c_str(); } } @@ -218,7 +268,7 @@ static std::string clustering_xml_interconnect_text(t_logical_block_type_ptr typ * cannot simply be marked open as that would lose the routing information. Instead, a block must be * output that reflects the routing resources used. This function handles both cases. */ -static void clustering_xml_open_block(pugi::xml_node parent_node, t_logical_block_type_ptr type, const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, t_pb_graph_node* pb_graph_node, int pb_index, bool is_used, const t_pb_routes& pb_route) { +static void clustering_xml_open_block(pugi::xml_node& parent_node, t_logical_block_type_ptr type, const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, t_pb_graph_node* pb_graph_node, int pb_index, bool is_used, const t_pb_routes& pb_route) { int i, j, k, m; const t_pb_type *pb_type, *child_pb_type; t_mode* mode = nullptr; @@ -362,7 +412,7 @@ static void clustering_xml_open_block(pugi::xml_node parent_node, t_logical_bloc } /* outputs a block that is used (i.e. has configuration) and all of its child blocks */ -static void clustering_xml_block(pugi::xml_node parent_node, t_logical_block_type_ptr type, const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, t_pb* pb, int pb_index, const t_pb_routes& pb_route) { +static void clustering_xml_block(pugi::xml_node& parent_node, t_logical_block_type_ptr type, const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, t_pb* pb, int pb_index, const t_pb_routes& pb_route) { int i, j, k, m; const t_pb_type *pb_type, *child_pb_type; t_pb_graph_node* pb_graph_node; @@ -559,20 +609,42 @@ static void clustering_xml_block(pugi::xml_node parent_node, t_logical_block_typ } } +static void clustering_xml_blocks_from_legalizer(pugi::xml_node& block_node, + const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, + ClusterLegalizer& cluster_legalizer) { + // Finalize the cluster legalization by ensuring that each cluster pb has + // its pb_route calculated. + cluster_legalizer.finalize(); + for (LegalizationClusterId cluster_id : cluster_legalizer.clusters()) { + clustering_xml_block(block_node, + cluster_legalizer.get_cluster_type(cluster_id), + pb_graph_pin_lookup_from_index_by_type, + cluster_legalizer.get_cluster_pb(cluster_id), + size_t(cluster_id), + cluster_legalizer.get_cluster_pb(cluster_id)->pb_route); + } +} + +static void clustering_xml_blocks_from_netlist(pugi::xml_node& block_node, + const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type) { + const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist; + for (auto blk_id : clb_nlist.blocks()) { + /* TODO: Must do check that total CLB pins match top-level pb pins, perhaps check this earlier? */ + clustering_xml_block(block_node, + clb_nlist.block_type(blk_id), + pb_graph_pin_lookup_from_index_by_type, + clb_nlist.block_pb(blk_id), + size_t(blk_id), + clb_nlist.block_pb(blk_id)->pb_route); + } +} + /* This routine dumps out the output netlist in a format suitable for * * input to vpr. This routine also dumps out the internal structure of * * the cluster, in essentially a graph based format. */ -void output_clustering(const vtr::vector*>& intra_lb_routing, bool global_clocks, const std::unordered_set& is_clock, const std::string& architecture_id, const char* out_fname, bool skip_clustering) { - auto& device_ctx = g_vpr_ctx.device(); - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - if (!intra_lb_routing.empty()) { - VTR_ASSERT(intra_lb_routing.size() == cluster_ctx.clb_nlist.blocks().size()); - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - cluster_ctx.clb_nlist.block_pb(blk_id)->pb_route = alloc_and_load_pb_route(intra_lb_routing[blk_id], cluster_ctx.clb_nlist.block_pb(blk_id)->pb_graph_node); - } - } +void output_clustering(ClusterLegalizer* cluster_legalizer_ptr, bool global_clocks, const std::unordered_set& is_clock, const std::string& architecture_id, const char* out_fname, bool skip_clustering, bool from_legalizer) { + const DeviceContext& device_ctx = g_vpr_ctx.device(); + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; IntraLbPbPinLookup pb_graph_pin_lookup_from_index_by_type(device_ctx.logical_block_types); @@ -582,26 +654,26 @@ void output_clustering(const vtr::vector inputs; std::vector outputs; - for (auto blk_id : atom_ctx.nlist.blocks()) { - auto type = atom_ctx.nlist.block_type(blk_id); + for (auto blk_id : atom_nlist.blocks()) { + auto type = atom_nlist.block_type(blk_id); switch (type) { case AtomBlockType::INPAD: if (skip_clustering) { VTR_ASSERT(0); } - inputs.push_back(atom_ctx.nlist.block_name(blk_id)); + inputs.push_back(atom_nlist.block_name(blk_id)); break; case AtomBlockType::OUTPAD: if (skip_clustering) { VTR_ASSERT(0); } - outputs.push_back(atom_ctx.nlist.block_name(blk_id)); + outputs.push_back(atom_nlist.block_name(blk_id)); break; case AtomBlockType::BLOCK: @@ -612,7 +684,7 @@ void output_clustering(const vtr::vector clocks; - for (auto net_id : atom_ctx.nlist.nets()) { + for (auto net_id : atom_nlist.nets()) { if (is_clock.count(net_id)) { - clocks.push_back(atom_ctx.nlist.net_name(net_id)); + clocks.push_back(atom_nlist.net_name(net_id)); } } @@ -631,25 +703,22 @@ void output_clustering(const vtr::vectorpb_route); + if (from_legalizer) { + VTR_ASSERT(cluster_legalizer_ptr != nullptr); + clustering_xml_blocks_from_legalizer(block_node, pb_graph_pin_lookup_from_index_by_type, *cluster_legalizer_ptr); + } else { + VTR_ASSERT(cluster_legalizer_ptr == nullptr); + clustering_xml_blocks_from_netlist(block_node, pb_graph_pin_lookup_from_index_by_type); } } out_xml.save_file(out_fname); - print_stats(); - - if (!intra_lb_routing.empty()) { - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - cluster_ctx.clb_nlist.block_pb(blk_id)->pb_route.clear(); - } - } + print_stats(cluster_legalizer_ptr, from_legalizer); } /******************************************************************** - * An useful API to output packing results to a XML file + * A useful API to output packing results to a XML file * This function is a wrapper for the function output_clustering() * but remove all the requirements on input data structures that * have to be built with other APIs @@ -660,13 +729,15 @@ void output_clustering(const vtr::vector*> intra_lb_routing_placeholder; std::unordered_set is_clock = alloc_and_load_is_clock(); - output_clustering(intra_lb_routing_placeholder, + // Since the cluster legalizer is not being used to output the clustering + // (from_legalizer is false), passing in nullptr. + output_clustering(nullptr, global_clocks, is_clock, architecture_id, out_fname, - false); + false, /*skip_clustering*/ + false /*from_legalizer*/); } diff --git a/vpr/src/pack/output_clustering.h b/vpr/src/pack/output_clustering.h index 509690e4934..92d734248d1 100644 --- a/vpr/src/pack/output_clustering.h +++ b/vpr/src/pack/output_clustering.h @@ -1,12 +1,31 @@ #ifndef OUTPUT_CLUSTERING_H #define OUTPUT_CLUSTERING_H -#include + #include -#include "vpr_types.h" -#include "pack_types.h" +#include + +class AtomNetId; +class ClusterLegalizer; -void output_clustering(const vtr::vector*>& intra_lb_routing, bool global_clocks, const std::unordered_set& is_clock, const std::string& architecture_id, const char* out_fname, bool skip_clustering); +/// @brief Output the clustering, given by the ClusterLegalizer or a clustered +/// netlist, to a clustered netlist file. +/// +/// The clustering can be output from the following sources: +/// 1) From the clustering +/// 2) From another clustered netlist +/// If from_legalizer is true, the ClusterLegalizer will be used to generate the +/// clustered netlist. If from_legalizer is false, the clustered netlist currently +/// in the global scope will be used. +void output_clustering(ClusterLegalizer* cluster_legalizer_ptr, + bool global_clocks, + const std::unordered_set& is_clock, + const std::string& architecture_id, + const char* out_fname, + bool skip_clustering, + bool from_legalizer); -void write_packing_results_to_xml(const bool& global_clocks, const std::string& architecture_id, const char* out_fname); +void write_packing_results_to_xml(const bool& global_clocks, + const std::string& architecture_id, + const char* out_fname); #endif diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp index e8c68ea2244..a4a1dcc09ee 100644 --- a/vpr/src/pack/pack.cpp +++ b/vpr/src/pack/pack.cpp @@ -1,22 +1,17 @@ #include +#include "SetupGrid.h" +#include "cluster.h" +#include "cluster_legalizer.h" +#include "cluster_util.h" +#include "globals.h" +#include "pack.h" +#include "prepack.h" #include "vpr_context.h" -#include "vtr_assert.h" -#include "vtr_log.h" - #include "vpr_error.h" #include "vpr_types.h" - -#include "globals.h" -#include "prepack.h" -#include "pack_types.h" -#include "pack.h" -#include "cluster.h" -#include "SetupGrid.h" -#include "noc_aware_cluster_util.h" - -/* #define DUMP_PB_GRAPH 1 */ -/* #define DUMP_BLIF_INPUT 1 */ +#include "vtr_assert.h" +#include "vtr_log.h" static bool try_size_device_grid(const t_arch& arch, const std::map& num_type_instances, @@ -38,9 +33,7 @@ bool try_pack(t_packer_opts* packer_opts, const t_model* library_models, float interc_delay, std::vector* lb_type_rr_graphs) { - AtomContext& atom_mutable_ctx = g_vpr_ctx.mutable_atom(); const AtomContext& atom_ctx = g_vpr_ctx.atom(); - ClusteringHelperContext& helper_ctx = g_vpr_ctx.mutable_cl_helper(); const DeviceContext& device_ctx = g_vpr_ctx.device(); std::unordered_set is_clock, is_global; @@ -48,8 +41,7 @@ bool try_pack(t_packer_opts* packer_opts, VTR_LOG("Begin packing '%s'.\n", packer_opts->circuit_file_name.c_str()); /* determine number of models in the architecture */ - helper_ctx.num_models = count_models(user_models); - helper_ctx.num_models += count_models(library_models); + size_t num_models = count_models(user_models) + count_models(library_models); is_clock = alloc_and_load_is_clock(); is_global.insert(is_clock.begin(), is_clock.end()); @@ -71,8 +63,11 @@ bool try_pack(t_packer_opts* packer_opts, atom_ctx.nlist.blocks().size(), atom_ctx.nlist.nets().size(), num_p_inputs, num_p_outputs); // Run the prepacker, packing the atoms into molecules. + // The Prepacker object performs prepacking and stores the pack molecules. + // As long as the molecules are used, this object must persist. VTR_LOG("Begin prepacking.\n"); - atom_mutable_ctx.prepacker.init(atom_ctx.nlist, device_ctx.logical_block_types); + Prepacker prepacker; + prepacker.init(atom_ctx.nlist, device_ctx.logical_block_types); /* We keep attraction groups off in the first iteration, and * only turn on in later iterations if some floorplan regions turn out to be overfull. @@ -86,11 +81,11 @@ bool try_pack(t_packer_opts* packer_opts, VTR_LOG("Using inter-cluster delay: %g\n", packer_opts->inter_cluster_net_delay); } - helper_ctx.target_external_pin_util = t_ext_pin_util_targets(packer_opts->target_external_pin_util); - helper_ctx.high_fanout_thresholds = t_pack_high_fanout_thresholds(packer_opts->high_fanout_threshold); - - VTR_LOG("Packing with pin utilization targets: %s\n", helper_ctx.target_external_pin_util.to_string().c_str()); - VTR_LOG("Packing with high fanout thresholds: %s\n", helper_ctx.high_fanout_thresholds.to_string().c_str()); + // During clustering, a block is related to un-clustered primitives with nets. + // This relation has three types: low fanout, high fanout, and transitive + // high_fanout_thresholds stores the threshold for nets to a block type to + // be considered high fanout. + t_pack_high_fanout_thresholds high_fanout_thresholds(packer_opts->high_fanout_threshold); bool allow_unrelated_clustering = false; if (packer_opts->allow_unrelated_clustering == e_unrelated_clustering::ON) { @@ -109,30 +104,46 @@ bool try_pack(t_packer_opts* packer_opts, int pack_iteration = 1; bool floorplan_regions_overfull = false; - // find all NoC router atoms - auto noc_atoms = find_noc_router_atoms(); - update_noc_reachability_partitions(noc_atoms); + // Initialize the cluster legalizer. + ClusterLegalizer cluster_legalizer(atom_ctx.nlist, + prepacker, + device_ctx.logical_block_types, + lb_type_rr_graphs, + num_models, + packer_opts->target_external_pin_util, + high_fanout_thresholds, + ClusterLegalizationStrategy::SKIP_INTRA_LB_ROUTE, + packer_opts->enable_pin_feasibility_filter, + packer_opts->feasible_block_array_size, + packer_opts->pack_verbosity); + + VTR_LOG("Packing with pin utilization targets: %s\n", cluster_legalizer.get_target_external_pin_util().to_string().c_str()); + VTR_LOG("Packing with high fanout thresholds: %s\n", high_fanout_thresholds.to_string().c_str()); while (true) { free_clustering_data(*packer_opts, clustering_data); + //Cluster the netlist - helper_ctx.num_used_type_instances = do_clustering( - *packer_opts, - *analysis_opts, - arch, - atom_mutable_ctx.prepacker, - is_clock, - is_global, - allow_unrelated_clustering, - balance_block_type_util, - lb_type_rr_graphs, - attraction_groups, - floorplan_regions_overfull, - clustering_data); + // num_used_type_instances: A map used to save the number of used + // instances from each logical block type. + std::map num_used_type_instances; + num_used_type_instances = do_clustering(*packer_opts, + *analysis_opts, + arch, + prepacker, + cluster_legalizer, + is_clock, + is_global, + allow_unrelated_clustering, + balance_block_type_util, + attraction_groups, + floorplan_regions_overfull, + high_fanout_thresholds, + clustering_data); //Try to size/find a device - bool fits_on_device = try_size_device_grid(*arch, helper_ctx.num_used_type_instances, packer_opts->target_device_utilization, packer_opts->device_layout); + bool fits_on_device = try_size_device_grid(*arch, num_used_type_instances, packer_opts->target_device_utilization, packer_opts->device_layout); /* We use this bool to determine the cause for the clustering not being dense enough. If the clustering * is not dense enough and there are floorplan constraints, it is presumed that the constraints are the cause @@ -187,7 +198,13 @@ bool try_pack(t_packer_opts* packer_opts, VTR_LOG("Pack iteration is %d\n", pack_iteration); attraction_groups.set_att_group_pulls(4); t_ext_pin_util pin_util(1.0, 1.0); - helper_ctx.target_external_pin_util.set_block_pin_util("clb", pin_util); + // TODO: This line assumes the logic block name is "clb" which + // may not be the case. This may need to be investigated. + // Probably we should do this update of ext_pin_util for + // all types that were overused. Or if that is hard, just + // do it for all block types. Doing it only for a clb + // string is dangerous -VB. + cluster_legalizer.get_target_external_pin_util().set_block_pin_util("clb", pin_util); } } else { //Unable to pack densely enough: Give Up @@ -201,8 +218,8 @@ bool try_pack(t_packer_opts* packer_opts, std::string resource_reqs; std::string resource_avail; auto& grid = g_vpr_ctx.device().grid; - for (auto iter = helper_ctx.num_used_type_instances.begin(); iter != helper_ctx.num_used_type_instances.end(); ++iter) { - if (iter != helper_ctx.num_used_type_instances.begin()) { + for (auto iter = num_used_type_instances.begin(); iter != num_used_type_instances.end(); ++iter) { + if (iter != num_used_type_instances.begin()) { resource_reqs += ", "; resource_avail += ", "; } @@ -230,8 +247,8 @@ bool try_pack(t_packer_opts* packer_opts, g_vpr_ctx.mutable_floorplanning().cluster_constraints.clear(); //attraction_groups.reset_attraction_groups(); - free_cluster_placement_stats(helper_ctx.cluster_placement_stats); - delete[] helper_ctx.primitives_list; + // Reset the cluster legalizer for re-clustering. + cluster_legalizer.reset(); ++pack_iteration; } @@ -252,7 +269,7 @@ bool try_pack(t_packer_opts* packer_opts, /******************** End **************************/ //check clustering and output it - check_and_output_clustering(*packer_opts, is_clock, arch, helper_ctx.total_clb_num, clustering_data.intra_lb_routing); + check_and_output_clustering(cluster_legalizer, *packer_opts, is_clock, arch); // Free Data Structures free_clustering_data(*packer_opts, clustering_data); diff --git a/vpr/src/pack/pack.h b/vpr/src/pack/pack.h index 0115d2c859a..842feb0aacd 100644 --- a/vpr/src/pack/pack.h +++ b/vpr/src/pack/pack.h @@ -1,11 +1,15 @@ #ifndef PACK_H #define PACK_H -#include #include -#include "vpr_types.h" +#include class AtomNetId; +struct t_analysis_opts; +struct t_arch; +struct t_lb_type_rr_node; +struct t_model; +struct t_packer_opts; bool try_pack(t_packer_opts* packer_opts, const t_analysis_opts* analysis_opts, diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp deleted file mode 100644 index a1b48d0e083..00000000000 --- a/vpr/src/pack/re_cluster.cpp +++ /dev/null @@ -1,276 +0,0 @@ -#include "re_cluster.h" -#include "re_cluster_util.h" -#include "initial_placement.h" -#include "cluster_placement.h" -#include "cluster_router.h" - -bool move_mol_to_new_cluster(t_pack_molecule* molecule, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data) { - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - auto& device_ctx = g_vpr_ctx.device(); - - bool is_removed, is_created; - ClusterBlockId old_clb = atom_to_cluster(molecule->atom_block_ids[molecule->root]); - int molecule_size = get_array_size_of_molecule(molecule); - - NocGroupId temp_noc_grp_id = NocGroupId::INVALID(); - PartitionRegion temp_cluster_pr; - t_lb_router_data* old_router_data = nullptr; - t_lb_router_data* router_data = nullptr; - - //Check that there is a place for a new cluster of the same type - t_logical_block_type_ptr block_type = cluster_ctx.clb_nlist.block_type(old_clb); - int block_mode = cluster_ctx.clb_nlist.block_pb(old_clb)->mode; - - unsigned int num_instances = 0; - for (auto equivalent_tile : block_type->equivalent_tiles) { - num_instances += device_ctx.grid.num_instances(equivalent_tile, -1); - } - - if (helper_ctx.num_used_type_instances[block_type] == num_instances) { - VTR_LOGV(verbosity > 4, "The utilization of block_type %s is 100%. No space for new clusters\n", block_type->name); - VTR_LOGV(verbosity > 4, "Atom %d move aborted\n", molecule->atom_block_ids[molecule->root]); - return false; - } - - //remove the molecule from its current cluster - std::unordered_set& old_clb_atoms = cluster_to_mutable_atoms(old_clb); - if (old_clb_atoms.size() == 1) { - VTR_LOGV(verbosity > 4, "Atom: %zu move failed. This is the last atom in its cluster.\n"); - return false; - } - remove_mol_from_cluster(molecule, molecule_size, old_clb, old_clb_atoms, false, old_router_data); - - //check old cluster legality after removing the molecule - is_removed = is_cluster_legal(old_router_data); - - //if the cluster is legal, commit the molecule removal. Otherwise, abort the move - if (is_removed) { - commit_mol_removal(molecule, molecule_size, old_clb, during_packing, old_router_data, clustering_data); - } else { - VTR_LOGV(verbosity > 4, "Atom: %zu move failed. Can't remove it from the old cluster\n", molecule->atom_block_ids[molecule->root]); - return false; - } - - //Create new cluster of the same type and mode. - ClusterBlockId new_clb(helper_ctx.total_clb_num); - is_created = start_new_cluster_for_mol(molecule, - block_type, - block_mode, - helper_ctx.feasible_block_array_size, - helper_ctx.enable_pin_feasibility_filter, - new_clb, - during_packing, - verbosity, - clustering_data, - &router_data, - temp_cluster_pr, - temp_noc_grp_id); - - //Commit or revert the move - if (is_created) { - commit_mol_move(old_clb, new_clb, during_packing, true); - VTR_LOGV(verbosity > 4, "Atom:%zu is moved to a new cluster\n", molecule->atom_block_ids[molecule->root]); - } else { - revert_mol_move(old_clb, molecule, old_router_data, during_packing, clustering_data); - VTR_LOGV(verbosity > 4, "Atom:%zu move failed. Can't start a new cluster of the same type and mode\n", molecule->atom_block_ids[molecule->root]); - } - - free_router_data(old_router_data); - old_router_data = nullptr; - - //If the move is done after packing not during it, some fixes need to be done on the - //clustered netlist - if (is_created && !during_packing) { - fix_clustered_netlist(molecule, molecule_size, old_clb, new_clb); - } - - return (is_created); -} - -bool move_mol_to_existing_cluster(t_pack_molecule* molecule, - const ClusterBlockId& new_clb, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data) { - //define local variables - bool is_removed, is_added; - AtomBlockId root_atom_id = molecule->atom_block_ids[molecule->root]; - int molecule_size = get_array_size_of_molecule(molecule); - t_lb_router_data* old_router_data = nullptr; - std::unordered_set& new_clb_atoms = cluster_to_mutable_atoms(new_clb); - ClusterBlockId old_clb = atom_to_cluster(root_atom_id); - - //check old and new clusters compatibility - bool is_compatible = check_type_and_mode_compatibility(old_clb, new_clb, verbosity); - if (!is_compatible) - return false; - - //remove the molecule from its current cluster - std::unordered_set& old_clb_atoms = cluster_to_mutable_atoms(old_clb); - if (old_clb_atoms.size() == 1) { - VTR_LOGV(verbosity > 4, "Atom: %zu move failed. This is the last atom in its cluster.\n"); - return false; - } - remove_mol_from_cluster(molecule, molecule_size, old_clb, old_clb_atoms, false, old_router_data); - - //check old cluster legality after removing the molecule - is_removed = is_cluster_legal(old_router_data); - - //if the cluster is legal, commit the molecule removal. Otherwise, abort the move - if (is_removed) { - commit_mol_removal(molecule, molecule_size, old_clb, during_packing, old_router_data, clustering_data); - } else { - VTR_LOGV(verbosity > 4, "Atom: %zu move failed. Can't remove it from the old cluster\n", root_atom_id); - return false; - } - - //Add the atom to the new cluster - t_lb_router_data* new_router_data = nullptr; - is_added = pack_mol_in_existing_cluster(molecule, molecule_size, new_clb, new_clb_atoms, during_packing, clustering_data, new_router_data); - - //Commit or revert the move - if (is_added) { - commit_mol_move(old_clb, new_clb, during_packing, false); - VTR_LOGV(verbosity > 4, "Atom:%zu is moved to a new cluster\n", molecule->atom_block_ids[molecule->root]); - } else { - revert_mol_move(old_clb, molecule, old_router_data, during_packing, clustering_data); - VTR_LOGV(verbosity > 4, "Atom:%zu move failed. Can't start a new cluster of the same type and mode\n", molecule->atom_block_ids[molecule->root]); - } - - free_router_data(old_router_data); - old_router_data = nullptr; - - //If the move is done after packing not during it, some fixes need to be done on the - //clustered netlist - if (is_added && !during_packing) { - fix_clustered_netlist(molecule, molecule_size, old_clb, new_clb); - } - - return (is_added); -} - -#if 1 -bool swap_two_molecules(t_pack_molecule* molecule_1, - t_pack_molecule* molecule_2, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - //define local variables - PartitionRegion temp_cluster_pr_1, temp_cluster_pr_2; - - bool mol_1_success, mol_2_success; - - AtomBlockId root_1_atom_id = molecule_1->atom_block_ids[molecule_1->root]; - AtomBlockId root_2_atom_id = molecule_2->atom_block_ids[molecule_2->root]; - - int molecule_1_size = get_array_size_of_molecule(molecule_1); - int molecule_2_size = get_array_size_of_molecule(molecule_2); - - //Check that the 2 clusters are the same type - ClusterBlockId clb_1 = atom_to_cluster(root_1_atom_id); - ClusterBlockId clb_2 = atom_to_cluster(root_2_atom_id); - - if (clb_1 == clb_2) { - VTR_LOGV(verbosity > 4, "Swap failed. Both atoms are already in the same cluster.\n"); - return false; - } - //Check that the old and new clusters are of the same type - bool is_compitable = check_type_and_mode_compatibility(clb_1, clb_2, verbosity); - if (!is_compitable) - return false; - - t_lb_router_data* old_1_router_data = nullptr; - t_lb_router_data* old_2_router_data = nullptr; - - //save the atoms of the 2 clusters - std::unordered_set& clb_1_atoms = cluster_to_mutable_atoms(clb_1); - std::unordered_set& clb_2_atoms = cluster_to_mutable_atoms(clb_2); - - if (clb_1_atoms.size() == 1 || clb_2_atoms.size() == 1) { - VTR_LOGV(verbosity > 4, "Atom: %zu, %zu swap failed. This is the last atom in its cluster.\n", - molecule_1->atom_block_ids[molecule_1->root], - molecule_2->atom_block_ids[molecule_2->root]); - return false; - } - - t_pb* clb_pb_1 = cluster_ctx.clb_nlist.block_pb(clb_1); - std::string clb_pb_1_name = static_cast(clb_pb_1->name); - t_pb* clb_pb_2 = cluster_ctx.clb_nlist.block_pb(clb_2); - std::string clb_pb_2_name = static_cast(clb_pb_2->name); - - //remove the molecule from its current cluster - remove_mol_from_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, false, old_1_router_data); - commit_mol_removal(molecule_1, molecule_1_size, clb_1, during_packing, old_1_router_data, clustering_data); - - remove_mol_from_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, false, old_2_router_data); - commit_mol_removal(molecule_2, molecule_2_size, clb_2, during_packing, old_2_router_data, clustering_data); - - //Add the atom to the new cluster - mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, during_packing, clustering_data, old_2_router_data); - if (!mol_1_success) { - mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, clustering_data, old_1_router_data); - mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, clustering_data, old_2_router_data); - - VTR_ASSERT(mol_1_success && mol_2_success); - free_router_data(old_1_router_data); - free_router_data(old_2_router_data); - old_1_router_data = nullptr; - old_2_router_data = nullptr; - - free(clb_pb_1->name); - cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str()); - free(clb_pb_2->name); - cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str()); - - return false; - } - - mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_1, clb_1_atoms, during_packing, clustering_data, old_1_router_data); - if (!mol_2_success) { - remove_mol_from_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, true, old_2_router_data); - commit_mol_removal(molecule_1, molecule_1_size, clb_2, during_packing, old_2_router_data, clustering_data); - mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, clustering_data, old_1_router_data); - mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, clustering_data, old_2_router_data); - - VTR_ASSERT(mol_1_success && mol_2_success); - free_router_data(old_1_router_data); - free_router_data(old_2_router_data); - old_1_router_data = nullptr; - old_2_router_data = nullptr; - - free(clb_pb_1->name); - cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str()); - free(clb_pb_2->name); - cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str()); - - return false; - } - - //commit the move if succeeded or revert if failed - VTR_ASSERT(mol_1_success && mol_2_success); - - //If the move is done after packing not during it, some fixes need to be done on the clustered netlist - if (!during_packing) { - fix_clustered_netlist(molecule_1, molecule_1_size, clb_1, clb_2); - fix_clustered_netlist(molecule_2, molecule_2_size, clb_2, clb_1); - } - - free_router_data(old_1_router_data); - free_router_data(old_2_router_data); - old_1_router_data = nullptr; - old_2_router_data = nullptr; - - free(clb_pb_1->name); - cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str()); - free(clb_pb_2->name); - cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str()); - - return true; -} -#endif diff --git a/vpr/src/pack/re_cluster.h b/vpr/src/pack/re_cluster.h deleted file mode 100644 index 5ca2489aac4..00000000000 --- a/vpr/src/pack/re_cluster.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef RE_CLUSTER_H -#define RE_CLUSTER_H -/** - * @file This file includes an API function that updates clustering after its done - * - * To optimize the clustering decisions, this file provides an API that can open up already - * packed clusters and change them. The functions in this API can be used in 2 locations: - * - During packing after the clusterer is done - * - During placement after the initial placement is done - * - */ - -#include "pack_types.h" -#include "clustered_netlist_utils.h" -#include "cluster_util.h" - -/** - * @brief This function moves a molecule out of its cluster and creates a new cluster for it - * - * This function can be called from 2 spots in the vpr flow. - * - First, during packing to optimize the initial clustered netlist - * (during_packing variable should be true.) - * - Second, during placement (during_packing variable should be false). In this case, the clustered - * netlist is updated. - */ -bool move_mol_to_new_cluster(t_pack_molecule* molecule, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data); - -/** - * @brief This function moves a molecule out of its cluster to another cluster that already exists. - * - * This function can be called from 2 spots in the vpr flow. - * - First, during packing to optimize the initial clustered netlist - * (during_packing variable should be true.) - * - Second, during placement (during_packing variable should be false). In this case, the clustered - * netlist is updated. - */ -bool move_mol_to_existing_cluster(t_pack_molecule* molecule, - const ClusterBlockId& new_clb, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data); - -/** - * @brief This function swap two molecules between two different clusters. - * - * This function can be called from 2 spots in the vpr flow. - * - First, during packing to optimize the initial clustered netlist - * (during_packing variable should be true.) - * - Second, during placement (during_packing variable should be false). In this case, the clustered - * netlist is updated. - */ -bool swap_two_molecules(t_pack_molecule* molecule_1, - t_pack_molecule* molecule_2, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data); -#endif diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp deleted file mode 100644 index 46eb04955a5..00000000000 --- a/vpr/src/pack/re_cluster_util.cpp +++ /dev/null @@ -1,764 +0,0 @@ -#include "re_cluster_util.h" -#include "clustered_netlist_utils.h" -#include "cluster_util.h" -#include "cluster_router.h" -#include "cluster_placement.h" -#include "place_macro.h" -#include "initial_placement.h" -#include "read_netlist.h" - -// The name suffix of the new block (if exists) -// This suffix is useful in preventing duplicate high-level cluster block names -const char* name_suffix = "_m"; - -/******************* Static Functions ********************/ -static void load_atom_index_for_pb_pin(t_pb_routes& pb_route, int ipin); -static void load_internal_to_block_net_nums(const t_logical_block_type_ptr type, t_pb_routes& pb_route); -static void fix_atom_pin_mapping(const AtomBlockId blk); - -static void fix_cluster_pins_after_moving(const ClusterBlockId clb_index); - -static std::pair check_net_absorption(AtomNetId atom_net_id, - ClusterBlockId new_clb, - ClusterBlockId old_clb, - ClusterPinId& cluster_pin_id); - -static void fix_cluster_port_after_moving(const ClusterBlockId clb_index); - -static void fix_cluster_net_after_moving(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId old_clb, - ClusterBlockId new_clb); - -static void rebuild_cluster_placement_stats(ClusterBlockId clb_index, - const std::unordered_set& clb_atoms); - -static void update_cluster_pb_stats(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId clb_index, - bool is_added); - -/***************** API functions ***********************/ -ClusterBlockId atom_to_cluster(AtomBlockId atom) { - auto& atom_ctx = g_vpr_ctx.atom(); - return (atom_ctx.lookup.atom_clb(atom)); -} - -const std::unordered_set& cluster_to_atoms(ClusterBlockId cluster) { - const auto& atoms = cluster_to_mutable_atoms(cluster); - - return atoms; -} - -void remove_mol_from_cluster(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId& old_clb, - std::unordered_set& old_clb_atoms, - bool router_data_ready, - t_lb_router_data*& router_data) { - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - - for (int i_atom = 0; i_atom < molecule_size; i_atom++) { - if (molecule->atom_block_ids[i_atom]) { - auto it = old_clb_atoms.find(molecule->atom_block_ids[i_atom]); - if (it != old_clb_atoms.end()) - old_clb_atoms.erase(molecule->atom_block_ids[i_atom]); - } - } - - //re-build router_data structure for this cluster - if (!router_data_ready) - router_data = lb_load_router_data(helper_ctx.lb_type_rr_graphs, old_clb, old_clb_atoms); - - update_cluster_pb_stats(molecule, molecule_size, old_clb, false); -} - -void commit_mol_move(ClusterBlockId old_clb, - ClusterBlockId new_clb, - bool during_packing, - bool new_clb_created) { - auto& device_ctx = g_vpr_ctx.device(); - - //place the new cluster if this function called during placement (after the initial placement is done) - if (!during_packing && new_clb_created) { - int imacro; - g_vpr_ctx.mutable_placement().mutable_block_locs().resize(g_vpr_ctx.placement().block_locs().size() + 1); - get_imacro_from_iblk(&imacro, old_clb, g_vpr_ctx.placement().pl_macros); - set_imacro_for_iblk(&imacro, new_clb); - place_one_block(new_clb, device_ctx.pad_loc_type, nullptr, nullptr, g_vpr_ctx.mutable_placement().mutable_blk_loc_registry()); - } -} - -t_lb_router_data* lb_load_router_data(std::vector* lb_type_rr_graphs, - ClusterBlockId clb_index, - const std::unordered_set& clb_atoms) { - //build data structures used by intra-logic block router - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& atom_ctx = g_vpr_ctx.atom(); - auto block_type = cluster_ctx.clb_nlist.block_type(clb_index); - t_lb_router_data* router_data = alloc_and_load_router_data(&lb_type_rr_graphs[block_type->index], block_type); - - for (auto atom_id : clb_atoms) { - add_atom_as_target(router_data, atom_id); - const t_pb* pb = atom_ctx.lookup.atom_pb(atom_id); - while (pb) { - set_reset_pb_modes(router_data, pb, true); - pb = pb->parent_pb; - } - } - return (router_data); -} - -bool start_new_cluster_for_mol(t_pack_molecule* molecule, - const t_logical_block_type_ptr type, - const int mode, - const int feasible_block_array_size, - bool enable_pin_feasibility_filter, - ClusterBlockId clb_index, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data, - t_lb_router_data** router_data, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_cluster_noc_grp_id, - enum e_detailed_routing_stages detailed_routing_stage, - int force_site) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - /* Cluster's PartitionRegion is empty initially, meaning it has no floorplanning constraints */ - PartitionRegion empty_pr; - floorplanning_ctx.cluster_constraints.push_back(empty_pr); - - /* Allocate a dummy initial cluster and load a atom block as a seed and check if it is legal */ - AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; - const std::string& root_atom_name = atom_ctx.nlist.block_name(root_atom); - //const t_model* root_model = atom_ctx.nlist.block_model(root_atom); - - t_pb* pb = new t_pb; - pb->pb_graph_node = type->pb_graph_head; - alloc_and_load_pb_stats(pb, feasible_block_array_size); - pb->parent_pb = nullptr; - - *router_data = alloc_and_load_router_data(&(helper_ctx.lb_type_rr_graphs[type->index]), type); - - e_block_pack_status pack_result = e_block_pack_status::BLK_STATUS_UNDEFINED; - pb->mode = mode; - t_cluster_placement_stats* cluster_placement_stats = &(helper_ctx.cluster_placement_stats[type->index]); - reset_cluster_placement_stats(cluster_placement_stats); - set_mode_cluster_placement_stats(pb->pb_graph_node, mode); - - pack_result = try_pack_molecule(cluster_placement_stats, - molecule, - helper_ctx.primitives_list, - pb, - helper_ctx.num_models, - helper_ctx.max_cluster_size, - clb_index, - detailed_routing_stage, - *router_data, - 0, - enable_pin_feasibility_filter, - 0, - FULL_EXTERNAL_PIN_UTIL, - temp_cluster_pr, - temp_cluster_noc_grp_id, - force_site); - - // If clustering succeeds, add it to the clb netlist - if (pack_result == e_block_pack_status::BLK_PASSED) { - VTR_LOGV(verbosity > 4, "\tPASSED_SEED: Block Type %s\n", type->name); - //Once clustering succeeds, add it to the clb netlist - if (pb->name != nullptr) { - free(pb->name); - } - std::string new_name = root_atom_name + name_suffix; - pb->name = vtr::strdup(new_name.c_str()); - clb_index = cluster_ctx.clb_nlist.create_block(new_name.c_str(), pb, type); - helper_ctx.total_clb_num++; - int molecule_size = get_array_size_of_molecule(molecule); - update_cluster_pb_stats(molecule, molecule_size, clb_index, true); - - // Update the clb-->atoms lookup table - helper_ctx.atoms_lookup.resize(helper_ctx.total_clb_num); - for (int i_atom = 0; i_atom < molecule_size; ++i_atom) { - if (molecule->atom_block_ids[i_atom]) { - helper_ctx.atoms_lookup[clb_index].insert(molecule->atom_block_ids[i_atom]); - } - } - - //If you are still in packing, update the clustering data. Otherwise, update the clustered netlist. - if (during_packing) { - clustering_data.intra_lb_routing.push_back((*router_data)->saved_lb_nets); - (*router_data)->saved_lb_nets = nullptr; - } else { - cluster_ctx.clb_nlist.block_pb(clb_index)->pb_route = alloc_and_load_pb_route((*router_data)->saved_lb_nets, cluster_ctx.clb_nlist.block_pb(clb_index)->pb_graph_node); - } - } else { - free_pb(pb); - delete pb; - } - - //Free failed clustering - free_router_data(*router_data); - *router_data = nullptr; - - return (pack_result == e_block_pack_status::BLK_PASSED); -} - -bool pack_mol_in_existing_cluster(t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId new_clb, - std::unordered_set& new_clb_atoms, - bool during_packing, - t_clustering_data& clustering_data, - t_lb_router_data*& router_data, - enum e_detailed_routing_stages detailed_routing_stage, - bool enable_pin_feasibility_filter, - int force_site) { - - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - NocGroupId temp_cluster_noc_grp_id; - PartitionRegion temp_cluster_pr; - e_block_pack_status pack_result = e_block_pack_status::BLK_STATUS_UNDEFINED; - t_ext_pin_util target_ext_pin_util = helper_ctx.target_external_pin_util.get_pin_util(cluster_ctx.clb_nlist.block_type(new_clb)->name); - t_logical_block_type_ptr block_type = cluster_ctx.clb_nlist.block_type(new_clb); - t_pb* temp_pb = cluster_ctx.clb_nlist.block_pb(new_clb); - - //re-build cluster placement stats - rebuild_cluster_placement_stats(new_clb, new_clb_atoms); - if (!check_free_primitives_for_molecule_atoms(molecule, &(helper_ctx.cluster_placement_stats[block_type->index]))) - return false; - - //re-build router_data structure for this cluster - router_data = lb_load_router_data(helper_ctx.lb_type_rr_graphs, new_clb, new_clb_atoms); - - pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[block_type->index]), - molecule, - helper_ctx.primitives_list, - temp_pb, - helper_ctx.num_models, - helper_ctx.max_cluster_size, - new_clb, - detailed_routing_stage, - router_data, - 0, - enable_pin_feasibility_filter, - //false, - helper_ctx.feasible_block_array_size, - target_ext_pin_util, - temp_cluster_pr, - temp_cluster_noc_grp_id, - force_site); - - // If clustering succeeds, add it to the clb netlist - if (pack_result == e_block_pack_status::BLK_PASSED) { - //If you are still in packing, update the clustering data. Otherwise, update the clustered netlist. - if (during_packing) { - free_intra_lb_nets(clustering_data.intra_lb_routing[new_clb]); - clustering_data.intra_lb_routing[new_clb] = router_data->saved_lb_nets; - router_data->saved_lb_nets = nullptr; - } else { - cluster_ctx.clb_nlist.block_pb(new_clb)->pb_route.clear(); - cluster_ctx.clb_nlist.block_pb(new_clb)->pb_route = alloc_and_load_pb_route(router_data->saved_lb_nets, cluster_ctx.clb_nlist.block_pb(new_clb)->pb_graph_node); - } - - for (int i_atom = 0; i_atom < molecule_size; i_atom++) { - if (molecule->atom_block_ids[i_atom]) { - new_clb_atoms.insert(molecule->atom_block_ids[i_atom]); - } - } - update_cluster_pb_stats(molecule, molecule_size, new_clb, true); - } - - //Free clustering router data - free_router_data(router_data); - router_data = nullptr; - - return (pack_result == e_block_pack_status::BLK_PASSED); -} - -void fix_clustered_netlist(t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId old_clb, - ClusterBlockId new_clb) { - fix_cluster_port_after_moving(new_clb); - fix_cluster_net_after_moving(molecule, molecule_size, old_clb, new_clb); -} - -void revert_mol_move(ClusterBlockId old_clb, - t_pack_molecule* molecule, - t_lb_router_data*& old_router_data, - bool during_packing, - t_clustering_data& clustering_data) { - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - NocGroupId temp_cluster_noc_grp_id_original; - PartitionRegion temp_cluster_pr_original; - e_block_pack_status pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(old_clb)->index]), - molecule, - helper_ctx.primitives_list, - cluster_ctx.clb_nlist.block_pb(old_clb), - helper_ctx.num_models, - helper_ctx.max_cluster_size, - old_clb, - E_DETAILED_ROUTE_FOR_EACH_ATOM, - old_router_data, - 0, - helper_ctx.enable_pin_feasibility_filter, - helper_ctx.feasible_block_array_size, - helper_ctx.target_external_pin_util.get_pin_util(cluster_ctx.clb_nlist.block_type(old_clb)->name), - temp_cluster_pr_original, - temp_cluster_noc_grp_id_original); - - VTR_ASSERT(pack_result == e_block_pack_status::BLK_PASSED); - //If you are still in packing, update the clustering data. Otherwise, update the clustered netlist. - if (during_packing) { - free_intra_lb_nets(clustering_data.intra_lb_routing[old_clb]); - clustering_data.intra_lb_routing[old_clb] = old_router_data->saved_lb_nets; - old_router_data->saved_lb_nets = nullptr; - } else { - cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route.clear(); - cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route = alloc_and_load_pb_route(old_router_data->saved_lb_nets, cluster_ctx.clb_nlist.block_pb(old_clb)->pb_graph_node); - } - - free_router_data(old_router_data); - old_router_data = nullptr; -} -/*******************************************/ -/************ static functions *************/ -/*******************************************/ - -static void fix_cluster_net_after_moving(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId old_clb, - ClusterBlockId new_clb) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - - AtomNetId atom_net_id; - ClusterPinId cluster_pin; - - //remove all old cluster pin from their nets - ClusterNetId cur_clb_net; - for (auto& old_clb_pin : cluster_ctx.clb_nlist.block_pins(old_clb)) { - cur_clb_net = cluster_ctx.clb_nlist.pin_net(old_clb_pin); - cluster_ctx.clb_nlist.remove_net_pin(cur_clb_net, old_clb_pin); - } - - //delete cluster nets that are no longer used - for (int i_atom = 0; i_atom < molecule_size; i_atom++) { - if (molecule->atom_block_ids[i_atom]) { - for (auto atom_pin : atom_ctx.nlist.block_pins(molecule->atom_block_ids[i_atom])) { - atom_net_id = atom_ctx.nlist.pin_net(atom_pin); - auto [previously_absorbed, now_absorbed] = check_net_absorption(atom_net_id, new_clb, old_clb, cluster_pin); - - if (!previously_absorbed && now_absorbed) { - cur_clb_net = cluster_ctx.clb_nlist.pin_net(cluster_pin); - cluster_ctx.clb_nlist.remove_net(cur_clb_net); - } - } - } - } - - //Fix cluster pin for old and new clbs - fix_cluster_pins_after_moving(old_clb); - fix_cluster_pins_after_moving(new_clb); - - for (AtomBlockId atom_blk : cluster_to_atoms(old_clb)) - fix_atom_pin_mapping(atom_blk); - - for (AtomBlockId atom_blk : cluster_to_atoms(new_clb)) - fix_atom_pin_mapping(atom_blk); - - cluster_ctx.clb_nlist.remove_and_compress(); - load_internal_to_block_net_nums(cluster_ctx.clb_nlist.block_type(old_clb), cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route); - load_internal_to_block_net_nums(cluster_ctx.clb_nlist.block_type(new_clb), cluster_ctx.clb_nlist.block_pb(new_clb)->pb_route); -} - -static void fix_cluster_port_after_moving(const ClusterBlockId clb_index) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - const t_pb* pb = cluster_ctx.clb_nlist.block_pb(clb_index); - - while (!pb->is_root()) { - pb = pb->parent_pb; - } - - size_t num_old_ports = cluster_ctx.clb_nlist.block_ports(clb_index).size(); - const t_pb_type* pb_type = pb->pb_graph_node->pb_type; - - for (size_t port = num_old_ports; port < (unsigned)pb_type->num_ports; port++) { - if (pb_type->ports[port].is_clock && pb_type->ports[port].type == IN_PORT) { - cluster_ctx.clb_nlist.create_port(clb_index, pb_type->ports[port].name, pb_type->ports[port].num_pins, PortType::CLOCK); - } else if (!pb_type->ports[port].is_clock && pb_type->ports[port].type == IN_PORT) { - cluster_ctx.clb_nlist.create_port(clb_index, pb_type->ports[port].name, pb_type->ports[port].num_pins, PortType::INPUT); - } else { - VTR_ASSERT(pb_type->ports[port].type == OUT_PORT); - cluster_ctx.clb_nlist.create_port(clb_index, pb_type->ports[port].name, pb_type->ports[port].num_pins, PortType::OUTPUT); - } - } - - num_old_ports = cluster_ctx.clb_nlist.block_ports(clb_index).size(); -} - -static void fix_cluster_pins_after_moving(const ClusterBlockId clb_index) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - - const t_pb* pb = cluster_ctx.clb_nlist.block_pb(clb_index); - t_pb_graph_pin* pb_graph_pin; - AtomNetId atom_net_id; - ClusterNetId clb_net_id; - - t_logical_block_type_ptr block_type = cluster_ctx.clb_nlist.block_type(clb_index); - - int num_input_ports = pb->pb_graph_node->num_input_ports; - int num_output_ports = pb->pb_graph_node->num_output_ports; - int num_clock_ports = pb->pb_graph_node->num_clock_ports; - - int iport, ipb_pin, ipin, rr_node_index; - - ipin = 0; - // iterating over input ports - for (iport = 0; iport < num_input_ports; iport++) { - ClusterPortId input_port_id = cluster_ctx.clb_nlist.find_port(clb_index, block_type->pb_type->ports[iport].name); - // iterating over physical block pins of each input port - for (ipb_pin = 0; ipb_pin < pb->pb_graph_node->num_input_pins[iport]; ipb_pin++) { - pb_graph_pin = &pb->pb_graph_node->input_pins[iport][ipb_pin]; - rr_node_index = pb_graph_pin->pin_count_in_cluster; - - VTR_ASSERT(pb_graph_pin->pin_count_in_cluster == ipin); - if (pb->pb_route.count(rr_node_index)) { - atom_net_id = pb->pb_route[rr_node_index].atom_net_id; - if (atom_net_id) { - clb_net_id = cluster_ctx.clb_nlist.create_net(atom_ctx.nlist.net_name(atom_net_id)); - atom_ctx.lookup.set_atom_clb_net(atom_net_id, clb_net_id); - ClusterPinId cur_pin_id = cluster_ctx.clb_nlist.find_pin(input_port_id, (BitIndex)ipb_pin); - if (!cur_pin_id) - cluster_ctx.clb_nlist.create_pin(input_port_id, (BitIndex)ipb_pin, clb_net_id, PinType::SINK, ipin); - else - cluster_ctx.clb_nlist.set_pin_net(cur_pin_id, PinType::SINK, clb_net_id); - } - cluster_ctx.clb_nlist.block_pb(clb_index)->pb_route[rr_node_index].pb_graph_pin = pb_graph_pin; - } - ipin++; - } - } - - // iterating over output ports - for (iport = 0; iport < num_output_ports; iport++) { - ClusterPortId output_port_id = cluster_ctx.clb_nlist.find_port(clb_index, block_type->pb_type->ports[num_input_ports + iport].name); - // iterating over physical block pins of each output port - for (ipb_pin = 0; ipb_pin < pb->pb_graph_node->num_output_pins[iport]; ipb_pin++) { - pb_graph_pin = &pb->pb_graph_node->output_pins[iport][ipb_pin]; - rr_node_index = pb_graph_pin->pin_count_in_cluster; - - VTR_ASSERT(pb_graph_pin->pin_count_in_cluster == ipin); - if (pb->pb_route.count(rr_node_index)) { - atom_net_id = pb->pb_route[rr_node_index].atom_net_id; - if (atom_net_id) { - clb_net_id = cluster_ctx.clb_nlist.create_net(atom_ctx.nlist.net_name(atom_net_id)); - atom_ctx.lookup.set_atom_clb_net(atom_net_id, clb_net_id); - ClusterPinId cur_pin_id = cluster_ctx.clb_nlist.find_pin(output_port_id, (BitIndex)ipb_pin); - AtomPinId atom_net_driver = atom_ctx.nlist.net_driver(atom_net_id); - bool driver_is_constant = atom_ctx.nlist.pin_is_constant(atom_net_driver); - if (!cur_pin_id) - cluster_ctx.clb_nlist.create_pin(output_port_id, (BitIndex)ipb_pin, clb_net_id, PinType::DRIVER, ipin, driver_is_constant); - else { - cluster_ctx.clb_nlist.set_pin_net(cur_pin_id, PinType::DRIVER, clb_net_id); - cluster_ctx.clb_nlist.set_pin_is_constant(cur_pin_id, driver_is_constant); - } - VTR_ASSERT(cluster_ctx.clb_nlist.net_is_constant(clb_net_id) == driver_is_constant); - } - cluster_ctx.clb_nlist.block_pb(clb_index)->pb_route[rr_node_index].pb_graph_pin = pb_graph_pin; - } - ipin++; - } - } - - // iterating over clock ports - for (iport = 0; iport < num_clock_ports; iport++) { - ClusterPortId clock_port_id = cluster_ctx.clb_nlist.find_port(clb_index, block_type->pb_type->ports[num_input_ports + num_output_ports + iport].name); - // iterating over physical block pins of each clock port - for (ipb_pin = 0; ipb_pin < pb->pb_graph_node->num_clock_pins[iport]; ipb_pin++) { - pb_graph_pin = &pb->pb_graph_node->clock_pins[iport][ipb_pin]; - rr_node_index = pb_graph_pin->pin_count_in_cluster; - - VTR_ASSERT(pb_graph_pin->pin_count_in_cluster == ipin); - if (pb->pb_route.count(rr_node_index)) { - atom_net_id = pb->pb_route[rr_node_index].atom_net_id; - if (atom_net_id) { - clb_net_id = cluster_ctx.clb_nlist.create_net(atom_ctx.nlist.net_name(atom_net_id)); - atom_ctx.lookup.set_atom_clb_net(atom_net_id, clb_net_id); - ClusterPinId cur_pin_id = cluster_ctx.clb_nlist.find_pin(clock_port_id, (BitIndex)ipb_pin); - if (!cur_pin_id) - cluster_ctx.clb_nlist.create_pin(clock_port_id, (BitIndex)ipb_pin, clb_net_id, PinType::SINK, ipin); - else - cluster_ctx.clb_nlist.set_pin_net(cur_pin_id, PinType::SINK, clb_net_id); - } - cluster_ctx.clb_nlist.block_pb(clb_index)->pb_route[rr_node_index].pb_graph_pin = pb_graph_pin; - } - ipin++; - } - } -} - -static std::pair check_net_absorption(const AtomNetId atom_net_id, - const ClusterBlockId new_clb, - const ClusterBlockId old_clb, - ClusterPinId& cluster_pin_id) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - AtomBlockId atom_block_id; - ClusterBlockId clb_index; - - ClusterNetId clb_net_id = atom_ctx.lookup.clb_net(atom_net_id); - - bool previously_absorbed; - if (clb_net_id == ClusterNetId::INVALID()) - previously_absorbed = true; - else { - previously_absorbed = false; - for (auto& cluster_pin : cluster_ctx.clb_nlist.net_pins(clb_net_id)) { - if (cluster_pin && cluster_ctx.clb_nlist.pin_block(cluster_pin) == old_clb) { - cluster_pin_id = cluster_pin; - break; - } - } - } - - //iterate over net pins and check their cluster - bool now_absorbed = true; - for (auto& net_pin : atom_ctx.nlist.net_pins(atom_net_id)) { - atom_block_id = atom_ctx.nlist.pin_block(net_pin); - clb_index = atom_ctx.lookup.atom_clb(atom_block_id); - - if (clb_index != new_clb) { - now_absorbed = false; - break; - } - } - - return {previously_absorbed, now_absorbed}; -} - -static void fix_atom_pin_mapping(const AtomBlockId blk) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - const t_pb* pb = atom_ctx.lookup.atom_pb(blk); - VTR_ASSERT_MSG(pb, "Atom block must have a matching PB"); - - const t_pb_graph_node* gnode = pb->pb_graph_node; - VTR_ASSERT_MSG(gnode->pb_type->model == atom_ctx.nlist.block_model(blk), - "Atom block PB must match BLIF model"); - - for (int iport = 0; iport < gnode->num_input_ports; ++iport) { - if (gnode->num_input_pins[iport] <= 0) continue; - - const AtomPortId port = atom_ctx.nlist.find_atom_port(blk, gnode->input_pins[iport][0].port->model_port); - if (!port) continue; - - for (int ipin = 0; ipin < gnode->num_input_pins[iport]; ++ipin) { - const t_pb_graph_pin* gpin = &gnode->input_pins[iport][ipin]; - VTR_ASSERT(gpin); - - set_atom_pin_mapping(cluster_ctx.clb_nlist, blk, port, gpin); - } - } - - for (int iport = 0; iport < gnode->num_output_ports; ++iport) { - if (gnode->num_output_pins[iport] <= 0) continue; - - const AtomPortId port = atom_ctx.nlist.find_atom_port(blk, gnode->output_pins[iport][0].port->model_port); - if (!port) continue; - - for (int ipin = 0; ipin < gnode->num_output_pins[iport]; ++ipin) { - const t_pb_graph_pin* gpin = &gnode->output_pins[iport][ipin]; - VTR_ASSERT(gpin); - - set_atom_pin_mapping(cluster_ctx.clb_nlist, blk, port, gpin); - } - } - - for (int iport = 0; iport < gnode->num_clock_ports; ++iport) { - if (gnode->num_clock_pins[iport] <= 0) continue; - - const AtomPortId port = atom_ctx.nlist.find_atom_port(blk, gnode->clock_pins[iport][0].port->model_port); - if (!port) continue; - - for (int ipin = 0; ipin < gnode->num_clock_pins[iport]; ++ipin) { - const t_pb_graph_pin* gpin = &gnode->clock_pins[iport][ipin]; - VTR_ASSERT(gpin); - - set_atom_pin_mapping(cluster_ctx.clb_nlist, blk, port, gpin); - } - } -} - -static void load_internal_to_block_net_nums(const t_logical_block_type_ptr type, t_pb_routes& pb_route) { - int num_pins = type->pb_graph_head->total_pb_pins; - - for (int ipb_pin = 0; ipb_pin < num_pins; ipb_pin++) { - if (!pb_route.count(ipb_pin)) continue; - - if (pb_route[ipb_pin].driver_pb_pin_id != OPEN) { - load_atom_index_for_pb_pin(pb_route, ipb_pin); - } - } -} - -static void load_atom_index_for_pb_pin(t_pb_routes& pb_route, int ipin) { - int driver = pb_route[ipin].driver_pb_pin_id; - - VTR_ASSERT(driver != OPEN); - //VTR_ASSERT(!pb_route[ipin].atom_net_id); - - if (!pb_route[driver].atom_net_id) { - load_atom_index_for_pb_pin(pb_route, driver); - } - - //Store the net coming from the driver - pb_route[ipin].atom_net_id = pb_route[driver].atom_net_id; - - //Store ourselves with the driver - pb_route[driver].sink_pb_pin_ids.push_back(ipin); -} - -#if 0 -static bool count_children_pbs(const t_pb* pb) { - if (pb == nullptr) - return 0; - - for (int i = 0; i < pb->get_num_child_types(); i++) { - for (int j = 0; j < pb->get_num_children_of_type(i); j++) { - if (pb->child_pbs[i] != nullptr && pb->child_pbs[i][j].name != nullptr) { - return true; - } - } - } - return false; -} -#endif - -static void rebuild_cluster_placement_stats(ClusterBlockId clb_index, - const std::unordered_set& clb_atoms) { - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& atom_ctx = g_vpr_ctx.atom(); - - t_cluster_placement_stats* cluster_placement_stats = &(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(clb_index)->index]); - reset_cluster_placement_stats(cluster_placement_stats); - set_mode_cluster_placement_stats(cluster_ctx.clb_nlist.block_pb(clb_index)->pb_graph_node, cluster_ctx.clb_nlist.block_pb(clb_index)->mode); - - for (AtomBlockId atom : clb_atoms) { - const t_pb* atom_pb = atom_ctx.lookup.atom_pb(atom); - commit_primitive(cluster_placement_stats, atom_pb->pb_graph_node); - } -} - -bool is_cluster_legal(t_lb_router_data*& router_data) { - return (check_cluster_legality(0, E_DETAILED_ROUTE_AT_END_ONLY, router_data)); -} - -void commit_mol_removal(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId old_clb, - bool during_packing, - t_lb_router_data*& router_data, - t_clustering_data& clustering_data) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - for (int i_atom = 0; i_atom < molecule_size; i_atom++) { - if (molecule->atom_block_ids[i_atom]) { - revert_place_atom_block(molecule->atom_block_ids[i_atom], router_data); - } - } - - cleanup_pb(cluster_ctx.clb_nlist.block_pb(old_clb)); - - //If you are still in packing, update the clustering data. Otherwise, update the clustered netlist. - if (during_packing) { - free_intra_lb_nets(clustering_data.intra_lb_routing[old_clb]); - clustering_data.intra_lb_routing[old_clb] = router_data->saved_lb_nets; - router_data->saved_lb_nets = nullptr; - } else { - cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route.clear(); - cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route = alloc_and_load_pb_route(router_data->saved_lb_nets, - cluster_ctx.clb_nlist.block_pb(old_clb)->pb_graph_node); - } -} - -bool check_type_and_mode_compatibility(ClusterBlockId old_clb, - ClusterBlockId new_clb, - int verbosity) { - auto& cluster_ctx = g_vpr_ctx.clustering(); - - //Check that the old and new clusters are the same type - if (cluster_ctx.clb_nlist.block_type(old_clb) != cluster_ctx.clb_nlist.block_type(new_clb)) { - VTR_LOGV(verbosity > 4, "Move aborted. New and old cluster blocks are not of the same type"); - return false; - } - - //Check that the old and new clusters are the mode - if (cluster_ctx.clb_nlist.block_pb(old_clb)->mode != cluster_ctx.clb_nlist.block_pb(new_clb)->mode) { - VTR_LOGV(verbosity > 4, "Move aborted. New and old cluster blocks are not of the same mode"); - return false; - } - - return true; -} - -static void update_cluster_pb_stats(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId clb_index, - bool is_added) { - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - t_pb* cur_pb; - - for (int iblock = 0; iblock < molecule_size; iblock++) { - auto blk_id = molecule->atom_block_ids[iblock]; - if (!blk_id) { - continue; - } - - //Update atom netlist mapping - if (is_added) { - atom_ctx.lookup.set_atom_clb(blk_id, clb_index); - } else { - atom_ctx.lookup.set_atom_clb(blk_id, ClusterBlockId::INVALID()); - } - - const t_pb* atom_pb = atom_ctx.lookup.atom_pb(blk_id); - VTR_ASSERT(atom_pb); - - cur_pb = atom_pb->parent_pb; - - while (cur_pb) { - /* reset list of feasible blocks */ - cur_pb->pb_stats->num_feasible_blocks = NOT_VALID; - if (is_added) - cur_pb->pb_stats->num_child_blocks_in_pb++; - else - cur_pb->pb_stats->num_child_blocks_in_pb--; - - cur_pb = cur_pb->parent_pb; - } - } -} - -std::unordered_set& cluster_to_mutable_atoms(ClusterBlockId cluster) { - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - - //If the lookup is not built yet, build it first - if (helper_ctx.atoms_lookup.empty()) - init_clb_atoms_lookup(helper_ctx.atoms_lookup); - - return helper_ctx.atoms_lookup[cluster]; -} \ No newline at end of file diff --git a/vpr/src/pack/re_cluster_util.h b/vpr/src/pack/re_cluster_util.h deleted file mode 100644 index bc940dca881..00000000000 --- a/vpr/src/pack/re_cluster_util.h +++ /dev/null @@ -1,212 +0,0 @@ -#ifndef RE_CLUSTER_UTIL_H -#define RE_CLUSTER_UTIL_H - -#include "clustered_netlist_fwd.h" -#include "clustered_netlist_utils.h" -#include "atom_netlist_fwd.h" -#include "globals.h" -#include "pack_types.h" -#include "cluster_util.h" -/** - * @file - * @brief This files defines some helper functions for the re-clustering API - * - * Re-clustering API is used to move atoms between clusters after the cluster is done. - * This can be very used in iteratively improve the packed solution after the initial clustering is done. - * It can also be used during placement to allow fine-grained moves that can move a BLE or a single FF/LUT. - * - * Note: Some of the helper functions defined here might be useful in different places in VPR. - */ - -/** - * @brief A function that returns the block ID in the clustered netlist - * from its ID in the atom netlist. - */ -ClusterBlockId atom_to_cluster(AtomBlockId atom); - -/** - * @brief A function that return a list of atoms in a cluster - * @note This function can be called only after cluster/packing is done or - * the clustered netlist is created. - * @return Atoms in the given cluster. The returned set is immutable. - */ -const std::unordered_set& cluster_to_atoms(ClusterBlockId cluster); - -/** - * @brief A function that return a list of atoms in a cluster - * @note This function can be called only after cluster/packing is done or - * the clustered netlist is created. - * @return Atoms in the given cluster. The returned set is mutable. - */ -std::unordered_set& cluster_to_mutable_atoms(ClusterBlockId cluster); - -/** - * @brief A function that loads the intra-cluster router data of one cluster - */ -t_lb_router_data* lb_load_router_data(std::vector* lb_type_rr_graphs, - ClusterBlockId clb_index, - const std::unordered_set& clb_atoms); - -/** - * @brief A function that removes a molecule from a cluster and checks legality of - * the old cluster. - * - * It returns true if the removal is done and the old cluster is legal. - * It aborts the removal and returns false if the removal will make the old cluster - * illegal. - * - * This function updates the intra-logic block router data structure (router_data) and - * remove all the atoms of the molecule from old_clb_atoms vector. - * - * @param old_clb: The original cluster of this molecule - * @param old_clb_atoms: A vector containing the list of atoms in the old cluster of the molecule. - * It will be updated in the function to remove the atoms of molecule from it. - * @param router_data: returns the intra logic block router data. - */ -void remove_mol_from_cluster(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId& old_clb, - std::unordered_set& old_clb_atoms, - bool router_data_ready, - t_lb_router_data*& router_data); - -/** - * @brief A function that starts a new cluster for one specific molecule - * - * It places the molecule in a specific type and mode that should be passed by - * the higher level routine. - * - * @param type: the cluster block type needed - * @param mode: the mode of the new cluster - * @param clb_index: the cluster block Id of the newly created cluster block - * @param during_packing: true if this function is called during packing, false if it is called during placement - * @param clustering_data: A data structure containing helper data for the clustering process - * (is updated if this function is called during packing, especially intra_lb_routing data member). - * @param router_data: returns the intra logic block router data. - * @param temp_cluster_pr: returns the partition region of the new cluster. - * @param temp_cluster_noc_grp_id returns the NoC group ID of the new cluster - * @param detailed_routing_stage: options are E_DETAILED_ROUTE_FOR_EACH_ATOM (default) and E_DETAILED_ROUTE_AT_END_ONLY. - * This argument specifies whether or not to run an intra-cluster routing-based legality - * check after adding the molecule to the cluster; default is the more conservative option. - * This argument is passed down to try_pack_mol; if E_DETAILED_ROUTE_AT_END_ONLY is passed, - * the function does not run a detailed intra-cluster routing-based legality check. - * If many molecules will be added to a cluster, this option enables use of a single - * routing check on the completed cluster (vs many incremental checks). - * @param force_site: optional user-specified primitive site on which to place the molecule; this is passed to - * try_pack_molecule and then to get_next_primitive_site. If a force_site argument is provided, - * the molecule is either placed on the specified site or fails to add to the cluster. - * If the force_site argument is set to its default value (-1), vpr selects an available site. - */ -bool start_new_cluster_for_mol(t_pack_molecule* molecule, - t_logical_block_type_ptr type, - int mode, - int feasible_block_array_size, - bool enable_pin_feasibility_filter, - ClusterBlockId clb_index, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data, - t_lb_router_data** router_data, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_cluster_noc_grp_id, - enum e_detailed_routing_stages detailed_routing_stage = E_DETAILED_ROUTE_FOR_EACH_ATOM, - int force_site = -1); - -/** - * @brief A function that packs a molecule into an existing cluster - * - * @param clb_index: the cluster block Id of the new cluster that we need to pack the molecule in. - * @param: clb_atoms: A vector containing the list of atoms in the new cluster block before adding the molecule. - * @param during_packing: true if this function is called during packing, false if it is called during placement - * @param is_swap: true if this function is called during swapping two molecules. False if the called during a single molecule move - * @param clustering_data: A data structure containing helper data for the clustering process - * (is updated if this function is called during packing, especially intra_lb_routing data member). - * @param router_data: returns the intra logic block router data. - * @param temp_cluster_noc_grp_id returns the NoC group ID of the new cluster - * @param detailed_routing_stage: options are E_DETAILED_ROUTE_FOR_EACH_ATOM (default) and E_DETAILED_ROUTE_AT_END_ONLY. - * This argument specifies whether or not to run an intra-cluster routing-based legality - * check after adding the molecule to the cluster; default is the more conservative option. - * This argument is passed down to try_pack_mol; if E_DETAILED_ROUTE_AT_END_ONLY is passed, - * the function does not run a detailed intra-cluster routing-based legality check. - * If many molecules will be added to a cluster, this option enables use of a single - * routing check on the completed cluster (vs many incremental checks). - * @param enable_pin_feasibility_filter: do a pin couting based legality check (before or in place of intra-cluster routing check). - * @param force_site: optional user-specified primitive site on which to place the molecule; this is passed to - * try_pack_molecule and then to get_next_primitive_site. If a force_site argument is provided, - * the molecule is either placed on the specified site or fails to add to the cluster. - * If the force_site argument is set to its default value (-1), vpr selects an available site. - */ -bool pack_mol_in_existing_cluster(t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId new_clb, - std::unordered_set& new_clb_atoms, - bool during_packing, - t_clustering_data& clustering_data, - t_lb_router_data*& router_data, - enum e_detailed_routing_stages detailed_routing_stage = E_DETAILED_ROUTE_FOR_EACH_ATOM, - bool enable_pin_feasibility_filter = true, - int force_site = -1); - -/** - * @brief A function that fix the clustered netlist if the move is performed - * after the packing is done and clustered netlist is built - * - * If you are changing clustering after packing is done, you need to update the clustered netlist by - * deleting the newly absorbed nets and creating nets for the atom nets that become unabsorbed. It also - * fixes the cluster ports for both the old and new clusters. - */ -void fix_clustered_netlist(t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId old_clb, - ClusterBlockId new_clb); - -/** - * @brief A function that commits the molecule move if it is legal - * - * @params during_packing: true if this function is called during packing, false if it is called during placement - * @params new_clb_created: true if the move is creating a new cluster (e.g. move_mol_to_new_cluster) - */ -void commit_mol_move(ClusterBlockId old_clb, - ClusterBlockId new_clb, - bool during_packing, - bool new_clb_created); - -/** - * @brief A function that reverts the molecule move if it is illegal - * - * @params during_packing: true if this function is called during packing, false if it is called during placement - * @params new_clb_created: true if the move is creating a new cluster (e.g. move_mol_to_new_cluster) - * @params - */ -void revert_mol_move(ClusterBlockId old_clb, - t_pack_molecule* molecule, - t_lb_router_data*& old_router_data, - bool during_packing, - t_clustering_data& clustering_data); - -/** - * @brief A function that checks the legality of a cluster by running the intra-cluster routing - */ -bool is_cluster_legal(t_lb_router_data*& router_data); - -/** - * @brief A function that commits the molecule removal if it is legal - * - * @params during_packing: true if this function is called during packing, false if it is called during placement - * @params new_clb_created: true if the move is creating a new cluster (e.g. move_mol_to_new_cluster) - */ -void commit_mol_removal(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId old_clb, - bool during_packing, - t_lb_router_data*& router_data, - t_clustering_data& clustering_data); - -/** - * @brief A function that check that two clusters are of the same type and in the same mode of operation - */ -bool check_type_and_mode_compatibility(ClusterBlockId old_clb, - ClusterBlockId new_clb, - int verbosity); - -#endif diff --git a/vpr/src/place/place_constraints.cpp b/vpr/src/place/place_constraints.cpp index 22b15f5a04f..94af4721026 100644 --- a/vpr/src/place/place_constraints.cpp +++ b/vpr/src/place/place_constraints.cpp @@ -11,7 +11,7 @@ #include "globals.h" #include "place_constraints.h" #include "place_util.h" -#include "re_cluster_util.h" +#include "vpr_context.h" int check_placement_floorplanning(const vtr::vector_map& block_locs) { int error = 0; @@ -221,12 +221,12 @@ bool cluster_floorplanning_legal(ClusterBlockId blk_id, const t_pl_loc& loc) { void load_cluster_constraints() { auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - auto& cluster_ctx = g_vpr_ctx.clustering(); + const ClusteringContext& cluster_ctx = g_vpr_ctx.clustering(); floorplanning_ctx.cluster_constraints.resize(cluster_ctx.clb_nlist.blocks().size()); for (auto cluster_id : cluster_ctx.clb_nlist.blocks()) { - const std::unordered_set& atoms = cluster_to_atoms(cluster_id); + const std::unordered_set& atoms = cluster_ctx.atoms_lookup[cluster_id]; PartitionRegion empty_pr; floorplanning_ctx.cluster_constraints[cluster_id] = empty_pr; diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp index b72b78cdaf1..480919ae9b6 100644 --- a/vpr/src/route/route_common.cpp +++ b/vpr/src/route/route_common.cpp @@ -7,7 +7,6 @@ #include "route_common.h" #include "route_export.h" #include "rr_graph.h" -#include "re_cluster_util.h" /* The numbering relation between the channels and clbs is: * * * diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp index e4ae8e996e5..98620b56009 100644 --- a/vpr/src/util/vpr_utils.cpp +++ b/vpr/src/util/vpr_utils.cpp @@ -4,6 +4,9 @@ #include #include +#include "pack_types.h" +#include "prepack.h" +#include "vpr_context.h" #include "vtr_assert.h" #include "vtr_log.h" #include "vtr_memory.h" @@ -17,7 +20,6 @@ #include "cluster_placement.h" #include "device_grid.h" #include "user_route_constraints.h" -#include "re_cluster_util.h" #include "placer_state.h" #include "grid_block.h" @@ -1124,14 +1126,11 @@ const t_pb_graph_pin* find_pb_graph_pin(const AtomNetlist& netlist, const AtomLo return get_pb_graph_node_pin_from_model_port_pin(model_port, ipin, pb_gnode); } -t_pb_graph_pin* get_pb_graph_node_pin_from_block_pin(ClusterBlockId iblock, int ipin) { +t_pb_graph_pin* get_pb_graph_node_pin_from_pb_graph_node(t_pb_graph_node* pb_graph_node, + int ipin) { int i, count; - const t_pb_type* pb_type; - t_pb_graph_node* pb_graph_node; - auto& cluster_ctx = g_vpr_ctx.clustering(); - pb_graph_node = cluster_ctx.clb_nlist.block_pb(iblock)->pb_graph_node; - pb_type = pb_graph_node->pb_type; + const t_pb_type* pb_type = pb_graph_node->pb_type; /* If this is post-placed, then the ipin may have been shuffled up by the z * num_pins, * bring it back down to 0..num_pins-1 range for easier analysis */ @@ -1169,6 +1168,13 @@ t_pb_graph_pin* get_pb_graph_node_pin_from_block_pin(ClusterBlockId iblock, int return nullptr; } +t_pb_graph_pin* get_pb_graph_node_pin_from_block_pin(ClusterBlockId iblock, int ipin) { + auto& cluster_ctx = g_vpr_ctx.clustering(); + + t_pb_graph_node* pb_graph_node = cluster_ctx.clb_nlist.block_pb(iblock)->pb_graph_node; + return get_pb_graph_node_pin_from_pb_graph_node(pb_graph_node, ipin); +} + const t_port* find_pb_graph_port(const t_pb_graph_node* pb_gnode, const std::string& port_name) { const t_pb_graph_pin* gpin = find_pb_graph_pin(pb_gnode, port_name, 0); @@ -1363,12 +1369,13 @@ std::tuple get_cluster_internal_class_pairs(const AtomLookup& atom_lookup, ClusterBlockId cluster_block_id) { + const ClusteringContext& cluster_ctx = g_vpr_ctx.clustering(); std::vector class_num_vec; auto [physical_tile, sub_tile, rel_cap, logical_block] = get_cluster_blk_physical_spec(cluster_block_id); class_num_vec.reserve(physical_tile->primitive_class_inf.size()); - const auto& cluster_atoms = cluster_to_atoms(cluster_block_id); + const auto& cluster_atoms = cluster_ctx.atoms_lookup[cluster_block_id]; for (AtomBlockId atom_blk_id : cluster_atoms) { auto atom_pb_graph_node = atom_lookup.atom_pb_graph_node(atom_blk_id); auto class_range = get_pb_graph_node_class_physical_range(physical_tile, @@ -1532,7 +1539,7 @@ void free_pb(t_pb* pb) { free_pb_stats(pb); } -void revalid_molecules(const t_pb* pb) { +void revalid_molecules(const t_pb* pb, const Prepacker& prepacker) { const t_pb_type* pb_type = pb->pb_graph_node->pb_type; if (pb_type->blif_model == nullptr) { @@ -1540,7 +1547,7 @@ void revalid_molecules(const t_pb* pb) { for (int i = 0; i < pb_type->modes[mode].num_pb_type_children && pb->child_pbs != nullptr; i++) { for (int j = 0; j < pb_type->modes[mode].pb_type_children[i].num_pb && pb->child_pbs[i] != nullptr; j++) { if (pb->child_pbs[i][j].name != nullptr || pb->child_pbs[i][j].child_pbs != nullptr) { - revalid_molecules(&pb->child_pbs[i][j]); + revalid_molecules(&pb->child_pbs[i][j], prepacker); } } } @@ -1556,7 +1563,7 @@ void revalid_molecules(const t_pb* pb) { atom_ctx.lookup.set_atom_clb(blk_id, ClusterBlockId::INVALID()); atom_ctx.lookup.set_atom_pb(blk_id, nullptr); - t_pack_molecule* cur_molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + t_pack_molecule* cur_molecule = prepacker.get_atom_molecule(blk_id); if (cur_molecule->valid == false) { int i; for (i = 0; i < get_array_size_of_molecule(cur_molecule); i++) { diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h index 24da4489b6b..9f08dcc0d2b 100644 --- a/vpr/src/util/vpr_utils.h +++ b/vpr/src/util/vpr_utils.h @@ -17,6 +17,7 @@ class DeviceGrid; class PlacerState; +class Prepacker; const t_model* find_model(const t_model* models, const std::string& name, bool required = true); const t_model_ports* find_model_port(const t_model* model, const std::string& name, bool required = true); @@ -192,6 +193,9 @@ int get_max_nets_in_pb_type(const t_pb_type* pb_type); bool primitive_type_feasible(AtomBlockId blk_id, const t_pb_type* cur_pb_type); t_pb_graph_pin* get_pb_graph_node_pin_from_model_port_pin(const t_model_ports* model_port, const int model_pin, const t_pb_graph_node* pb_graph_node); const t_pb_graph_pin* find_pb_graph_pin(const AtomNetlist& netlist, const AtomLookup& netlist_lookup, const AtomPinId pin_id); +/// @brief Gets the pb_graph_node pin at the given pin index for the given +/// pb_graph_node. +t_pb_graph_pin* get_pb_graph_node_pin_from_pb_graph_node(t_pb_graph_node* pb_graph_node, int ipin); t_pb_graph_pin* get_pb_graph_node_pin_from_block_pin(ClusterBlockId iblock, int ipin); vtr::vector alloc_and_load_pin_id_to_pb_mapping(); void free_pin_id_to_pb_mapping(vtr::vector& pin_id_to_pb_mapping); @@ -218,7 +222,7 @@ void parse_direct_pin_name(char* src_string, int line, int* start_pin_index, int void free_pb_stats(t_pb* pb); void free_pb(t_pb* pb); -void revalid_molecules(const t_pb* pb); +void revalid_molecules(const t_pb* pb, const Prepacker& prepacker); void print_switch_usage(); void print_usage_by_wire_length(); diff --git a/vpr/test/test_connection_router.cpp b/vpr/test/test_connection_router.cpp index 1bc208bf3ba..c2ac5329a26 100644 --- a/vpr/test/test_connection_router.cpp +++ b/vpr/test/test_connection_router.cpp @@ -191,9 +191,6 @@ TEST_CASE("connection_router", "[vpr]") { free_routing_structs(); vpr_free_all(arch, vpr_setup); - - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - atom_ctx.prepacker.reset(); } } // namespace diff --git a/vpr/test/test_post_verilog.cpp b/vpr/test/test_post_verilog.cpp index be4bd45f045..a8344fa79d4 100644 --- a/vpr/test/test_post_verilog.cpp +++ b/vpr/test/test_post_verilog.cpp @@ -35,10 +35,6 @@ void do_vpr_flow(const char* input_unc_opt, const char* output_unc_opt) { free_routing_structs(); vpr_free_all(arch, vpr_setup); - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - - atom_ctx.prepacker.reset(); - REQUIRE(flow_succeeded == true); } diff --git a/vpr/test/test_vpr.cpp b/vpr/test/test_vpr.cpp index da0b4c8b21c..0e92311b5c2 100644 --- a/vpr/test/test_vpr.cpp +++ b/vpr/test/test_vpr.cpp @@ -169,9 +169,6 @@ TEST_CASE("read_rr_graph_metadata", "[vpr]") { echo_file_name, false); vpr_free_all(arch, vpr_setup); - - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - atom_ctx.prepacker.reset(); } REQUIRE(src_inode != -1); @@ -233,9 +230,6 @@ TEST_CASE("read_rr_graph_metadata", "[vpr]") { CHECK_THAT(value->as_string().get(&arch.strings), Equals("test edge")); } vpr_free_all(arch, vpr_setup); - - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - atom_ctx.prepacker.reset(); } } // namespace