From 3b10f53fc07ad224d677db1800965d52658ef5b9 Mon Sep 17 00:00:00 2001 From: Nikolai Maas Date: Mon, 9 Sep 2024 17:58:59 +0200 Subject: [PATCH] three phase coarsening for hypergraphs (naive) --- mt-kahypar/io/command_line_options.cpp | 8 +- .../two_hop_clustering.h | 54 ++++---- .../rating_degree_similarity_policy.h | 123 +++++++++++------- mt-kahypar/partition/context.h | 3 +- 4 files changed, 115 insertions(+), 73 deletions(-) diff --git a/mt-kahypar/io/command_line_options.cpp b/mt-kahypar/io/command_line_options.cpp index c0081cb1e..2436975ab 100644 --- a/mt-kahypar/io/command_line_options.cpp +++ b/mt-kahypar/io/command_line_options.cpp @@ -336,7 +336,7 @@ namespace mt_kahypar { "")->default_value(4), "Two-hop coarsening: maximum number of degree one nodes in one cluster.") ("c-sim-incident-weight-scaling", - po::value(&context.coarsening.rating.incident_weight_scaling_constant)->value_name( + po::value(&context.coarsening.rating.incident_weight_scaling_constant)->value_name( "")->default_value(0), "Scales how incident weight is computed when determining similarity thresholds.") ("c-sim-preserve-nodes-scaling-factor", @@ -350,7 +350,11 @@ namespace mt_kahypar { ("c-sim-acceptance-limit-bound", po::value(&context.coarsening.rating.acceptance_limit_bound)->value_name( "")->default_value(1.0), - "Lower bound for similarity acceptance limit (nodes with at most this difference are always accepted)."); + "Lower bound for similarity acceptance limit (nodes with at most this difference are always accepted).") + ("c-two-hop-degree-threshold", + po::value(&context.coarsening.two_hop_degree_threshold)->value_name( + "")->default_value(100), + "If set, then vertices with more adjacent pins than the provided threshold are ignored during two-hop coarsening."); return options; } diff --git a/mt-kahypar/partition/coarsening/multilevel/clustering_algorithms/two_hop_clustering.h b/mt-kahypar/partition/coarsening/multilevel/clustering_algorithms/two_hop_clustering.h index 1988fa6f7..1fc680f4e 100644 --- a/mt-kahypar/partition/coarsening/multilevel/clustering_algorithms/two_hop_clustering.h +++ b/mt-kahypar/partition/coarsening/multilevel/clustering_algorithms/two_hop_clustering.h @@ -45,10 +45,6 @@ namespace mt_kahypar { class TwoHopClustering { using IncidenceMap = ds::SparseMap; - // degree threshold where it is extremely unlikely that two-hop coarsening is applicable - static constexpr HyperedgeID HIGH_DEGREE_THRESHOLD = 500; - static constexpr HypernodeID kInvalidHypernode = std::numeric_limits::max(); - struct MatchingEntry { HypernodeID key; HypernodeID hn; @@ -81,18 +77,24 @@ class TwoHopClustering { int pass_nr = 0) { _degree_one_map.reserve_for_estimated_number_of_insertions(cc.currentNumNodes() / 3); - auto fill_incidence_map_for_node = [&](IncidenceMap& incidence_map, const HypernodeID hn) { + auto fill_incidence_map_for_node = [&](IncidenceMap& incidence_map, const HypernodeID hn, bool& too_many_accesses) { // TODO: can we do this more efficiently for graphs? + size_t num_accesses = 0; HyperedgeWeight incident_weight_sum = 0; for (const HyperedgeID& he : hg.incidentEdges(hn)) { - incident_weight_sum += hg.edgeWeight(he); + if (num_accesses + hg.edgeSize(he) > _context.coarsening.two_hop_degree_threshold) { + too_many_accesses = true; + break; + } for (const HypernodeID& pin: hg.pins(he)) { if (pin != hn) { HypernodeID target_cluster = cc.clusterID(pin); ASSERT(target_cluster != cc.clusterID(hn)); // holds since we only consider unmatched nodes incidence_map[target_cluster] += static_cast(hg.edgeWeight(he)) / (hg.edgeSize(he) - 1); + ++num_accesses; } } + incident_weight_sum += hg.edgeWeight(he); } return incident_weight_sum; }; @@ -103,27 +105,33 @@ class TwoHopClustering { tbb::parallel_for(ID(0), hg.initialNumNodes(), [&](const HypernodeID id) { ASSERT(id < node_mapping.size()); const HypernodeID hn = node_mapping[id]; - if (hg.nodeIsEnabled(hn) && cc.vertexIsUnmatched(hn) && hg.nodeDegree(hn) <= HIGH_DEGREE_THRESHOLD) { + if (hg.nodeIsEnabled(hn) && cc.vertexIsUnmatched(hn) + && hg.nodeWeight(hn) <= _context.coarsening.max_allowed_node_weight / 2 + && hg.nodeDegree(hn) <= _context.coarsening.two_hop_degree_threshold) { IncidenceMap& incidence_map = _local_incidence_map.local(); - const HyperedgeWeight incident_weight_sum = fill_incidence_map_for_node(incidence_map, hn); - - const float required_connectivity = required_similarity * incident_weight_sum; - float max_connectivity = 0; - HypernodeID best_target = kInvalidHypernode; - for (const auto& [target_cluster, connectivity]: incidence_map) { - if (connectivity >= required_connectivity && connectivity > max_connectivity) { - max_connectivity = connectivity; - best_target = target_cluster; - if (required_similarity >= 0.5) { - // in this case, this already must be the maximum - break; + incidence_map.clear(); + + bool too_many_accesses = false; + const HyperedgeWeight incident_weight_sum = fill_incidence_map_for_node(incidence_map, hn, too_many_accesses); + + if (!too_many_accesses) { + const float required_connectivity = required_similarity * incident_weight_sum; + float max_connectivity = 0; + HypernodeID best_target = kInvalidHypernode; + for (const auto& [target_cluster, connectivity]: incidence_map) { + if (connectivity >= required_connectivity && connectivity > max_connectivity) { + max_connectivity = connectivity; + best_target = target_cluster; + if (required_similarity >= 0.5) { + // in this case, this already must be the maximum + break; + } } } + if (best_target != kInvalidHypernode) { + _degree_one_map.insert(best_target, MatchingEntry{best_target, hn}); + } } - if (best_target != kInvalidHypernode) { - _degree_one_map.insert(best_target, MatchingEntry{best_target, hn}); - } - incidence_map.clear(); } }); diff --git a/mt-kahypar/partition/coarsening/policies/rating_degree_similarity_policy.h b/mt-kahypar/partition/coarsening/policies/rating_degree_similarity_policy.h index 6bf276c2e..3810cb02b 100644 --- a/mt-kahypar/partition/coarsening/policies/rating_degree_similarity_policy.h +++ b/mt-kahypar/partition/coarsening/policies/rating_degree_similarity_policy.h @@ -81,7 +81,7 @@ namespace { } void insert(float edge_contribution, HypernodeWeight weight) { - ASSERT(weight > 0); + ASSERT(weight >= 0); if (edge_contribution <= 0) { _data[0].edge_weight_contribution += edge_contribution; _data[0].node_weight += weight; @@ -132,14 +132,15 @@ namespace { class PreserveRebalancingNodesPolicy final : public kahypar::meta::PolicyBase { + using IncidenceMap = ds::SparseMap; // this is prototypical and will almost certainly be removed static constexpr bool debug = false; public: explicit PreserveRebalancingNodesPolicy(): - _incident_weight(), _acceptance_limit() {} + _incident_weight(), _acceptance_limit(), _local_incidence_map(0) {} explicit PreserveRebalancingNodesPolicy(const HypernodeID num_nodes): - _incident_weight(num_nodes, 0), _acceptance_limit(num_nodes, 0) {} + _incident_weight(num_nodes, 0), _acceptance_limit(num_nodes, 0), _local_incidence_map(num_nodes) {} PreserveRebalancingNodesPolicy(const PreserveRebalancingNodesPolicy&) = delete; PreserveRebalancingNodesPolicy(PreserveRebalancingNodesPolicy&&) = delete; @@ -151,12 +152,13 @@ class PreserveRebalancingNodesPolicy final : public kahypar::meta::PolicyBase { ASSERT(_incident_weight.size() >= hypergraph.initialNumNodes() && _acceptance_limit.size() >= hypergraph.initialNumNodes()); - auto scaled_edge_weight = [&](const HyperedgeID he) { + auto edge_weight_scaling = [&](const HyperedgeID he) { if constexpr (Hypergraph::is_graph) { - return hypergraph.edgeWeight(he); + return 1.0; + } else if (hypergraph.edgeSize(he) <= context.coarsening.rating.incident_weight_scaling_constant) { + return 1.0; } else { - return static_cast(hypergraph.edgeWeight(he)) / - (hypergraph.edgeSize(he) + context.coarsening.rating.incident_weight_scaling_constant); + return context.coarsening.rating.incident_weight_scaling_constant / static_cast(hypergraph.edgeSize(he)); } }; @@ -166,30 +168,29 @@ class PreserveRebalancingNodesPolicy final : public kahypar::meta::PolicyBase { // TODO(maas): save the total incident weight in the hypergraph data structure? double incident_weight_sum = 0; for (const HyperedgeID& he : hypergraph.incidentEdges(hn)) { - incident_weight_sum += scaled_edge_weight(he); + incident_weight_sum += edge_weight_scaling(he) * hypergraph.edgeWeight(he); } _incident_weight[hn] = incident_weight_sum; }); timer.stop_timer("compute_incident_weight"); timer.start_timer("compute_similarity_metric", "Compute Similarity Metric"); - if constexpr (Hypergraph::is_graph) { - // TODO: We are ignoring edges between neighbors here - the result is thus only approximate. - // This could be acceptable, though + // TODO: We are ignoring edges between neighbors here - the result is thus only approximate. + // This could be acceptable, though + const HypernodeWeight max_summed_weight = std::ceil(context.coarsening.rating.preserve_nodes_relative_weight_limit + * hypergraph.totalWeight()); + hypergraph.doParallelForAllNodes([&](const HypernodeID hn) { + GroupedIncidenceData incidence_data; + const double ratio_of_u = _incident_weight[hn] / std::max(hypergraph.nodeWeight(hn), 1); // Step 1: Collect contributed edge weights and node weights of neighbors in into sorted aggregates // (effectively a semi-sorting) // TODO: should this rather be relative to the maximum cluster weight? - const HypernodeWeight max_summed_weight = std::ceil(context.coarsening.rating.preserve_nodes_relative_weight_limit - * hypergraph.totalWeight()); - hypergraph.doParallelForAllNodes([&](const HypernodeID hn) { - GroupedIncidenceData incidence_data; - const double ratio_of_u = _incident_weight[hn] / std::max(hypergraph.nodeWeight(hn), 1); - // TODO: this needs to be implemented differently for hypergraphs + if constexpr (Hypergraph::is_graph) { size_t num_accesses = 0; for (const HyperedgeID& he : hypergraph.incidentEdges(hn)) { HypernodeID v = hypergraph.edgeTarget(he); - float edge_contribution = _incident_weight[v] - 2 * scaled_edge_weight(he); + float edge_contribution = _incident_weight[v] - 2 * hypergraph.edgeWeight(he); HypernodeWeight weight = hypergraph.nodeWeight(v); if (weight == 0 || edge_contribution / weight < ratio_of_u) { incidence_data.insert(edge_contribution, weight); @@ -200,39 +201,64 @@ class PreserveRebalancingNodesPolicy final : public kahypar::meta::PolicyBase { break; } } - - // Step 2: Iterate through aggregated neighbor values in sorted order and determine minimum - const auto& list = incidence_data.inner(); - double summed_contribution = _incident_weight[hn]; - HypernodeWeight summed_weight = std::max(hypergraph.nodeWeight(hn), 1); - double min_value = summed_contribution / summed_weight; - for (size_t i = 0; i < list.size() && summed_weight <= max_summed_weight; ++i) { - const NeighborData& neighbor = list[i]; - if (summed_weight + neighbor.node_weight > max_summed_weight) { - double fraction_of_last = static_cast(max_summed_weight - summed_weight) / neighbor.node_weight; - summed_contribution += fraction_of_last * neighbor.edge_weight_contribution; - summed_weight = max_summed_weight; - } else { - summed_contribution += neighbor.edge_weight_contribution; - summed_weight += neighbor.node_weight; + } else { + // this is probably quite slow and will be replaced with a bloom-filter based approach + size_t num_accesses = 0; + IncidenceMap& incidence_map = _local_incidence_map.local(); + incidence_map.clear(); + for (const HyperedgeID& he : hypergraph.incidentEdges(hn)) { + HypernodeID edge_size = hypergraph.edgeSize(he); + if (edge_size < context.partition.ignore_hyperedge_size_threshold) { + if (num_accesses + edge_size > context.coarsening.vertex_degree_sampling_threshold) { + break; + } + for (const HypernodeID& pin: hypergraph.pins(he)) { + if (pin != hn) { + incidence_map[pin] += edge_weight_scaling(he) * static_cast(hypergraph.edgeWeight(he)) / (edge_size - 1); + ++num_accesses; + } + } } - if (summed_contribution / summed_weight <= min_value) { - min_value = summed_contribution / summed_weight; - } else { - break; + } + + for (const auto& [neighbor, connectivity]: incidence_map) { + float edge_contribution = _incident_weight[neighbor] - 2 * connectivity; + HypernodeWeight weight = hypergraph.nodeWeight(neighbor); + if (weight == 0 || edge_contribution / weight < ratio_of_u) { + incidence_data.insert(edge_contribution, weight); } } + } - // Step 3: Compute acceptance limit of v from minimum - _acceptance_limit[hn] = std::min( - context.coarsening.rating.preserve_nodes_scaling_factor * min_value, - context.coarsening.rating.acceptance_limit_bound * _incident_weight[hn] / std::max(hypergraph.nodeWeight(hn), 1)); - DBG << V(hn) << V(_acceptance_limit[hn]) << V(_incident_weight[hn]) - << V(hypergraph.nodeWeight(hn)) << V(hypergraph.nodeDegree(hn)); - }); - } else { - ERR("not supported"); - } + // Step 2: Iterate through aggregated neighbor values in sorted order and determine minimum + const auto& list = incidence_data.inner(); + double summed_contribution = _incident_weight[hn]; + HypernodeWeight summed_weight = std::max(hypergraph.nodeWeight(hn), 1); + double min_value = summed_contribution / summed_weight; + for (size_t i = 0; i < list.size() && summed_weight <= max_summed_weight; ++i) { + const NeighborData& neighbor = list[i]; + if (summed_weight + neighbor.node_weight > max_summed_weight) { + double fraction_of_last = static_cast(max_summed_weight - summed_weight) / neighbor.node_weight; + summed_contribution += fraction_of_last * neighbor.edge_weight_contribution; + summed_weight = max_summed_weight; + } else { + summed_contribution += neighbor.edge_weight_contribution; + summed_weight += neighbor.node_weight; + } + if (summed_contribution / summed_weight <= min_value) { + min_value = summed_contribution / summed_weight; + } else { + break; + } + } + + // Step 3: Compute acceptance limit of v from minimum + _acceptance_limit[hn] = std::min( + context.coarsening.rating.preserve_nodes_scaling_factor * min_value, + context.coarsening.rating.acceptance_limit_bound * _incident_weight[hn] / std::max(hypergraph.nodeWeight(hn), 1)); + DBG << V(hn) << V(_acceptance_limit[hn]) << V(_incident_weight[hn]) + << V(hypergraph.nodeWeight(hn)) << V(hypergraph.nodeDegree(hn)); + }); timer.stop_timer("compute_similarity_metric"); } @@ -258,10 +284,13 @@ class PreserveRebalancingNodesPolicy final : public kahypar::meta::PolicyBase { } private: + const Context* _context; // TODO: currently must be a pointer so we can default-initialize.. // ! incident weight (scaled with hyperedge size) for all nodes parallel::scalable_vector _incident_weight; // ! pre-computed metric which is used to determine whether a contraction is accepted parallel::scalable_vector _acceptance_limit; + // ! Tracks connectivity to all neighbors in case of hypergraphs + tbb::enumerable_thread_specific _local_incidence_map; }; diff --git a/mt-kahypar/partition/context.h b/mt-kahypar/partition/context.h index 21853279c..3add7a10e 100644 --- a/mt-kahypar/partition/context.h +++ b/mt-kahypar/partition/context.h @@ -109,7 +109,7 @@ struct RatingParameters { DegreeSimilarityPolicy degree_similarity_policy = DegreeSimilarityPolicy::UNDEFINED; // Similarity policy - int32_t incident_weight_scaling_constant = 0; + uint32_t incident_weight_scaling_constant = 1; double preserve_nodes_scaling_factor = 0.25; double preserve_nodes_relative_weight_limit = 0.001; double acceptance_limit_bound = 1.0; @@ -134,6 +134,7 @@ struct CoarseningParameters { double twin_required_similarity = 0.75; double twin_reduced_required_similarity = 0.4; HypernodeID degree_one_node_cluster_size = 4; + size_t two_hop_degree_threshold = 100; // Those will be determined dynamically HypernodeWeight max_allowed_node_weight = 0;