Skip to content

Commit

Permalink
three phase coarsening for hypergraphs (naive)
Browse files Browse the repository at this point in the history
  • Loading branch information
N-Maas committed Sep 9, 2024
1 parent b315892 commit 3b10f53
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 73 deletions.
8 changes: 6 additions & 2 deletions mt-kahypar/io/command_line_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ namespace mt_kahypar {
"<int>")->default_value(4),
"Two-hop coarsening: maximum number of degree one nodes in one cluster.")
("c-sim-incident-weight-scaling",
po::value<int32_t>(&context.coarsening.rating.incident_weight_scaling_constant)->value_name(
po::value<uint32_t>(&context.coarsening.rating.incident_weight_scaling_constant)->value_name(
"<int>")->default_value(0),
"Scales how incident weight is computed when determining similarity thresholds.")
("c-sim-preserve-nodes-scaling-factor",
Expand All @@ -350,7 +350,11 @@ namespace mt_kahypar {
("c-sim-acceptance-limit-bound",
po::value<double>(&context.coarsening.rating.acceptance_limit_bound)->value_name(
"<double>")->default_value(1.0),
"Lower bound for similarity acceptance limit (nodes with at most this difference are always accepted).");
"Lower bound for similarity acceptance limit (nodes with at most this difference are always accepted).")
("c-two-hop-degree-threshold",
po::value<size_t>(&context.coarsening.two_hop_degree_threshold)->value_name(
"<size_t>")->default_value(100),
"If set, then vertices with more adjacent pins than the provided threshold are ignored during two-hop coarsening.");
return options;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,6 @@ namespace mt_kahypar {
class TwoHopClustering {
using IncidenceMap = ds::SparseMap<HypernodeID, float>;

// degree threshold where it is extremely unlikely that two-hop coarsening is applicable
static constexpr HyperedgeID HIGH_DEGREE_THRESHOLD = 500;
static constexpr HypernodeID kInvalidHypernode = std::numeric_limits<HypernodeID>::max();

struct MatchingEntry {
HypernodeID key;
HypernodeID hn;
Expand Down Expand Up @@ -81,18 +77,24 @@ class TwoHopClustering {
int pass_nr = 0) {
_degree_one_map.reserve_for_estimated_number_of_insertions(cc.currentNumNodes() / 3);

auto fill_incidence_map_for_node = [&](IncidenceMap& incidence_map, const HypernodeID hn) {
auto fill_incidence_map_for_node = [&](IncidenceMap& incidence_map, const HypernodeID hn, bool& too_many_accesses) {
// TODO: can we do this more efficiently for graphs?
size_t num_accesses = 0;
HyperedgeWeight incident_weight_sum = 0;
for (const HyperedgeID& he : hg.incidentEdges(hn)) {
incident_weight_sum += hg.edgeWeight(he);
if (num_accesses + hg.edgeSize(he) > _context.coarsening.two_hop_degree_threshold) {
too_many_accesses = true;
break;
}
for (const HypernodeID& pin: hg.pins(he)) {
if (pin != hn) {
HypernodeID target_cluster = cc.clusterID(pin);
ASSERT(target_cluster != cc.clusterID(hn)); // holds since we only consider unmatched nodes
incidence_map[target_cluster] += static_cast<double>(hg.edgeWeight(he)) / (hg.edgeSize(he) - 1);
++num_accesses;
}
}
incident_weight_sum += hg.edgeWeight(he);
}
return incident_weight_sum;
};
Expand All @@ -103,27 +105,33 @@ class TwoHopClustering {
tbb::parallel_for(ID(0), hg.initialNumNodes(), [&](const HypernodeID id) {
ASSERT(id < node_mapping.size());
const HypernodeID hn = node_mapping[id];
if (hg.nodeIsEnabled(hn) && cc.vertexIsUnmatched(hn) && hg.nodeDegree(hn) <= HIGH_DEGREE_THRESHOLD) {
if (hg.nodeIsEnabled(hn) && cc.vertexIsUnmatched(hn)
&& hg.nodeWeight(hn) <= _context.coarsening.max_allowed_node_weight / 2
&& hg.nodeDegree(hn) <= _context.coarsening.two_hop_degree_threshold) {
IncidenceMap& incidence_map = _local_incidence_map.local();
const HyperedgeWeight incident_weight_sum = fill_incidence_map_for_node(incidence_map, hn);

const float required_connectivity = required_similarity * incident_weight_sum;
float max_connectivity = 0;
HypernodeID best_target = kInvalidHypernode;
for (const auto& [target_cluster, connectivity]: incidence_map) {
if (connectivity >= required_connectivity && connectivity > max_connectivity) {
max_connectivity = connectivity;
best_target = target_cluster;
if (required_similarity >= 0.5) {
// in this case, this already must be the maximum
break;
incidence_map.clear();

bool too_many_accesses = false;
const HyperedgeWeight incident_weight_sum = fill_incidence_map_for_node(incidence_map, hn, too_many_accesses);

if (!too_many_accesses) {
const float required_connectivity = required_similarity * incident_weight_sum;
float max_connectivity = 0;
HypernodeID best_target = kInvalidHypernode;
for (const auto& [target_cluster, connectivity]: incidence_map) {
if (connectivity >= required_connectivity && connectivity > max_connectivity) {
max_connectivity = connectivity;
best_target = target_cluster;
if (required_similarity >= 0.5) {
// in this case, this already must be the maximum
break;
}
}
}
if (best_target != kInvalidHypernode) {
_degree_one_map.insert(best_target, MatchingEntry{best_target, hn});
}
}
if (best_target != kInvalidHypernode) {
_degree_one_map.insert(best_target, MatchingEntry{best_target, hn});
}
incidence_map.clear();
}
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ namespace {
}

void insert(float edge_contribution, HypernodeWeight weight) {
ASSERT(weight > 0);
ASSERT(weight >= 0);
if (edge_contribution <= 0) {
_data[0].edge_weight_contribution += edge_contribution;
_data[0].node_weight += weight;
Expand Down Expand Up @@ -132,14 +132,15 @@ namespace {


class PreserveRebalancingNodesPolicy final : public kahypar::meta::PolicyBase {
using IncidenceMap = ds::SparseMap<HypernodeID, float>; // this is prototypical and will almost certainly be removed
static constexpr bool debug = false;

public:
explicit PreserveRebalancingNodesPolicy():
_incident_weight(), _acceptance_limit() {}
_incident_weight(), _acceptance_limit(), _local_incidence_map(0) {}

explicit PreserveRebalancingNodesPolicy(const HypernodeID num_nodes):
_incident_weight(num_nodes, 0), _acceptance_limit(num_nodes, 0) {}
_incident_weight(num_nodes, 0), _acceptance_limit(num_nodes, 0), _local_incidence_map(num_nodes) {}

PreserveRebalancingNodesPolicy(const PreserveRebalancingNodesPolicy&) = delete;
PreserveRebalancingNodesPolicy(PreserveRebalancingNodesPolicy&&) = delete;
Expand All @@ -151,12 +152,13 @@ class PreserveRebalancingNodesPolicy final : public kahypar::meta::PolicyBase {
ASSERT(_incident_weight.size() >= hypergraph.initialNumNodes()
&& _acceptance_limit.size() >= hypergraph.initialNumNodes());

auto scaled_edge_weight = [&](const HyperedgeID he) {
auto edge_weight_scaling = [&](const HyperedgeID he) {
if constexpr (Hypergraph::is_graph) {
return hypergraph.edgeWeight(he);
return 1.0;
} else if (hypergraph.edgeSize(he) <= context.coarsening.rating.incident_weight_scaling_constant) {
return 1.0;
} else {
return static_cast<double>(hypergraph.edgeWeight(he)) /
(hypergraph.edgeSize(he) + context.coarsening.rating.incident_weight_scaling_constant);
return context.coarsening.rating.incident_weight_scaling_constant / static_cast<double>(hypergraph.edgeSize(he));
}
};

Expand All @@ -166,30 +168,29 @@ class PreserveRebalancingNodesPolicy final : public kahypar::meta::PolicyBase {
// TODO(maas): save the total incident weight in the hypergraph data structure?
double incident_weight_sum = 0;
for (const HyperedgeID& he : hypergraph.incidentEdges(hn)) {
incident_weight_sum += scaled_edge_weight(he);
incident_weight_sum += edge_weight_scaling(he) * hypergraph.edgeWeight(he);
}
_incident_weight[hn] = incident_weight_sum;
});
timer.stop_timer("compute_incident_weight");

timer.start_timer("compute_similarity_metric", "Compute Similarity Metric");
if constexpr (Hypergraph::is_graph) {
// TODO: We are ignoring edges between neighbors here - the result is thus only approximate.
// This could be acceptable, though
// TODO: We are ignoring edges between neighbors here - the result is thus only approximate.
// This could be acceptable, though
const HypernodeWeight max_summed_weight = std::ceil(context.coarsening.rating.preserve_nodes_relative_weight_limit
* hypergraph.totalWeight());
hypergraph.doParallelForAllNodes([&](const HypernodeID hn) {
GroupedIncidenceData incidence_data;
const double ratio_of_u = _incident_weight[hn] / std::max(hypergraph.nodeWeight(hn), 1);

// Step 1: Collect contributed edge weights and node weights of neighbors in into sorted aggregates
// (effectively a semi-sorting)
// TODO: should this rather be relative to the maximum cluster weight?
const HypernodeWeight max_summed_weight = std::ceil(context.coarsening.rating.preserve_nodes_relative_weight_limit
* hypergraph.totalWeight());
hypergraph.doParallelForAllNodes([&](const HypernodeID hn) {
GroupedIncidenceData incidence_data;
const double ratio_of_u = _incident_weight[hn] / std::max(hypergraph.nodeWeight(hn), 1);
// TODO: this needs to be implemented differently for hypergraphs
if constexpr (Hypergraph::is_graph) {
size_t num_accesses = 0;
for (const HyperedgeID& he : hypergraph.incidentEdges(hn)) {
HypernodeID v = hypergraph.edgeTarget(he);
float edge_contribution = _incident_weight[v] - 2 * scaled_edge_weight(he);
float edge_contribution = _incident_weight[v] - 2 * hypergraph.edgeWeight(he);
HypernodeWeight weight = hypergraph.nodeWeight(v);
if (weight == 0 || edge_contribution / weight < ratio_of_u) {
incidence_data.insert(edge_contribution, weight);
Expand All @@ -200,39 +201,64 @@ class PreserveRebalancingNodesPolicy final : public kahypar::meta::PolicyBase {
break;
}
}

// Step 2: Iterate through aggregated neighbor values in sorted order and determine minimum
const auto& list = incidence_data.inner();
double summed_contribution = _incident_weight[hn];
HypernodeWeight summed_weight = std::max(hypergraph.nodeWeight(hn), 1);
double min_value = summed_contribution / summed_weight;
for (size_t i = 0; i < list.size() && summed_weight <= max_summed_weight; ++i) {
const NeighborData& neighbor = list[i];
if (summed_weight + neighbor.node_weight > max_summed_weight) {
double fraction_of_last = static_cast<double>(max_summed_weight - summed_weight) / neighbor.node_weight;
summed_contribution += fraction_of_last * neighbor.edge_weight_contribution;
summed_weight = max_summed_weight;
} else {
summed_contribution += neighbor.edge_weight_contribution;
summed_weight += neighbor.node_weight;
} else {
// this is probably quite slow and will be replaced with a bloom-filter based approach
size_t num_accesses = 0;
IncidenceMap& incidence_map = _local_incidence_map.local();
incidence_map.clear();
for (const HyperedgeID& he : hypergraph.incidentEdges(hn)) {
HypernodeID edge_size = hypergraph.edgeSize(he);
if (edge_size < context.partition.ignore_hyperedge_size_threshold) {
if (num_accesses + edge_size > context.coarsening.vertex_degree_sampling_threshold) {
break;
}
for (const HypernodeID& pin: hypergraph.pins(he)) {
if (pin != hn) {
incidence_map[pin] += edge_weight_scaling(he) * static_cast<double>(hypergraph.edgeWeight(he)) / (edge_size - 1);
++num_accesses;
}
}
}
if (summed_contribution / summed_weight <= min_value) {
min_value = summed_contribution / summed_weight;
} else {
break;
}

for (const auto& [neighbor, connectivity]: incidence_map) {
float edge_contribution = _incident_weight[neighbor] - 2 * connectivity;
HypernodeWeight weight = hypergraph.nodeWeight(neighbor);
if (weight == 0 || edge_contribution / weight < ratio_of_u) {
incidence_data.insert(edge_contribution, weight);
}
}
}

// Step 3: Compute acceptance limit of v from minimum
_acceptance_limit[hn] = std::min(
context.coarsening.rating.preserve_nodes_scaling_factor * min_value,
context.coarsening.rating.acceptance_limit_bound * _incident_weight[hn] / std::max(hypergraph.nodeWeight(hn), 1));
DBG << V(hn) << V(_acceptance_limit[hn]) << V(_incident_weight[hn])
<< V(hypergraph.nodeWeight(hn)) << V(hypergraph.nodeDegree(hn));
});
} else {
ERR("not supported");
}
// Step 2: Iterate through aggregated neighbor values in sorted order and determine minimum
const auto& list = incidence_data.inner();
double summed_contribution = _incident_weight[hn];
HypernodeWeight summed_weight = std::max(hypergraph.nodeWeight(hn), 1);
double min_value = summed_contribution / summed_weight;
for (size_t i = 0; i < list.size() && summed_weight <= max_summed_weight; ++i) {
const NeighborData& neighbor = list[i];
if (summed_weight + neighbor.node_weight > max_summed_weight) {
double fraction_of_last = static_cast<double>(max_summed_weight - summed_weight) / neighbor.node_weight;
summed_contribution += fraction_of_last * neighbor.edge_weight_contribution;
summed_weight = max_summed_weight;
} else {
summed_contribution += neighbor.edge_weight_contribution;
summed_weight += neighbor.node_weight;
}
if (summed_contribution / summed_weight <= min_value) {
min_value = summed_contribution / summed_weight;
} else {
break;
}
}

// Step 3: Compute acceptance limit of v from minimum
_acceptance_limit[hn] = std::min(
context.coarsening.rating.preserve_nodes_scaling_factor * min_value,
context.coarsening.rating.acceptance_limit_bound * _incident_weight[hn] / std::max(hypergraph.nodeWeight(hn), 1));
DBG << V(hn) << V(_acceptance_limit[hn]) << V(_incident_weight[hn])
<< V(hypergraph.nodeWeight(hn)) << V(hypergraph.nodeDegree(hn));
});
timer.stop_timer("compute_similarity_metric");
}

Expand All @@ -258,10 +284,13 @@ class PreserveRebalancingNodesPolicy final : public kahypar::meta::PolicyBase {
}

private:
const Context* _context; // TODO: currently must be a pointer so we can default-initialize..
// ! incident weight (scaled with hyperedge size) for all nodes
parallel::scalable_vector<float> _incident_weight;
// ! pre-computed metric which is used to determine whether a contraction is accepted
parallel::scalable_vector<float> _acceptance_limit;
// ! Tracks connectivity to all neighbors in case of hypergraphs
tbb::enumerable_thread_specific<IncidenceMap> _local_incidence_map;
};


Expand Down
3 changes: 2 additions & 1 deletion mt-kahypar/partition/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ struct RatingParameters {
DegreeSimilarityPolicy degree_similarity_policy = DegreeSimilarityPolicy::UNDEFINED;

// Similarity policy
int32_t incident_weight_scaling_constant = 0;
uint32_t incident_weight_scaling_constant = 1;
double preserve_nodes_scaling_factor = 0.25;
double preserve_nodes_relative_weight_limit = 0.001;
double acceptance_limit_bound = 1.0;
Expand All @@ -134,6 +134,7 @@ struct CoarseningParameters {
double twin_required_similarity = 0.75;
double twin_reduced_required_similarity = 0.4;
HypernodeID degree_one_node_cluster_size = 4;
size_t two_hop_degree_threshold = 100;

// Those will be determined dynamically
HypernodeWeight max_allowed_node_weight = 0;
Expand Down

0 comments on commit 3b10f53

Please sign in to comment.