Skip to content

Commit

Permalink
Merge pull request #75 from KIwabuchi/feature/dnnd_new_api
Browse files Browse the repository at this point in the history
(DNND) Bugfies in New API
  • Loading branch information
KIwabuchi authored Sep 4, 2024
2 parents 2dac3c6 + 335fa85 commit a68d15f
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 27 deletions.
2 changes: 1 addition & 1 deletion examples/dnnd_advanced_example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ using id_t = uint32_t;
using dist_t = double;

// Point Type
using point_type = saltatlas::feature_vector<float>;
using point_type = saltatlas::pm_feature_vector<float>;

// Custom distance function
// The distance function should have the signature as follows:
Expand Down
25 changes: 23 additions & 2 deletions examples/dnnd_simple_custom_point_example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
/// mpirun -n 2 ./example/dnnd_simple_custom_point_example

#include <iostream>
#include <random>
#include <vector>

#include <ygm/comm.hpp>

#include <saltatlas/dnnd/dnnd_simple.hpp>
Expand All @@ -39,16 +39,37 @@ dist_t distance_func(const graph_point& a, const graph_point& b) {
return a.data.size() + b.data.size();
}

// Randomly generate a point, just for demonstration
graph_point gen_point() {
graph_point p;
std::random_device rd;
std::mt19937 gen(rd());
const int num_vertices = gen() % 10 + 1;
p.data.resize(num_vertices);
for (size_t i = 0; i < num_vertices; ++i) {
const int degree = gen() % 10 + 1;
p.data[i].reserve(degree);
for (size_t j = 0; j < degree; ++j) {
p.data[i][j] = std::uniform_real_distribution<double>(0.0, 1.0)(gen);
}
}
return p;
}

int main(int argc, char** argv) {
ygm::comm comm(&argc, &argv);

saltatlas::dnnd<id_t, graph_point, dist_t> g(distance_func, comm);

// Add points
{
// Assuming ids and points are stored in vectors
std::vector<id_t> ids;
std::vector<graph_point> points;
// Assuming ids and points are stored in vectors
for (size_t i = 0; i < 10; ++i) {
ids.push_back(i + 10 * comm.rank());
points.push_back(gen_point());
}
g.add_points(ids.begin(), ids.end(), points.begin(), points.end());
}

Expand Down
2 changes: 0 additions & 2 deletions include/saltatlas/dnnd/data_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ void read_points_helper(

ygm::ygm_ptr<point_store<id_t, point_t, H, E, pstore_alloc>> ptr_point_store(
&local_point_store);
local_point_store.reset();
comm.cf_barrier();

// Reads points
Expand Down Expand Up @@ -132,7 +131,6 @@ void read_points_with_id_helper(
const std::function<int(const id_t &id)> &point_partitioner,
ygm::comm &comm, const bool verbose) {
const auto range = partial_range(file_names.size(), comm.rank(), comm.size());
local_point_store.reset();
static auto &ref_point_store = local_point_store;
comm.cf_barrier();

Expand Down
39 changes: 30 additions & 9 deletions include/saltatlas/dnnd/dnnd_advanced.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,12 @@ struct open_read_only_t {};
/// \tparam Point Point type.
/// \tparam Distance Distance type.
template <typename Id = uint64_t,
typename Point = saltatlas::feature_vector<double>,
typename Point = saltatlas::pm_feature_vector<double>,
typename Distance = double>
class dnnd {
private:
using self_type = dnnd<Id, Point, Distance>;

public:
/// \brief Point ID type.
using id_type = Id;
Expand Down Expand Up @@ -183,6 +186,7 @@ class dnnd {
}

/// \brief Add points to the internal point store.
/// All ranks must call this function.
/// \tparam id_iterator Iterator type for point IDs.
/// \tparam point_iterator Iterator type for points.
/// \param ids_begin Iterator to the beginning of point IDs.
Expand All @@ -192,16 +196,24 @@ class dnnd {
template <typename id_iterator, typename point_iterator>
void add_points(id_iterator ids_begin, id_iterator ids_end,
point_iterator points_begin, point_iterator points_end) {
assert(m_pstore);
m_pstore->reserve(std::distance(ids_begin, ids_end));
for (auto id = ids_begin; id != ids_end; ++id) {
assert(points_begin != points_end);
(*m_pstore)[*id] = *points_begin;
++points_begin;
auto receiver = [](auto, auto this_ptr, const id_t id,
const auto& sent_point) {
if (this_ptr->m_pstore.contains(id)) {
std::cerr << "Duplicate ID " << id << std::endl;
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
this_ptr->m_pstore[id] = sent_point;
};

for (; ids_begin != ids_end; ++ids_begin, ++points_begin) {
const auto dst = priv_get_point_partitioner()(*ids_begin);
m_comm.async(dst, receiver, m_this, *ids_begin, *points_begin);
}
m_comm.barrier();
}

/// \brief Load points from files and add to the internal point store.
/// All ranks must call this function.
/// \tparam paths_iterator Iterator type for file paths.
/// \param paths_begin Iterator to the beginning of file paths.
/// \param paths_end Iterator to the end of file paths.
Expand All @@ -226,6 +238,7 @@ class dnnd {
}

/// \brief Load points from files and add to the internal point store.
/// All ranks must call this function.
/// This function assumes that there is one point per line.
/// \tparam paths_iterator Iterator type for file paths.
/// \param paths_begin Iterator to the beginning of file paths.
Expand Down Expand Up @@ -253,6 +266,7 @@ class dnnd {
}

/// \brief Build a KNNG.
/// All ranks must call this function.
/// \param k Number of neighbors per point.
/// \param rho Rho parameter in NN-Descent.
/// \param delta Delta parameter in NN-Descent.
Expand All @@ -276,6 +290,7 @@ class dnnd {
}

/// \brief Build a KNNG.
/// All ranks must call this function.
/// \param k Number of neighbors per point.
/// \param initial_index Initial index.
/// \param rho Rho parameter in NN-Descent.
Expand All @@ -301,6 +316,7 @@ class dnnd {
}

/// \brief Build a KNNG.
/// All ranks must call this function.
/// \param k Number of neighbors per point.
/// \param initial_index Initial index.
/// \param rho Rho parameter in NN-Descent.
Expand All @@ -327,6 +343,7 @@ class dnnd {
}

/// \brief Update the KNNG.
/// All ranks must call this function.
void update(const std::size_t index_id, distance_function_type dfunc,
const int k, const double rho = 0.8, const double delta = 0.001) {
typename nn_kernel_type::option option{.k = k,
Expand All @@ -345,6 +362,7 @@ class dnnd {

/// \brief Apply optimizations to an already constructed KNNG aiming at
/// improving the query quality and performance.
/// All ranks must call this function.
/// \param make_index_undirected If true, make the index undirected.
/// \param make_index_undirected If true, make the graph undirected.
/// \param pruning_degree_multiplier
Expand Down Expand Up @@ -374,6 +392,7 @@ class dnnd {
/// \brief Query nearest neighbors of given points.
/// This function assumes that the query points are already distributed.
/// Query results are returned to the MPI rank that submitted the queries.
/// All ranks must call this function.
/// \tparam query_iterator Iterator type for query points.
/// \param queries_begin Iterator to the beginning of query points.
/// \param queries_end Iterator to the end of query points.
Expand Down Expand Up @@ -411,6 +430,7 @@ class dnnd {
/// \brief Query nearest neighbors of given points.
/// This function runs queries on multiple indices in such a way that the
/// indices are merged before the queries are run.
/// All ranks must call this function.
template <typename index_id_iterator, typename query_iterator>
neighbor_store_type query(index_id_iterator index_ids_begin,
index_id_iterator index_ids_end,
Expand Down Expand Up @@ -504,8 +524,9 @@ class dnnd {
bool m_verbose;
std::unique_ptr<metall::utility::metall_mpi_adaptor> m_metall{nullptr};
point_store_type* m_pstore{nullptr};
knn_index_container* m_knn_index_list{nullptr};
size_container* m_index_k_list{nullptr};
knn_index_container* m_knn_index_list{nullptr};
size_container* m_index_k_list{nullptr};
ygm::ygm_ptr<self_type> m_this{this};
};

} // namespace saltatlas
41 changes: 30 additions & 11 deletions include/saltatlas/dnnd/dnnd_simple.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ template <typename Id = uint64_t,
typename Point = saltatlas::feature_vector<double>,
typename Distance = double>
class dnnd {
private:
using self_type = dnnd<Id, Point, Distance>;

public:
/// \brief Point ID type.
using id_type = Id;
Expand Down Expand Up @@ -111,6 +114,7 @@ class dnnd {
m_verbose(verbose) {}

/// \brief Add points to the internal point store.
/// All ranks must call this function although some ranks add no points.
/// \tparam id_iterator Iterator type for point IDs.
/// \tparam point_iterator Iterator type for points.
/// \param ids_begin Iterator to the beginning of point IDs.
Expand All @@ -120,14 +124,24 @@ class dnnd {
template <typename id_iterator, typename point_iterator>
void add_points(id_iterator ids_begin, id_iterator ids_end,
point_iterator points_begin, point_iterator points_end) {
m_pstore.reserve(std::distance(ids_begin, ids_end));
for (auto id = ids_begin; id != ids_end; ++id) {
m_pstore[*id] = *points_begin;
++points_begin;
auto receiver = [](auto, auto this_ptr, const id_t id,
const auto& sent_point) {
if (this_ptr->m_pstore.contains(id)) {
std::cerr << "Duplicate ID " << id << std::endl;
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
this_ptr->m_pstore[id] = sent_point;
};

for (; ids_begin != ids_end; ++ids_begin, ++points_begin) {
const auto dst = priv_get_point_partitioner()(*ids_begin);
m_comm.async(dst, receiver, m_this, *ids_begin, *points_begin);
}
m_comm.barrier();
}

/// \brief Load points from files and add to the internal point store.
/// All ranks must call this function although some ranks load no points.
/// \tparam paths_iterator Iterator type for file paths.
/// \param paths_begin Iterator to the beginning of file paths.
/// \param paths_end Iterator to the end of file paths.
Expand All @@ -150,6 +164,7 @@ class dnnd {

/// \brief Load points from files and add to the internal point store.
/// This function assumes that there is one point per line.
/// All ranks must call this function although some ranks load no points.
/// \tparam paths_iterator Iterator type for file paths.
/// \param paths_begin Iterator to the beginning of file paths.
/// \param paths_end Iterator to the end of file paths.
Expand All @@ -176,6 +191,7 @@ class dnnd {
}

/// \brief Build a KNNG.
/// All ranks must call this function.
/// \param k Number of neighbors per point.
/// \param rho Rho parameter in NN-Descent.
/// \param delta Delta parameter in NN-Descent.
Expand All @@ -197,6 +213,7 @@ class dnnd {

/// \brief Apply optimizations to an already constructed KNNG aiming at
/// improving the query quality and performance.
/// All ranks must call this function.
/// \param make_index_undirected If true, make the index undirected.
/// \param make_index_undirected If true, make the graph undirected.
/// \param pruning_degree_multiplier
Expand All @@ -220,6 +237,7 @@ class dnnd {
/// \brief Query nearest neighbors of given points.
/// This function assumes that the query points are already distributed.
/// Query results are returned to the MPI rank that submitted the queries.
/// All ranks must call this function.
/// \tparam query_iterator Iterator type for query points.
/// \param queries_begin Iterator to the beginning of query points.
/// \param queries_end Iterator to the end of query points.
Expand Down Expand Up @@ -310,13 +328,14 @@ class dnnd {
return [size](const id_type& id) { return id % size; };
};

distance_function_type m_distance_func;
ygm::comm& m_comm;
uint64_t m_rnd_seed;
point_store_type m_pstore;
knn_index_type m_knn_index{};
std::size_t m_index_k{0};
bool m_verbose;
distance_function_type m_distance_func;
ygm::comm& m_comm;
uint64_t m_rnd_seed;
point_store_type m_pstore;
knn_index_type m_knn_index{};
std::size_t m_index_k{0};
bool m_verbose;
ygm::ygm_ptr<self_type> m_this{this};
};

} // namespace saltatlas
4 changes: 2 additions & 2 deletions include/saltatlas/point_store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ class point_store {

private:
using point_table_type = container::unordered_map<
id_type, PointType, hasher, equal_to,
id_type, point_type, hasher, equal_to,
dndetail::other_scoped_allocator<allocator_type,
std::pair<const id_type, PointType>>>;
std::pair<const id_type, point_type>>>;

public:
using iterator = typename point_table_type::iterator;
Expand Down

0 comments on commit a68d15f

Please sign in to comment.