diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 99037f9..ddd72c4 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -37,16 +37,17 @@ function(add_saltatlas_dnnd_example_feature_type example_name type_name) PRIVATE "SALTATLAS_DNND_EXAMPLE_FEATURE_ELEMENT_TYPE=${type_name}") endfunction() -add_saltatlas_example(dnnd_simple_example) -add_saltatlas_example(dnnd_simple_custom_distance_example) -add_saltatlas_example(dnnd_simple_custom_point_example) +add_saltatlas_example(dnnd_simple) +add_saltatlas_example(dnnd_simple_custom_distance) +add_saltatlas_example(dnnd_simple_custom_point) +add_saltatlas_example(dnnd_levenshtein) add_saltatlas_dnnd_example_feature_type(dnnd_bench float) add_saltatlas_dnnd_example_feature_type(dnnd_bench uint8_t) add_saltatlas_dnnd_example_feature_type(dnnd_bench double) if (SALTATLAS_USE_METALL) - add_saltatlas_example(dnnd_advanced_example) + add_saltatlas_example(dnnd_advanced) endif () add_subdirectory(datasets) diff --git a/examples/dnnd_advanced_example.cpp b/examples/dnnd_advanced.cpp similarity index 100% rename from examples/dnnd_advanced_example.cpp rename to examples/dnnd_advanced.cpp diff --git a/examples/dnnd_custom_point_example.cpp b/examples/dnnd_custom_point_example.cpp deleted file mode 100644 index b0ed535..0000000 --- a/examples/dnnd_custom_point_example.cpp +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright 2020-2022 Lawrence Livermore National Security, LLC and other -// saltatlas Project Developers. See the top-level COPYRIGHT file for details. -// -// SPDX-License-Identifier: MIT - -// Usage: -// cd saltatlas/build -// mpirun -n 2 ./examples/dnnd_custom_point_example - -#include -#include -#include -#include - -#include -#include - -using id_type = uint32_t; -using point_type = std::unordered_map>; -using distance_type = uint32_t; -using dnnd_type = saltatlas::dnnd; - -int main(int argc, char **argv) { - ygm::comm comm(&argc, &argv); - - int index_k{2}; - int query_k{4}; - double r{0.8}; - double delta{0.001}; - double epsilon{0.1}; - double mu{0.0}; - bool exchange_reverse_neighbors{true}; - bool make_index_undirected{true}; - double pruning_degree_multiplier{0.0}; // No pruning - std::size_t batch_size{1ULL << 31}; - bool verbose{true}; - - auto distance_func = [](const point_type &a, - const point_type &b) -> distance_type { - auto sum0 = 0.0; - auto sum1 = 0.0; - for (const auto &[key, value] : a) { - for (const auto &v : value) { - sum0 += v; - } - } - for (const auto &[key, value] : b) { - for (const auto &v : value) { - sum1 += v; - } - } - return std::abs(sum0 - sum1); - }; - - dnnd_type dnnd(distance_func, comm, std::random_device{}(), verbose); - comm.cf_barrier(); - - comm.cout0() << "<>" << std::endl; - { - auto &point_store = dnnd.get_point_store(); - // Init points - for (int i = 0; i < 10; i++) { - const id_type id = i * comm.size() + comm.rank(); - auto &point = point_store[id]; - const auto size = std::rand() % 10 + 1; - for (int j = 0; j < size; j++) { - point[j].push_back(std::rand() % 100); - } - } - } - - comm.cout0() << "<>" << std::endl; - dnnd.construct_index(index_k, r, delta, exchange_reverse_neighbors, - batch_size); - - comm.cout0() << "\n<>" << std::endl; - dnnd.optimize_index(make_index_undirected, pruning_degree_multiplier); - - comm.cout0() << "\n<>" << std::endl; - std::vector queries; - for (int i = 0; i < 10; i++) { - dnnd_type::point_type query; - const auto size = std::rand() % 10 + 1; - for (int j = 0; j < size; j++) { - query[j].push_back(std::rand() % 100); - } - queries.push_back(query); - } - - comm.cout0() << "Executing queries" << std::endl; - const auto query_results = - dnnd.query_batch(queries, query_k, epsilon, mu, batch_size); - - return 0; -} diff --git a/examples/dnnd_example.cpp b/examples/dnnd_example.cpp deleted file mode 100644 index f7e4dea..0000000 --- a/examples/dnnd_example.cpp +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2020-2024 Lawrence Livermore National Security, LLC and other -// saltatlas Project Developers. See the top-level COPYRIGHT file for details. -// -// SPDX-License-Identifier: MIT - -// Usage: -// cd saltatlas/build -// mpirun -n 2 ./examples/dnnd_example - -#include -#include -#include - -#include "dnnd_example_common.hpp" - -int main(int argc, char **argv) { - ygm::comm comm(&argc, &argv); - - int index_k{2}; - int query_k{4}; - double r{0.8}; - double delta{0.001}; - double epsilon{0.1}; - double mu{0.0}; - bool exchange_reverse_neighbors{true}; - bool make_index_undirected{true}; - double pruning_degree_multiplier{0.0}; // No pruning - bool remove_long_paths{false}; - std::size_t batch_size{1ULL << 29}; - std::string distance_name{"l2"}; - std::vector point_file_paths{ - "./examples/datasets/point_5-4.txt"}; - std::string query_file_path{"./examples/datasets/query_5-4.txt"}; - std::string ground_truth_file_path{ - "./examples/datasets/ground-truth_5-4.txt"}; - std::string point_file_format{"wsv"}; - std::string query_result_file_path{"query-results"}; - bool verbose{true}; - - dnnd_type dnnd(distance_name, comm, std::random_device{}(), verbose); - comm.cf_barrier(); - - comm.cout0() << "<>" << std::endl; - saltatlas::read_points(point_file_paths, point_file_format, verbose, - dnnd.get_point_partitioner(), dnnd.get_point_store(), - comm); - - comm.cout0() << "<>" << std::endl; - dnnd.construct_index(index_k, r, delta, exchange_reverse_neighbors, - batch_size); - - comm.cout0() << "\n<>" << std::endl; - dnnd.optimize_index(make_index_undirected, pruning_degree_multiplier, - remove_long_paths); - - comm.cout0() << "\n<>" << std::endl; - std::vector queries; - saltatlas::read_query(query_file_path, queries, comm); - - comm.cout0() << "Executing queries" << std::endl; - const auto query_results = - dnnd.query_batch(queries, query_k, epsilon, mu, batch_size); - - comm.cout0() << "\nRecall scores" << std::endl; - show_query_recall_score(query_results, ground_truth_file_path, comm); - - comm.cout0() << "\nDump query results to " << query_result_file_path - << std::endl; - saltatlas::utility::gather_and_dump_neighbors(query_results, - query_result_file_path, comm); - - return 0; -} diff --git a/examples/dnnd_levenshtein.cpp b/examples/dnnd_levenshtein.cpp new file mode 100644 index 0000000..94d6ac2 --- /dev/null +++ b/examples/dnnd_levenshtein.cpp @@ -0,0 +1,61 @@ +// Copyright 2020-2024 Lawrence Livermore National Security, LLC and other +// saltatlas Project Developers. See the top-level COPYRIGHT file for details. +// +// SPDX-License-Identifier: MIT + +// Usage: +// cd saltatlas/build +// mpirun -n 2 ./examples/dnnd_example + +#include +#include +#include + +#include +#include + +#include +#include + +using id_type = uint32_t; +using dist_t = float; +using point_type = saltatlas::feature_vector; + +int main(int argc, char **argv) { + ygm::comm comm(&argc, &argv); + + std::vector point_file_paths{ + "../examples/datasets/point_string.txt"}; + std::filesystem::path query_file_path{ + "../examples/datasets/query_string.txt"}; + std::filesystem::path ground_truth_file_path{ + "../examples/datasets/ground-truth_string.txt"}; + std::filesystem::path query_result_file_path{"query-results"}; + + saltatlas::dnnd g( + saltatlas::distance::id::levenshtein, comm); + + comm.cout0() << "<>" << std::endl; + g.load_points(point_file_paths.begin(), point_file_paths.end(), "wsv"); + + comm.cout0() << "<>" << std::endl; + int index_k{2}; + g.build(index_k); + + comm.cout0() << "<>" << std::endl; + bool make_graph_undirected = true; + g.optimize(make_graph_undirected); + + comm.cout0() << "<>" << std::endl; + int num_to_search{3}; + std::vector queries; + saltatlas::read_query(query_file_path, queries, comm); + + const auto results = g.query(queries.begin(), queries.end(), num_to_search); + comm.cout0() << "Dumping query results to " << query_result_file_path + << std::endl; + saltatlas::utility::gather_and_dump_neighbors(results, query_result_file_path, + comm); + + return 0; +} diff --git a/examples/dnnd_pm_const_example.cpp b/examples/dnnd_pm_const_example.cpp deleted file mode 100644 index 1c15f33..0000000 --- a/examples/dnnd_pm_const_example.cpp +++ /dev/null @@ -1,310 +0,0 @@ -// Copyright 2020-2024 Lawrence Livermore National Security, LLC and other -// saltatlas Project Developers. See the top-level COPYRIGHT file for details. -// -// SPDX-License-Identifier: MIT - -#include -#include -#include -#include - -#include "dnnd_example_common.hpp" - -struct option_t { - int index_k{0}; - double r{0.8}; - double delta{0.001}; - bool exchange_reverse_neighbors{true}; - std::size_t batch_size{1ULL << 29}; - std::string distance_name; - std::vector point_file_names; - std::string point_file_format; - std::string dnnd_init_index_path; - std::string dhnsw_init_index_path; - bool settled_init_index{false}; - std::string datastore_path; - std::string datastore_transfer_path; - std::string index_dump_prefix; - bool donot_store_dataset{false}; - bool dump_index_with_distance{false}; - bool verbose{false}; -}; - -bool parse_options(int, char **, option_t &, bool &); -template -void usage(std::string_view, cout_type &); -void show_options(const option_t &, ygm::comm &); - -int main(int argc, char **argv) { - ygm::comm comm(&argc, &argv); - show_config(comm); - - option_t opt; - bool help{false}; - if (!parse_options(argc, argv, opt, help)) { - comm.cerr0() << "Invalid option" << std::endl; - usage(argv[0], comm.cerr0()); - return 0; - } - if (help) { - usage(argv[0], comm.cout0()); - return 0; - } - show_options(opt, comm); - - { - dnnd_pm_type dnnd(dnnd_pm_type::create, opt.datastore_path, - opt.distance_name, comm, std::random_device{}(), - opt.verbose); - - comm.cout0() << "\n<>" << std::endl; - { - // Gather file paths if directories are given by the user - const auto paths = - saltatlas::utility::find_file_paths(opt.point_file_names); - - ygm::timer point_read_timer; - saltatlas::read_points(paths, opt.point_file_format, opt.verbose, - dnnd.get_point_partitioner(), - dnnd.get_point_store(), comm); - comm.cout0() << "\nReading points took (s)\t" - << point_read_timer.elapsed() << std::endl; - comm.cout0() << "#of points\t" - << comm.all_reduce_sum(dnnd.get_point_store().size()) - << std::endl; - comm.cout0() << "Feature dimensions\t" - << dnnd.get_point_store().begin()->second.size() - << std::endl; - } - - comm.cout0() << "\n<>" << std::endl; - ygm::timer const_timer; - if (!opt.dnnd_init_index_path.empty()) { - dnnd_pm_type init_dnnd(dnnd_pm_type::open_read_only, - opt.dnnd_init_index_path, comm, opt.verbose); - dnnd.construct_index(opt.index_k, opt.r, opt.delta, - opt.exchange_reverse_neighbors, opt.batch_size, - init_dnnd.get_knn_index(), opt.settled_init_index); - } else if (!opt.dhnsw_init_index_path.empty()) { - std::unordered_map> init_neighbors; - comm.cout0() << "Read DHNS index" << std::endl; - ygm::timer read_timer; - saltatlas::read_dhnsw_index( - std::vector{opt.dhnsw_init_index_path}, opt.verbose, - dnnd.get_point_partitioner(), init_neighbors, comm); - comm.cout0() << "\nReading index took (s)\t" << read_timer.elapsed() - << std::endl; - - dnnd.construct_index(opt.index_k, opt.r, opt.delta, - opt.exchange_reverse_neighbors, opt.batch_size, - init_neighbors, opt.settled_init_index); - } else { - dnnd.construct_index(opt.index_k, opt.r, opt.delta, - opt.exchange_reverse_neighbors, opt.batch_size); - } - comm.cout0() << "\nIndex construction took (s)\t" << const_timer.elapsed() - << std::endl; - - if (opt.donot_store_dataset) { - comm.cout0() << "\nDestroying dataset from the data store." << std::endl; - dnnd.destroy_dataset(); - } - } - comm.cf_barrier(); - comm.cout0() << "\nClosed Metall." << std::endl; - - if (!opt.datastore_transfer_path.empty()) { - comm.cout0() << "\nTransferring index data store " << opt.datastore_path - << " to " << opt.datastore_transfer_path << std::endl; - if (!dnnd_pm_type::copy(opt.datastore_path, opt.datastore_transfer_path)) { - comm.cerr0() << "\nFailed to transfer index." << std::endl; - MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); - } - comm.cout0() << "Finished transfer." << std::endl; - } - - if (!opt.index_dump_prefix.empty()) { - comm.cout0() << "\nDumping index to " << opt.index_dump_prefix << std::endl; - // Reopen dnnd in read-only mode - dnnd_pm_type dnnd(dnnd_pm_type::open_read_only, opt.datastore_path, comm, - opt.verbose); - if (!dnnd.dump_index(opt.index_dump_prefix, opt.dump_index_with_distance)) { - comm.cerr0() << "\nFailed to dump index." << std::endl; - MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); - } - comm.cf_barrier(); - comm.cout0() << "Finished dumping." << std::endl; - } - - return 0; -} - -inline bool parse_options(int argc, char **argv, option_t &option, bool &help) { - option.distance_name.clear(); - option.point_file_names.clear(); - option.point_file_format.clear(); - option.dnnd_init_index_path.clear(); - option.dhnsw_init_index_path.clear(); - option.settled_init_index = false; - option.datastore_path.clear(); - option.datastore_transfer_path.clear(); - option.index_dump_prefix.clear(); - help = false; - - int n; - while ((n = ::getopt(argc, argv, "k:r:d:z:x:f:p:I:H:Seb:D:RMvh")) != -1) { - switch (n) { - case 'k': - option.index_k = std::stoi(optarg); - break; - - case 'r': - option.r = std::stod(optarg); - break; - - case 'd': - option.delta = std::stod(optarg); - break; - - case 'e': - option.exchange_reverse_neighbors = true; - break; - - case 'f': - option.distance_name = optarg; - break; - - case 'z': - option.datastore_path = optarg; - break; - - case 'x': - option.datastore_transfer_path = optarg; - break; - - case 'p': - option.point_file_format = optarg; - break; - - case 'I': - option.dnnd_init_index_path = optarg; - break; - - case 'H': - option.dhnsw_init_index_path = optarg; - break; - - case 'S': - option.settled_init_index = true; - break; - - case 'b': - option.batch_size = std::stoul(optarg); - break; - - case 'D': - option.index_dump_prefix = optarg; - break; - - case 'R': - option.donot_store_dataset = true; - break; - - case 'M': - option.dump_index_with_distance = true; - break; - - case 'v': - option.verbose = true; - break; - - case 'h': - help = true; - return true; - - default: - return false; - } - } - - for (int index = optind; index < argc; index++) { - option.point_file_names.emplace_back(argv[index]); - } - - if (option.datastore_path.empty() || option.distance_name.empty() || - option.point_file_format.empty() || option.point_file_names.empty()) { - return false; - } - - if (!option.dnnd_init_index_path.empty() && - !option.dhnsw_init_index_path.empty()) { - return false; - } - - return true; -} - -template -void usage(std::string_view exe_name, cout_type &cout) { - cout << "Usage: mpirun -n [#of processes] " << exe_name - << " [options (see below)] [list of input point files (or directories " - "that contain input files) (required)]" - << std::endl; - - cout - << "Options:" - << "\n\t-z [string, required] Path to store constructed index." - << "\n\t-f [string, required] Distance metric name:" - << "\n\t\t'l2' (L2 distance), " - "'sql2' (squared L2, faster one), " - "'cosine' (cosine similarity), " - "'altcosine' (alternative faster cosine similarity), " - "'jaccard' (Jaccard index), " - "'altjaccard' (alternative faster Jaccard index), " - "or 'levenshtein' (Levenshtein distance)." - << "\n\t-p [string, required] Format of input point files:" - << "\n\t\t'wsv' (whitespace-separated values w/o ID)," - << "\n\t\t'wsv-id' (WSV format and the first column is point ID)," - << "\n\t\t'csv' (comma-separated values w/o ID)," - << "\n\t\tor 'csv-id' (CSV format and the first column is point ID)." - << "\n\t-k [int] Number of neighbors to have for each point in the index." - << "\n\t-r [double] Sample rate parameter (ρ) in NN-Descent." - << "\n\t-d [double] Precision parameter (δ) in NN-Descent." - << "\n\t-e If specified, generate reverse neighbors globally during the " - "index construction." - << "\n" - << "\n\t-I [string] Path to an existing DNND data for initializing the " - "new index." - << "\n\t-H [string] Path to an existing HNSW index directory for" - " initializing the new index." - << "\n\t-S If specified, consider the initial neighbors settled ones and " - "mark them as 'old' ones." - << "\n\t-x [string] If specified, transfer index to this path at the end." - << "\n\t-b [long int] Batch size for the index construction (0 is the " - "full batch mode)." - << "\n\t-D [string] If specified, dump the k-NN index to files starting " - "with this prefix (one file per process). A line starts from the " - "corresponding source ID followed by the list of neighbor IDs." - << "\n\t-M If specified, dump the k-NN index with distances." << "\n" - << "\n\t-R If specified, do not store the dataset with the index." << "\n" - << "\n\t-v If specified, turn on the verbose mode." - << "\n\t-h Show this menu." << std::endl; -} - -void show_options(const option_t &opt, ygm::comm &comm) { - comm.cout0() << "\nOptions:" << "\nDatastore path\t" << opt.datastore_path - << "\nDistance metric name\t" << opt.distance_name - << "\nPoint file format\t" << opt.point_file_format << "\nk\t" - << opt.index_k << "\nr\t" << opt.r << "\ndelta\t" << opt.delta - << "\nExchange reverse neighbors\t" - << opt.exchange_reverse_neighbors << "\nBatch size\t" - << opt.batch_size << "\nDNND init index path\t" - << opt.dnnd_init_index_path << "\nDHNSW init index path\t" - << opt.dhnsw_init_index_path << "\nSettled init index\t" - << opt.settled_init_index << "\nDatastore transfer path\t" - << opt.datastore_transfer_path - << "\nk-NN index dump file prefix\t" << opt.index_dump_prefix - << "\nDump index with distance\t" << opt.dump_index_with_distance - << "\nDon't store dataset\t" << opt.donot_store_dataset - << "\nVerbose\t" << opt.verbose << std::endl; -} \ No newline at end of file diff --git a/examples/dnnd_pm_optimize_example.cpp b/examples/dnnd_pm_optimize_example.cpp deleted file mode 100644 index 5ad08c7..0000000 --- a/examples/dnnd_pm_optimize_example.cpp +++ /dev/null @@ -1,205 +0,0 @@ -// Copyright 2020-2022 Lawrence Livermore National Security, LLC and other -// saltatlas Project Developers. See the top-level COPYRIGHT file for details. -// -// SPDX-License-Identifier: MIT - -#include -#include -#include -#include - -#include -#include - -#include "dnnd_example_common.hpp" - -struct option_t { - std::string original_datastore_path; - std::string datastore_path; - std::string datastore_transfer_path; - bool make_index_undirected{true}; - double pruning_degree_multiplier{0.0}; // no pruning by default - bool remove_long_paths{false}; - std::size_t batch_size{1ULL << 28}; - std::string index_dump_prefix; - bool dump_index_with_distance{false}; - bool verbose{true}; -}; - -bool parse_options(int, char **, option_t &, bool &); -template -void usage(std::string_view, cout_type &); -void show_options(const option_t &, ygm::comm &); - -int main(int argc, char **argv) { - ygm::comm comm(&argc, &argv); - show_config(comm); - - option_t opt; - bool help{true}; - - if (!parse_options(argc, argv, opt, help)) { - comm.cerr0() << "Invalid option" << std::endl; - usage(argv[0], comm.cerr0()); - return 0; - } - if (help) { - usage(argv[0], comm.cout0()); - return 0; - } - - show_options(opt, comm); - - if (!opt.original_datastore_path.empty()) { - if (dnnd_pm_type::copy(opt.original_datastore_path, opt.datastore_path)) { - comm.cout0() << "\nTransferred index from " << opt.original_datastore_path - << " to " << opt.datastore_path << std::endl; - } else { - comm.cerr0() << "Failed to transfer index." << std::endl; - MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); - } - } - - { - dnnd_pm_type dnnd(dnnd_pm_type::open, opt.datastore_path, comm, - opt.verbose); - comm.cout0() << "\n<>" << std::endl; - ygm::timer optimization_timer; - dnnd.optimize_index(opt.make_index_undirected, - opt.pruning_degree_multiplier, opt.remove_long_paths); - comm.cout0() << "\nIndex optimization took (s)\t" - << optimization_timer.elapsed() << std::endl; - } - comm.cout0() << "\nThe index is ready for query." << std::endl; - - if (!opt.datastore_transfer_path.empty()) { - comm.cout0() << "\nTransferring index data store " << opt.datastore_path - << " to " << opt.datastore_transfer_path << std::endl; - if (!dnnd_pm_type::copy(opt.datastore_path, opt.datastore_transfer_path)) { - comm.cerr0() << "\nFailed to transfer index." << std::endl; - MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); - } - } - - if (!opt.index_dump_prefix.empty()) { - comm.cout0() << "\nDumping index to " << opt.index_dump_prefix << std::endl; - // Reopen dnnd in read-only mode - dnnd_pm_type dnnd(dnnd_pm_type::open_read_only, opt.datastore_path, comm, - opt.verbose); - if (!dnnd.dump_index(opt.index_dump_prefix, opt.dump_index_with_distance)) { - comm.cerr0() << "\nFailed to dump index." << std::endl; - MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); - } - comm.cf_barrier(); - comm.cout0() << "Finished dumping." << std::endl; - } - - return 0; -} - -bool parse_options(int argc, char **argv, option_t &opt, bool &help) { - opt.original_datastore_path.clear(); - opt.datastore_path.clear(); - opt.datastore_transfer_path.clear(); - opt.index_dump_prefix.clear(); - help = false; - - int n; - while ((n = ::getopt(argc, argv, "i:z:x:um:lb:D:Mvh")) != -1) { - switch (n) { - case 'i': - opt.original_datastore_path = optarg; - break; - - case 'z': - opt.datastore_path = optarg; - break; - - case 'x': - opt.datastore_transfer_path = optarg; - break; - - case 'u': - opt.make_index_undirected = true; - break; - - case 'm': - opt.pruning_degree_multiplier = std::stod(optarg); - break; - - case 'l': - opt.remove_long_paths = true; - break; - - case 'b': - opt.batch_size = std::stoull(optarg); - break; - - case 'D': - opt.index_dump_prefix = optarg; - break; - - case 'M': - opt.dump_index_with_distance = true; - break; - - case 'v': - opt.verbose = true; - break; - - case 'h': - help = true; - return true; - - default: - std::cerr << "Invalid option" << std::endl; - std::abort(); - } - } - - if (opt.datastore_path.empty()) { - return false; - } - - return true; -} - -template -void usage(std::string_view exe_name, cout_type &cout) { - cout << "Usage: mpirun -n [#of processes] " << exe_name - << " [options (see below)]" << std::endl; - - cout << "Options:" - << "\n\t-z [string, required] Path to an index to modify." - << "\n\t-u If specified, make the index undirected." - << "\n\t-m [double] Pruning degree multiplier (m) in PyNNDescent." - << "\n\t\tCut every points' neighbors more than 'k' x 'm'" - "No pruning if <= 0." - << "\n\t-l If specified, remove long paths as proposed by PyNNDescent." - << "\n\t-i [string] If specified, transfer an already constructed index " - "from this path to path 'z' at the beginning." - << "\n\t-x [string] If specified, transfer the index to this path at " - "the end." - << "\n\t-b [long int] Batch size (0 is the full batch mode)." - << "\n\t-D [string] If specified, dump the k-NN index to files starting " - "with this prefix (one file per process). A line starts from the " - "corresponding source ID followed by the list of neighbor IDs." - << "\n\t-M If specified, dump the k-NN index with distances." - << "\n" - << "\n\t-v If specified, turn on the verbose mode." - << "\n\t-h Show this menu." << std::endl; -} - -void show_options(const option_t &opt, ygm::comm &comm) { - comm.cout0() << "\nOptions:" - << "\nOriginal datastore path\t" << opt.original_datastore_path - << "\nDatastore path\t" << opt.datastore_path - << "\nMake index undirected\t" << opt.make_index_undirected - << "\nPruning degree multiplier\t" - << opt.pruning_degree_multiplier << "\nRemove long paths\t" - << opt.remove_long_paths << "\nBatch size\t" << opt.batch_size - << "\nDatastore transfer path\t" << opt.datastore_transfer_path - << "\nk-NN index dump file prefix\t" << opt.index_dump_prefix - << "\nDump index with distance\t" << opt.dump_index_with_distance - << "\nVerbose\t" << opt.verbose << std::endl; -} \ No newline at end of file diff --git a/examples/dnnd_pm_query_example.cpp b/examples/dnnd_pm_query_example.cpp deleted file mode 100644 index f54e0e6..0000000 --- a/examples/dnnd_pm_query_example.cpp +++ /dev/null @@ -1,190 +0,0 @@ -// Copyright 2020-2022 Lawrence Livermore National Security, LLC and other -// saltatlas Project Developers. See the top-level COPYRIGHT file for details. -// -// SPDX-License-Identifier: MIT - -#include -#include -#include -#include - -#include "dnnd_example_common.hpp" - -struct option_t { - std::string datastore_path; - std::string original_datastore_path; - int query_k{4}; - double epsilon{0.1}; - double mu{0.0}; - std::size_t batch_size{0}; - std::string query_file_path; - std::string ground_truth_file_path; - std::string query_result_file_path; - bool verbose{true}; -}; - -bool parse_options(int, char **, option_t &, bool &); -template -void usage(std::string_view, cout_type &); -void show_options(const option_t &, ygm::comm &); - -int main(int argc, char **argv) { - ygm::comm comm(&argc, &argv); - show_config(comm); - - bool help{false}; - option_t opt; - if (!parse_options(argc, argv, opt, help)) { - comm.cerr0() << "Invalid option" << std::endl; - usage(argv[0], comm.cerr0()); - return 0; - } - if (help) { - usage(argv[0], comm.cout0()); - return 0; - } - show_options(opt, comm); - - if (!opt.original_datastore_path.empty()) { - if (dnnd_pm_type::copy(opt.original_datastore_path, opt.datastore_path)) { - comm.cout0() << "\nTransferred index." << std::endl; - } else { - comm.cerr0() << "Failed to transfer index." << std::endl; - MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); - } - } - - { - comm.cout0() << "\n<>" << std::endl; - dnnd_pm_type dnnd(dnnd_pm_type::open_read_only, opt.datastore_path, comm, - opt.verbose); - - std::vector queries; - saltatlas::read_query(opt.query_file_path, queries, comm); - - comm.cout0() << "Executing queries" << std::endl; - ygm::timer step_timer; - const auto query_results = dnnd.query_batch( - queries, opt.query_k, opt.epsilon, opt.mu, opt.batch_size); - comm.cf_barrier(); - comm.cout0() << "\nProcessing queries took (s)\t" << step_timer.elapsed() - << std::endl; - - if (!opt.ground_truth_file_path.empty()) { - show_query_recall_score(query_results, opt.ground_truth_file_path, comm); - show_query_recall_score_with_only_distance( - query_results, opt.ground_truth_file_path, comm); - show_query_recall_score_with_distance_ties( - query_results, opt.ground_truth_file_path, comm); - } - - if (!opt.query_result_file_path.empty()) { - saltatlas::utility::gather_and_dump_neighbors( - query_results, opt.query_result_file_path, comm); - } - } -END_BLOCK: - comm.cf_barrier(); - - return 0; -} - -inline bool parse_options(int argc, char **argv, option_t &opt, bool &help) { - opt.datastore_path.clear(); - opt.original_datastore_path.clear(); - opt.query_file_path.clear(); - opt.ground_truth_file_path.clear(); - opt.query_result_file_path.clear(); - - int n; - while ((n = ::getopt(argc, argv, "b:q:n:g:o:z:x:e:m:vh")) != -1) { - switch (n) { - case 'b': - opt.batch_size = std::stoul(optarg); - break; - - case 'q': - opt.query_file_path = optarg; - break; - - case 'n': - opt.query_k = std::stoi(optarg); - break; - - case 'g': - opt.ground_truth_file_path = optarg; - break; - - case 'z': - opt.datastore_path = optarg; - break; - - case 'x': - opt.original_datastore_path = optarg; - break; - - case 'o': - opt.query_result_file_path = optarg; - break; - - case 'v': - opt.verbose = true; - break; - - case 'e': - opt.epsilon = std::stold(optarg); - break; - - case 'm': - opt.mu = std::stold(optarg); - break; - - case 'h': - help = true; - return true; - - default: - std::cerr << "Invalid option" << std::endl; - std::abort(); - } - } - - if (opt.datastore_path.empty() || opt.query_file_path.empty()) { - return false; - } - - return true; -} - -template -void usage(std::string_view exe_name, cout_type &cout) { - cout << "Usage: mpirun -n [#of processes] " << exe_name - << " [options (see below)]" << std::endl; - - cout << "Options:" - << "\n\t-z [string, required] Path to an index." - << "\n\t-q [string, required] Path to a query file." - << "\n\t-n [int, required] Number of nearest neighbors to find for each " - "query point." - << "\n\t-e [double] Epsilon parameter in PyNNDescent." - << "\n\t-o [string] Path to store query results." - << "\n\t-g [string] Path to a query ground truth file." - << "\n\t-x [string] If specified, transfer an already constructed index " - "from this path to path 'z' at the beginning." - << "\n\t-b [long int] Batch size for query (0 is the full batch mode)." - << "\n\t-v If specified, turn on the verbose mode." - << "\n\t-h Show this menu." << std::endl; -} - -void show_options(const option_t &opt, ygm::comm &comm) { - comm.cout0() << "Options:" - << "\nOriginal datastore path\t" << opt.original_datastore_path - << "\nDatastore path\t" << opt.datastore_path - << "\nQuery file path\t" << opt.query_file_path - << "\nQuery n (#of neighbors to search)\t" << opt.query_k - << "\nEpsilon\t" << opt.epsilon << "\nMu\t" << opt.mu - << "\nBatch size\t" << opt.batch_size - << "\nGround truth file path\t" << opt.ground_truth_file_path - << "\nQuery result file path\t" << opt.query_result_file_path - << "\nVerbose\t" << opt.verbose << std::endl; -} \ No newline at end of file diff --git a/examples/dnnd_simple_example.cpp b/examples/dnnd_simple.cpp similarity index 100% rename from examples/dnnd_simple_example.cpp rename to examples/dnnd_simple.cpp diff --git a/examples/dnnd_simple_custom_distance_example.cpp b/examples/dnnd_simple_custom_distance.cpp similarity index 100% rename from examples/dnnd_simple_custom_distance_example.cpp rename to examples/dnnd_simple_custom_distance.cpp diff --git a/examples/dnnd_simple_custom_point_example.cpp b/examples/dnnd_simple_custom_point.cpp similarity index 100% rename from examples/dnnd_simple_custom_point_example.cpp rename to examples/dnnd_simple_custom_point.cpp diff --git a/examples/dnnd_string_example.cpp b/examples/dnnd_string_example.cpp deleted file mode 100644 index 475b5ba..0000000 --- a/examples/dnnd_string_example.cpp +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright 2020-2024 Lawrence Livermore National Security, LLC and other -// saltatlas Project Developers. See the top-level COPYRIGHT file for details. -// -// SPDX-License-Identifier: MIT - -// Usage: -// cd saltatlas/build -// mpirun -n 2 ./examples/dnnd_example - -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include - -// #include "dnnd_example_common.hpp" - -using id_type = uint32_t; -using feature_element_type = char; -using distance_type = float; -using feature_vector_type = saltatlas::feature_vector; - -using dnnd_type = saltatlas::dnnd; -using neighbor_type = typename dnnd_type::neighbor_type; - -using dnnd_pm_type = - saltatlas::dnnd_pm; -using pm_neighbor_type = typename dnnd_pm_type::neighbor_type; - -int main(int argc, char **argv) { - ygm::comm comm(&argc, &argv); - - int index_k{2}; - int query_k{3}; - double r{0.8}; - double delta{0.001}; - bool exchange_reverse_neighbors{true}; - bool make_index_undirected{true}; - double pruning_degree_multiplier{1.5}; - bool remove_long_paths{false}; - double epsilon{0.1}; - double mu{0.2}; - std::size_t batch_size{0}; - std::string distance_name{"levenshtein"}; - std::vector point_file_paths{ - "./examples/datasets/point_string.txt"}; - std::string query_file_path{"./examples/datasets/query_string.txt"}; - std::string ground_truth_file_path{ - "./examples/datasets/ground-truth_string.txt"}; - std::string point_file_format{"str"}; - std::string query_result_file_path{"query-results"}; - bool verbose{true}; - - dnnd_type dnnd(distance_name, comm, std::random_device{}(), verbose); - comm.cf_barrier(); - - comm.cout0() << "<>" << std::endl; - saltatlas::read_points(point_file_paths, point_file_format, verbose, - dnnd.get_point_partitioner(), dnnd.get_point_store(), - comm); - - comm.cout0() << "<>" << std::endl; - dnnd.construct_index(index_k, r, delta, exchange_reverse_neighbors, - batch_size); - - comm.cout0() << "\n<>" << std::endl; - dnnd.optimize_index(make_index_undirected, pruning_degree_multiplier, - remove_long_paths); - - comm.cout0() << "\n<>" << std::endl; - dnnd_pm_type::query_store_type queries; - saltatlas::read_query(query_file_path, queries, comm); - - comm.cout0() << "Executing queries" << std::endl; - const auto query_results = - dnnd.query_batch(queries, query_k, epsilon, mu, batch_size); - - comm.cout0() << "Dump query results" << std::endl; - saltatlas::utility::gather_and_dump_neighbors(query_results, - query_result_file_path, comm); - - return 0; -}