From 26ae203c5100c7cbcc3ffc99904e130b6c0b0191 Mon Sep 17 00:00:00 2001 From: Keita Iwabuchi Date: Sat, 10 Sep 2022 00:26:42 -0700 Subject: [PATCH 01/10] (wip) [skip ci] Adding external/app doc --- docs/readthedocs/external/list.md | 33 ++++++++++++++++ docs/readthedocs/external/ripples.md | 56 ++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 docs/readthedocs/external/list.md create mode 100644 docs/readthedocs/external/ripples.md diff --git a/docs/readthedocs/external/list.md b/docs/readthedocs/external/list.md new file mode 100644 index 00000000..8b06d8c8 --- /dev/null +++ b/docs/readthedocs/external/list.md @@ -0,0 +1,33 @@ +# Open Source Projects Using Metall + +## miniVite + +MiniVite is benchmark in the ECP ExaGraph suite that implements a single phase of the Louvain method for community detection. + +miniVite has a mode that uses Metall to store a graph into persistent memory. + +The miniVite version that works with Metall comes with a CMake file. + +For building and running miniVite with Metall see details [here](https://github.com/ECP-ExaGraph/miniVite/tree/metallds2#minivite--metall-and-umap). + +## Ripples + +Ripples is a software framework to study the Influence Maximization problem. + +See [detail](./ripples.md). + + +## HavoqGT + +HavoqGT (Highly Asynchronous Visitor Queue Graph Toolkit) is a framework for expressing asynchronous vertex-centric graph algorithms. + +All graph data is allocated by Metall. + +https://github.com/LLNL/havoqgt + +## saltatlas + +saltatlas DNND is a distributed NNDescent application. +saltatlas DNND leverages Metall to store k-NN index, which requires a heavy construction time. + +https://github.com/LLNL/saltatlas \ No newline at end of file diff --git a/docs/readthedocs/external/ripples.md b/docs/readthedocs/external/ripples.md new file mode 100644 index 00000000..c6c81d86 --- /dev/null +++ b/docs/readthedocs/external/ripples.md @@ -0,0 +1,56 @@ +# Ripples & Metall + +## Introduction + +[Ripples](https://github.com/pnnl/ripples) is a software framework to study the Influence Maximization problem. + +Here, we describe how to build and run Ripples with Metall. +The following instructions are tested with [Ripples v2.1](https://github.com/pnnl/ripples/releases/tag/v2.1). + +## Build Example + +Tested Environment + +- Linux +- Python 3.7 +- GCC 10 +- CMake 3.23 + + +```shell +git clone git@github.com:pnnl/ripples.git +cd ripples +git checkout da08b3e759642a93556f081169c61607354ecd3e + +# Set up Python environment, if not available +# For example: +pip install --user pipenv +pip install --user conan +# If needed: +# export PATH="$HOME/.local/bin:$PATH" + +pipenv --three +pipenv install +pipenv shell + +# Install dependencies +conan create conan/waf-generator user/stable +conan create conan/trng user/stable +# if the line above does not work, +# conan create conan/trng libtrng/4.22@user/stable +conan create conan/metall user/stable + +# Enable the Metall mode +conan install --install-folder build . -o metal=True + +# Enable the Metall configure and build Ripples +./waf configure --enable-metall build_release +``` + +## Run + +```shell +./build/release/tools/imm --input-graph test-data/karate.tsv --seed-set-size 8 --diffusion-model LT --epsilon 0.8 +``` + +See details [Ripples README](https://github.com/pnnl/ripples). \ No newline at end of file From 479a4b41ef1d97eb0d3322770408c22b7eedd7a3 Mon Sep 17 00:00:00 2001 From: Keita Iwabuchi Date: Sat, 10 Sep 2022 00:31:52 -0700 Subject: [PATCH 02/10] (wip) [skip ci] Adding external/app doc --- mkdocs.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mkdocs.yml b/mkdocs.yml index 7af06802..b89319df 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -29,5 +29,8 @@ nav: - 'Snapshot': 'detail/snapshot.md' - 'Example': 'detail/example.md' - 'Internal Architecture': 'detail/internal_architecture.md' +- 'External': + - 'Open Source Projects Using Metall': 'external/index.md' + - 'Use Ripples with Metall': 'external/ripples.md' - 'ABOUT': - 'License and Notice': 'about/license_notice.md' From 19bd5897be0b24325981188977d9ce4b8b05b92d Mon Sep 17 00:00:00 2001 From: Keita Iwabuchi Date: Sat, 10 Sep 2022 00:33:26 -0700 Subject: [PATCH 03/10] (wip) [skip ci] Adding external/app doc --- mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index b89319df..7d856e3a 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -30,7 +30,7 @@ nav: - 'Example': 'detail/example.md' - 'Internal Architecture': 'detail/internal_architecture.md' - 'External': - - 'Open Source Projects Using Metall': 'external/index.md' + - 'Open Source Projects Using Metall': 'external/list.md' - 'Use Ripples with Metall': 'external/ripples.md' - 'ABOUT': - 'License and Notice': 'about/license_notice.md' From 62b1b9b72a3167a5212f4b33baf6e2f5ba598df4 Mon Sep 17 00:00:00 2001 From: Keita Iwabuchi Date: Tue, 13 Sep 2022 08:24:48 -0700 Subject: [PATCH 04/10] (wip) [skip ci] Adding external/app doc --- docs/readthedocs/external/list.md | 21 ++++++++++----------- docs/readthedocs/external/ripples.md | 13 ++++++++----- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/docs/readthedocs/external/list.md b/docs/readthedocs/external/list.md index 8b06d8c8..d13285cd 100644 --- a/docs/readthedocs/external/list.md +++ b/docs/readthedocs/external/list.md @@ -1,27 +1,26 @@ # Open Source Projects Using Metall -## miniVite +## Collaboration Work with the ECP ExaGraph Project +### miniVite -MiniVite is benchmark in the ECP ExaGraph suite that implements a single phase of the Louvain method for community detection. +miniVite is a proxy app that implements a single phase of Louvain method in distributed memory for graph community detection. -miniVite has a mode that uses Metall to store a graph into persistent memory. +miniVite has a mode that uses Metall to store a graph in persistent memory to reuse the data and reduce the overall analytics workload. -The miniVite version that works with Metall comes with a CMake file. +For building and running miniVite with Metall, +see the details located [here](https://github.com/ECP-ExaGraph/miniVite/tree/metallds2#minivite--metall-and-umap). -For building and running miniVite with Metall see details [here](https://github.com/ECP-ExaGraph/miniVite/tree/metallds2#minivite--metall-and-umap). +### Ripples -## Ripples - -Ripples is a software framework to study the Influence Maximization problem. - -See [detail](./ripples.md). +Ripples is a software framework to study the Influence Maximization problem developed at Pacific Northwest National Laboratory. +To build Riplles with Metall, see the details located [here](./ripples.md). ## HavoqGT HavoqGT (Highly Asynchronous Visitor Queue Graph Toolkit) is a framework for expressing asynchronous vertex-centric graph algorithms. -All graph data is allocated by Metall. +Same as MiniVite, HavoqGT uses Metall to store a graph in persistent memory to reuse the data and reduce the overall analytics workload. https://github.com/LLNL/havoqgt diff --git a/docs/readthedocs/external/ripples.md b/docs/readthedocs/external/ripples.md index c6c81d86..dcb9f2cb 100644 --- a/docs/readthedocs/external/ripples.md +++ b/docs/readthedocs/external/ripples.md @@ -4,18 +4,21 @@ [Ripples](https://github.com/pnnl/ripples) is a software framework to study the Influence Maximization problem. +Ripples has a mode that uses Metall to allocate its intermediate data, which requires a large amount of memory, in storage (file system) so that +it can handle large-scale problems, exceeding DRAM capacity. + Here, we describe how to build and run Ripples with Metall. -The following instructions are tested with [Ripples v2.1](https://github.com/pnnl/ripples/releases/tag/v2.1). ## Build Example -Tested Environment +Tested Environment: - Linux - Python 3.7 -- GCC 10 +- GCC 10 (GCC >= 8.1 is required) - CMake 3.23 +The following instructions are tested with the latest version of Ripples at the time of writing (commit ID: da08b3e759642a93556f081169c61607354ecd3e). ```shell git clone git@github.com:pnnl/ripples.git @@ -43,11 +46,11 @@ conan create conan/metall user/stable # Enable the Metall mode conan install --install-folder build . -o metal=True -# Enable the Metall configure and build Ripples +# Build ./waf configure --enable-metall build_release ``` -## Run +## Run Example ```shell ./build/release/tools/imm --input-graph test-data/karate.tsv --seed-set-size 8 --diffusion-model LT --epsilon 0.8 From f040d0b8b1a08bec618698894d933ae0cbca7b3f Mon Sep 17 00:00:00 2001 From: Keita Iwabuchi Date: Tue, 13 Sep 2022 08:35:24 -0700 Subject: [PATCH 05/10] (wip) [skip ci] Adding external/app doc --- docs/readthedocs/about/publication.md | 31 +- .../basics/compile_time_options.md | 6 +- docs/readthedocs/css/custom.css | 2 +- docs/readthedocs/detail/api.md | 8 +- docs/readthedocs/detail/example.md | 2 +- .../detail/internal_architecture.md | 2 +- docs/readthedocs/external/list.md | 15 +- .../readthedocs/external/ripples-metall.patch | 741 ++++++++++++++++++ docs/readthedocs/external/ripples.md | 66 +- docs/readthedocs/index.md | 22 +- example/README.md | 2 +- mkdocs.yml | 1 + 12 files changed, 836 insertions(+), 62 deletions(-) create mode 100644 docs/readthedocs/external/ripples-metall.patch diff --git a/docs/readthedocs/about/publication.md b/docs/readthedocs/about/publication.md index b8412f3b..e1b8e0e8 100644 --- a/docs/readthedocs/about/publication.md +++ b/docs/readthedocs/about/publication.md @@ -1,17 +1,26 @@ # Publication -## Metall: A Persistent Memory Allocator Enabling Graph Processing +## Metall: A persistent memory allocator for data-centric analytics + +``` +Keita Iwabuchi, Karim Youssef, Kaushik Velusamy, Maya Gokhale, Roger Pearce, +Metall: A persistent memory allocator for data-centric analytics, +Parallel Computing, 2022, 102905, ISSN 0167-8191, https://doi.org/10.1016/j.parco.2022.102905. +``` + +- [Parallel Computing](https://www.sciencedirect.com/science/article/abs/pii/S0167819122000114) (journal) -[Available PDF](https://www.osti.gov/servlets/purl/1576900) +- [arXiv](https://arxiv.org/abs/2108.07223) (preprint) + +## Metall: A Persistent Memory Allocator Enabling Graph Processing -A page in IEEE Xplore is [here](https://ieeexplore.ieee.org/document/8945094) ```text -@INPROCEEDINGS{8945094, -author={K. {Iwabuchi} and L. {Lebanoff} and M. {Gokhale} and R. {Pearce}}, -booktitle={2019 IEEE/ACM 9th Workshop on Irregular Applications: Architectures and Algorithms (IA3)}, -title={Metall: A Persistent Memory Allocator Enabling Graph Processing}, -year={2019}, -pages={39-44}, -doi={10.1109/IA349570.2019.00012}, -month={Nov},} +K. Iwabuchi, L. Lebanoff, M. Gokhale and R. Pearce, +"Metall: A Persistent Memory Allocator Enabling Graph Processing," +2019 IEEE/ACM 9th Workshop on Irregular Applications: Architectures and Algorithms (IA3), 2019, +pp. 39-44, doi: 10.1109/IA349570.2019.00012. ``` + +- [Available PDF](https://www.osti.gov/servlets/purl/1576900) + +- A page in IEEE Xplore is [here](https://ieeexplore.ieee.org/document/8945094) diff --git a/docs/readthedocs/basics/compile_time_options.md b/docs/readthedocs/basics/compile_time_options.md index 609337cc..f30fdc98 100644 --- a/docs/readthedocs/basics/compile_time_options.md +++ b/docs/readthedocs/basics/compile_time_options.md @@ -1,6 +1,6 @@ # Compile-time Options -There are some compile-time options (macro) as follows to configure the behavior of Metall: +There are some compile-time options (C/C++ macro) as follows to configure the behavior of Metall: - METALL_DISABLE_FREE_FILE_SPACE @@ -10,13 +10,13 @@ There are some compile-time options (macro) as follows to configure the behavior - METALL_DEFAULT_VM_RESERVE_SIZE=*bytes* - The default virtual memory reserve size - An internally defined value is used if 0 is specified - - Wll be rounded up to a multiple of the page size internally + - Wll be rounded up to a multiple of the system page size (e.g., 4 KB) internally - METALL_INITIAL_SEGMENT_SIZE=*bytes* - The initial segment size - Use the internally defined value if 0 is specified - - Wll be rounded up to a multiple of the page size internally + - Wll be rounded up to a multiple of the system page size internally - METALL_FREE_SMALL_OBJECT_SIZE_HINT=*bytes* diff --git a/docs/readthedocs/css/custom.css b/docs/readthedocs/css/custom.css index e45fcc89..f938ea8c 100644 --- a/docs/readthedocs/css/custom.css +++ b/docs/readthedocs/css/custom.css @@ -8,4 +8,4 @@ img[src$="metall_overview.png"] { margin-right: auto; max-width: 70%; height: auto; -} \ No newline at end of file +} diff --git a/docs/readthedocs/detail/api.md b/docs/readthedocs/detail/api.md index c7e8f071..4caddd88 100644 --- a/docs/readthedocs/detail/api.md +++ b/docs/readthedocs/detail/api.md @@ -13,6 +13,8 @@ In multi-process environment, Metall assumes each process allocates its own Meta ## Main APIs in Metall +Here, we list Metall's main APIs. + ```C++ // The main header file #include @@ -118,11 +120,11 @@ static bool metall::get_description(const char *dir_path, std::string *descripti Example programs are located in [example](https://github.com/LLNL/metall/tree/master/example). -## Generate API document using Doxygen +## FUll API document -A Doxygen configuration file is [here](https://github.com/LLNL/metall/tree/master/docs/Doxyfile.in). +The full API document is available [here](https://software.llnl.gov/metall/api/). -To generate API document: +To generate the full API document locally using Doxygen: ```bash cd metall diff --git a/docs/readthedocs/detail/example.md b/docs/readthedocs/detail/example.md index 93e66d35..d5d82914 100644 --- a/docs/readthedocs/detail/example.md +++ b/docs/readthedocs/detail/example.md @@ -2,4 +2,4 @@ Example programs are located in [example](https://github.com/LLNL/metall/tree/master/example) -To build the examples see [build source files in Metall](../advanced_build/example_test_bench.md) \ No newline at end of file +To build the examples see [build source files in Metall](../advanced_build/cmake.md) \ No newline at end of file diff --git a/docs/readthedocs/detail/internal_architecture.md b/docs/readthedocs/detail/internal_architecture.md index b12143a6..da73bc48 100644 --- a/docs/readthedocs/detail/internal_architecture.md +++ b/docs/readthedocs/detail/internal_architecture.md @@ -26,7 +26,7 @@ Objects larger than the half chunk size (*large objects*) use a single or multip By default, Metall frees DRAM and file space by chunk, that is, small object deallocations do not free physical memory immediately, whereas large object deallocations do. Metall also has a mode that tries to free the corresponding space when an object equal or larger than *N* bytes is deallocated, -where N is set by the compile time option *METALL_FREE_SMALL_OBJECT_SIZE_HINT=N* (see [Build and Install](../getting_started/build_and_install.md)). +where N is set by the compile time option (macro) *METALL_FREE_SMALL_OBJECT_SIZE_HINT=N* (see [Build and Install](../basics/compile_time_options.md)). ### Internal Allocation Size Same as other major heap memory allocators, Metall rounds up a small object to the nearest internal allocation size. diff --git a/docs/readthedocs/external/list.md b/docs/readthedocs/external/list.md index d13285cd..ec91cf13 100644 --- a/docs/readthedocs/external/list.md +++ b/docs/readthedocs/external/list.md @@ -14,19 +14,18 @@ see the details located [here](https://github.com/ECP-ExaGraph/miniVite/tree/met Ripples is a software framework to study the Influence Maximization problem developed at Pacific Northwest National Laboratory. -To build Riplles with Metall, see the details located [here](./ripples.md). +To build Ripples with Metall, see the details located [here](./ripples.md). ## HavoqGT -HavoqGT (Highly Asynchronous Visitor Queue Graph Toolkit) is a framework for expressing asynchronous vertex-centric graph algorithms. +[HavoqGT](https://github.com/LLNL/havoqgt) (Highly Asynchronous Visitor Queue Graph Toolkit) is a framework for expressing asynchronous vertex-centric graph algorithms. Same as MiniVite, HavoqGT uses Metall to store a graph in persistent memory to reuse the data and reduce the overall analytics workload. -https://github.com/LLNL/havoqgt +## saltatlas (DNND) -## saltatlas +[saltatlas](https://github.com/LLNL/saltatlas) is a distributed approximate k-nearest neighbors framework. +saltatlas contains a distributed NNDescent algorithm implementation (DNND). +DNND is designed to work with Metall to store its main data structure, which requires a heavy construction time, in persistent memory to avoid redundant data ingestion tasks. -saltatlas DNND is a distributed NNDescent application. -saltatlas DNND leverages Metall to store k-NN index, which requires a heavy construction time. - -https://github.com/LLNL/saltatlas \ No newline at end of file +To use saltatlas DNND with Metall, see its [README](https://github.com/LLNL/saltatlas). diff --git a/docs/readthedocs/external/ripples-metall.patch b/docs/readthedocs/external/ripples-metall.patch new file mode 100644 index 00000000..a11cd016 --- /dev/null +++ b/docs/readthedocs/external/ripples-metall.patch @@ -0,0 +1,741 @@ +diff --git a/conan/trng/conanfile.py b/conan/trng/conanfile.py +index c5feaeb..adbdc81 100644 +--- a/conan/trng/conanfile.py ++++ b/conan/trng/conanfile.py +@@ -5,6 +5,7 @@ from conans import ConanFile, CMake, tools + class LibtrngConan(ConanFile): + name = "libtrng" + license = "BSD" ++ version = "4.22" + author = "Heiko Bauke" + url = "https://www.numbercrunch.de/trng/" + description = "Tina's Random Number Generator Library" +@@ -13,15 +14,18 @@ class LibtrngConan(ConanFile): + options = {"shared": [True, False]} + default_options = "shared=True" + generators = "cmake" +- +- def source(self): +- tools.download('https://github.com/rabauke/trng4/archive/refs/tags/v' + self.version + '.tar.gz', 'trng-' + self.version + '.tar.gz') +- tools.unzip('trng-' + self.version + '.tar.gz') +- return 'trng4-' + self.version ++ scm = { ++ "type" : "git", ++ "url" : "https://github.com/mminutoli/trng4.git", ++ "subfolder" : "trng", ++ "revision" : "basic_hip_support" ++ } + + def build(self): + cmake = CMake(self) +- cmake.configure(source_folder='trng4-' + self.version) ++ cmake.definitions['TRNG_ENABLE_EXAMPLES'] = False ++ cmake.definitions['TRNG_ENABLE_TESTS'] = False ++ cmake.configure(source_folder='trng') + cmake.parallel = False + cmake.build() + cmake.install() +diff --git a/conanfile.py b/conanfile.py +index 6b7196a..ab00183 100644 +--- a/conanfile.py ++++ b/conanfile.py +@@ -15,7 +15,8 @@ class RipplesConan(ConanFile): + self.options['spdlog'].shared = True + + def requirements(self): +- self.requires('spdlog/1.9.2') ++ self.requires('fmt/9.1.0') ++ self.requires('spdlog/1.10.0') + self.requires('nlohmann_json/3.9.1') + self.requires('catch2/2.13.3') + self.requires('cli11/2.1.1') +diff --git a/include/ripples/configuration.h b/include/ripples/configuration.h +index a1c2b2f..3b260e6 100644 +--- a/include/ripples/configuration.h ++++ b/include/ripples/configuration.h +@@ -55,6 +55,7 @@ namespace ripples { + //! input graphs. + struct GraphInputConfiguration { + std::string IFileName{""}; //!< The input file name ++ std::string metall_dir{"/tmp/graph"}; //!< Where is the metall directory? + bool weighted{false}; //!< is Graph weighted? + bool undirected{false}; //!< is Graph undirected? + bool disable_renumbering{false}; //!< trust the input to be clean. +@@ -72,6 +73,9 @@ struct GraphInputConfiguration { + "The input file with the edge-list.") + ->group("Input Options") + ->required(); ++ app.add_flag("--metall-store-dir", metall_dir, ++ "Directory to store metall graph data.") ++ ->group("Input Options"); + app.add_flag("--reload-binary", reload, "Reload a graph from binary input") + ->group("Input Options"); + app.add_flag("-u,--undirected", undirected, "The input graph is undirected") +diff --git a/include/ripples/generate_rrr_sets.h b/include/ripples/generate_rrr_sets.h +index df44175..919b063 100644 +--- a/include/ripples/generate_rrr_sets.h ++++ b/include/ripples/generate_rrr_sets.h +@@ -61,9 +61,10 @@ + + #ifdef ENABLE_MEMKIND + #include "memkind_allocator.h" ++#include "pmem_allocator.h" + #endif + +-#ifdef ENABLE_METALL ++#ifdef ENABLE_METALL_RRRSETS + #include "metall/metall.hpp" + #include "metall/container/vector.hpp" + #endif +@@ -72,13 +73,13 @@ namespace ripples { + + #if defined ENABLE_MEMKIND + template +-using RRRsetAllocator = libmemkind::static_kind::allocator; +-#elif defined ENABLE_METALL ++using RRRsetAllocator = libmemkind::pmem::allocator; ++#elif defined ENABLE_METALL_RRRSETS + template + using RRRsetAllocator = metall::manager::allocator_type; + + metall::manager &metall_manager_instance() { +- static metall::manager manager(metall::create_only, "/tmp/ripples"); ++ static metall::manager manager(metall::create_only, "/dev/shm/metall_tmp"); + return manager; + } + +@@ -90,7 +91,7 @@ using RRRsetAllocator = std::allocator; + //! \brief The Random Reverse Reachability Sets type + template + using RRRset = +-#ifdef ENABLE_METALL ++#ifdef ENABLE_METALL_RRRSETS + metall::container::vector>; + #else +diff --git a/include/ripples/graph.h b/include/ripples/graph.h +index d34ecbe..2a027bb 100644 +--- a/include/ripples/graph.h ++++ b/include/ripples/graph.h +@@ -51,6 +51,12 @@ + + #include + ++#if defined ENABLE_METALL ++#include ++#include ++#include "metall/metall.hpp" ++#endif ++ + namespace ripples { + + //! \brief Forward Direction Graph loading policy. +@@ -195,7 +201,8 @@ struct WeightedDestination : public Destination { + //! of the original data. + template , +- typename DirectionPolicy = ForwardDirection> ++ typename DirectionPolicy = ForwardDirection, ++ typename allocator_t = std::allocator> + class Graph { + public: + //! The size type. +@@ -226,22 +233,26 @@ class Graph { + edge_type *end_; + }; + +- //! Empty Graph Constructor. +- Graph() ++ //! Allocator Graph Constructor. ++ Graph(allocator_t allocator = allocator_t()) + : numNodes(0), + numEdges(0), + index(nullptr), + edges(nullptr), +- idMap(), +- reverseMap() {} ++ graph_allocator(allocator), ++ idMap(allocator), ++ reverseMap(allocator) {} + + Graph(const Graph &O) + : numNodes(O.numNodes), + numEdges(O.numEdges), + idMap(O.idMap), +- reverseMap(O.reverseMap) { +- edges = new edge_type[numEdges]; +- index = new edge_type *[numNodes + 1]; ++ reverseMap(O.reverseMap), ++ graph_allocator(O.graph_allocator) { ++ auto edge_allocator = edge_allocator_t(graph_allocator); ++ edges = edge_allocator.allocate(numEdges); ++ auto index_allocator = index_allocator_t(graph_allocator); ++ index = index_allocator.allocate(numNodes + 1); + #pragma omp parallel for + for (size_t i = 0; i < numEdges; ++i) { + edges[i] = O.edges[i]; +@@ -249,8 +260,8 @@ class Graph { + + #pragma omp parallel for + for (size_t i = 0; i < numNodes + 1; ++i) { +- index[i] = edges + (reinterpret_cast(O.index[i]) - +- reinterpret_cast(O.index)); ++ index[i] = std::addressof(edges[0]) + (O.index[i] - ++ O.index[0]); + } + } + +@@ -260,8 +271,10 @@ class Graph { + idMap = O.idMap; + reverseMap = O.reverseMap; + +- edges = new edge_type[numEdges]; +- index = new edge_type *[numNodes + 1]; ++ auto edge_allocator = edge_allocator_t(graph_allocator); ++ edges = edge_allocator.allocate(numEdges); ++ auto index_allocator = index_allocator_t(graph_allocator); ++ index = index_allocator.allocate(numNodes + 1); + #pragma omp parallel for + for (size_t i = 0; i < numEdges; ++i) { + edges[i] = O.edges[i]; +@@ -269,8 +282,8 @@ class Graph { + + #pragma omp parallel for + for (size_t i = 0; i < numNodes + 1; ++i) { +- index[i] = edges + (reinterpret_cast(O.index[i]) - +- reinterpret_cast(O.index)); ++ index[i] = std::addressof(edges[0]) + (O.index[i] - ++ O.index[0]); + } + } + +@@ -281,6 +294,7 @@ class Graph { + numEdges(O.numEdges), + index(O.index), + edges(O.edges), ++ graph_allocator(O.graph_allocator), + idMap(std::move(O.idMap)), + reverseMap(std::move(O.reverseMap)) { + O.numNodes = 0; +@@ -295,8 +309,17 @@ class Graph { + Graph &operator=(Graph &&O) { + if (this == &O) return *this; + +- delete[] index; +- delete[] edges; ++ // delete[] index; ++ // delete[] edges; ++ ++ if(index){ ++ auto index_allocator = index_allocator_t(graph_allocator); ++ index_allocator.deallocate(index, numNodes + 1); ++ } ++ if(edges){ ++ auto edge_allocator = edge_allocator_t(graph_allocator); ++ edge_allocator.deallocate(edges, numEdges); ++ } + + numNodes = O.numNodes; + numEdges = O.numEdges; +@@ -313,16 +336,6 @@ class Graph { + return *this; + } + +- //! Reload from binary constructor. +- //! +- //! \tparam FStream The type of the input stream. +- //! +- //! \param FS The binary stream containing the graph dump. +- template +- Graph(FStream &FS) { +- load_binary(FS); +- } +- + //! \brief Constructor. + //! + //! Build a Graph from a sequence of edges. The vertex identifiers are +@@ -335,7 +348,10 @@ class Graph { + //! \param begin The start of the edge list. + //! \param end The end of the edge list. + template +- Graph(EdgeIterator begin, EdgeIterator end, bool renumbering) { ++ Graph(EdgeIterator begin, EdgeIterator end, bool renumbering, allocator_t allocator = allocator_t()) ++ : graph_allocator(allocator), ++ idMap(allocator), ++ reverseMap(allocator){ + for (auto itr = begin; itr != end; ++itr) { + idMap[itr->source]; + idMap[itr->destination]; +@@ -344,12 +360,14 @@ class Graph { + size_t num_nodes = renumbering ? idMap.size() : idMap.rbegin()->first + 1; + size_t num_edges = std::distance(begin, end); + +- index = new edge_type *[num_nodes + 1]; +- edges = new edge_type[num_edges]; ++ auto index_allocator = index_allocator_t(graph_allocator); ++ index = index_allocator.allocate(num_nodes + 1); ++ auto edge_allocator = edge_allocator_t(graph_allocator); ++ edges = edge_allocator.allocate(num_edges); + + #pragma omp parallel for + for (size_t i = 0; i < num_nodes + 1; ++i) { +- index[i] = edges; ++ index[i] = std::addressof(edges[0]); + } + + #pragma omp parallel for +@@ -379,10 +397,10 @@ class Graph { + } + + for (size_t i = 1; i <= num_nodes; ++i) { +- index[i] += index[i - 1] - edges; ++ index[i] += index[i - 1] - std::addressof(edges[0]); + } + +- std::vector ptrEdge(index, index + num_nodes); ++ std::vector ptrEdge(std::addressof(index[0]), std::addressof(index[0]) + num_nodes); + for (auto itr = begin; itr != end; ++itr) { + *ptrEdge[DirectionPolicy::Source(itr, idMap)] = + edge_type::template Create(itr, idMap); +@@ -392,8 +410,26 @@ class Graph { + + //! \brief Destuctor. + ~Graph() { +- if (index) delete[] index; +- if (edges) delete[] edges; ++ if(index){ ++ auto index_allocator = index_allocator_t(graph_allocator); ++ index_allocator.deallocate(index, numNodes + 1); ++ } ++ if(edges){ ++ auto edge_allocator = edge_allocator_t(graph_allocator); ++ edge_allocator.deallocate(edges, numEdges); ++ } ++ // if (index) delete[] index; ++ // if (edges) delete[] edges; ++ } ++ ++ void recalculate_addresses() const { ++ if(index){ ++ #pragma omp parallel for ++ for (size_t i = 0; i < numNodes + 1; ++i) { ++ index[i] = std::addressof(edges[0]) + (index[i] - ++ index[0]); ++ } ++ } + } + + //! Returns the out-degree of a vertex. +@@ -496,45 +532,49 @@ class Graph { + using transposed_direction = + typename std::conditional, + ForwardDirection>::type; +- using transposed_type = Graph; ++ using transposed_type = Graph; + + friend transposed_type; + + public: + //! Get the transposed graph. + //! \return the transposed graph. +- transposed_type get_transpose() const { ++ transposed_type get_transpose(allocator_t allocator = allocator_t()) const { + using out_dest_type = typename transposed_type::edge_type; +- transposed_type G; ++ transposed_type G(allocator); + G.numEdges = numEdges; + G.numNodes = numNodes; + G.reverseMap = reverseMap; + G.idMap = idMap; +- G.index = new out_dest_type *[numNodes + 1]; +- G.edges = new out_dest_type[numEdges]; ++ using index_transposed_allocator_t = typename std::allocator_traits::template rebind_alloc; ++ auto index_allocator = index_transposed_allocator_t(G.graph_allocator); ++ G.index = index_allocator.allocate(numNodes + 1); ++ using edge_transposed_allocator_t = typename std::allocator_traits::template rebind_alloc; ++ auto edge_allocator = edge_transposed_allocator_t(G.graph_allocator); ++ G.edges = edge_allocator.allocate(numEdges); + + #pragma omp parallel for +- for (auto itr = G.index; itr < G.index + numNodes + 1; ++itr) { ++ for (auto itr = std::addressof(G.index[0]); itr < std::addressof(G.index[0]) + numNodes + 1; ++itr) { + *itr = nullptr; + } + + #pragma omp parallel for +- for (auto itr = G.edges; itr < G.edges + numEdges; ++itr) { ++ for (auto itr = std::addressof(G.edges[0]); itr < std::addressof(G.edges[0]) + numEdges; ++itr) { + *itr = out_dest_type(); + } + +- std::for_each(edges, edges + numEdges, ++ std::for_each(std::addressof(edges[0]), std::addressof(edges[0]) + numEdges, + [&](const edge_type &d) { ++G.index[d.vertex + 1]; }); + +- G.index[0] = G.edges; +- std::partial_sum(G.index, G.index + numNodes + 1, G.index, ++ G.index[0] = std::addressof(G.edges[0]); ++ std::partial_sum(std::addressof(G.index[0]), std::addressof(G.index[0]) + numNodes + 1, std::addressof(G.index[0]), + [](out_dest_type *a, out_dest_type *b) -> out_dest_type * { + size_t sum = reinterpret_cast(a) + + reinterpret_cast(b); + return reinterpret_cast(sum); + }); + +- std::vector destPointers(G.index, G.index + numNodes); ++ std::vector destPointers(std::addressof(G.index[0]), std::addressof(G.index[0]) + numNodes); + + for (vertex_type v = 0; v < numNodes; ++v) { + for (auto u : neighbors(v)) { +@@ -546,11 +586,14 @@ class Graph { + return G; + } + +- edge_type **csr_index() const { return index; } ++ edge_type **csr_index() const { ++ return std::addressof(index[0]); ++ } + +- edge_type *csr_edges() const { return edges; } ++ edge_type *csr_edges() const { ++ return std::addressof(edges[0]); ++ } + +- private: + template + void load_binary(FStream &FS) { + if (!FS.is_open()) throw "Bad things happened!!!"; +@@ -570,8 +613,10 @@ class Graph { + + for (VertexTy i = 0; i < numNodes; ++i) idMap[reverseMap[i]] = i; + +- index = new edge_type *[numNodes + 1]; +- edges = new edge_type[numEdges]; ++ auto index_allocator = index_allocator_t(graph_allocator); ++ index = index_allocator.allocate(numNodes + 1); ++ auto edge_allocator = edge_allocator_t(graph_allocator); ++ edges = edge_allocator.allocate(numEdges); + + #pragma omp parallel for + for (size_t i = 0; i < numNodes + 1; ++i) { +@@ -583,25 +628,51 @@ class Graph { + edges[i] = edge_type(); + } + +- FS.read(reinterpret_cast(index), ++ FS.read(reinterpret_cast(std::addressof(index[0])), + (numNodes + 1) * sizeof(ptrdiff_t)); + + sequence_of::load(index, index + numNodes + 1, index); + + std::transform(index, index + numNodes + 1, index, + [=](edge_type *v) -> edge_type * { +- return reinterpret_cast(v) + edges; ++ return reinterpret_cast(v) + std::addressof(edges[0]); + }); + +- FS.read(reinterpret_cast(edges), numEdges * sizeof(edge_type)); ++ FS.read(reinterpret_cast(std::addressof(edges[0])), numEdges * sizeof(edge_type)); + sequence_of::load(edges, edges + numEdges, edges); + } + +- edge_type **index; +- edge_type *edges; +- +- std::map idMap; +- std::vector reverseMap; ++ private: ++ // Allocator and pointer types for the indices array ++ using index_allocator_t = typename std::allocator_traits::template rebind_alloc; ++ using index_pointer_t = typename std::allocator_traits::pointer; ++ index_pointer_t index; ++ ++ ++ // Allocator and pointer types for the edges array ++ using edge_allocator_t = typename std::allocator_traits::template rebind_alloc; ++ using edge_pointer_t = typename std::allocator_traits::pointer; ++ edge_pointer_t edges; ++ ++ allocator_t graph_allocator; ++ ++ // Allocator and vector types for the indices array ++ using reverse_map_allocator_t = typename std::allocator_traits::template rebind_alloc; ++ #if defined ENABLE_METALL ++ using reverse_map_vector_t = boost::container::vector; ++ #else ++ using reverse_map_vector_t = std::vector; ++ #endif ++ ++ using idmap_allocator_t = typename std::allocator_traits::template rebind_alloc>; ++ #if defined ENABLE_METALL ++ using idmap_t = boost::container::map, idmap_allocator_t>; ++ #else ++ using idmap_t = std::map, idmap_allocator_t>; ++ #endif ++ ++ idmap_t idMap; ++ reverse_map_vector_t reverseMap; + + size_t numNodes; + size_t numEdges; +diff --git a/include/ripples/imm.h b/include/ripples/imm.h +index 8030582..32c5e54 100644 +--- a/include/ripples/imm.h ++++ b/include/ripples/imm.h +@@ -180,8 +180,8 @@ auto Sampling(const GraphTy &G, const ConfTy &CFG, double l, + + double LB = 0; + #if defined ENABLE_MEMKIND +- RRRsetAllocator allocator(libmemkind::kinds::DAX_KMEM_PREFERRED); +- #elif defined ENABLE_METALL ++ RRRsetAllocator allocator("/mnt/bb/reeceneff/memkind_tmp", 0); ++ #elif defined ENABLE_METALL_RRRSETS + RRRsetAllocator allocator = metall_manager_instance().get_allocator(); + #else + RRRsetAllocator allocator; +@@ -266,10 +266,10 @@ auto Sampling(const GraphTy &G, const ConfTy &CFG, double l, + + double LB = 0; + #if defined ENABLE_MEMKIND +- RRRsetAllocator allocator(libmemkind::kinds::DAX_KMEM_PREFERRED); +- #elif defined ENABLE_METALL ++ RRRsetAllocator allocator("/mnt/bb/reeceneff/memkind_tmp", 0); ++ #elif defined ENABLE_METALL_RRRSETS + RRRsetAllocator allocator = metall_manager_instance().get_allocator(); +-#else ++ #else + RRRsetAllocator allocator; + #endif + std::vector> RR; +@@ -410,7 +410,6 @@ auto IMM(const GraphTy &G, const ConfTy &CFG, double l, GeneratorTy &gen, + auto R = + Sampling(G, CFG, l, gen, record, std::forward(model_tag), + std::forward(ex_tag)); +- + #if CUDA_PROFILE + auto logst = spdlog::stdout_color_st("IMM-profile"); + std::vector rrr_sizes; +@@ -428,7 +427,6 @@ auto IMM(const GraphTy &G, const ConfTy &CFG, double l, GeneratorTy &gen, + FindMostInfluentialSet(G, CFG, R, record, gen.isGpuEnabled(), + std::forward(ex_tag)); + auto end = std::chrono::high_resolution_clock::now(); +- + record.FindMostInfluentialSet = end - start; + + start = std::chrono::high_resolution_clock::now(); +diff --git a/include/ripples/loaders.h b/include/ripples/loaders.h +index 4f67661..a33cc55 100644 +--- a/include/ripples/loaders.h ++++ b/include/ripples/loaders.h +@@ -232,21 +232,22 @@ std::vector loadEdgeList(const Configuration &CFG, PRNG &weightGen) { + } + + namespace { +-template +-GraphTy loadGraph_helper(ConfTy &CFG, PrngTy &PRNG) { +- GraphTy G; ++template > ++GraphTy loadGraph_helper(ConfTy &CFG, PrngTy &PRNG, allocator_t allocator = allocator_t()) { ++ GraphTy G(allocator); + + if (!CFG.reload) { + using vertex_type = typename GraphTy::vertex_type; + using weight_type = typename GraphTy::edge_type::edge_weight; + using edge_type = ripples::Edge; + auto edgeList = ripples::loadEdgeList(CFG, PRNG); +- GraphTy tmpG(edgeList.begin(), edgeList.end(), !CFG.disable_renumbering); ++ GraphTy tmpG(edgeList.begin(), edgeList.end(), !CFG.disable_renumbering, allocator); + G = std::move(tmpG); + } else { + std::ifstream binaryDump(CFG.IFileName, std::ios::binary); +- GraphTy tmpG(binaryDump); +- G = std::move(tmpG); ++ // GraphTy tmpG(binaryDump, allocator); ++ // G = std::move(tmpG); ++ G.load_binary(binaryDump); + } + + return G; +@@ -262,22 +263,22 @@ GraphTy loadGraph_helper(ConfTy &CFG, PrngTy &PRNG) { + //! \param CFG The configuration object. + //! \param PRNG The parallel random number generator. + //! \return The GraphTy graph loaded from the input file. +-template +-GraphTy loadGraph(ConfTy &CFG, PrngTy &PRNG) { +- GraphTy G; ++template > ++GraphTy loadGraph(ConfTy &CFG, PrngTy &PRNG, allocator_t allocator = allocator_t()) { ++ GraphTy G(allocator); + if (CFG.distribution == "uniform") { + WeightGenerator> gen( + PRNG, CFG.scale_factor); +- G = loadGraph_helper(CFG, gen); ++ G = loadGraph_helper(CFG, gen, allocator); + } else if (CFG.distribution == "normal") { + WeightGenerator> gen( + PRNG, + trng::truncated_normal_dist(CFG.mean, CFG.variance, 0.0, 1.0), + CFG.scale_factor); +- G = loadGraph_helper(CFG, gen); ++ G = loadGraph_helper(CFG, gen, allocator); + } else if (CFG.distribution == "const") { + auto gen = [&]() -> float { return CFG.mean; }; +- G = loadGraph_helper(CFG, gen); ++ G = loadGraph_helper(CFG, gen, allocator); + } else { + throw std::domain_error("Unsupported distribution"); + } +diff --git a/include/ripples/mpi/imm.h b/include/ripples/mpi/imm.h +index 80bbe3a..b64b438 100644 +--- a/include/ripples/mpi/imm.h ++++ b/include/ripples/mpi/imm.h +@@ -151,7 +151,7 @@ auto Sampling(const GraphTy &G, const ConfTy &CFG, double l, + double LB = 0; + #if defined ENABLE_MEMKIND + RRRsetAllocator allocator("/pmem1", 0); +- #elif defined ENABLE_METALL ++ #elif defined ENABLE_METALL_RRRSETS + RRRsetAllocator allocator = metall_manager_instance().get_allocator(); + #else + RRRsetAllocator allocator; +diff --git a/tools/imm.cc b/tools/imm.cc +index e835c76..870821c 100644 +--- a/tools/imm.cc ++++ b/tools/imm.cc +@@ -39,7 +39,6 @@ + // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // + //===----------------------------------------------------------------------===// +- + #include + #include + #include +@@ -154,14 +153,45 @@ int main(int argc, char **argv) { + weightGen.split(2, 0); + + using dest_type = ripples::WeightedDestination; ++ #if defined ENABLE_METALL ++ using GraphFwd = ++ ripples::Graph, metall::manager::allocator_type>; ++ using GraphBwd = ++ ripples::Graph, metall::manager::allocator_type>; ++ #else + using GraphFwd = + ripples::Graph>; + using GraphBwd = + ripples::Graph>; +- console->info("Loading..."); ++ #endif ++console->info("Loading..."); ++auto loading_start = std::chrono::high_resolution_clock::now(); ++#if defined ENABLE_METALL ++bool exists = metall::manager::consistent(CFG.metall_dir.c_str()); ++metall::manager manager = (exists ? ++ metall::manager(metall::open_only, CFG.metall_dir.c_str()) ++ : metall::manager(metall::create_only, CFG.metall_dir.c_str())); ++ GraphBwd *Gr; ++ if(exists){ ++ console->info("Previously existing graph exists! Loading..."); ++ Gr = manager.find("graph").first; ++ // Gr->recalculate_addresses(); ++ } ++ else{ ++ console->info("Creating new metall directory..."); ++ GraphFwd Gf = ripples::loadGraph(CFG, weightGen, manager.get_allocator()); ++ Gr = manager.construct("graph")(Gf.get_transpose(manager.get_allocator())); ++ } ++ ++ GraphBwd &G(*Gr); ++ //GraphBwd G(Gr[0]); ++#else + GraphFwd Gf = ripples::loadGraph(CFG, weightGen); + GraphBwd G = Gf.get_transpose(); ++#endif ++ auto loading_end = std::chrono::high_resolution_clock::now(); + console->info("Loading Done!"); ++ spdlog::get("console")->info("Loading took {} s", (double)std::chrono::duration_cast(loading_end - loading_start).count() / 1000.0); + console->info("Number of Nodes : {}", G.num_nodes()); + console->info("Number of Edges : {}", G.num_edges()); + +@@ -182,8 +212,8 @@ int main(int argc, char **argv) { + decltype(R.Total) real_total; + if (CFG.diffusionModel == "IC") { + ripples::StreamingRRRGenerator< +- decltype(G), decltype(generator), +- typename ripples::RRRsets::iterator, ++ GraphBwd, decltype(generator), ++ typename ripples::RRRsets::iterator, + ripples::independent_cascade_tag> + se(G, generator, R, workers - gpu_workers, gpu_workers, + CFG.worker_to_gpu); +@@ -195,8 +225,8 @@ int main(int argc, char **argv) { + real_total = end - start; + } else if (CFG.diffusionModel == "LT") { + ripples::StreamingRRRGenerator< +- decltype(G), decltype(generator), +- typename ripples::RRRsets::iterator, ++ GraphBwd, decltype(generator), ++ typename ripples::RRRsets::iterator, + ripples::linear_threshold_tag> + se(G, generator, R, workers - gpu_workers, gpu_workers, + CFG.worker_to_gpu); +diff --git a/tools/mpi-imm.cc b/tools/mpi-imm.cc +index 60091f8..0ddb121 100644 +--- a/tools/mpi-imm.cc ++++ b/tools/mpi-imm.cc +@@ -134,13 +134,40 @@ int main(int argc, char *argv[]) { + weightGen.split(2, 0); + + using edge_type = ripples::WeightedDestination; ++ #if defined ENABLE_METALL ++ using GraphFwd = ++ ripples::Graph, metall::manager::allocator_type>; ++ using GraphBwd = ++ ripples::Graph, metall::manager::allocator_type>; ++ #else + using GraphFwd = + ripples::Graph>; + using GraphBwd = + ripples::Graph>; +- console->info("Loading..."); +- GraphFwd Gf = ripples::loadGraph(CFG, weightGen); ++ #endif ++console->info("Loading..."); ++#if defined ENABLE_METALL ++bool exists = metall::manager::consistent(CFG.metall_dir.c_str()); ++metall::manager manager = (exists ? ++ metall::manager(metall::open_only, CFG.metall_dir.c_str()) ++ : metall::manager(metall::create_only, CFG.metall_dir.c_str())); ++ GraphBwd *Gr; ++ if(exists){ ++ console->info("Previously existing graph exists! Loading..."); ++ Gr = manager.find("graph").first; ++ // Gr->recalculate_addresses(); ++ } ++ else{ ++ console->info("Creating new metall directory..."); ++ GraphFwd Gf = ripples::loadGraph(CFG, weightGen, manager.get_allocator()); ++ Gr = manager.construct("graph")(Gf.get_transpose(manager.get_allocator())); ++ } ++ GraphBwd G(Gr[0]); ++#else ++ std::allocator = GraphAllocator; ++ GraphFwd Gf = ripples::loadGraph(CFG, weightGen, GraphAllocator); + GraphBwd G = Gf.get_transpose(); ++#endif + console->info("Loading Done!"); + console->info("Number of Nodes : {}", G.num_nodes()); + console->info("Number of Edges : {}", G.num_edges()); +diff --git a/tools/wscript b/tools/wscript +index 5be422d..862e61b 100644 +--- a/tools/wscript ++++ b/tools/wscript +@@ -68,7 +68,7 @@ def build(bld): + cuda_acc_cxx_flags += ['-DENABLE_MEMKIND=1'] + + if bld.env.ENABLE_METALL: +- cuda_acc_tools_deps += ['metall', 'boost'] ++ cuda_acc_tools_deps += ['metall', 'boost', 'libstdc++fs'] + cuda_acc_cxx_flags += ['-DENABLE_METALL=1'] + + bld(features='cxx cxxprogram', source='imm.cc', target='imm', diff --git a/docs/readthedocs/external/ripples.md b/docs/readthedocs/external/ripples.md index dcb9f2cb..a359a10f 100644 --- a/docs/readthedocs/external/ripples.md +++ b/docs/readthedocs/external/ripples.md @@ -2,58 +2,94 @@ ## Introduction -[Ripples](https://github.com/pnnl/ripples) is a software framework to study the Influence Maximization problem. +This page describes technical details about the ongoing collaboration with the [Ripples](https://github.com/pnnl/ripples), a software framework to study the Influence Maximization problem. -Ripples has a mode that uses Metall to allocate its intermediate data, which requires a large amount of memory, in storage (file system) so that -it can handle large-scale problems, exceeding DRAM capacity. +Ripples needs to perform multiple data construction steps. However, those steps account for a large amount of time in real workloads. -Here, we describe how to build and run Ripples with Metall. +Ripples can persistently store a portion of the constructed data structures using the standard file I/O operation. +However, it stores only simple data structures (e.g., std::vector) because it causes additional overheads in terms of coding and performance to assemble and disassemble complex data structures (e.g., std::map). + +Metall allows applications to store the C++ containers easily. It also has multiple features for large-scale data management, such as the snapshot, parallel data copy, and user-level mmap implementation support for better data locality. + +This collaboration is aiming at leveraging Metall in Ripples' actual workload. To investigate how Metall can improve Ripples', we have integrated Metall into Ripples. + +### Integrating Metall into Ripples + +Ripples uses STL containers for its internal data structures. +Thanks to Metall's rich C++ API (which was mainly inherited from the Boost.Interprocess library), +we were able to integrate Metall into Ripples following the C++ standard syntax. + +Specifically, we changed the original code so that internal data structures can accept a custom memory allocator instead of the default one. +On the other hand, we did not have to modify the core parts of graph construction or analytics code. + +Such reasonable code modification exhibits Metall's high adaptability. ## Build Example +Here we describe how to build Ripples with Metall to persistently store Ripples graph data. + Tested Environment: - Linux - Python 3.7 -- GCC 10 (GCC >= 8.1 is required) +- GCC 10 - CMake 3.23 The following instructions are tested with the latest version of Ripples at the time of writing (commit ID: da08b3e759642a93556f081169c61607354ecd3e). ```shell -git clone git@github.com:pnnl/ripples.git -cd ripples -git checkout da08b3e759642a93556f081169c61607354ecd3e - # Set up Python environment, if not available # For example: pip install --user pipenv pip install --user conan -# If needed: +# If needed, configure PATH, for example: # export PATH="$HOME/.local/bin:$PATH" +# Get source code +git clone git@github.com:pnnl/ripples.git +cd ripples +git checkout da08b3e759642a93556f081169c61607354ecd3e + +# Apply the patch file (download from the link under this code block) +git apply ripples-metall.patch + pipenv --three pipenv install pipenv shell +# Create a conan profile +conan profile new default --detect +conan profile update settings.compiler.libcxx=libstdc++11 default +conan profile update env.CC=$(which gcc) default +conan profile update env.CXX=$(which g++) default + # Install dependencies conan create conan/waf-generator user/stable conan create conan/trng user/stable -# if the line above does not work, -# conan create conan/trng libtrng/4.22@user/stable conan create conan/metall user/stable # Enable the Metall mode -conan install --install-folder build . -o metal=True +conan install --install-folder build . --build fmt -o metal=True # Build ./waf configure --enable-metall build_release ``` +Download a patch file from here [ripples-metall.patch](./ripples-metall.patch). + ## Run Example ```shell -./build/release/tools/imm --input-graph test-data/karate.tsv --seed-set-size 8 --diffusion-model LT --epsilon 0.8 +./build/release/tools/imm -i test-data/karate.tsv -e 0.5 -k 100 -d LT --parallel --metall-store-dir=/mnt/ssd/graph ``` -See details [Ripples README](https://github.com/pnnl/ripples). \ No newline at end of file +- Reads edge data from test-data/karate.tsv +- Stores the constructed graph in /mnt/ssd/graph + +## Allocate RRRSets Using Metall + +Ripples + Metall has another mode that allocates intermediate data (called RRRSets) using Metall. + +To enable the mode, define `ENABLE_METALL_RRRSETS` macro (e.g., insert `#define ENABLE_METALL_RRRSETS` at the beginning of `tools/imm.cc`). + +RRRSet (intermediate data) is allocated in `/dev/shm` (tmpfs) by default. To change the location, modify line 82 `include/ripples/generate_rrr_sets.h` and re-build the program (`./waf configure --enable-metall build_release`). \ No newline at end of file diff --git a/docs/readthedocs/index.md b/docs/readthedocs/index.md index f465ab91..95b9135e 100644 --- a/docs/readthedocs/index.md +++ b/docs/readthedocs/index.md @@ -1,3 +1,7 @@ +# Metall: Persistent Memory Allocator for Data-Centric Analytics + +This Read the Docs page describes Metall (open-source library available [here]((https://github.com/LLNL/metall))). + ## Overview Metall is a persistent memory allocator designed to provide developers with an API to allocate custom C++ data structures in both block-storage and @@ -17,21 +21,3 @@ and provides persistent memory snapshotting (versioning) capabilities. Example programs that use Metall are listed [here](detail/example.md). ![Metall Overview](./img/metall_overview.png) - - -## Publication - -[Metall: A Persistent Memory Allocator Enabling Graph Processing](https://www.osti.gov/servlets/purl/1576900) - -```text -@INPROCEEDINGS{8945094, -author={K. {Iwabuchi} and L. {Lebanoff} and M. {Gokhale} and R. {Pearce}}, -booktitle={2019 IEEE/ACM 9th Workshop on Irregular Applications: Architectures and Algorithms (IA3)}, -title={Metall: A Persistent Memory Allocator Enabling Graph Processing}, -year={2019}, -pages={39-44}, -doi={10.1109/IA349570.2019.00012}, -month={Nov},} -``` - -IEEE Xplore [page](https://ieeexplore.ieee.org/document/8945094) \ No newline at end of file diff --git a/example/README.md b/example/README.md index 10912a71..c4d9a24b 100644 --- a/example/README.md +++ b/example/README.md @@ -1,4 +1,4 @@ -# List of Examples +****# List of Examples To build examples see a [page](https://metall.readthedocs.io/en/latest/advanced_build/cmake/) hosted on Read the Docs. diff --git a/mkdocs.yml b/mkdocs.yml index 7d856e3a..632147ad 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -33,4 +33,5 @@ nav: - 'Open Source Projects Using Metall': 'external/list.md' - 'Use Ripples with Metall': 'external/ripples.md' - 'ABOUT': + - 'Publication': 'about/publication.md' - 'License and Notice': 'about/license_notice.md' From 8c8e4b68ec2ab80b43c3f451cbc756c034215b3c Mon Sep 17 00:00:00 2001 From: Keita Iwabuchi Date: Mon, 3 Oct 2022 14:16:34 -0700 Subject: [PATCH 06/10] Bugfix: anonymous new mapping --- .../segment_storage/mmap_segment_storage.hpp | 153 +++++++++++++----- 1 file changed, 117 insertions(+), 36 deletions(-) diff --git a/include/metall/kernel/segment_storage/mmap_segment_storage.hpp b/include/metall/kernel/segment_storage/mmap_segment_storage.hpp index d53d0b03..ac86fbc4 100644 --- a/include/metall/kernel/segment_storage/mmap_segment_storage.hpp +++ b/include/metall/kernel/segment_storage/mmap_segment_storage.hpp @@ -36,8 +36,6 @@ class mmap_segment_storage { // -------------------------------------------------------------------------------- // mmap_segment_storage() { #ifdef METALL_USE_ANONYMOUS_NEW_MAP - // TODO: implement msync for anonymous mapping - static_assert(true, "METALL_USE_ANONYMOUS_NEW_MAP does not work now"); logger::out(logger::level::info, __FILE__, __LINE__, "METALL_USE_ANONYMOUS_NEW_MAP is defined"); #endif @@ -73,7 +71,11 @@ class mmap_segment_storage { m_read_only(other.m_read_only), m_free_file_space(other.m_free_file_space), m_block_fd_list(std::move(other.m_block_fd_list)), - m_block_size(other.m_block_size) { + m_block_size(other.m_block_size) +#ifdef METALL_USE_ANONYMOUS_NEW_MAP + , m_anonymous_map_flag_list(other.m_anonymous_map_flag_list) +#endif + { other.priv_set_broken_status(); } @@ -88,7 +90,9 @@ class mmap_segment_storage { m_free_file_space = other.m_free_file_space; m_block_fd_list = std::move(other.m_block_fd_list); m_block_size = other.m_block_size; - +#ifdef METALL_USE_ANONYMOUS_NEW_MAP + m_anonymous_map_flag_list = std::move(other.m_anonymous_map_flag_list); +#endif other.priv_set_broken_status(); return (*this); } @@ -254,7 +258,8 @@ class mmap_segment_storage { } bool priv_is_open() const { - return (check_sanity() && m_system_page_size > 0 && m_num_blocks > 0 && m_vm_region_size > 0 && m_current_segment_size > 0 + return (check_sanity() && m_system_page_size > 0 && m_num_blocks > 0 && m_vm_region_size > 0 + && m_current_segment_size > 0 && m_segment && !m_base_path.empty() && !m_block_fd_list.empty() && m_block_size > 0); } @@ -329,7 +334,10 @@ class mmap_segment_storage { return true; } - bool priv_open(const std::string &base_path, const size_type vm_region_size, void *const vm_region, const bool read_only) { + bool priv_open(const std::string &base_path, + const size_type vm_region_size, + void *const vm_region, + const bool read_only) { if (!check_sanity()) return false; if (is_open()) return false; // Cannot open multiple segments simultaneously. @@ -362,14 +370,16 @@ class mmap_segment_storage { const auto fd = priv_map_file(file_name, m_block_size, m_current_segment_size, read_only); if (fd == -1) { std::stringstream ss; - ss << "Failed to map a file " << m_block_size; + ss << "Failed to map a file " << file_name; logger::out(logger::level::error, __FILE__, __LINE__, ss.str().c_str()); priv_destroy_segment(); priv_set_broken_status(); return false; - } else { - m_block_fd_list.template emplace_back(fd); } + m_block_fd_list.emplace_back(fd); +#ifdef METALL_USE_ANONYMOUS_NEW_MAP + m_anonymous_map_flag_list.push_back(false); +#endif m_current_segment_size += m_block_size; ++m_num_blocks; } @@ -445,27 +455,21 @@ class mmap_segment_storage { } #ifdef METALL_USE_ANONYMOUS_NEW_MAP - if (!priv_map_anonymous(file_name, file_size, segment_offset)) { - return false; + const auto fd = priv_map_anonymous(file_name, file_size, segment_offset); + if (m_anonymous_map_flag_list.size() < block_number + 1) { + m_anonymous_map_flag_list.resize(block_number + 1, false); } - // Although we do not map the file, we still open it so that other functions in this class work. - const auto fd = ::open(file_name.c_str(), O_RDWR); - if (fd == -1) { - logger::perror(logger::level::error, __FILE__, __LINE__, "open"); - std::string s("Failed to open a file " + file_name); - logger::out(logger::level::error, __FILE__, __LINE__, s.c_str()); - // Destroy the map by overwriting PROT_NONE map since the VM region is managed by another class. - mdtl::map_with_prot_none(static_cast(m_segment) + segment_offset, file_size); - return false; - } - m_block_fd_list.emplace_back(fd); + m_anonymous_map_flag_list[block_number] = true; #else const auto fd = priv_map_file(file_name, file_size, segment_offset, false); +#endif if (fd == -1) { return false; } - m_block_fd_list.emplace_back(fd); -#endif + if (m_block_fd_list.size() < block_number + 1) { + m_block_fd_list.resize(block_number + 1, -1); + } + m_block_fd_list[block_number] = fd; return true; } @@ -510,9 +514,9 @@ class mmap_segment_storage { return ret.first; } - bool priv_map_anonymous(const std::string &path, - const size_type region_size, - const different_type segment_offset) const { + int priv_map_anonymous(const std::string &path, + const size_type region_size, + const different_type segment_offset) const { assert(!path.empty()); assert(region_size > 0); assert(segment_offset >= 0); @@ -529,17 +533,28 @@ class mmap_segment_storage { if (!addr) { std::string s("Failed to map an anonymous region at " + std::to_string(segment_offset)); logger::out(logger::level::error, __FILE__, __LINE__, s.c_str()); - return false; + return -1; } - return true; + // Although we do not map the file, we still open it so that other functions in this class works. + const auto fd = ::open(path.c_str(), O_RDWR); + if (fd == -1) { + logger::perror(logger::level::error, __FILE__, __LINE__, "open"); + std::string s("Failed to open a file " + path); + logger::out(logger::level::error, __FILE__, __LINE__, s.c_str()); + // Destroy the map by overwriting PROT_NONE map since the VM region is managed by another class. + mdtl::map_with_prot_none(static_cast(m_segment) + segment_offset, region_size); + return -1; + } + + return fd; } bool priv_destroy_segment() { if (!is_open()) return false; int succeeded = true; - for (const auto &fd : m_block_fd_list) { + for (const auto &fd: m_block_fd_list) { succeeded &= mdtl::os_close(fd); } @@ -553,7 +568,7 @@ class mmap_segment_storage { return succeeded; } - bool priv_sync(const bool sync) const { + bool priv_sync(const bool sync) { if (!priv_sync_segment(sync)) { // Failing this operation is not a critical error logger::out(logger::level::error, __FILE__, __LINE__, "Failed to synchronize the segment"); return false; @@ -561,7 +576,7 @@ class mmap_segment_storage { return true; } - bool priv_sync_segment(const bool sync) const { + bool priv_sync_segment(const bool sync) { if (!is_open()) return false; if (m_read_only) return true; @@ -591,7 +606,7 @@ class mmap_segment_storage { return true; } - bool priv_parallel_msync(const bool sync) const { + bool priv_parallel_msync(const bool sync) { std::atomic_uint_fast64_t block_no_count = 0; std::atomic_uint_fast64_t num_successes = 0; @@ -599,6 +614,13 @@ class mmap_segment_storage { while (true) { const auto block_no = block_no_count.fetch_add(1); if (block_no < m_block_fd_list.size()) { +#ifdef METALL_USE_ANONYMOUS_NEW_MAP + assert(m_anonymous_map_flag_list.size() > block_no); + if (m_anonymous_map_flag_list[block_no]) { + num_successes.fetch_add(priv_sync_anonymous_map(block_no) ? 1 : 0); + continue; + } +#endif const auto map = static_cast(m_segment) + block_no * m_block_size; num_successes.fetch_add(mdtl::os_msync(map, m_block_size, sync) ? 1 : 0); } else { @@ -614,11 +636,11 @@ class mmap_segment_storage { logger::out(logger::level::info, __FILE__, __LINE__, ss.str().c_str()); } std::vector> threads(num_threads); - for (auto &th : threads) { - th = std::make_unique(diff_sync); + for (auto &th: threads) { + th = std::make_unique(diff_sync); } - for (auto &th : threads) { + for (auto &th: threads) { th->join(); } @@ -630,6 +652,14 @@ class mmap_segment_storage { if (offset + nbytes > m_current_segment_size) return false; +#ifdef METALL_USE_ANONYMOUS_NEW_MAP + const auto block_no = offset / m_block_size; + assert(m_anonymous_map_flag_list.size() > block_no); + if (m_anonymous_map_flag_list[block_no]) { + return priv_uncommit_private_anonymous_pages(offset, nbytes); + } +#endif + if (m_free_file_space) return priv_uncommit_pages_and_free_file_space(offset, nbytes); else @@ -644,6 +674,54 @@ class mmap_segment_storage { return mdtl::uncommit_shared_pages(static_cast(m_segment) + offset, nbytes); } + bool priv_uncommit_private_anonymous_pages(const different_type offset, const size_type nbytes) const { + return mdtl::uncommit_private_anonymous_pages(static_cast(m_segment) + offset, nbytes); + } + +#ifdef METALL_USE_ANONYMOUS_NEW_MAP + bool priv_sync_anonymous_map(const size_type block_no) { + assert(m_anonymous_map_flag_list[block_no]); + { + std::string s("Sync anonymous map at block " + std::to_string(block_no)); + logger::out(logger::level::info, __FILE__, __LINE__, s.c_str()); + } + + auto *const addr = static_cast(m_segment) + block_no * m_block_size; + if (::write(m_block_fd_list[block_no], addr, m_block_size) != (ssize_t)m_block_size) { + std::string s("Failed to write back a block"); + logger::perror(logger::level::error, __FILE__, __LINE__, s.c_str()); + priv_destroy_segment(); + priv_set_broken_status(); + return false; + } + m_anonymous_map_flag_list[block_no] = false; + + { + std::string s("Map block " + std::to_string(block_no) + " as a non-anonymous map"); + logger::out(logger::level::info, __FILE__, __LINE__, s.c_str()); + } + [[maybe_unused]] static constexpr int map_nosync = +#ifdef MAP_NOSYNC + MAP_NOSYNC; +#else + 0; +#endif + const auto mapped_addr = mdtl::map_file_write_mode(m_block_fd_list[block_no], + addr, + m_block_size, + 0, + MAP_FIXED | map_nosync); + if (!mapped_addr || mapped_addr != addr) { + std::string s("Failed to map a block"); + logger::out(logger::level::error, __FILE__, __LINE__, s.c_str()); + priv_destroy_segment(); + priv_set_broken_status(); + return false; + } + return true; + } +#endif + bool priv_set_system_page_size() { m_system_page_size = mdtl::get_page_size(); if (m_system_page_size == -1) { @@ -721,6 +799,9 @@ class mmap_segment_storage { std::vector m_block_fd_list; size_type m_block_size{0}; bool m_broken{false}; +#ifdef METALL_USE_ANONYMOUS_NEW_MAP + std::vector m_anonymous_map_flag_list; +#endif }; } // namespace kernel From 74ce24b23987199b32237c68e6825b4a58d6c1b1 Mon Sep 17 00:00:00 2001 From: Keita Iwabuchi Date: Mon, 3 Oct 2022 14:43:46 -0700 Subject: [PATCH 07/10] Add mapping bench --- bench/CMakeLists.txt | 3 +- bench/mapping/CMakeLists.txt | 1 + bench/mapping/run_mapping_bench.cpp | 206 ++++++++++++++++++++++++++++ 3 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 bench/mapping/CMakeLists.txt create mode 100644 bench/mapping/run_mapping_bench.cpp diff --git a/bench/CMakeLists.txt b/bench/CMakeLists.txt index d5c6201b..123cc662 100644 --- a/bench/CMakeLists.txt +++ b/bench/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(simple_alloc) add_subdirectory(adjacency_list) add_subdirectory(bfs) -add_subdirectory(rand_engine) \ No newline at end of file +add_subdirectory(rand_engine) +add_subdirectory(mapping) \ No newline at end of file diff --git a/bench/mapping/CMakeLists.txt b/bench/mapping/CMakeLists.txt new file mode 100644 index 00000000..e79eb906 --- /dev/null +++ b/bench/mapping/CMakeLists.txt @@ -0,0 +1 @@ +add_metall_executable(run_mapping_bench run_mapping_bench.cpp) \ No newline at end of file diff --git a/bench/mapping/run_mapping_bench.cpp b/bench/mapping/run_mapping_bench.cpp new file mode 100644 index 00000000..04a06d32 --- /dev/null +++ b/bench/mapping/run_mapping_bench.cpp @@ -0,0 +1,206 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using rand_engine = metall::utility::rand_512; +static constexpr std::size_t k_page_size = 4096; + +namespace { +namespace mdtl = metall::mtlldetail; +} + +auto random_write_by_page(const std::size_t size, unsigned char *const map) { + const auto num_pages = size / k_page_size; + rand_engine rand_engine(123); + std::uniform_int_distribution<> dist(0, num_pages - 1); + + const auto s = mdtl::elapsed_time_sec(); + for (std::size_t i = 0; i < num_pages; ++i) { + const auto page_no = dist(rand_engine); + const off_t offset = static_cast(page_no * k_page_size); + map[offset] = '0'; + } + const auto t = mdtl::elapsed_time_sec(s); + + return t; +} + +auto random_read_by_page(const std::size_t size, const unsigned char *const map) { + const auto num_pages = size / k_page_size; + rand_engine rand_engine(1234); + std::uniform_int_distribution<> dist(0, num_pages - 1); + + const auto s = mdtl::elapsed_time_sec(); + for (std::size_t i = 0; i < num_pages; ++i) { + const auto page_no = dist(rand_engine); + const off_t offset = static_cast(page_no * k_page_size); + [[maybe_unused]] volatile char dummy = map[offset]; + } + const auto t = mdtl::elapsed_time_sec(s); + + return t; +} + +int create_normal_file(std::string_view path) { + const int fd = ::open(path.data(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); + if (fd == -1) { + std::cerr << "Failed to create a file" << std::endl; + std::abort(); + } + return fd; +} + +int create_tmpfile(std::string_view path) { + static char file_template[] = "/mmap.XXXXXX"; + + char fullname[path.size() + sizeof(file_template)]; + (void)strcpy(fullname, path.data()); + (void)strcat(fullname, file_template); + + int fd = -1; + if ((fd = mkstemp(fullname)) < 0) { + std::perror("Could not create temporary file"); + std::abort(); + } + + (void)unlink(fullname); + + return fd; +} + +void extend_file(const int fd, const std::size_t size, const bool fill_with_zero) { + if (!mdtl::extend_file_size(fd, size, fill_with_zero)) { + std::cerr << "Failed to extend file" << std::endl; + std::abort(); + } +} + +auto map_file(const int fd, const std::size_t size) { + static constexpr int k_map_nosync = +#ifdef MAP_NOSYNC + MAP_NOSYNC; +#else + 0; +#warning "MAP_NOSYNC is not defined" +#endif + + auto *const map = mdtl::os_mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | k_map_nosync, fd, 0); + if (!map) { + std::cerr << " Failed mapping" << std::endl; + std::abort(); + } + + return map; +} + +void close_file(const int fd) { + if (!mdtl::os_close(fd)) { + std::cerr << __LINE__ << " Failed to close file" << std::endl; + std::abort(); + } +} + +void unmap(void *const addr, const std::size_t size) { + if (!mdtl::munmap(addr, size, false)) { + std::cerr << __LINE__ << " Failed to munmap" << std::endl; + std::abort(); + } +} + +/// \brief Run benchmark to evaluate different mapping methods +void run_bench_one_time(std::string_view dir_path, + const std::size_t length, + const bool init_file_writing_zero, + std::map> &time_table) { + + const auto bench_core = [&time_table, length](std::string_view mode, unsigned char *const map) { + time_table[std::string(mode) + " write"].push_back(random_write_by_page(length, map)); + time_table[std::string(mode) + " read"].push_back(random_read_by_page(length, map)); + }; + + // Use 'new' + { + auto *const map = new unsigned char[length]; + bench_core("malloc", map); + delete[] map; + } + + // Use a normal file and mmap + { + std::string file_path{std::string(dir_path) + "/map-file"}; + const int fd = create_normal_file(file_path); + extend_file(fd, length, init_file_writing_zero); + auto *const map = static_cast(map_file(fd, length)); + close_file(fd); + bench_core("Normal-file", map); + unmap(map, length); + } + + // Use tmpfile and mmap + { + const int fd = create_tmpfile(dir_path); + extend_file(fd, length, init_file_writing_zero); + auto *const map = static_cast(map_file(fd, length)); + close_file(fd); + bench_core("tmpfile", map); + unmap(map, length); + } + + // Use Metall + { + metall::manager manager(metall::create_only, dir_path.data()); + auto *map = static_cast(manager.allocate(length)); + bench_core("Metall", map); + manager.deallocate(map); + } + metall::manager::remove(dir_path.data()); + +} + +void run_bench(std::string_view dir_path, + const std::size_t num_repeats, + const std::size_t length, + const bool init_file_writing_zero) { + std::cout << "\n----------" << std::endl; + std::cout << "Directory Path:\t" << dir_path + << "\nRepeats:\t" << num_repeats + << "\nLength:\t" << length + << "\nInit w/ writing:\t" << init_file_writing_zero + << "\n" << std::endl; + + // Run bench + std::map> time_table; + for (std::size_t i = 0; i < num_repeats; ++i) { + run_bench_one_time(dir_path, length, init_file_writing_zero, time_table); + } + + // Show results + for (const auto &entry: time_table) { + const auto &mode = entry.first; + const auto × = entry.second; + std::cout << std::fixed; + std::cout << std::setprecision(2); + std::cout << mode << " took (s)\t" + << std::accumulate(times.begin(), times.end(), 0.0f) / times.size() << std::endl; + } +} + +int main() { + static constexpr std::size_t size = k_page_size * 1024 * 10; + const int num_repeats = 10; + +#if defined(__linux__) + run_bench("/dev/shm", num_repeats, size, false); + run_bench("/dev/shm", num_repeats, size, true); +#endif + run_bench("/tmp", num_repeats, size, false); + run_bench("/tmp", num_repeats, size, true); + + return 0; +} \ No newline at end of file From 6f6ba023db0920d57617e829d11c75bf013ed64c Mon Sep 17 00:00:00 2001 From: Keita Iwabuchi Date: Tue, 4 Oct 2022 16:42:05 -0700 Subject: [PATCH 08/10] Bugfix and speed up in all_memory_deallocated --- include/metall/kernel/chunk_directory.hpp | 9 ++++----- include/metall/kernel/segment_allocator.hpp | 15 +++++++++++---- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/include/metall/kernel/chunk_directory.hpp b/include/metall/kernel/chunk_directory.hpp index 2f642504..30b67c1e 100644 --- a/include/metall/kernel/chunk_directory.hpp +++ b/include/metall/kernel/chunk_directory.hpp @@ -412,17 +412,16 @@ class chunk_directory { return buf; } - std::vector get_all_large_chunks() const { - std::vector buf; - + const std::size_t num_used_large_chunks() const { + std::size_t count = 0; for (chunk_no_type chunk_no = 0; chunk_no < size(); ++chunk_no) { if (m_table[chunk_no].type == chunk_type::large_chunk_head || m_table[chunk_no].type == chunk_type::large_chunk_body) { - buf.push_back(chunk_no); + ++count; } } - return buf; + return count; } private: diff --git a/include/metall/kernel/segment_allocator.hpp b/include/metall/kernel/segment_allocator.hpp index cd33faba..a21e6c47 100644 --- a/include/metall/kernel/segment_allocator.hpp +++ b/include/metall/kernel/segment_allocator.hpp @@ -187,11 +187,17 @@ class segment_allocator { /// \brief Checks if all memory is deallocated. bool all_memory_deallocated() const { + if (m_chunk_directory.size() == 0) { + return true; + } + #ifndef METALL_DISABLE_OBJECT_CACHE - if (!priv_check_all_small_allocations_are_deallocated()) - return false; + if (priv_check_all_small_allocations_are_cached() && m_chunk_directory.num_used_large_chunks() == 0) { + return true; + } #endif - return m_chunk_directory.get_all_large_chunks().empty(); + + return false; } /// \brief @@ -536,7 +542,8 @@ class segment_allocator { #endif #ifndef METALL_DISABLE_OBJECT_CACHE - bool priv_check_all_small_allocations_are_deallocated() const { + /// \brief Checks if all marked (used) slots in the chunk directory exist in the object cache. + bool priv_check_all_small_allocations_are_cached() const { const auto marked_slots = m_chunk_directory.get_all_marked_slots(); std::set small_allocs; for (const auto &item : marked_slots) { From b8b1b00ca4aedb85a42e220e6c1fecfac5d6da82 Mon Sep 17 00:00:00 2001 From: Keita Iwabuchi Date: Tue, 4 Oct 2022 16:45:05 -0700 Subject: [PATCH 09/10] Brush up test scripts. --- scripts/CI/build_and_test.sh | 81 +++--------------- scripts/release_test/full_build_and_test.sh | 95 ++++----------------- scripts/test_kernel.sh | 74 ++++++++++++++++ scripts/test_utility.sh | 11 +++ 4 files changed, 117 insertions(+), 144 deletions(-) create mode 100644 scripts/test_kernel.sh diff --git a/scripts/CI/build_and_test.sh b/scripts/CI/build_and_test.sh index 679edeab..af843c62 100755 --- a/scripts/CI/build_and_test.sh +++ b/scripts/CI/build_and_test.sh @@ -2,6 +2,7 @@ ############################################################################## # Bash script that builds and tests Metall with many compile time configurations +# # 1. Set environmental variables for build # Set manually: # export CC=gcc @@ -12,6 +13,9 @@ # spack load g++ # spack load boost # +# Metall's CMake configuration step downloads the Boost C++ libraries automatically +# if the library is not found. +# # 2. Set optional environmental variables for test # export METALL_TEST_DIR=/tmp # export METALL_LIMIT_MAKE_PARALLELS=n @@ -21,83 +25,26 @@ # sh ./scripts/CI/build_and_test.sh ############################################################################## -####################################### -# Builds and runs test programs -# Globals: -# METALL_ROOT_DIR -# METALL_TEST_DIR -# METALL_LIMIT_MAKE_PARALLELS (option) -# Arguments: -# CMake options to pass -# Outputs: STDOUT and STDERR -####################################### -run_build_and_test_core() { - local BUILD_DIR=./build - - mkdir -p ${BUILD_DIR} - pushd ${BUILD_DIR} - echo "Build and test in ${PWD}" - - # Build - local CMAKE_OPTIONS="$@" - local CMAKE_FILE_LOCATION=${METALL_ROOT_DIR} - or_die cmake ${CMAKE_FILE_LOCATION} ${CMAKE_OPTIONS} - if [[ -z "${METALL_LIMIT_MAKE_PARALLELS}" ]]; then - or_die make -j - else - or_die make -j${METALL_LIMIT_MAKE_PARALLELS} - fi - - # Test 1 - rm -rf ${METALL_TEST_DIR} - or_die ctest --timeout 1000 - - # Test 2 - rm -rf ${METALL_TEST_DIR} - pushd bench/adjacency_list - or_die bash ${METALL_ROOT_DIR}/bench/adjacency_list/test/test.sh -d${METALL_TEST_DIR} - popd - - # Test 3 - rm -rf ${METALL_TEST_DIR} - pushd bench/adjacency_list - or_die bash ${METALL_ROOT_DIR}/bench/adjacency_list/test/test_large.sh -d${METALL_TEST_DIR} - popd - - # TODO: reflink test and C_API test - - rm -rf ${METALL_TEST_DIR} - - popd - rm -rf ${BUILD_DIR} -} - -####################################### -# Show some system information -# Outputs: STDOUT and STDERR -####################################### -show_system_info() { - exec_cmd df -h - exec_cmd df -ih - exec_cmd free -g - exec_cmd uname -r -} - ####################################### # main function # Globals: -# METALL_BUILD_TYPE (option) +# METALL_BUILD_DIR (option, defined if not given) # METALL_TEST_DIR (option, defined if not given) # METALL_ROOT_DIR (defined in this function, readonly) +# METALL_BUILD_TYPE (option) # Outputs: STDOUT and STDERR ####################################### main() { readonly METALL_ROOT_DIR=${PWD} + source ${METALL_ROOT_DIR}/scripts/test_kernel.sh source ${METALL_ROOT_DIR}/scripts/test_utility.sh + echo "Build and test on ${HOSTNAME}" show_system_info - echo "Build and test on ${HOSTNAME}" + if [[ -z "${METALL_BUILD_DIR}" ]]; then + readonly METALL_BUILD_DIR="${METALL_ROOT_DIR}/build_${RANDOM}" + fi setup_test_dir export METALL_TEST_DIR @@ -111,7 +58,8 @@ main() { for DISABLE_FREE_FILE_SPACE in OFF; do for DISABLE_SMALL_OBJECT_CACHE in OFF; do for FREE_SMALL_OBJECT_SIZE_HINT in 0; do - run_build_and_test_core -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + run_build_and_test_kernel \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DDISABLE_FREE_FILE_SPACE=${DISABLE_FREE_FILE_SPACE} \ -DDISABLE_SMALL_OBJECT_CACHE=${DISABLE_SMALL_OBJECT_CACHE} \ -DFREE_SMALL_OBJECT_SIZE_HINT=${FREE_SMALL_OBJECT_SIZE_HINT} \ @@ -124,8 +72,7 @@ main() { -DBUILD_EXAMPLE=ON \ -DRUN_BUILD_AND_TEST_WITH_CI=ON \ -DBUILD_VERIFICATION=OFF \ - -DVERBOSE_SYSTEM_SUPPORT_WARNING=OFF \ - -DLOGGING=OFF + -DVERBOSE_SYSTEM_SUPPORT_WARNING=OFF done done done diff --git a/scripts/release_test/full_build_and_test.sh b/scripts/release_test/full_build_and_test.sh index 89ef2693..340b2c6e 100644 --- a/scripts/release_test/full_build_and_test.sh +++ b/scripts/release_test/full_build_and_test.sh @@ -4,16 +4,19 @@ # Bash script that builds and tests Metall with all compile time configurations # This test would take a few hours at least # -# 1. Set environmental variables for build +# 1. Set environmental variables for build, if needed # Set manually: # export CC=gcc # export CXX=g++ -# export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:/path/to/boost +# export CMAKE_PREFIX_PATH=/path/to/boost:${CMAKE_PREFIX_PATH} # # Or, configure environmental variables using spack: # spack load g++ # spack load boost # +# Metall's CMake configuration step downloads the Boost C++ libraries automatically +# if the library is not found. +# # 2. Set optional environmental variables for test # export METALL_TEST_DIR=/tmp # export METALL_BUILD_DIR=./build @@ -23,105 +26,43 @@ # sh ./scripts/release_test/full_build_and_test.sh ############################################################################## -####################################### -# Builds documents -# Globals: -# METALL_ROOT_DIR -# METALL_BUILD_DIR -# Outputs: STDOUT and STDERR -####################################### -build_docs() { - mkdir -p ${METALL_BUILD_DIR} - cd ${METALL_BUILD_DIR} - echo "Build and test in ${PWD}" - - # Build - local CMAKE_FILE_LOCATION=${METALL_ROOT_DIR} - or_die cmake ${CMAKE_FILE_LOCATION} -DBUILD_DOC=ON - or_die make build_doc - - cd ../ - rm -rf ${METALL_BUILD_DIR} -} - -####################################### -# Builds and runs test programs -# Globals: -# METALL_ROOT_DIR -# METALL_TEST_DIR -# METALL_BUILD_DIR -# METALL_LIMIT_MAKE_PARALLELS (option) -# Arguments: -# CMake options to pass -# Outputs: STDOUT and STDERR -####################################### -run_build_and_test_core() { - mkdir -p ${METALL_BUILD_DIR} - pushd ${METALL_BUILD_DIR} - echo "Build and test in ${PWD}" - - # Build - local CMAKE_OPTIONS="$@" - local CMAKE_FILE_LOCATION=${METALL_ROOT_DIR} - or_die cmake ${CMAKE_FILE_LOCATION} ${CMAKE_OPTIONS} - if [[ -z "${METALL_LIMIT_MAKE_PARALLELS}" ]]; then - or_die make -j - else - or_die make -j${METALL_LIMIT_MAKE_PARALLELS} - fi - - # Test 1 - rm -rf ${METALL_TEST_DIR} - or_die ctest --timeout 1000 - - # Test 2 - rm -rf ${METALL_TEST_DIR} - pushd bench/adjacency_list - or_die bash ./test/test.sh -d${METALL_TEST_DIR} - popd - - # Test 3 - rm -rf ${METALL_TEST_DIR} - pushd bench/adjacency_list - or_die bash ./test/test_large.sh -d${METALL_TEST_DIR} - popd - - # TODO: reflink test and C_API test - - rm -rf ${METALL_TEST_DIR} - popd - rm -rf ${METALL_BUILD_DIR} -} - ####################################### # main function # Globals: -# METALL_TEST_DIR (option, defined if not given) # METALL_BUILD_DIR (option, defined if not given) +# METALL_TEST_DIR (option, defined if not given) # METALL_ROOT_DIR (defined in this function, readonly) # Outputs: STDOUT and STDERR ####################################### main() { readonly METALL_ROOT_DIR=${PWD} + source ${METALL_ROOT_DIR}/scripts/test_kernel.sh source ${METALL_ROOT_DIR}/scripts/test_utility.sh + echo "Build and test on ${HOSTNAME}" + show_system_info + if [[ -z "${METALL_BUILD_DIR}" ]]; then readonly METALL_BUILD_DIR="${METALL_ROOT_DIR}/build_${RANDOM}" fi - # Build documents only + setup_test_dir + export METALL_TEST_DIR + + # Build documents build_docs for BUILD_TYPE in Debug RelWithDebInfo Release; do for DISABLE_FREE_FILE_SPACE in ON OFF; do for DISABLE_SMALL_OBJECT_CACHE in ON OFF; do for FREE_SMALL_OBJECT_SIZE_HINT in 0 8 4096 65536; do - for LOGGING in ON OFF; do - run_build_and_test_core -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + for USE_ANONYMOUS_NEW_MAP in ON OFF; do + run_build_and_test_kernel \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DDISABLE_FREE_FILE_SPACE=${DISABLE_FREE_FILE_SPACE} \ -DDISABLE_SMALL_OBJECT_CACHE=${DISABLE_SMALL_OBJECT_CACHE} \ -DFREE_SMALL_OBJECT_SIZE_HINT=${FREE_SMALL_OBJECT_SIZE_HINT} \ - -DLOGGING=${LOGGING} \ + -DUSE_ANONYMOUS_NEW_MAP=${USE_ANONYMOUS_NEW_MAP} \ -DBUILD_BENCH=ON \ -DBUILD_TEST=ON \ -DRUN_LARGE_SCALE_TEST=ON \ diff --git a/scripts/test_kernel.sh b/scripts/test_kernel.sh new file mode 100644 index 00000000..f8c2848a --- /dev/null +++ b/scripts/test_kernel.sh @@ -0,0 +1,74 @@ +####################################### +# Builds and runs test programs +# Globals: +# METALL_ROOT_DIR +# METALL_TEST_DIR +# METALL_BUILD_DIR +# METALL_LIMIT_MAKE_PARALLELS (option) +# Arguments: +# CMake options to pass +# Outputs: STDOUT and STDERR +####################################### +run_build_and_test_kernel() { + source ${METALL_ROOT_DIR}/scripts/test_utility.sh + + mkdir -p ${METALL_BUILD_DIR} + pushd ${METALL_BUILD_DIR} + echo "Build and test in ${PWD}" + + # Build + local CMAKE_OPTIONS="$@" + local CMAKE_FILE_LOCATION=${METALL_ROOT_DIR} + or_die cmake ${CMAKE_FILE_LOCATION} ${CMAKE_OPTIONS} + if [[ -z "${METALL_LIMIT_MAKE_PARALLELS}" ]]; then + or_die make -j + else + or_die make -j${METALL_LIMIT_MAKE_PARALLELS} + fi + + # Test 1 + rm -rf ${METALL_TEST_DIR} + or_die ctest --timeout 1000 + + # Test 2 + rm -rf ${METALL_TEST_DIR} + pushd bench/adjacency_list + or_die bash ./test/test.sh -d${METALL_TEST_DIR} + popd + + # Test 3 + rm -rf ${METALL_TEST_DIR} + pushd bench/adjacency_list + or_die bash ./test/test_large.sh -d${METALL_TEST_DIR} + popd + + # TODO: reflink test and C_API test + + rm -rf ${METALL_TEST_DIR} + popd + rm -rf ${METALL_BUILD_DIR} +} + + +####################################### +# Builds documents +# Globals: +# METALL_ROOT_DIR +# METALL_BUILD_DIR +# Outputs: STDOUT and STDERR +####################################### +build_docs() { + source ${METALL_ROOT_DIR}/scripts/test_utility.sh + + mkdir -p ${METALL_BUILD_DIR} + cd ${METALL_BUILD_DIR} + echo "Build and test in ${PWD}" + + # Build + local CMAKE_FILE_LOCATION=${METALL_ROOT_DIR} + or_die cmake ${CMAKE_FILE_LOCATION} -DBUILD_DOC=ON + or_die make build_doc + + cd ../ + rm -rf ${METALL_BUILD_DIR} +} \ No newline at end of file diff --git a/scripts/test_utility.sh b/scripts/test_utility.sh index 01b46179..d313b8f6 100644 --- a/scripts/test_utility.sh +++ b/scripts/test_utility.sh @@ -54,4 +54,15 @@ setup_test_dir() { # mkdir -p ${METALL_TEST_DIR} # Metall creates automatically if the directory does not exist echo "Store test data to ${METALL_TEST_DIR}" +} + +####################################### +# Show some system information +# Outputs: STDOUT and STDERR +####################################### +show_system_info() { + exec_cmd df -h + exec_cmd df -ih + exec_cmd free -g + exec_cmd uname -r } \ No newline at end of file From 5017db2233a4edf0226a027fb1ccd4bb061c268d Mon Sep 17 00:00:00 2001 From: Keita Iwabuchi Date: Tue, 4 Oct 2022 16:43:45 -0700 Subject: [PATCH 10/10] CMake CMP0135 is NEW. --- CMakeLists.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b10cee8c..6e001bf4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,13 @@ include(FetchContent) # -------------------------------------------------------------------------------- # # CMake policy # -------------------------------------------------------------------------------- # -cmake_policy(SET CMP0077 NEW) +if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.13") + cmake_policy(SET CMP0077 NEW) +endif() + +if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.24") + cmake_policy(SET CMP0135 NEW) +endif() # -------------------------------------------------------------------------------- # # Metall general configuration @@ -85,7 +91,6 @@ option(BUILD_TEST "Build the test" OFF) option(RUN_LARGE_SCALE_TEST "Run large scale tests" OFF) option(RUN_BUILD_AND_TEST_WITH_CI "Perform build and basic test with CI" OFF) option(BUILD_VERIFICATION "Build verification directory" OFF) -option(LOGGING "Logging" OFF) option(USE_SORTED_BIN "Use VM space aware algorithm in the bin directory" OFF) set(DEFAULT_VM_RESERVE_SIZE "0" CACHE STRING @@ -192,11 +197,6 @@ if (INITIAL_SEGMENT_SIZE GREATER 0) message(STATUS "METALL_INITIAL_SEGMENT_SIZE=${INITIAL_SEGMENT_SIZE}") endif () -if (LOGGING) - list(APPEND METALL_DEFS "METALL_ENABLE_LOGGING") - message(STATUS "Enable logging") -endif () - if (USE_SORTED_BIN) list(APPEND METALL_DEFS "METALL_USE_SORTED_BIN") message(STATUS "Use VM space aware algorithm in the bin directory")