Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add parallel-hashmap and opt test #2

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@
[submodule "contrib/sparsehash-c11"]
path = contrib/sparsehash-c11
url = [email protected]:sparsehash/sparsehash-c11.git
[submodule "contrib/parallel-hashmap"]
path = contrib/parallel-hashmap
url = [email protected]:greg7mdp/parallel-hashmap.git
2 changes: 2 additions & 0 deletions benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
HASH_FUNCTIONS = ["std_hash", "ch_hash", "absl_hash"]

HASH_TABLES = [
"parallel_phmap",
"opt_parallel_phmap",
"ch_hash_map",
"absl_hash_map",
"google_dense_hash_map",
Expand Down
8 changes: 8 additions & 0 deletions contrib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ set(FLAT_HASH_MAP_HEADERS

add_library(flat_hash_map INTERFACE ${FLAT_HASH_MAP_HEADERS})

# Add parallel_hashmap library
# add_subdirectory(parallel-hashmap EXCLUDE_FROM_ALL)

set(PARALLEL_HASHMAP_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/contrib/parallel-hashmap/")

add_library(parallel-hashmap INTERFACE ${PARALLEL_HASHMAP_INCLUDE_DIR})
target_include_directories(parallel-hashmap SYSTEM BEFORE INTERFACE "${PROJECT_SOURCE_DIR}/contrib/parallel-hashmap/")

# Add sparsehash library
add_library(sparsehash INTERFACE)
target_include_directories(sparsehash SYSTEM BEFORE INTERFACE "${PROJECT_SOURCE_DIR}/contrib/sparsehash-c11")
1 change: 1 addition & 0 deletions contrib/parallel-hashmap
Submodule parallel-hashmap added at 50fa64
2 changes: 2 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
include_directories("${PROJECT_SOURCE_DIR}/contrib/parallel-hashmap/")

add_headers_and_sources(hash_table_aggregation_benchmark ${CMAKE_CURRENT_SOURCE_DIR})
add_executable(hash_table_aggregation_benchmark ${hash_table_aggregation_benchmark_headers} ${hash_table_aggregation_benchmark_sources})
target_link_libraries(hash_table_aggregation_benchmark PRIVATE
Expand Down
36 changes: 35 additions & 1 deletion src/HashTables.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,38 @@
#include <hopscotch-map/include/tsl/hopscotch_map.h>
#include <sparsehash/dense_hash_map>
#include <unordered_dense/include/ankerl/unordered_dense.h>
#include <parallel_hashmap/phmap.h>

#include "defines.h"


template <typename Key, typename Hash>
struct PhmapHashTableType
{
using HashTable = phmap::flat_hash_map<Key, UInt64, Hash>;
static constexpr std::string_view description = "Phmap HashMap";
static constexpr bool has_initialization = false;
static constexpr bool phmap_opt = false;
};


template <typename Key, typename Hash>
struct PhmapHashTableTypeOpt
{
using HashTable = phmap::flat_hash_map<Key, UInt64, Hash>;
static constexpr std::string_view description = "Opt Phmap HashMap";
static constexpr bool has_initialization = false;
static constexpr bool phmap_opt = true;
};


template <typename Key, typename Hash>
struct ClickHouseHashTableType
{
using HashTable = HashMap<Key, UInt64, Hash>;
static constexpr std::string_view description = "ClickHouse HashMap";
static constexpr bool has_initialization = false;
static constexpr bool phmap_opt = false;
};

template <typename Key, typename Hash>
Expand All @@ -27,6 +50,7 @@ struct AbseilHashTableType
using HashTable = ::absl::flat_hash_map<Key, UInt64, Hash>;
static constexpr std::string_view description = "absl::flat_hash_map";
static constexpr bool has_initialization = false;
static constexpr bool phmap_opt = false;
};

template <typename Key, typename Hash>
Expand All @@ -35,6 +59,7 @@ struct DenseHashTableType
using HashTable = ::google::dense_hash_map<Key, UInt64, Hash>;
static constexpr std::string_view description = "google::dense_hash_map";
static constexpr bool has_initialization = true;
static constexpr bool phmap_opt = false;

static void initialize(HashTable & hash_table) { hash_table.set_empty_key(std::numeric_limits<Key>::max()); }
};
Expand All @@ -45,6 +70,7 @@ struct TslHopscotchHashTableType
using HashTable = tsl::hopscotch_map<Key, UInt64, Hash>;
static constexpr std::string_view description = "tsl::hopscotch_map";
static constexpr bool has_initialization = false;
static constexpr bool phmap_opt = false;
};

template <typename Key, typename Hash>
Expand All @@ -53,6 +79,7 @@ struct AnkerlUnorderedDenseHashTableType
using HashTable = ankerl::unordered_dense::map<Key, UInt64, Hash>;
static constexpr std::string_view description = "ankerl::unordered_dense::map";
static constexpr bool has_initialization = false;
static constexpr bool phmap_opt = false;
};

template <typename Key, typename Hash>
Expand All @@ -61,6 +88,7 @@ struct SkaFlatHashTableType
using HashTable = ska::flat_hash_map<Key, UInt64, Hash>;
static constexpr std::string_view description = "ska::flat_hash_map";
static constexpr bool has_initialization = false;
static constexpr bool phmap_opt = false;
};

template <typename Key, typename Hash>
Expand All @@ -69,6 +97,7 @@ struct SkaBytellHashTableType
using HashTable = ska::bytell_hash_map<Key, UInt64, Hash>;
static constexpr std::string_view description = "ska::bytell_hash_map";
static constexpr bool has_initialization = false;
static constexpr bool phmap_opt = false;
};

template <typename Key, typename Hash>
Expand All @@ -77,12 +106,17 @@ struct StandardHashTableType
using HashTable = std::unordered_map<Key, UInt64, Hash>;
static constexpr std::string_view description = "std::unordered_map";
static constexpr bool has_initialization = false;
static constexpr bool phmap_opt = false;
};

template <typename Key, typename Hash, typename Callback>
void dispatchHashTableType(std::string_view hash_table_type, Callback && callback)
{
if (hash_table_type == "ch_hash_map")
if (hash_table_type == "parallel_phmap")
callback(PhmapHashTableType<Key, Hash>());
else if (hash_table_type == "opt_parallel_phmap")
callback(PhmapHashTableTypeOpt<Key, Hash>());
else if (hash_table_type == "ch_hash_map")
callback(ClickHouseHashTableType<Key, Hash>());
else if (hash_table_type == "absl_hash_map")
callback(AbseilHashTableType<Key, Hash>());
Expand Down
70 changes: 70 additions & 0 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,73 @@ void NOINLINE test(const Key * data, size_t size, std::string_view hash_table, s
std::cout << "Memory usage: " << memory_usage << "\n";
}

template <typename Key, typename Map, typename InitFunc = VoidInitialization<Map>>
void NOINLINE testopt(const Key * data, size_t size, std::string_view hash_table, std::string_view hash_function, InitFunc init_func = {})
{
auto start = std::chrono::steady_clock::now();
size_t map_size = 0;
size_t memory_usage = getCurrentMemoryUsageInBytes();
static constexpr size_t PREFETCH = 16;
static constexpr size_t BLOCK = 512;
std::vector<size_t> hash_values(BLOCK);

{
Map map;
init_func(map);
// const auto * end = data + size;
size_t i = 0;
for (; (i + BLOCK) < size; i += BLOCK)
{
for (size_t j = 0; j < BLOCK; j++) {
size_t hashval = map.hash_function()(data[i + j]);
hash_values[j] = hashval;
}

for (size_t j = 0, k = PREFETCH; j < BLOCK; j++, k++) {
if (k < BLOCK) {
map.prefetch_hash(hash_values[k]);
}
map.lazy_emplace_with_hash(data[i + j], hash_values[j], [&](const auto& ctor) {
ctor(data[i + j], 1);
});
}
}

if (i < size)
{
for (size_t j = 0; j < (size - i); j++) {
size_t hashval = map.hash_function()(data[i + j]);
hash_values[j] = hashval;
}

for (size_t j = 0, k = PREFETCH; j < (size - i); j++, k++) {
if (k < (size - i)) {
map.prefetch_hash(hash_values[k]);
}
map.lazy_emplace_with_hash(data[i + j], hash_values[j], [&](const auto& ctor) {
ctor(data[i + j], 1);
});
// ++map[data[i + j]];
// map.emplace_with_hash(hash_values[j], data[i + j]);
}
}

memory_usage = std::max(getCurrentMemoryUsageInBytes() - memory_usage, getPageSizeInBytes());
map_size = map.size();
}

auto finish = std::chrono::steady_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(finish - start);
double elapsed_seconds = static_cast<double>(duration.count()) / 1000000000ULL;

std::cout << "Hash table: " << hash_table << '\n';
std::cout << "Hash function: " << hash_function << '\n';
std::cout << "Hash table size: " << map_size << '\n';

std::cout << "Elapsed: " << elapsed_seconds << " (" << static_cast<size_t>(size / elapsed_seconds) << " elem/sec.) " << '\n';
std::cout << "Memory usage: " << memory_usage << "\n";
}

template <typename Key>
static void NOINLINE
testForHashMapType(std::string_view hash_table_type, std::string_view hash_function_type, const Key * data, size_t size)
Expand All @@ -66,6 +133,9 @@ testForHashMapType(std::string_view hash_table_type, std::string_view hash_funct
if constexpr (HashTableType::has_initialization)
test<Key, HashTable>(
data, size, hash_table_type.description, hash_function_type.description, HashTableType::initialize);
else if constexpr (HashTableType::phmap_opt)
testopt<Key, HashTable>(
data, size, hash_table_type.description, hash_function_type.description);
else
test<Key, HashTable>(data, size, hash_table_type.description, hash_function_type.description);
});
Expand Down