From d371101bfdb4fc75815d2187406678235a343820 Mon Sep 17 00:00:00 2001 From: Pxl Date: Mon, 14 Aug 2023 10:47:06 +0800 Subject: [PATCH] [Improvement](aggregation) make fixed hashmap's bitmap_size flexable (#22573) make fixed hashmap's bitmap_size flexable --- be/src/vec/common/aggregation_common.h | 117 +++-------------------- be/src/vec/common/columns_hashing.h | 8 +- be/src/vec/common/columns_hashing_impl.h | 23 ----- be/src/vec/common/hash_table/hash.h | 54 +++-------- be/src/vec/common/uint128.h | 13 +++ be/src/vec/exec/join/vhash_join_node.cpp | 12 +-- be/src/vec/exec/vaggregation_node.cpp | 59 ++++++------ be/src/vec/exec/vaggregation_node.h | 36 ++++++- be/src/vec/exec/vpartition_sort_node.cpp | 10 +- be/src/vec/exec/vset_operation_node.cpp | 10 +- 10 files changed, 114 insertions(+), 228 deletions(-) diff --git a/be/src/vec/common/aggregation_common.h b/be/src/vec/common/aggregation_common.h index 1c07080eb0b8c6..20cda701ebdbcc 100644 --- a/be/src/vec/common/aggregation_common.h +++ b/be/src/vec/common/aggregation_common.h @@ -34,125 +34,33 @@ namespace doris::vectorized { -using Sizes = std::vector; - -/// When packing the values of nullable columns at a given row, we have to -/// store the fact that these values are nullable or not. This is achieved -/// by encoding this information as a bitmap. Let S be the size in bytes of -/// a packed values binary blob and T the number of bytes we may place into -/// this blob, the size that the bitmap shall occupy in the blob is equal to: -/// ceil(T/8). Thus we must have: S = T + ceil(T/8). Below we indicate for -/// each value of S, the corresponding value of T, and the bitmap size: -/// -/// 32,28,4 -/// 16,14,2 -/// 8,7,1 -/// 4,3,1 -/// 2,1,1 -/// - -namespace { -// clang-format off -template -constexpr auto get_bitmap_size() { - return (sizeof(T) == 32) - ? 4: (sizeof(T) == 16) - ? 2: ((sizeof(T) == 8) - ? 1: ((sizeof(T) == 4) - ? 1: ((sizeof(T) == 2) - ? 1: 0))); +inline size_t get_bitmap_size(size_t key_number) { + return (key_number + 7) / 8; } -// clang-format on - -} // namespace - -template -using KeysNullMap = std::array()>; - -/// Pack into a binary blob of type T a set of fixed-size keys. Granted that all the keys fit into the -/// binary blob, they are disposed in it consecutively. -template -T pack_fixed(size_t i, size_t keys_size, const ColumnRawPtrs& key_columns, const Sizes& key_sizes) { - union { - T key; - char bytes[sizeof(key)] = {}; - }; - - size_t offset = 0; - - for (size_t j = 0; j < keys_size; ++j) { - size_t index = i; - const IColumn* column = key_columns[j]; - - switch (key_sizes[j]) { - case 1: - memcpy(bytes + offset, - static_cast(column)->get_raw_data_begin<1>() + index, - 1); - offset += 1; - break; - case 2: - memcpy(bytes + offset, - static_cast(column)->get_raw_data_begin<2>() + - index * 2, - 2); - offset += 2; - break; - case 4: - memcpy(bytes + offset, - static_cast(column)->get_raw_data_begin<4>() + - index * 4, - 4); - offset += 4; - break; - case 8: - memcpy(bytes + offset, - static_cast(column)->get_raw_data_begin<8>() + - index * 8, - 8); - offset += 8; - break; - default: - memcpy(bytes + offset, - static_cast(column)->get_raw_data_begin<1>() + - index * key_sizes[j], - key_sizes[j]); - offset += key_sizes[j]; - } - } - return key; -} +using Sizes = std::vector; -/// Similar as above but supports nullable values. template T pack_fixed(size_t i, size_t keys_size, const ColumnRawPtrs& key_columns, const Sizes& key_sizes, - const KeysNullMap& bitmap) { + const ColumnRawPtrs& nullmap_columns) { union { T key; char bytes[sizeof(key)] = {}; }; - size_t offset = 0; - - static constexpr auto bitmap_size = std::tuple_size>::value; - static constexpr bool has_bitmap = bitmap_size > 0; - - if (has_bitmap) { - memcpy(bytes + offset, bitmap.data(), bitmap_size * sizeof(UInt8)); - offset += bitmap_size; - } + size_t bitmap_size = get_bitmap_size(nullmap_columns.size()); + size_t offset = bitmap_size; for (size_t j = 0; j < keys_size; ++j) { bool is_null = false; - if (has_bitmap) { - size_t bucket = j / 8; - size_t off = j % 8; - is_null = ((bitmap[bucket] >> off) & 1) == 1; + if (bitmap_size && nullmap_columns[j] != nullptr) { + is_null = nullmap_columns[j]->get_bool(i); } if (is_null) { + size_t bucket = j / 8; + bytes[bucket] |= (1 << (j - bucket * 8)); offset += key_sizes[j]; continue; } @@ -199,10 +107,7 @@ T pack_fixed(size_t i, size_t keys_size, const ColumnRawPtrs& key_columns, const template std::vector pack_fixeds(size_t row_numbers, const ColumnRawPtrs& key_columns, const Sizes& key_sizes, const ColumnRawPtrs& nullmap_columns) { - size_t bitmap_size = 0; - if (!nullmap_columns.empty()) { - bitmap_size = std::tuple_size>::value; - } + size_t bitmap_size = get_bitmap_size(nullmap_columns.size()); std::vector result(row_numbers); size_t offset = 0; diff --git a/be/src/vec/common/columns_hashing.h b/be/src/vec/common/columns_hashing.h index 2c28e34b75daee..5c2bf2e50d588f 100644 --- a/be/src/vec/common/columns_hashing.h +++ b/be/src/vec/common/columns_hashing.h @@ -208,12 +208,8 @@ struct HashMethodKeysFixed : Base(key_columns), key_sizes(key_sizes_), keys_size(key_columns.size()) {} ALWAYS_INLINE Key get_key_holder(size_t row, Arena&) const { - if constexpr (has_nullable_keys_) { - auto bitmap = Base::create_bitmap(row); - return pack_fixed(row, keys_size, Base::get_actual_columns(), key_sizes, bitmap); - } else { - return pack_fixed(row, keys_size, Base::get_actual_columns(), key_sizes); - } + return pack_fixed(row, keys_size, Base::get_actual_columns(), key_sizes, + Base::get_nullmap_columns()); } Key pack_key_holder(Key key, Arena& pool) const { return key; } diff --git a/be/src/vec/common/columns_hashing_impl.h b/be/src/vec/common/columns_hashing_impl.h index e8ce4efed62fee..6b4edf22e8dd84 100644 --- a/be/src/vec/common/columns_hashing_impl.h +++ b/be/src/vec/common/columns_hashing_impl.h @@ -435,25 +435,6 @@ class BaseStateKeysFixed { const ColumnRawPtrs& get_nullmap_columns() const { return null_maps; } - /// Create a bitmap that indicates whether, for a particular row, - /// a key column bears a null value or not. - KeysNullMap create_bitmap(size_t row) const { - KeysNullMap bitmap {}; - - for (size_t k = 0; k < null_maps.size(); ++k) { - if (null_maps[k] != nullptr) { - const auto& null_map = assert_cast(*null_maps[k]).get_data(); - if (null_map[row] == 1) { - size_t bucket = k / 8; - size_t offset = k % 8; - bitmap[bucket] |= UInt8(1) << offset; - } - } - } - - return bitmap; - } - private: ColumnRawPtrs actual_columns; ColumnRawPtrs null_maps; @@ -469,10 +450,6 @@ class BaseStateKeysFixed { const ColumnRawPtrs& get_nullmap_columns() const { return null_maps; } - KeysNullMap create_bitmap(size_t) const { - LOG(FATAL) << "Internal error: calling create_bitmap() for non-nullable keys is forbidden"; - } - private: ColumnRawPtrs actual_columns; ColumnRawPtrs null_maps; diff --git a/be/src/vec/common/hash_table/hash.h b/be/src/vec/common/hash_table/hash.h index 9856be0a3fcada..46df8a4c72a5dd 100644 --- a/be/src/vec/common/hash_table/hash.h +++ b/be/src/vec/common/hash_table/hash.h @@ -170,47 +170,17 @@ struct HashCRC32 { } }; -/// It is reasonable to use for UInt8, UInt16 with sufficient hash table size. -struct TrivialHash { - template - size_t operator()(T key) const { - return key; +template <> +struct HashCRC32 { + size_t operator()(const doris::vectorized::UInt136& x) const { +#if defined(__SSE4_2__) || defined(__aarch64__) + doris::vectorized::UInt64 crc = -1ULL; + crc = _mm_crc32_u8(crc, x.a); + crc = _mm_crc32_u64(crc, x.b); + crc = _mm_crc32_u64(crc, x.c); + return crc; +#else + return Hash128to64({Hash128to64({x.a, x.b}), x.c}); +#endif } }; - -/** A relatively good non-cryptographic hash function from UInt64 to UInt32. - * But worse (both in quality and speed) than just cutting int_hash64. - * Taken from here: http://www.concentric.net/~ttwang/tech/inthash.htm - * - * Slightly changed compared to the function by link: shifts to the right are accidentally replaced by a cyclic shift to the right. - * This change did not affect the smhasher test results. - * - * It is recommended to use different salt for different tasks. - * That was the case that in the database values were sorted by hash (for low-quality pseudo-random spread), - * and in another place, in the aggregate function, the same hash was used in the hash table, - * as a result, this aggregate function was monstrously slowed due to collisions. - * - * NOTE Salting is far from perfect, because it commutes with first steps of calculation. - * - * NOTE As mentioned, this function is slower than int_hash64. - * But occasionally, it is faster, when written in a loop and loop is vectorized. - */ -template -inline doris::vectorized::UInt32 int_hash32(doris::vectorized::UInt64 key) { - key ^= salt; - - key = (~key) + (key << 18); - key = key ^ ((key >> 31) | (key << 33)); - key = key * 21; - key = key ^ ((key >> 11) | (key << 53)); - key = key + (key << 6); - key = key ^ ((key >> 22) | (key << 42)); - - return key; -} - -/// For containers. -template -struct IntHash32 { - size_t operator()(const T& key) const { return int_hash32(key); } -}; diff --git a/be/src/vec/common/uint128.h b/be/src/vec/common/uint128.h index 204729f6a6edf0..60324d7e356d7b 100644 --- a/be/src/vec/common/uint128.h +++ b/be/src/vec/common/uint128.h @@ -192,6 +192,19 @@ struct UInt256 { return *this; } }; + +#pragma pack(1) +struct UInt136 { + UInt8 a; + UInt64 b; + UInt64 c; + + bool operator==(const UInt136 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; } + + bool operator!=(const UInt136 rhs) const { return !operator==(rhs); } +}; +#pragma pack() + } // namespace doris::vectorized /// Overload hash for type casting diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index c048c4cc2f6061..807c63ef5c2310 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -1178,7 +1178,8 @@ void HashJoinNode::_hash_table_init(RuntimeState* state) { bool use_fixed_key = true; bool has_null = false; - int key_byte_size = 0; + size_t key_byte_size = 0; + size_t bitmap_size = get_bitmap_size(_build_expr_ctxs.size()); _probe_key_sz.resize(_probe_expr_ctxs.size()); _build_key_sz.resize(_build_expr_ctxs.size()); @@ -1200,20 +1201,17 @@ void HashJoinNode::_hash_table_init(RuntimeState* state) { key_byte_size += _probe_key_sz[i]; } - if (std::tuple_size>::value + key_byte_size > - sizeof(UInt256)) { + if (bitmap_size + key_byte_size > sizeof(UInt256)) { use_fixed_key = false; } if (use_fixed_key) { // TODO: may we should support uint256 in the future if (has_null) { - if (std::tuple_size>::value + key_byte_size <= - sizeof(UInt64)) { + if (bitmap_size + key_byte_size <= sizeof(UInt64)) { _hash_table_variants ->emplace>(); - } else if (std::tuple_size>::value + key_byte_size <= - sizeof(UInt128)) { + } else if (bitmap_size + key_byte_size <= sizeof(UInt128)) { _hash_table_variants ->emplace>(); } else { diff --git a/be/src/vec/exec/vaggregation_node.cpp b/be/src/vec/exec/vaggregation_node.cpp index ca346635158e81..b20e45182f4247 100644 --- a/be/src/vec/exec/vaggregation_node.cpp +++ b/be/src/vec/exec/vaggregation_node.cpp @@ -257,7 +257,8 @@ void AggregationNode::_init_hash_method(const VExprContextSPtrs& probe_exprs) { } else { bool use_fixed_key = true; bool has_null = false; - int key_byte_size = 0; + size_t key_byte_size = 0; + size_t bitmap_size = get_bitmap_size(_probe_expr_ctxs.size()); _probe_key_sz.resize(_probe_expr_ctxs.size()); for (int i = 0; i < _probe_expr_ctxs.size(); ++i) { @@ -275,47 +276,41 @@ void AggregationNode::_init_hash_method(const VExprContextSPtrs& probe_exprs) { key_byte_size += _probe_key_sz[i]; } - if (std::tuple_size>::value + key_byte_size > sizeof(UInt256)) { + if (!has_null) { + bitmap_size = 0; + } + + if (bitmap_size + key_byte_size > sizeof(UInt256)) { use_fixed_key = false; } if (use_fixed_key) { - if (has_null) { - if (std::tuple_size>::value + key_byte_size <= sizeof(UInt64)) { - if (_is_first_phase) - _agg_data->init(AggregatedDataVariants::Type::int64_keys, has_null); - else - _agg_data->init(AggregatedDataVariants::Type::int64_keys_phase2, has_null); - } else if (std::tuple_size>::value + key_byte_size <= - sizeof(UInt128)) { - if (_is_first_phase) - _agg_data->init(AggregatedDataVariants::Type::int128_keys, has_null); - else - _agg_data->init(AggregatedDataVariants::Type::int128_keys_phase2, has_null); + if (bitmap_size + key_byte_size <= sizeof(UInt64)) { + if (_is_first_phase) { + _agg_data->init(AggregatedDataVariants::Type::int64_keys, has_null); + } else { + _agg_data->init(AggregatedDataVariants::Type::int64_keys_phase2, has_null); + } + } else if (bitmap_size + key_byte_size <= sizeof(UInt128)) { + if (_is_first_phase) { + _agg_data->init(AggregatedDataVariants::Type::int128_keys, has_null); } else { - if (_is_first_phase) - _agg_data->init(AggregatedDataVariants::Type::int256_keys, has_null); - else - _agg_data->init(AggregatedDataVariants::Type::int256_keys_phase2, has_null); + _agg_data->init(AggregatedDataVariants::Type::int128_keys_phase2, has_null); + } + } else if (bitmap_size + key_byte_size <= sizeof(UInt136)) { + if (_is_first_phase) { + _agg_data->init(AggregatedDataVariants::Type::int136_keys, has_null); + } else { + _agg_data->init(AggregatedDataVariants::Type::int136_keys_phase2, has_null); } } else { - if (key_byte_size <= sizeof(UInt64)) { - if (_is_first_phase) - _agg_data->init(AggregatedDataVariants::Type::int64_keys, has_null); - else - _agg_data->init(AggregatedDataVariants::Type::int64_keys_phase2, has_null); - } else if (key_byte_size <= sizeof(UInt128)) { - if (_is_first_phase) - _agg_data->init(AggregatedDataVariants::Type::int128_keys, has_null); - else - _agg_data->init(AggregatedDataVariants::Type::int128_keys_phase2, has_null); + if (_is_first_phase) { + _agg_data->init(AggregatedDataVariants::Type::int256_keys, has_null); } else { - if (_is_merge) - _agg_data->init(AggregatedDataVariants::Type::int256_keys, has_null); - else - _agg_data->init(AggregatedDataVariants::Type::int256_keys_phase2, has_null); + _agg_data->init(AggregatedDataVariants::Type::int256_keys_phase2, has_null); } } + } else { _agg_data->init(AggregatedDataVariants::Type::serialized); } diff --git a/be/src/vec/exec/vaggregation_node.h b/be/src/vec/exec/vaggregation_node.h index 1e01a138a3d099..ef4d9100391896 100644 --- a/be/src/vec/exec/vaggregation_node.h +++ b/be/src/vec/exec/vaggregation_node.h @@ -309,7 +309,7 @@ struct AggregationMethodKeysFixed { static void insert_keys_into_columns(std::vector& keys, MutableColumns& key_columns, const size_t num_rows, const Sizes& key_sizes) { // In any hash key value, column values to be read start just after the bitmap, if it exists. - size_t pos = has_nullable_keys ? std::tuple_size>::value : 0; + size_t pos = has_nullable_keys ? get_bitmap_size(key_columns.size()) : 0; for (size_t i = 0; i < key_columns.size(); ++i) { size_t size = key_sizes[i]; @@ -407,6 +407,8 @@ using AggregatedDataWithUInt32Key = PHHashMap>; using AggregatedDataWithUInt128Key = PHHashMap>; using AggregatedDataWithUInt256Key = PHHashMap>; +using AggregatedDataWithUInt136Key = PHHashMap>; + using AggregatedDataWithUInt32KeyPhase2 = PHHashMap>; using AggregatedDataWithUInt64KeyPhase2 = @@ -416,6 +418,9 @@ using AggregatedDataWithUInt128KeyPhase2 = using AggregatedDataWithUInt256KeyPhase2 = PHHashMap>; +using AggregatedDataWithUInt136KeyPhase2 = + PHHashMap>; + using AggregatedDataWithNullableUInt8Key = AggregationDataWithNullKey; using AggregatedDataWithNullableUInt16Key = AggregationDataWithNullKey; using AggregatedDataWithNullableUInt32Key = AggregationDataWithNullKey; @@ -466,12 +471,16 @@ using AggregatedMethodVariants = std::variant< AggregationMethodKeysFixed, AggregationMethodKeysFixed, AggregationMethodKeysFixed, + AggregationMethodKeysFixed, + AggregationMethodKeysFixed, AggregationMethodKeysFixed, AggregationMethodKeysFixed, AggregationMethodKeysFixed, AggregationMethodKeysFixed, AggregationMethodKeysFixed, - AggregationMethodKeysFixed>; + AggregationMethodKeysFixed, + AggregationMethodKeysFixed, + AggregationMethodKeysFixed>; struct AggregatedDataVariants { AggregatedDataVariants() = default; @@ -500,6 +509,8 @@ struct AggregatedDataVariants { int256_keys, int256_keys_phase2, string_key, + int136_keys, + int136_keys_phase2, }; Type _type = Type::EMPTY; @@ -661,6 +672,27 @@ struct AggregatedDataVariants { AggregationMethodStringNoCache>(); } break; + case Type::int136_keys: + + if (is_nullable) { + _aggregated_method_variant + .emplace>(); + } else { + _aggregated_method_variant + .emplace>(); + } + + break; + case Type::int136_keys_phase2: + + if (is_nullable) { + _aggregated_method_variant.emplace< + AggregationMethodKeysFixed>(); + } else { + _aggregated_method_variant.emplace< + AggregationMethodKeysFixed>(); + } + break; default: DCHECK(false) << "Do not have a rigth agg data type"; } diff --git a/be/src/vec/exec/vpartition_sort_node.cpp b/be/src/vec/exec/vpartition_sort_node.cpp index 8ee303105dff4b..eb807ad4b4abd0 100644 --- a/be/src/vec/exec/vpartition_sort_node.cpp +++ b/be/src/vec/exec/vpartition_sort_node.cpp @@ -399,7 +399,8 @@ void VPartitionSortNode::_init_hash_method() { } else { bool use_fixed_key = true; bool has_null = false; - int key_byte_size = 0; + size_t key_byte_size = 0; + size_t bitmap_size = get_bitmap_size(_partition_exprs_num); _partition_key_sz.resize(_partition_exprs_num); for (int i = 0; i < _partition_exprs_num; ++i) { @@ -417,16 +418,15 @@ void VPartitionSortNode::_init_hash_method() { key_byte_size += _partition_key_sz[i]; } - if (std::tuple_size>::value + key_byte_size > sizeof(UInt256)) { + if (bitmap_size + key_byte_size > sizeof(UInt256)) { use_fixed_key = false; } if (use_fixed_key) { if (has_null) { - if (std::tuple_size>::value + key_byte_size <= sizeof(UInt64)) { + if (bitmap_size + key_byte_size <= sizeof(UInt64)) { _partitioned_data->init(PartitionedHashMapVariants::Type::int64_keys, has_null); - } else if (std::tuple_size>::value + key_byte_size <= - sizeof(UInt128)) { + } else if (bitmap_size + key_byte_size <= sizeof(UInt128)) { _partitioned_data->init(PartitionedHashMapVariants::Type::int128_keys, has_null); } else { diff --git a/be/src/vec/exec/vset_operation_node.cpp b/be/src/vec/exec/vset_operation_node.cpp index ac01cbd5dad4a7..bcecb1091ad959 100644 --- a/be/src/vec/exec/vset_operation_node.cpp +++ b/be/src/vec/exec/vset_operation_node.cpp @@ -327,7 +327,8 @@ void VSetOperationNode::hash_table_init() { bool use_fixed_key = true; bool has_null = false; - int key_byte_size = 0; + size_t key_byte_size = 0; + size_t bitmap_size = get_bitmap_size(_child_expr_lists[0].size()); _build_key_sz.resize(_child_expr_lists[0].size()); _probe_key_sz.resize(_child_expr_lists[0].size()); @@ -347,16 +348,15 @@ void VSetOperationNode::hash_table_init() { key_byte_size += _probe_key_sz[i]; } - if (std::tuple_size>::value + key_byte_size > sizeof(UInt256)) { + if (bitmap_size + key_byte_size > sizeof(UInt256)) { use_fixed_key = false; } if (use_fixed_key) { if (has_null) { - if (std::tuple_size>::value + key_byte_size <= sizeof(UInt64)) { + if (bitmap_size + key_byte_size <= sizeof(UInt64)) { _hash_table_variants ->emplace>(); - } else if (std::tuple_size>::value + key_byte_size <= - sizeof(UInt128)) { + } else if (bitmap_size + key_byte_size <= sizeof(UInt128)) { _hash_table_variants ->emplace>(); } else {