From 76f2bb5e40bc576261e91a873ee2d10be52172d2 Mon Sep 17 00:00:00 2001 From: Pxl Date: Tue, 8 Oct 2024 11:17:59 +0800 Subject: [PATCH] [Improvement](predicate) reduce useless check on HybridSet::find (#41498) ## Proposed changes reduce useless check on HybridSet::find --- be/src/exprs/hybrid_set.h | 16 ----------- be/src/olap/in_list_predicate.h | 28 ++++++++++--------- ...aggregate_function_group_array_intersect.h | 6 ++-- 3 files changed, 19 insertions(+), 31 deletions(-) diff --git a/be/src/exprs/hybrid_set.h b/be/src/exprs/hybrid_set.h index f0977a652b1cbe..6536ec2430fe08 100644 --- a/be/src/exprs/hybrid_set.h +++ b/be/src/exprs/hybrid_set.h @@ -333,10 +333,6 @@ class HybridSet : public HybridSetBase { int size() override { return _set.size(); } bool find(const void* data) const override { - if (data == nullptr) { - return false; - } - return _set.find(*reinterpret_cast(data)); } @@ -487,10 +483,6 @@ class StringSet : public HybridSetBase { int size() override { return _set.size(); } bool find(const void* data) const override { - if (data == nullptr) { - return false; - } - const auto* value = reinterpret_cast(data); std::string str_value(const_cast(value->data), value->size); return _set.find(str_value); @@ -654,19 +646,11 @@ class StringValueSet : public HybridSetBase { int size() override { return _set.size(); } bool find(const void* data) const override { - if (data == nullptr) { - return false; - } - const auto* value = reinterpret_cast(data); return _set.find(*value); } bool find(const void* data, size_t size) const override { - if (data == nullptr) { - return false; - } - StringRef sv(reinterpret_cast(data), size); return _set.find(sv); } diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index bd91fe147fbb43..e79c2249c47801 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -99,9 +99,9 @@ class InListPredicateBase : public ColumnPredicate { if constexpr (is_string_type(Type)) { HybridSetBase::IteratorBase* iter = hybrid_set->begin(); while (iter->has_next()) { - const StringRef* value = (const StringRef*)(iter->get_value()); + const auto* value = (const StringRef*)(iter->get_value()); if constexpr (Type == TYPE_CHAR) { - _temp_datas.push_back(""); + _temp_datas.emplace_back(""); _temp_datas.back().resize(std::max(char_length, value->size)); memcpy(_temp_datas.back().data(), value->data, value->size); const string& str = _temp_datas.back(); @@ -231,12 +231,12 @@ class InListPredicateBase : public ColumnPredicate { void _evaluate_bit(const vectorized::IColumn& column, const uint16_t* sel, uint16_t size, bool* flags) const { if (column.is_nullable()) { - auto* nullable_col = + const auto* nullable_col = vectorized::check_and_get_column(column); - auto& null_bitmap = reinterpret_cast( - nullable_col->get_null_map_column()) - .get_data(); - auto& nested_col = nullable_col->get_nested_column(); + const auto& null_bitmap = reinterpret_cast( + nullable_col->get_null_map_column()) + .get_data(); + const auto& nested_col = nullable_col->get_nested_column(); if (_opposite) { return _base_evaluate_bit(&nested_col, &null_bitmap, sel, size, @@ -302,11 +302,13 @@ class InListPredicateBase : public ColumnPredicate { bool evaluate_and(const segment_v2::BloomFilter* bf) const override { if constexpr (PT == PredicateType::IN_LIST) { // IN predicate can not use ngram bf, just return true to accept - if (bf->is_ngram_bf()) return true; + if (bf->is_ngram_bf()) { + return true; + } HybridSetBase::IteratorBase* iter = _values->begin(); while (iter->has_next()) { if constexpr (std::is_same_v) { - const StringRef* value = (const StringRef*)iter->get_value(); + const auto* value = (const StringRef*)iter->get_value(); if (bf->test_bytes(value->data, value->size)) { return true; } @@ -393,9 +395,9 @@ class InListPredicateBase : public ColumnPredicate { if (column->is_column_dictionary()) { if constexpr (std::is_same_v) { - auto* nested_col_ptr = vectorized::check_and_get_column< + const auto* nested_col_ptr = vectorized::check_and_get_column< vectorized::ColumnDictionary>(column); - auto& data_array = nested_col_ptr->get_data(); + const auto& data_array = nested_col_ptr->get_data(); auto segid = column->get_rowset_segment_id(); DCHECK((segid.first.hi | segid.first.mi | segid.first.lo) != 0); auto& value_in_dict_flags = _segment_id_to_value_in_dict_flags[segid]; @@ -460,9 +462,9 @@ class InListPredicateBase : public ColumnPredicate { const uint16_t* sel, uint16_t size, bool* flags) const { if (column->is_column_dictionary()) { if constexpr (std::is_same_v) { - auto* nested_col_ptr = vectorized::check_and_get_column< + const auto* nested_col_ptr = vectorized::check_and_get_column< vectorized::ColumnDictionary>(column); - auto& data_array = nested_col_ptr->get_data(); + const auto& data_array = nested_col_ptr->get_data(); auto& value_in_dict_flags = _segment_id_to_value_in_dict_flags[column->get_rowset_segment_id()]; if (value_in_dict_flags.empty()) { diff --git a/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h b/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h index 94b34caff78645..fd6076686acb65 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h +++ b/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h @@ -132,7 +132,8 @@ struct AggregateFunctionGroupArrayIntersectData { const T* src_data = is_null_element ? nullptr : &(nested_column_data->get_element(offset + i)); - if (set->find(src_data) || (set->contain_null() && src_data == nullptr)) { + if ((!is_null_element && set->find(src_data)) || + (set->contain_null() && is_null_element)) { new_set->insert(src_data); } } @@ -424,7 +425,8 @@ class AggregateFunctionGroupArrayIntersectGeneric for (size_t i = 0; i < arr_size; ++i) { StringRef src = process_element(i); - if (set->find(src.data, src.size) || (set->contain_null() && src.data == nullptr)) { + if ((set->find(src.data, src.size) && src.data != nullptr) || + (set->contain_null() && src.data == nullptr)) { new_set->insert((void*)src.data, src.size); } }