Skip to content

Commit

Permalink
[Improvement](predicate) reduce useless check on HybridSet::find (apa…
Browse files Browse the repository at this point in the history
…che#41498)

## Proposed changes
reduce useless check on HybridSet::find
  • Loading branch information
BiteTheDDDDt authored and eldenmoon committed Oct 10, 2024
1 parent 8617bc5 commit d77c3cc
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 31 deletions.
16 changes: 0 additions & 16 deletions be/src/exprs/hybrid_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -333,10 +333,6 @@ class HybridSet : public HybridSetBase {
int size() override { return _set.size(); }

bool find(const void* data) const override {
if (data == nullptr) {
return false;
}

return _set.find(*reinterpret_cast<const ElementType*>(data));
}

Expand Down Expand Up @@ -487,10 +483,6 @@ class StringSet : public HybridSetBase {
int size() override { return _set.size(); }

bool find(const void* data) const override {
if (data == nullptr) {
return false;
}

const auto* value = reinterpret_cast<const StringRef*>(data);
std::string str_value(const_cast<const char*>(value->data), value->size);
return _set.find(str_value);
Expand Down Expand Up @@ -654,19 +646,11 @@ class StringValueSet : public HybridSetBase {
int size() override { return _set.size(); }

bool find(const void* data) const override {
if (data == nullptr) {
return false;
}

const auto* value = reinterpret_cast<const StringRef*>(data);
return _set.find(*value);
}

bool find(const void* data, size_t size) const override {
if (data == nullptr) {
return false;
}

StringRef sv(reinterpret_cast<const char*>(data), size);
return _set.find(sv);
}
Expand Down
28 changes: 15 additions & 13 deletions be/src/olap/in_list_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,9 @@ class InListPredicateBase : public ColumnPredicate {
if constexpr (is_string_type(Type)) {
HybridSetBase::IteratorBase* iter = hybrid_set->begin();
while (iter->has_next()) {
const StringRef* value = (const StringRef*)(iter->get_value());
const auto* value = (const StringRef*)(iter->get_value());
if constexpr (Type == TYPE_CHAR) {
_temp_datas.push_back("");
_temp_datas.emplace_back("");
_temp_datas.back().resize(std::max(char_length, value->size));
memcpy(_temp_datas.back().data(), value->data, value->size);
const string& str = _temp_datas.back();
Expand Down Expand Up @@ -231,12 +231,12 @@ class InListPredicateBase : public ColumnPredicate {
void _evaluate_bit(const vectorized::IColumn& column, const uint16_t* sel, uint16_t size,
bool* flags) const {
if (column.is_nullable()) {
auto* nullable_col =
const auto* nullable_col =
vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
auto& null_bitmap = reinterpret_cast<const vectorized::ColumnUInt8&>(
nullable_col->get_null_map_column())
.get_data();
auto& nested_col = nullable_col->get_nested_column();
const auto& null_bitmap = reinterpret_cast<const vectorized::ColumnUInt8&>(
nullable_col->get_null_map_column())
.get_data();
const auto& nested_col = nullable_col->get_nested_column();

if (_opposite) {
return _base_evaluate_bit<true, true, is_and>(&nested_col, &null_bitmap, sel, size,
Expand Down Expand Up @@ -302,11 +302,13 @@ class InListPredicateBase : public ColumnPredicate {
bool evaluate_and(const segment_v2::BloomFilter* bf) const override {
if constexpr (PT == PredicateType::IN_LIST) {
// IN predicate can not use ngram bf, just return true to accept
if (bf->is_ngram_bf()) return true;
if (bf->is_ngram_bf()) {
return true;
}
HybridSetBase::IteratorBase* iter = _values->begin();
while (iter->has_next()) {
if constexpr (std::is_same_v<T, StringRef>) {
const StringRef* value = (const StringRef*)iter->get_value();
const auto* value = (const StringRef*)iter->get_value();
if (bf->test_bytes(value->data, value->size)) {
return true;
}
Expand Down Expand Up @@ -393,9 +395,9 @@ class InListPredicateBase : public ColumnPredicate {

if (column->is_column_dictionary()) {
if constexpr (std::is_same_v<T, StringRef>) {
auto* nested_col_ptr = vectorized::check_and_get_column<
const auto* nested_col_ptr = vectorized::check_and_get_column<
vectorized::ColumnDictionary<vectorized::Int32>>(column);
auto& data_array = nested_col_ptr->get_data();
const auto& data_array = nested_col_ptr->get_data();
auto segid = column->get_rowset_segment_id();
DCHECK((segid.first.hi | segid.first.mi | segid.first.lo) != 0);
auto& value_in_dict_flags = _segment_id_to_value_in_dict_flags[segid];
Expand Down Expand Up @@ -460,9 +462,9 @@ class InListPredicateBase : public ColumnPredicate {
const uint16_t* sel, uint16_t size, bool* flags) const {
if (column->is_column_dictionary()) {
if constexpr (std::is_same_v<T, StringRef>) {
auto* nested_col_ptr = vectorized::check_and_get_column<
const auto* nested_col_ptr = vectorized::check_and_get_column<
vectorized::ColumnDictionary<vectorized::Int32>>(column);
auto& data_array = nested_col_ptr->get_data();
const auto& data_array = nested_col_ptr->get_data();
auto& value_in_dict_flags =
_segment_id_to_value_in_dict_flags[column->get_rowset_segment_id()];
if (value_in_dict_flags.empty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,8 @@ struct AggregateFunctionGroupArrayIntersectData {
const T* src_data =
is_null_element ? nullptr : &(nested_column_data->get_element(offset + i));

if (set->find(src_data) || (set->contain_null() && src_data == nullptr)) {
if ((!is_null_element && set->find(src_data)) ||
(set->contain_null() && is_null_element)) {
new_set->insert(src_data);
}
}
Expand Down Expand Up @@ -424,7 +425,8 @@ class AggregateFunctionGroupArrayIntersectGeneric

for (size_t i = 0; i < arr_size; ++i) {
StringRef src = process_element(i);
if (set->find(src.data, src.size) || (set->contain_null() && src.data == nullptr)) {
if ((set->find(src.data, src.size) && src.data != nullptr) ||
(set->contain_null() && src.data == nullptr)) {
new_set->insert((void*)src.data, src.size);
}
}
Expand Down

0 comments on commit d77c3cc

Please sign in to comment.