From 27f2f8e5b32170fcf9ab9ee70d82ca6d63426f15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A7=9C=E5=87=AF?= Date: Tue, 6 Aug 2024 19:53:08 +0800 Subject: [PATCH] [Refact](inverted index) refact inverted index compound predicates evaluate logic --- .../rowset/segment_v2/inverted_index_reader.h | 111 +++++++++++------- .../rowset/segment_v2/segment_iterator.cpp | 2 +- be/src/vec/exprs/vcompound_pred.h | 53 +++++---- 3 files changed, 100 insertions(+), 66 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h index 8fe3f48879354e2..1199de603d733c0 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h @@ -74,72 +74,101 @@ class InvertedIndexQueryCacheHandle; class InvertedIndexFileReader; struct InvertedIndexQueryInfo; class InvertedIndexResultBitmap { +private: + std::shared_ptr _data_bitmap; + std::shared_ptr _null_bitmap; public: - std::shared_ptr data_bitmap; - std::shared_ptr null_bitmap; + // Default constructor + InvertedIndexResultBitmap() + : _data_bitmap(nullptr), + _null_bitmap(nullptr) {} - // Constructor with default arguments - InvertedIndexResultBitmap( - std::shared_ptr data_bitmap = std::make_shared(), - std::shared_ptr null_bitmap = std::make_shared()) - : data_bitmap(std::move(data_bitmap)), null_bitmap(std::move(null_bitmap)) {} + // Constructor with arguments + InvertedIndexResultBitmap(std::shared_ptr data_bitmap, + std::shared_ptr null_bitmap) + : _data_bitmap(std::move(data_bitmap)), _null_bitmap(std::move(null_bitmap)) {} // Copy constructor InvertedIndexResultBitmap(const InvertedIndexResultBitmap& other) - : data_bitmap(std::make_shared(*other.data_bitmap)), - null_bitmap(std::make_shared(*other.null_bitmap)) {} + : _data_bitmap(std::make_shared(*other._data_bitmap)), + _null_bitmap(std::make_shared(*other._null_bitmap)) {} // Move constructor InvertedIndexResultBitmap(InvertedIndexResultBitmap&& other) noexcept - : data_bitmap(std::move(other.data_bitmap)), - null_bitmap(std::move(other.null_bitmap)) {} + : _data_bitmap(std::move(other._data_bitmap)), + _null_bitmap(std::move(other._null_bitmap)) {} + + // Copy assignment operator + InvertedIndexResultBitmap& operator=(const InvertedIndexResultBitmap& other) { + if (this != &other) { // Prevent self-assignment + _data_bitmap = std::make_shared(*other._data_bitmap); + _null_bitmap = std::make_shared(*other._null_bitmap); + } + return *this; + } // Move assignment operator InvertedIndexResultBitmap& operator=(InvertedIndexResultBitmap&& other) noexcept { if (this != &other) { // Prevent self-assignment - data_bitmap = std::move(other.data_bitmap); - null_bitmap = std::move(other.null_bitmap); + _data_bitmap = std::move(other._data_bitmap); + _null_bitmap = std::move(other._null_bitmap); } return *this; } - // Operator & - InvertedIndexResultBitmap operator&(const InvertedIndexResultBitmap& other) const { - InvertedIndexResultBitmap result; - *(result.data_bitmap) = *(this->data_bitmap) & *(other.data_bitmap); - *(result.null_bitmap) = (*(this->data_bitmap) & *(other.null_bitmap)) | - (*(this->null_bitmap) & *(other.data_bitmap)) | - (*(this->null_bitmap) & *(other.null_bitmap)); - return result; + + // Operator &= + InvertedIndexResultBitmap& operator&=(const InvertedIndexResultBitmap& other) { + if (_data_bitmap && _null_bitmap && other._data_bitmap && other._null_bitmap) { + auto new_null_bitmap = (*_data_bitmap & *other._null_bitmap) | + (*_null_bitmap & *other._data_bitmap) | + (*_null_bitmap & *other._null_bitmap); + *_data_bitmap &= *other._data_bitmap; + *_null_bitmap = std::move(new_null_bitmap); + } + return *this; } - // Operator | - InvertedIndexResultBitmap operator|(const InvertedIndexResultBitmap& other) const { - InvertedIndexResultBitmap result; - *(result.data_bitmap) = *(this->data_bitmap) | *(other.data_bitmap); - *(result.null_bitmap) = - ((*(this->null_bitmap) | *(other.null_bitmap)) - *(result.data_bitmap)); - return result; + // Operator |= + InvertedIndexResultBitmap& operator|=(const InvertedIndexResultBitmap& other) { + if (_data_bitmap && _null_bitmap && other._data_bitmap && other._null_bitmap) { + auto new_null_bitmap = (*_null_bitmap | *other._null_bitmap) - *_data_bitmap; + *_data_bitmap |= *other._data_bitmap; + *_null_bitmap = std::move(new_null_bitmap); + } + return *this; } // NOT operation - InvertedIndexResultBitmap op_not(const roaring::Roaring* universe) const { - InvertedIndexResultBitmap result; - *(result.data_bitmap) = *universe - *this->data_bitmap - *this->null_bitmap; - *(result.null_bitmap) = *this->null_bitmap; // Ensure result.null_bitmap is a copy - return result; + InvertedIndexResultBitmap& op_not(const roaring::Roaring* universe) { + if (_data_bitmap && _null_bitmap) { + *_data_bitmap = *universe - *_data_bitmap - *_null_bitmap; + // The _null_bitmap remains unchanged. + } + return *this; + } + + // Operator -= + InvertedIndexResultBitmap& operator-=(const InvertedIndexResultBitmap& other) { + if (_data_bitmap && _null_bitmap && other._data_bitmap && other._null_bitmap) { + *_data_bitmap -= *other._data_bitmap; + *_data_bitmap -= *other._null_bitmap; + *_null_bitmap -= *other._null_bitmap; + } + return *this; } - // Operator - - InvertedIndexResultBitmap operator-(const InvertedIndexResultBitmap& other) const { - InvertedIndexResultBitmap result; - *(result.data_bitmap) = *(this->data_bitmap) - *(other.data_bitmap) - *(this->null_bitmap) - - *(other.null_bitmap); - *(result.null_bitmap) = *(this->null_bitmap) - *(other.null_bitmap); - return result; + std::shared_ptr get_data_bitmap() { + return _data_bitmap; + } + + std::shared_ptr get_null_bitmap() { + return _null_bitmap; } // Check if both bitmaps are empty - bool is_empty() const { return data_bitmap->isEmpty() && null_bitmap->isEmpty(); } + bool is_empty() const { + return (_data_bitmap == nullptr && _null_bitmap == nullptr); + } }; class InvertedIndexReader : public std::enable_shared_from_this { diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 0ebc1be5c79c29d..2d2cabecd865654 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -2749,7 +2749,7 @@ void SegmentIterator::_output_index_result_column_for_expr(uint16_t* sel_rowid_i auto inverted_index_result_column_for_exprs = expr_ctx->get_inverted_index_result_column(); for (auto& inverted_index_result_bitmap_for_expr : inverted_index_result_bitmap_for_exprs) { const auto* expr = inverted_index_result_bitmap_for_expr.first; - auto index_result_bitmap = inverted_index_result_bitmap_for_expr.second.data_bitmap; + auto index_result_bitmap = inverted_index_result_bitmap_for_expr.second.get_data_bitmap(); auto index_result_column = vectorized::ColumnUInt8::create(); vectorized::ColumnUInt8::Container& vec_match_pred = index_result_column->get_data(); vec_match_pred.resize(block->rows()); diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index 5a9fd9ff8b61ef3..d06b324df95ba44 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -56,62 +56,67 @@ class VCompoundPred : public VectorizedFnCall { Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) const override { - bool all_pass = true; segment_v2::InvertedIndexResultBitmap res; - if (_op == TExprOpcode::COMPOUND_OR) { - for (auto child : _children) { - Status st = child->evaluate_inverted_index(context, segment_num_rows); - if (!st.ok()) { + bool all_pass = true; + + switch (_op) { + case TExprOpcode::COMPOUND_OR: { + for (const auto& child : _children) { + if (Status st = child->evaluate_inverted_index(context, segment_num_rows); + !st.ok()) { all_pass = false; continue; } if (context->has_inverted_index_result_for_expr(child.get())) { - auto index_result = context->get_inverted_index_result_for_expr(child.get()); - res = res | index_result; // Union operation + res |= context->get_inverted_index_result_for_expr(child.get()); } else { - //column has no inverted index all_pass = false; } } - } else if (_op == TExprOpcode::COMPOUND_AND) { - bool first = true; - for (auto child : _children) { - Status st = child->evaluate_inverted_index(context, segment_num_rows); - if (!st.ok()) { + break; + } + case TExprOpcode::COMPOUND_AND: { + for (const auto& child : _children) { + if (Status st = child->evaluate_inverted_index(context, segment_num_rows); + !st.ok()) { all_pass = false; continue; } if (context->has_inverted_index_result_for_expr(child.get())) { auto index_result = context->get_inverted_index_result_for_expr(child.get()); - if (first) { + if (res.is_empty()) { res = std::move(index_result); - first = false; } else { - res = res & index_result; + res &= index_result; + } + + if (res.get_data_bitmap()->isEmpty()) { + break; // Early exit if result is empty } } else { all_pass = false; } - if (res.data_bitmap->isEmpty()) { - return Status::OK(); // Early exit if result is empty - } } - } else if (_op == TExprOpcode::COMPOUND_NOT) { - auto child = _children[0]; + break; + } + case TExprOpcode::COMPOUND_NOT: { + const auto& child = _children[0]; Status st = child->evaluate_inverted_index(context, segment_num_rows); if (!st.ok()) { return st; } - //all_pass = false; + if (context->has_inverted_index_result_for_expr(child.get())) { auto index_result = context->get_inverted_index_result_for_expr(child.get()); roaring::Roaring full_result; full_result.addRange(0, segment_num_rows); - res = index_result.op_not(&full_result); + res = std::move(index_result.op_not(&full_result)); } else { all_pass = false; } - } else { + break; + } + default: return Status::NotSupported( "Compound operator must be AND, OR, or NOT to execute with inverted index."); }