Skip to content

Commit

Permalink
[Refactor](inverted index) refactor inverted index compound predicate…
Browse files Browse the repository at this point in the history
…s evaluate logic (#38908)

This PR addresses several key issues related to the compound condition
support in the inverted index, and optimization for index skipping
without returning to the table:

1. **Unified Handling of `expr` and `column predicate`**:
- Combined the processing of inverted index-related `column predicate`
and `expr`.
- Ensured that compound conditions involving both `column predicate` and
`expr` are processed uniformly to reduce complexity and improve
robustness.

2. **Optimized the Execution of Compound Conditions**:
- Removed the logic in `scan_operator` that normalized compound
predicates by pushing down logic to `_common_expr_ctxs_push_down` where
`expr` contexts are managed.
- Added `evaluate_inverted_index` support to the `vexpr` and function
layers, such as `function comparison` and `function collection_in`.
- Introduced new data structures in `VExprContext` to store results from
`evaluate_inverted_index`, thus facilitating quick lookup and
application of these results during execution.
  • Loading branch information
airborne12 committed Sep 10, 2024
1 parent 1d029ff commit 7330621
Show file tree
Hide file tree
Showing 108 changed files with 1,128 additions and 1,198 deletions.
6 changes: 6 additions & 0 deletions be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,10 @@ std::unique_ptr<lucene::util::Reader> InvertedIndexReader::create_reader(
return reader;
}

std::string InvertedIndexReader::get_index_file_path() {
return _inverted_index_file_reader->get_index_file_path(&_index_meta);
}

void InvertedIndexReader::get_analyse_result(std::vector<std::string>& analyse_result,
lucene::util::Reader* reader,
lucene::analysis::Analyzer* analyzer,
Expand Down Expand Up @@ -231,6 +235,8 @@ Status InvertedIndexReader::read_null_bitmap(OlapReaderStatistics* stats,
null_bitmap->runOptimize();
cache->insert(cache_key, null_bitmap, cache_handle);
FINALIZE_INPUT(null_bitmap_in);
} else {
cache->insert(cache_key, null_bitmap, cache_handle);
}
if (owned_dir) {
FINALIZE_INPUT(dir);
Expand Down
100 changes: 99 additions & 1 deletion be/src/olap/rowset/segment_v2/inverted_index_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,104 @@ class InvertedIndexIterator;
class InvertedIndexQueryCacheHandle;
class InvertedIndexFileReader;
struct InvertedIndexQueryInfo;
class InvertedIndexResultBitmap {
private:
std::shared_ptr<roaring::Roaring> _data_bitmap = nullptr;
std::shared_ptr<roaring::Roaring> _null_bitmap = nullptr;

public:
// Default constructor
InvertedIndexResultBitmap() = default;
~InvertedIndexResultBitmap() = default;

// Constructor with arguments
InvertedIndexResultBitmap(std::shared_ptr<roaring::Roaring> data_bitmap,
std::shared_ptr<roaring::Roaring> null_bitmap)
: _data_bitmap(std::move(data_bitmap)), _null_bitmap(std::move(null_bitmap)) {}

// Copy constructor
InvertedIndexResultBitmap(const InvertedIndexResultBitmap& other)
: _data_bitmap(std::make_shared<roaring::Roaring>(*other._data_bitmap)),
_null_bitmap(std::make_shared<roaring::Roaring>(*other._null_bitmap)) {}

// Move constructor
InvertedIndexResultBitmap(InvertedIndexResultBitmap&& other) noexcept
: _data_bitmap(std::move(other._data_bitmap)),
_null_bitmap(std::move(other._null_bitmap)) {}

// Copy assignment operator
InvertedIndexResultBitmap& operator=(const InvertedIndexResultBitmap& other) {
if (this != &other) { // Prevent self-assignment
_data_bitmap = std::make_shared<roaring::Roaring>(*other._data_bitmap);
_null_bitmap = std::make_shared<roaring::Roaring>(*other._null_bitmap);
}
return *this;
}

// Move assignment operator
InvertedIndexResultBitmap& operator=(InvertedIndexResultBitmap&& other) noexcept {
if (this != &other) { // Prevent self-assignment
_data_bitmap = std::move(other._data_bitmap);
_null_bitmap = std::move(other._null_bitmap);
}
return *this;
}

// Operator &=
InvertedIndexResultBitmap& operator&=(const InvertedIndexResultBitmap& other) {
if (_data_bitmap && _null_bitmap && other._data_bitmap && other._null_bitmap) {
auto new_null_bitmap = (*_data_bitmap & *other._null_bitmap) |
(*_null_bitmap & *other._data_bitmap) |
(*_null_bitmap & *other._null_bitmap);
*_data_bitmap &= *other._data_bitmap;
*_null_bitmap = std::move(new_null_bitmap);
}
return *this;
}

// Operator |=
InvertedIndexResultBitmap& operator|=(const InvertedIndexResultBitmap& other) {
if (_data_bitmap && _null_bitmap && other._data_bitmap && other._null_bitmap) {
auto new_null_bitmap = (*_null_bitmap | *other._null_bitmap) - *_data_bitmap;
*_data_bitmap |= *other._data_bitmap;
*_null_bitmap = std::move(new_null_bitmap);
}
return *this;
}

// NOT operation
const InvertedIndexResultBitmap& op_not(const roaring::Roaring* universe) const {
if (_data_bitmap && _null_bitmap) {
*_data_bitmap = *universe - *_data_bitmap - *_null_bitmap;
// The _null_bitmap remains unchanged.
}
return *this;
}

// Operator -=
InvertedIndexResultBitmap& operator-=(const InvertedIndexResultBitmap& other) {
if (_data_bitmap && _null_bitmap && other._data_bitmap && other._null_bitmap) {
*_data_bitmap -= *other._data_bitmap;
*_data_bitmap -= *other._null_bitmap;
*_null_bitmap -= *other._null_bitmap;
}
return *this;
}

void mask_out_null() {
if (_data_bitmap && _null_bitmap) {
*_data_bitmap -= *_null_bitmap;
}
}

const std::shared_ptr<roaring::Roaring>& get_data_bitmap() const { return _data_bitmap; }

const std::shared_ptr<roaring::Roaring>& get_null_bitmap() const { return _null_bitmap; }

// Check if both bitmaps are empty
bool is_empty() const { return (_data_bitmap == nullptr && _null_bitmap == nullptr); }
};

class InvertedIndexReader : public std::enable_shared_from_this<InvertedIndexReader> {
public:
explicit InvertedIndexReader(
Expand Down Expand Up @@ -136,7 +234,7 @@ class InvertedIndexReader : public std::enable_shared_from_this<InvertedIndexRea

virtual Status handle_searcher_cache(InvertedIndexCacheHandle* inverted_index_cache_handle,
OlapReaderStatistics* stats);

std::string get_index_file_path();
static Status create_index_searcher(lucene::store::Directory* dir, IndexSearcherPtr* searcher,
MemTracker* mem_tracker,
InvertedIndexReaderType reader_type);
Expand Down
Loading

0 comments on commit 7330621

Please sign in to comment.