Skip to content

Commit

Permalink
[Fix](inverted index) fix comparison for string after refactor compou…
Browse files Browse the repository at this point in the history
…nd (apache#40338)

Untokenized strings exceed ignore_above, they are written as null,
causing range query errors.
Introduced by apache#38908
  • Loading branch information
airborne12 committed Sep 10, 2024
1 parent c43ed5f commit 61b2a50
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 31 deletions.
25 changes: 5 additions & 20 deletions be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -751,18 +751,7 @@ Status SegmentIterator::_extract_common_expr_columns(const vectorized::VExprSPtr
return Status::OK();
}

bool SegmentIterator::_check_apply_by_inverted_index(ColumnId col_id) {
if (_opts.runtime_state && !_opts.runtime_state->query_options().enable_inverted_index_query) {
return false;
}
if (_inverted_index_iterators[col_id] == nullptr) {
//this column without inverted index
return false;
}
return true;
}

bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool pred_in_compound) {
bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred) {
if (_opts.runtime_state && !_opts.runtime_state->query_options().enable_inverted_index_query) {
return false;
}
Expand Down Expand Up @@ -798,15 +787,11 @@ bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool

bool handle_by_fulltext = _column_has_fulltext_index(pred_column_id);
if (handle_by_fulltext) {
// when predicate in compound condition which except leafNode of andNode,
// only can apply match query for fulltext index,
// when predicate is leafNode of andNode,
// can apply 'match qeury' and 'equal query' and 'list query' for fulltext index.
return (pred_in_compound ? pred->type() == PredicateType::MATCH
: (pred->type() == PredicateType::MATCH ||
pred->type() == PredicateType::IS_NULL ||
pred->type() == PredicateType::IS_NOT_NULL ||
PredicateTypeTraits::is_equal_or_list(pred->type())));
// can apply 'match query' and 'equal query' and 'list query' for fulltext index.
return pred->type() == PredicateType::MATCH || pred->type() == PredicateType::IS_NULL ||
pred->type() == PredicateType::IS_NOT_NULL ||
PredicateTypeTraits::is_equal_or_list(pred->type());
}

return true;
Expand Down
3 changes: 1 addition & 2 deletions be/src/olap/rowset/segment_v2/segment_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -298,8 +298,7 @@ class SegmentIterator : public RowwiseIterator {

void _convert_dict_code_for_predicate_if_necessary_impl(ColumnPredicate* predicate);

bool _check_apply_by_inverted_index(ColumnId col_id);
bool _check_apply_by_inverted_index(ColumnPredicate* pred, bool pred_in_compound = false);
bool _check_apply_by_inverted_index(ColumnPredicate* pred);

void _output_index_result_column_for_expr(uint16_t* sel_rowid_idx, uint16_t select_size,
vectorized::Block* block);
Expand Down
23 changes: 14 additions & 9 deletions be/src/vec/functions/functions_comparison.h
Original file line number Diff line number Diff line change
Expand Up @@ -546,14 +546,6 @@ class FunctionComparison : public IFunction {
//NOT support comparison predicate when parser is FULLTEXT for expr inverted index evaluate.
return Status::OK();
}
std::string column_name = data_type_with_name.first;
Field param_value;
arguments[0].column->get(0, param_value);
auto param_type = arguments[0].type->get_type_as_type_descriptor().type;

std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> query_param = nullptr;
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
param_type, &param_value, query_param));
segment_v2::InvertedIndexQueryType query_type;
std::string_view name_view(name);
if (name_view == NameEquals::name || name_view == NameNotEquals::name) {
Expand All @@ -570,6 +562,19 @@ class FunctionComparison : public IFunction {
return Status::InvalidArgument("invalid comparison op type {}", Name::name);
}

if (segment_v2::is_range_query(query_type) &&
iter->get_inverted_index_reader_type() ==
segment_v2::InvertedIndexReaderType::STRING_TYPE) {
// untokenized strings exceed ignore_above, they are written as null, causing range query errors
return Status::OK();
}
std::string column_name = data_type_with_name.first;
Field param_value;
arguments[0].column->get(0, param_value);
auto param_type = arguments[0].type->get_type_as_type_descriptor().type;
std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> query_param = nullptr;
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
param_type, &param_value, query_param));
std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
param_type, &param_value, query_param));
Expand All @@ -585,7 +590,7 @@ class FunctionComparison : public IFunction {
bitmap_result = result;
bitmap_result.mask_out_null();

if (name == "ne") {
if (name_view == NameNotEquals::name) {
roaring::Roaring full_result;
full_result.addRange(0, num_rows);
bitmap_result.op_not(&full_result);
Expand Down

0 comments on commit 61b2a50

Please sign in to comment.