Skip to content

Commit

Permalink
[opt](invert index) Empty strings are not written to the index in the…
Browse files Browse the repository at this point in the history
… case of TOKENIZED
  • Loading branch information
zzzxl1993 committed Dec 22, 2023
1 parent 5420575 commit 4f50a70
Showing 1 changed file with 8 additions and 14 deletions.
22 changes: 8 additions & 14 deletions be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,13 +299,10 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
get_parser_ignore_above_value_from_properties(_index_meta->properties());
auto ignore_above = std::stoi(ignore_above_value);
for (int i = 0; i < count; ++i) {
// only ignore_above UNTOKENIZED strings
if (_parser_type == InvertedIndexParserType::PARSER_NONE &&
v->get_size() > ignore_above) {
VLOG_DEBUG << "fulltext index value length can be at most "
<< ignore_above_value << ", but got "
<< "value length:" << v->get_size() << ", ignore this value";
new_fulltext_field(empty_value.c_str(), 0);
// only ignore_above UNTOKENIZED strings and empty strings not tokenized
if ((_parser_type == InvertedIndexParserType::PARSER_NONE &&
v->get_size() > ignore_above) ||
(_parser_type != InvertedIndexParserType::PARSER_NONE && v->empty())) {
RETURN_IF_ERROR(add_null_document());
} else {
new_fulltext_field(v->get_data(), v->get_size());
Expand Down Expand Up @@ -352,13 +349,10 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
}

auto value = join(strings, " ");
// only ignore_above UNTOKENIZED strings
if (_parser_type == InvertedIndexParserType::PARSER_NONE &&
value.length() > ignore_above) {
VLOG_DEBUG << "fulltext index value length can be at most "
<< ignore_above_value << ", but got "
<< "value length:" << value.length() << ", ignore this value";
new_fulltext_field(empty_value.c_str(), 0);
// only ignore_above UNTOKENIZED strings and empty strings not tokenized
if ((_parser_type == InvertedIndexParserType::PARSER_NONE &&
value.length() > ignore_above) ||
(_parser_type != InvertedIndexParserType::PARSER_NONE && v->empty())) {
RETURN_IF_ERROR(add_null_document());
} else {
new_fulltext_field(value.c_str(), value.length());
Expand Down

0 comments on commit 4f50a70

Please sign in to comment.