Skip to content

Commit

Permalink
[opt](invert index) Empty strings are not written to the index in the…
Browse files Browse the repository at this point in the history
… case of TOKENIZED (apache#28822)
  • Loading branch information
zzzxl1993 committed Dec 25, 2023
1 parent 6453326 commit 89cbe2a
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 13 deletions.
19 changes: 6 additions & 13 deletions be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,6 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
}

for (int i = 0; i < count; ++i) {
new_fulltext_field(empty_value.c_str(), 0);
RETURN_IF_ERROR(add_null_document());
}
}
Expand Down Expand Up @@ -299,12 +298,9 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
auto ignore_above = std::stoi(ignore_above_value);
for (int i = 0; i < count; ++i) {
// only ignore_above UNTOKENIZED strings
if (_parser_type == InvertedIndexParserType::PARSER_NONE &&
v->get_size() > ignore_above) {
VLOG_DEBUG << "fulltext index value length can be at most "
<< ignore_above_value << ", but got "
<< "value length:" << v->get_size() << ", ignore this value";
new_fulltext_field(empty_value.c_str(), 0);
if ((_parser_type == InvertedIndexParserType::PARSER_NONE &&
v->get_size() > ignore_above) ||
(_parser_type != InvertedIndexParserType::PARSER_NONE && v->empty())) {
RETURN_IF_ERROR(add_null_document());
} else {
new_fulltext_field(v->get_data(), v->get_size());
Expand Down Expand Up @@ -352,12 +348,9 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {

auto value = join(strings, " ");
// only ignore_above UNTOKENIZED strings
if (_parser_type == InvertedIndexParserType::PARSER_NONE &&
value.length() > ignore_above) {
VLOG_DEBUG << "fulltext index value length can be at most "
<< ignore_above_value << ", but got "
<< "value length:" << value.length() << ", ignore this value";
new_fulltext_field(empty_value.c_str(), 0);
if ((_parser_type == InvertedIndexParserType::PARSER_NONE &&
value.length() > ignore_above) ||
(_parser_type != InvertedIndexParserType::PARSER_NONE && value.empty())) {
RETURN_IF_ERROR(add_null_document());
} else {
new_fulltext_field(value.c_str(), value.length());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
1

-- !sql --
0

Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.


suite("test_index_empty_string", "p0"){
def timeout = 60000
def delta_time = 1000
def alter_res = "null"
def useTime = 0

def indexTblName = "test_index_empty_string"

sql "DROP TABLE IF EXISTS ${indexTblName}"
// create 1 replica table
sql """
CREATE TABLE IF NOT EXISTS ${indexTblName}(
`id` int(11) NOT NULL,
`a` text NULL DEFAULT "",
`b` text NULL DEFAULT "",
INDEX a_idx(`a`) USING INVERTED COMMENT '',
INDEX b_idx(`b`) USING INVERTED PROPERTIES("parser" = "english") COMMENT ''
) ENGINE=OLAP
DUPLICATE KEY(`id`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES(
"replication_allocation" = "tag.location.default: 1"
);
"""

sql """
INSERT INTO $indexTblName VALUES
(1, '', '1'),
(2, '2', '');
"""

qt_sql "SELECT count() FROM $indexTblName WHERE a match '';"
qt_sql "SELECT count() FROM $indexTblName WHERE b match '';"
}

0 comments on commit 89cbe2a

Please sign in to comment.