diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp index c9a9cf4795a220..0949d708742e27 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp @@ -251,7 +251,6 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { } for (int i = 0; i < count; ++i) { - new_fulltext_field(empty_value.c_str(), 0); RETURN_IF_ERROR(add_null_document()); } } @@ -299,12 +298,9 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { auto ignore_above = std::stoi(ignore_above_value); for (int i = 0; i < count; ++i) { // only ignore_above UNTOKENIZED strings - if (_parser_type == InvertedIndexParserType::PARSER_NONE && - v->get_size() > ignore_above) { - VLOG_DEBUG << "fulltext index value length can be at most " - << ignore_above_value << ", but got " - << "value length:" << v->get_size() << ", ignore this value"; - new_fulltext_field(empty_value.c_str(), 0); + if ((_parser_type == InvertedIndexParserType::PARSER_NONE && + v->get_size() > ignore_above) || + (_parser_type != InvertedIndexParserType::PARSER_NONE && v->empty())) { RETURN_IF_ERROR(add_null_document()); } else { new_fulltext_field(v->get_data(), v->get_size()); @@ -352,12 +348,9 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { auto value = join(strings, " "); // only ignore_above UNTOKENIZED strings - if (_parser_type == InvertedIndexParserType::PARSER_NONE && - value.length() > ignore_above) { - VLOG_DEBUG << "fulltext index value length can be at most " - << ignore_above_value << ", but got " - << "value length:" << value.length() << ", ignore this value"; - new_fulltext_field(empty_value.c_str(), 0); + if ((_parser_type == InvertedIndexParserType::PARSER_NONE && + value.length() > ignore_above) || + (_parser_type != InvertedIndexParserType::PARSER_NONE && value.empty())) { RETURN_IF_ERROR(add_null_document()); } else { new_fulltext_field(value.c_str(), value.length()); diff --git a/regression-test/data/inverted_index_p0/test_index_empty_string.out b/regression-test/data/inverted_index_p0/test_index_empty_string.out new file mode 100644 index 00000000000000..70b6b599194790 --- /dev/null +++ b/regression-test/data/inverted_index_p0/test_index_empty_string.out @@ -0,0 +1,7 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 + +-- !sql -- +0 + diff --git a/regression-test/suites/inverted_index_p0/test_index_empty_string.groovy b/regression-test/suites/inverted_index_p0/test_index_empty_string.groovy new file mode 100644 index 00000000000000..2cf1d844d2bca0 --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_index_empty_string.groovy @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +suite("test_index_empty_string", "p0"){ + def timeout = 60000 + def delta_time = 1000 + def alter_res = "null" + def useTime = 0 + + def indexTblName = "test_index_empty_string" + + sql "DROP TABLE IF EXISTS ${indexTblName}" + // create 1 replica table + sql """ + CREATE TABLE IF NOT EXISTS ${indexTblName}( + `id` int(11) NOT NULL, + `a` text NULL DEFAULT "", + `b` text NULL DEFAULT "", + INDEX a_idx(`a`) USING INVERTED COMMENT '', + INDEX b_idx(`b`) USING INVERTED PROPERTIES("parser" = "english") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql """ + INSERT INTO $indexTblName VALUES + (1, '', '1'), + (2, '2', ''); + """ + + qt_sql "SELECT count() FROM $indexTblName WHERE a match '';" + qt_sql "SELECT count() FROM $indexTblName WHERE b match '';" +}