Skip to content

Commit

Permalink
[fix](inverted index) Cloud mode supports Chinese tokenizer with defa…
Browse files Browse the repository at this point in the history
…ult conversion to lowercase #32841 (#34973)
  • Loading branch information
zzzxl1993 authored May 23, 2024
1 parent ac0176e commit 3c3a20b
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 16 deletions.
5 changes: 2 additions & 3 deletions be/src/olap/inverted_index_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,11 @@ std::string get_parser_ignore_above_value_from_properties(
template <bool ReturnTrue = false>
std::string get_parser_lowercase_from_properties(
const std::map<std::string, std::string>& properties) {
DBUG_EXECUTE_IF("inverted_index_parser.get_parser_lowercase_from_properties", { return ""; })

if (properties.find(INVERTED_INDEX_PARSER_LOWERCASE_KEY) != properties.end()) {
return properties.at(INVERTED_INDEX_PARSER_LOWERCASE_KEY);
} else {
DBUG_EXECUTE_IF("inverted_index_parser.get_parser_lowercase_from_properties",
{ return ""; })

if constexpr (ReturnTrue) {
return INVERTED_INDEX_PARSER_TRUE;
} else {
Expand Down
13 changes: 10 additions & 3 deletions be/src/olap/tablet_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -614,15 +614,22 @@ void TabletIndex::to_schema_pb(TabletIndexPB* index) const {
}
index->set_index_type(_index_type);
for (const auto& kv : _properties) {
DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", {
if (kv.first == INVERTED_INDEX_PARSER_LOWERCASE_KEY) {
continue;
}
})
(*index->mutable_properties())[kv.first] = kv.second;
}

DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", { return; })

// lowercase by default
if (!_properties.contains(INVERTED_INDEX_PARSER_LOWERCASE_KEY)) {
(*index->mutable_properties())[INVERTED_INDEX_PARSER_LOWERCASE_KEY] =
INVERTED_INDEX_PARSER_TRUE;
if (!_properties.empty()) {
if (!_properties.contains(INVERTED_INDEX_PARSER_LOWERCASE_KEY)) {
(*index->mutable_properties())[INVERTED_INDEX_PARSER_LOWERCASE_KEY] =
INVERTED_INDEX_PARSER_TRUE;
}
}
}

Expand Down
8 changes: 8 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ public Index(long indexId, String indexName, List<String> columns,
this.indexType = indexType;
this.properties = properties;
this.comment = comment;
if (indexType == IndexDef.IndexType.INVERTED) {
if (this.properties != null && !this.properties.isEmpty()) {
String key = InvertedIndexUtil.INVERTED_INDEX_PARSER_LOWERCASE_KEY;
if (!properties.containsKey(key)) {
this.properties.put(key, "true");
}
}
}
}

public Index() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public void testFetchResult() throws AnalysisException {
Assert.assertEquals(procResult.getRows().get(1).get(5), "col_2");
Assert.assertEquals(procResult.getRows().get(1).get(11), "INVERTED");
Assert.assertEquals(procResult.getRows().get(1).get(12), "inverted index on col_2");
Assert.assertEquals(procResult.getRows().get(1).get(13), "(\"parser\" = \"unicode\")");
Assert.assertEquals(procResult.getRows().get(1).get(13), "(\"parser\" = \"unicode\", \"lower_case\" = \"true\")");

Assert.assertEquals(procResult.getRows().get(2).get(0), "tbl_test_indexes_proc");
Assert.assertEquals(procResult.getRows().get(2).get(1), "3");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@ suite("test_index_lowercase_fault_injection") {
COMMENT "OLAP"
DISTRIBUTED BY HASH(`@timestamp`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
"replication_allocation" = "tag.location.default: 1",
"compaction_policy" = "time_series",
"time_series_compaction_goal_size_mbytes" = "1024",
"time_series_compaction_file_count_threshold" = "20",
"time_series_compaction_time_threshold_seconds" = "3600"
);
"""
}
Expand All @@ -55,14 +59,14 @@ suite("test_index_lowercase_fault_injection") {
sql """ INSERT INTO ${testTable} VALUES (893964653, '232.0.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 200, 3781); """

sql 'sync'

qt_sql """ select count() from ${testTable} where (request match 'HTTP'); """
qt_sql """ select count() from ${testTable} where (request match 'http'); """
} finally {
GetDebugPoint().disableDebugPointForAllBEs("inverted_index_parser.get_parser_lowercase_from_properties")
GetDebugPoint().disableDebugPointForAllBEs("tablet_schema.to_schema_pb")
}

qt_sql """ select count() from ${testTable} where (request match 'HTTP'); """
qt_sql """ select count() from ${testTable} where (request match 'http'); """

sql """ INSERT INTO ${testTable} VALUES (893964672, '26.1.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 304, 0); """
sql """ INSERT INTO ${testTable} VALUES (893964672, '26.1.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 304, 0); """
sql """ INSERT INTO ${testTable} VALUES (893964653, '232.0.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 200, 3781); """
Expand Down
10 changes: 5 additions & 5 deletions regression-test/suites/index_p0/test_index_meta.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ suite("index_meta", "p0") {
assertEquals(show_result[1][4], "name")
assertEquals(show_result[1][10], "INVERTED")
assertEquals(show_result[1][11], "index for name")
assertEquals(show_result[1][12], "(\"parser\" = \"none\")")
assertEquals(show_result[1][12], "(\"parser\" = \"none\", \"lower_case\" = \"true\")")

// add index on column description
sql "create index idx_desc on ${tableName}(description) USING INVERTED PROPERTIES(\"parser\"=\"standard\") COMMENT 'index for description';"
Expand All @@ -90,12 +90,12 @@ suite("index_meta", "p0") {
assertEquals(show_result[1][4], "name")
assertEquals(show_result[1][10], "INVERTED")
assertEquals(show_result[1][11], "index for name")
assertEquals(show_result[1][12], "(\"parser\" = \"none\")")
assertEquals(show_result[1][12], "(\"parser\" = \"none\", \"lower_case\" = \"true\")")
assertEquals(show_result[2][2], "idx_desc")
assertEquals(show_result[2][4], "description")
assertEquals(show_result[2][10], "INVERTED")
assertEquals(show_result[2][11], "index for description")
assertEquals(show_result[2][12], "(\"parser\" = \"standard\")")
assertEquals(show_result[2][12], "(\"parser\" = \"standard\", \"lower_case\" = \"true\")")

// drop index
// add index on column description
Expand All @@ -114,7 +114,7 @@ suite("index_meta", "p0") {
assertEquals(show_result[1][4], "description")
assertEquals(show_result[1][10], "INVERTED")
assertEquals(show_result[1][11], "index for description")
assertEquals(show_result[1][12], "(\"parser\" = \"standard\")")
assertEquals(show_result[1][12], "(\"parser\" = \"standard\", \"lower_case\" = \"true\")")

// add index on column description
sql "create index idx_name on ${tableName}(name) USING INVERTED COMMENT 'new index for name';"
Expand All @@ -133,7 +133,7 @@ suite("index_meta", "p0") {
assertEquals(show_result[1][4], "description")
assertEquals(show_result[1][10], "INVERTED")
assertEquals(show_result[1][11], "index for description")
assertEquals(show_result[1][12], "(\"parser\" = \"standard\")")
assertEquals(show_result[1][12], "(\"parser\" = \"standard\", \"lower_case\" = \"true\")")
assertEquals(show_result[2][2], "idx_name")
assertEquals(show_result[2][4], "name")
assertEquals(show_result[2][10], "INVERTED")
Expand Down

0 comments on commit 3c3a20b

Please sign in to comment.