Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix](ES Catalog)Fix int parse error when querying by doc_values (#40385) #40522

Merged
merged 1 commit into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 28 additions & 20 deletions be/src/exec/es/es_scroll_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,31 +383,39 @@ Status insert_int_value(const rapidjson::Value& col, PrimitiveType type,
return Status::OK();
}

if (pure_doc_value && col.IsArray() && !col.Empty()) {
RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
auto parse_and_insert_data = [&](const rapidjson::Value& col_value) -> Status {
StringParser::ParseResult result;
std::string val = col_value.GetString();
// ES allows inserting numbers and characters containing decimals in numeric types.
// To parse these numbers in Doris, we remove the decimals here.
size_t pos = val.find('.');
if (pos != std::string::npos) {
val = val.substr(0, pos);
}
size_t len = val.length();
T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
RETURN_ERROR_IF_PARSING_FAILED(result, col_value, type);

col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
return Status::OK();
};

if (pure_doc_value && col.IsArray() && !col.Empty()) {
if (col.IsNumber()) {
RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
return Status::OK();
} else {
RETURN_ERROR_IF_COL_IS_ARRAY(col[0], type, true);
RETURN_ERROR_IF_COL_IS_NOT_STRING(col[0], type);
return parse_and_insert_data(col[0]);
}
}

RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);

StringParser::ParseResult result;
std::string val = col.GetString();
// ES allows inserting numbers and characters containing decimals in numeric types.
// To parse these numbers in Doris, we remove the decimals here.
size_t pos = val.find(".");
if (pos != std::string::npos) {
val = val.substr(0, pos);
}
size_t len = val.length();
T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
RETURN_ERROR_IF_PARSING_FAILED(result, col, type);

col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);

return Status::OK();
return parse_and_insert_data(col);
}

ScrollParser::ScrollParser(bool doc_value_mode) : _size(0), _line_index(0) {}
Expand Down
152 changes: 152 additions & 0 deletions regression-test/data/external_table_p0/es/test_es_query.out
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ text3_4*5
text3_4*5
text_ignore_above_10

-- !sql11 --
2022-08-08T12:10:10
2022-08-09T12:10:10
2022-08-10T12:10:10
2022-08-11T12:10:10
2022-08-11T12:10:10

-- !sql20 --
["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [-1, 0, 1, 2] [0, 1, 2, 3] ["d", "e", "f"] [128, 129, -129, -130] ["192.168.0.1", "127.0.0.1"] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08T12:10:10 text#1 ["2020-01-01", "2020-01-02"] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ["a", "b", "c"] [{"name":"Andy","age":18},{"name":"Tim","age":28}] 2022-08-08T12:10:10 2022-08-08T12:10:10 2022-08-08T20:10:10 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}]

Expand Down Expand Up @@ -81,6 +88,13 @@ text_ignore_above_10
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"

-- !sql25 --
2022-08-08T12:10:10
2022-08-09T12:10:10
2022-08-10T12:10:10
2022-08-11T12:10:10
2022-08-11T12:10:10

-- !sql_5_02 --
[1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] \N string1 text#1 3.14 2022-08-08T00:00 12345 2022-08-08T20:10:10

Expand Down Expand Up @@ -182,6 +196,20 @@ text2
text3_4*5
text_ignore_above_10

-- !sql_5_25 --
2022-08-08T12:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10

-- !sql_5_26 --
2022-08-08T12:10:10
2022-08-08T12:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10

-- !sql_6_02 --
[1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] \N string1 text#1 3.14 2022-08-08T00:00 12345 2022-08-08T20:10:10

Expand Down Expand Up @@ -283,6 +311,20 @@ text2
text3_4*5
text_ignore_above_10

-- !sql_6_25 --
2022-08-08T12:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10

-- !sql_6_26 --
2022-08-08T12:10:10
2022-08-08T12:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10

-- !sql_7_02 --
[1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] debug \N This string can be quite lengthy string1 2022-08-08T20:10:10 text#1 3.14 2022-08-08T00:00 2022-08-08T12:10:10 1659931810000 2022-08-08T12:10:10 2022-08-08T20:10:10 12345

Expand Down Expand Up @@ -423,6 +465,23 @@ text3_4*5
text3_4*5
text_ignore_above_10

-- !sql_7_32 --
1659931810000
1660018210000
1660104610000
1660191010000
1660191010000

-- !sql_7_33 --
1659931810000
1659931810000
1660018210000
1660018210000
1660104610000
1660104610000
1660191010000
1660191010000

-- !sql_7_50 --
value1 value2

Expand Down Expand Up @@ -563,6 +622,23 @@ text3_4*5
text3_4*5
text_ignore_above_10

-- !sql_8_30 --
1659931810000
1660018210000
1660104610000
1660191010000
1660191010000

-- !sql_8_31 --
1659931810000
1659931810000
1660018210000
1660018210000
1660104610000
1660104610000
1660191010000
1660191010000

-- !sql01 --
["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [-1, 0, 1, 2] [0, 1, 2, 3] ["d", "e", "f"] [128, 129, -129, -130] ["192.168.0.1", "127.0.0.1"] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08T12:10:10 text#1 ["2020-01-01", "2020-01-02"] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ["a", "b", "c"] [{"name":"Andy","age":18},{"name":"Tim","age":28}] 2022-08-08T12:10:10 2022-08-08T12:10:10 2022-08-08T20:10:10 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] \N [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}]

Expand Down Expand Up @@ -618,6 +694,13 @@ text3_4*5
text3_4*5
text_ignore_above_10

-- !sql11 --
2022-08-08T12:10:10
2022-08-09T12:10:10
2022-08-10T12:10:10
2022-08-11T12:10:10
2022-08-11T12:10:10

-- !sql20 --
["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [-1, 0, 1, 2] [0, 1, 2, 3] ["d", "e", "f"] [128, 129, -129, -130] ["192.168.0.1", "127.0.0.1"] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08T12:10:10 text#1 ["2020-01-01", "2020-01-02"] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ["a", "b", "c"] [{"name":"Andy","age":18},{"name":"Tim","age":28}] 2022-08-08T12:10:10 2022-08-08T12:10:10 2022-08-08T20:10:10 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}]

Expand Down Expand Up @@ -645,6 +728,13 @@ text_ignore_above_10
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"

-- !sql25 --
2022-08-08T12:10:10
2022-08-09T12:10:10
2022-08-10T12:10:10
2022-08-11T12:10:10
2022-08-11T12:10:10

-- !sql_5_02 --
[1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] \N string1 text#1 3.14 2022-08-08T00:00 12345 2022-08-08T20:10:10

Expand Down Expand Up @@ -746,6 +836,20 @@ text2
text3_4*5
text_ignore_above_10

-- !sql_5_25 --
2022-08-08T12:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10

-- !sql_5_26 --
2022-08-08T12:10:10
2022-08-08T12:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10

-- !sql_6_02 --
[1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] \N string1 text#1 3.14 2022-08-08T00:00 12345 2022-08-08T20:10:10

Expand Down Expand Up @@ -847,6 +951,20 @@ text2
text3_4*5
text_ignore_above_10

-- !sql_6_25 --
2022-08-08T12:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10

-- !sql_6_26 --
2022-08-08T12:10:10
2022-08-08T12:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10
2022-08-08T20:10:10

-- !sql_7_02 --
[1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] debug \N This string can be quite lengthy string1 2022-08-08T20:10:10 text#1 3.14 2022-08-08T00:00 2022-08-08T12:10:10 1659931810000 2022-08-08T12:10:10 2022-08-08T20:10:10 12345

Expand Down Expand Up @@ -987,6 +1105,23 @@ text3_4*5
text3_4*5
text_ignore_above_10

-- !sql_7_32 --
1659931810000
1660018210000
1660104610000
1660191010000
1660191010000

-- !sql_7_33 --
1659931810000
1659931810000
1660018210000
1660018210000
1660104610000
1660104610000
1660191010000
1660191010000

-- !sql_7_50 --
value1 value2

Expand Down Expand Up @@ -1127,3 +1262,20 @@ text3_4*5
text3_4*5
text_ignore_above_10

-- !sql_8_30 --
1659931810000
1660018210000
1660104610000
1660191010000
1660191010000

-- !sql_8_31 --
1659931810000
1659931810000
1660018210000
1660018210000
1660104610000
1660104610000
1660191010000
1660191010000

10 changes: 10 additions & 0 deletions regression-test/suites/external_table_p0/es/test_es_query.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -177,12 +177,14 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") {
order_qt_sql08 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test_v1;"""
order_qt_sql09 """select test1 from test_v1;"""
order_qt_sql10 """select test2 from test_v1;"""
order_qt_sql11 """select test6 from test_v1;"""

order_qt_sql20 """select * from test_v2 where test2='text#1'"""
order_qt_sql21 """select * from test_v2 where esquery(test2, '{"match":{"test2":"text#1"}}')"""
order_qt_sql22 """select test4,test5,test6,test7,test8 from test_v2 order by test8"""
order_qt_sql23 """select * from test_v2 where esquery(c_long, '{"term":{"c_long":"-1"}}');"""
order_qt_sql24 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test_v2;"""
order_qt_sql25 """select test6 from test_v2;"""

sql """switch test_es_query_es5"""
order_qt_sql_5_02 """select * from test1 where test2='text#1'"""
Expand All @@ -208,6 +210,8 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") {
order_qt_sql_5_22 """select test6, substring(test6, 1, 13) from test2 where substring(test6, 1, 13) = '2022-08-08 12' limit 4;"""
order_qt_sql_5_23 """select test1 from test1;"""
order_qt_sql_5_24 """select test2 from test1;"""
order_qt_sql_5_25 """select test6 from test1;"""
order_qt_sql_5_26 """select test6 from test2;"""
try {
sql """select * from composite_type_array;"""
fail("Should not reach here")
Expand Down Expand Up @@ -241,6 +245,8 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") {
order_qt_sql_6_22 """select test6, substring(test6, 1, 13) from test2 where substring(test6, 1, 13) = '2022-08-08 12' limit 4;"""
order_qt_sql_6_23 """select test1 from test1;"""
order_qt_sql_6_24 """select test2 from test1;"""
order_qt_sql_6_25 """select test6 from test1;"""
order_qt_sql_6_26 """select test6 from test2;"""
try {
sql """select * from composite_type_array;"""
fail("Should not reach here")
Expand Down Expand Up @@ -299,6 +305,8 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") {
order_qt_sql_7_29 """select test7,substring(test7, 1, 10) from test2 where substring(test7, 1, 10)='2022-08-11' limit 2;"""
order_qt_sql_7_30 """select test1 from test1;"""
order_qt_sql_7_31 """select test2 from test1;"""
order_qt_sql_7_32 """select test6 from test1;"""
order_qt_sql_7_33 """select test6 from test2;"""
try {
sql """select * from composite_type_array;"""
fail("Should not reach here")
Expand Down Expand Up @@ -357,6 +365,8 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") {
order_qt_sql_8_27 """select test7,substring(test7, 1, 10) from test2 where substring(test7, 1, 10)='2022-08-11' limit 2;"""
order_qt_sql_8_28 """select test1 from test1;"""
order_qt_sql_8_29 """select test2 from test1;"""
order_qt_sql_8_30 """select test6 from test1;"""
order_qt_sql_8_31 """select test6 from test2;"""
try {
sql """select * from composite_type_array;"""
fail("Should not reach here")
Expand Down
Loading