Skip to content

Commit

Permalink
[fix](ES Catalog)Do not push down limit to ES when predicates can not…
Browse files Browse the repository at this point in the history
… be processed by ES. (#40111)
  • Loading branch information
qidaye committed Sep 2, 2024
1 parent 3ee0e2b commit ee2b51b
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 2 deletions.
4 changes: 2 additions & 2 deletions be/src/vec/exec/scan/new_es_scan_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,8 @@ Status NewEsScanNode::_init_scanners(std::list<VScannerSPtr>* scanners) {
properties[ESScanReader::KEY_BATCH_SIZE] = std::to_string(_state->batch_size());
properties[ESScanReader::KEY_HOST_PORT] = get_host_and_port(es_scan_range->es_hosts);
// push down limit to Elasticsearch
// if predicate in _conjunct_ctxs can not be processed by Elasticsearch, we can not push down limit operator to Elasticsearch
if (limit() != -1 && limit() <= _state->batch_size()) {
// if predicate in _conjuncts can not be processed by Elasticsearch, we can not push down limit operator to Elasticsearch
if (limit() != -1 && limit() <= _state->batch_size() && _conjuncts.empty()) {
properties[ESScanReader::KEY_TERMINATE_AFTER] = std::to_string(limit());
}

Expand Down
105 changes: 105 additions & 0 deletions regression-test/data/external_table_p0/es/test_es_query.out
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,16 @@ I'm not null or empty
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"

-- !sql_5_21 --
2022-08-08T20:10:10 2022-08-08 20
2022-08-08T20:10:10 2022-08-08 20
2022-08-08T20:10:10 2022-08-08 20
2022-08-08T20:10:10 2022-08-08 20

-- !sql_5_22 --
2022-08-08T12:10:10 2022-08-08 12
2022-08-08T12:10:10 2022-08-08 12

-- !sql_6_02 --
[1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] \N string1 text#1 3.14 2022-08-08T00:00 12345 2022-08-08T20:10:10

Expand Down Expand Up @@ -265,6 +275,16 @@ I'm not null or empty
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"

-- !sql_6_21 --
2022-08-08T20:10:10 2022-08-08 20
2022-08-08T20:10:10 2022-08-08 20
2022-08-08T20:10:10 2022-08-08 20
2022-08-08T20:10:10 2022-08-08 20

-- !sql_6_22 --
2022-08-08T12:10:10 2022-08-08 12
2022-08-08T12:10:10 2022-08-08 12

-- !sql_7_02 --
[1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] debug \N This string can be quite lengthy string1 2022-08-08T20:10:10 text#1 3.14 2022-08-08T00:00 2022-08-08T12:10:10 1659931810000 2022-08-08T12:10:10 2022-08-08T20:10:10 12345

Expand Down Expand Up @@ -370,6 +390,22 @@ I'm not null or empty
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"

-- !sql_7_26 --
2022-08-08T04:10:10 2022-08-08
2022-08-08T04:10:10 2022-08-08

-- !sql_7_27 --
2022-08-09T12:10:10 2022-08-09
2022-08-09T12:10:10 2022-08-09

-- !sql_7_28 --
2022-08-10T04:10:10 2022-08-10
2022-08-10T04:10:10 2022-08-10

-- !sql_7_29 --
2022-08-11T12:10:10 2022-08-11
2022-08-11T12:10:10 2022-08-11

-- !sql_7_26 --
value1 value2

Expand Down Expand Up @@ -475,6 +511,22 @@ I'm not null or empty
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"

-- !sql_8_24 --
2022-08-08T04:10:10 2022-08-08
2022-08-08T04:10:10 2022-08-08

-- !sql_8_25 --
2022-08-09T12:10:10 2022-08-09
2022-08-09T12:10:10 2022-08-09

-- !sql_8_26 --
2022-08-10T04:10:10 2022-08-10
2022-08-10T04:10:10 2022-08-10

-- !sql_8_27 --
2022-08-11T12:10:10 2022-08-11
2022-08-11T12:10:10 2022-08-11

-- !sql01 --
["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [-1, 0, 1, 2] [0, 1, 2, 3] ["d", "e", "f"] [128, 129, -129, -130] ["192.168.0.1", "127.0.0.1"] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08T12:10:10 text#1 ["2020-01-01", "2020-01-02"] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ["a", "b", "c"] [{"name":"Andy","age":18},{"name":"Tim","age":28}] 2022-08-08T12:10:10 2022-08-08T12:10:10 2022-08-08T20:10:10 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] \N [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}]

Expand Down Expand Up @@ -608,6 +660,16 @@ I'm not null or empty
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"

-- !sql_5_21 --
2022-08-08T20:10:10 2022-08-08 20
2022-08-08T20:10:10 2022-08-08 20
2022-08-08T20:10:10 2022-08-08 20
2022-08-08T20:10:10 2022-08-08 20

-- !sql_5_22 --
2022-08-08T12:10:10 2022-08-08 12
2022-08-08T12:10:10 2022-08-08 12

-- !sql_6_02 --
[1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] \N string1 text#1 3.14 2022-08-08T00:00 12345 2022-08-08T20:10:10

Expand Down Expand Up @@ -680,6 +742,16 @@ I'm not null or empty
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"

-- !sql_6_21 --
2022-08-08T20:10:10 2022-08-08 20
2022-08-08T20:10:10 2022-08-08 20
2022-08-08T20:10:10 2022-08-08 20
2022-08-08T20:10:10 2022-08-08 20

-- !sql_6_22 --
2022-08-08T12:10:10 2022-08-08 12
2022-08-08T12:10:10 2022-08-08 12

-- !sql_7_02 --
[1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] debug \N This string can be quite lengthy string1 2022-08-08T20:10:10 text#1 3.14 2022-08-08T00:00 2022-08-08T12:10:10 1659931810000 2022-08-08T12:10:10 2022-08-08T20:10:10 12345

Expand Down Expand Up @@ -785,6 +857,22 @@ I'm not null or empty
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"

-- !sql_7_26 --
2022-08-08T04:10:10 2022-08-08
2022-08-08T04:10:10 2022-08-08

-- !sql_7_27 --
2022-08-09T12:10:10 2022-08-09
2022-08-09T12:10:10 2022-08-09

-- !sql_7_28 --
2022-08-10T04:10:10 2022-08-10
2022-08-10T04:10:10 2022-08-10

-- !sql_7_29 --
2022-08-11T12:10:10 2022-08-11
2022-08-11T12:10:10 2022-08-11

-- !sql_7_26 --
value1 value2

Expand Down Expand Up @@ -889,3 +977,20 @@ I'm not null or empty
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"
[{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White"

-- !sql_8_24 --
2022-08-08T04:10:10 2022-08-08
2022-08-08T04:10:10 2022-08-08

-- !sql_8_25 --
2022-08-09T12:10:10 2022-08-09
2022-08-09T12:10:10 2022-08-09

-- !sql_8_26 --
2022-08-10T04:10:10 2022-08-10
2022-08-10T04:10:10 2022-08-10

-- !sql_8_27 --
2022-08-11T12:10:10 2022-08-11
2022-08-11T12:10:10 2022-08-11

12 changes: 12 additions & 0 deletions regression-test/suites/external_table_p0/es/test_es_query.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,8 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") {
order_qt_sql_5_18 """select message from test1 where not_null_or_empty(message)"""
order_qt_sql_5_19 """select * from test1 where esquery(c_unsigned_long, '{"match":{"c_unsigned_long":0}}')"""
order_qt_sql_5_20 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test1;"""
order_qt_sql_5_21 """select test6, substring(test6, 1, 13) from test2 where substring(test6, 1, 13) = '2022-08-08 20' limit 4;"""
order_qt_sql_5_22 """select test6, substring(test6, 1, 13) from test2 where substring(test6, 1, 13) = '2022-08-08 12' limit 4;"""
try {
sql """select * from composite_type_array;"""
fail("Should not reach here")
Expand Down Expand Up @@ -243,6 +245,8 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") {
order_qt_sql_6_18 """select message from test1 where not_null_or_empty(message)"""
order_qt_sql_6_19 """select * from test1 where esquery(c_person, '{"match":{"c_person.name":"Andy"}}')"""
order_qt_sql_6_20 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test1;"""
order_qt_sql_6_21 """select test6, substring(test6, 1, 13) from test2 where substring(test6, 1, 13) = '2022-08-08 20' limit 4;"""
order_qt_sql_6_22 """select test6, substring(test6, 1, 13) from test2 where substring(test6, 1, 13) = '2022-08-08 12' limit 4;"""
try {
sql """select * from composite_type_array;"""
fail("Should not reach here")
Expand Down Expand Up @@ -295,6 +299,10 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") {
order_qt_sql_7_23 """select * from test1 where level = 'debug'"""
order_qt_sql_7_24 """select * from test1 where esquery(c_float, '{"match":{"c_float":1.1}}')"""
order_qt_sql_7_25 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test1;"""
order_qt_sql_7_26 """select test7,substring(test7, 1, 10) from test2 where substring(test7, 1, 10)='2022-08-08' limit 2;"""
order_qt_sql_7_27 """select test7,substring(test7, 1, 10) from test2 where substring(test7, 1, 10)='2022-08-09' limit 2;"""
order_qt_sql_7_28 """select test7,substring(test7, 1, 10) from test2 where substring(test7, 1, 10)='2022-08-10' limit 2;"""
order_qt_sql_7_29 """select test7,substring(test7, 1, 10) from test2 where substring(test7, 1, 10)='2022-08-11' limit 2;"""
try {
sql """select * from composite_type_array;"""
fail("Should not reach here")
Expand Down Expand Up @@ -347,6 +355,10 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") {
order_qt_sql_8_21 """select * from test1 where level = 'debug'"""
order_qt_sql_8_22 """select * from test1 where esquery(c_ip, '{"match":{"c_ip":"192.168.0.1"}}')"""
order_qt_sql_8_23 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test1;"""
order_qt_sql_8_24 """select test7,substring(test7, 1, 10) from test2 where substring(test7, 1, 10)='2022-08-08' limit 2;"""
order_qt_sql_8_25 """select test7,substring(test7, 1, 10) from test2 where substring(test7, 1, 10)='2022-08-09' limit 2;"""
order_qt_sql_8_26 """select test7,substring(test7, 1, 10) from test2 where substring(test7, 1, 10)='2022-08-10' limit 2;"""
order_qt_sql_8_27 """select test7,substring(test7, 1, 10) from test2 where substring(test7, 1, 10)='2022-08-11' limit 2;"""
try {
sql """select * from composite_type_array;"""
fail("Should not reach here")
Expand Down

0 comments on commit ee2b51b

Please sign in to comment.