From e92dc19f7f6823b9d6d0cb622aa8de6309c0d515 Mon Sep 17 00:00:00 2001
From: wuwenchi <wuwenchihdu@hotmail.com>
Date: Thu, 21 Dec 2023 19:03:13 +0800
Subject: [PATCH 1/6] [fix](hive) add support for quoteChar and seperatorChar
 for hive (branch-2.0) (#28703)#28703

bp #28613
---
 .../scripts/create_preinstalled_table.hql     |  8 +++++
 .../doris/planner/external/HiveScanNode.java  | 17 ++++++++-
 .../hive/test_hive_serde_prop.out             |  4 +++
 .../hive/test_hive_serde_prop.groovy          | 36 +++++++++++++++++++
 4 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
 create mode 100644 regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy

diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
index 4e80d7466d2f91..e798ecd7f2bb9c 100644
--- a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
+++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
@@ -1798,3 +1798,11 @@ create table stats_test2 (id INT, value STRING) STORED AS PARQUET;
 
 insert into stats_test1 values (1, 'name1'), (2, 'name2'), (3, 'name3');
 INSERT INTO stats_test2 VALUES (1, ';'), (2, '\*');
+
+create table employee_gz(name string,salary string)
+row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
+with serdeproperties 
+('quoteChar'='\"'
+,'seperatorChar'=',');
+
+insert into employee_gz values ('a', '1.1'), ('b', '2.2');
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java
index 943d30017e7c2a..58b93112477b40 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java
@@ -76,6 +76,9 @@ public class HiveScanNode extends FileQueryScanNode {
     public static final String DEFAULT_FIELD_DELIMITER = "\1"; // "\x01"
     public static final String PROP_LINE_DELIMITER = "line.delim";
     public static final String DEFAULT_LINE_DELIMITER = "\n";
+    public static final String PROP_SEPERATOR_CHAR = "seperatorChar";
+    public static final String PROP_QUOTA_CHAR = "quoteChar";
+
 
     public static final String PROP_COLLECTION_DELIMITER_HIVE2 = "colelction.delim";
     public static final String PROP_COLLECTION_DELIMITER_HIVE3 = "collection.delim";
@@ -362,7 +365,16 @@ protected Map<String, String> getLocationProperties() throws UserException  {
     protected TFileAttributes getFileAttributes() throws UserException {
         TFileTextScanRangeParams textParams = new TFileTextScanRangeParams();
         java.util.Map<String, String> delimiter = hmsTable.getRemoteTable().getSd().getSerdeInfo().getParameters();
-        textParams.setColumnSeparator(delimiter.getOrDefault(PROP_FIELD_DELIMITER, DEFAULT_FIELD_DELIMITER));
+        if (delimiter.containsKey(PROP_FIELD_DELIMITER)) {
+            textParams.setColumnSeparator(delimiter.get(PROP_FIELD_DELIMITER));
+        } else if (delimiter.containsKey(PROP_SEPERATOR_CHAR)) {
+            textParams.setColumnSeparator(delimiter.get(PROP_SEPERATOR_CHAR));
+        } else {
+            textParams.setColumnSeparator(DEFAULT_FIELD_DELIMITER);
+        }
+        if (delimiter.containsKey(PROP_QUOTA_CHAR)) {
+            textParams.setEnclose(delimiter.get(PROP_QUOTA_CHAR).getBytes()[0]);
+        }
         textParams.setLineDelimiter(delimiter.getOrDefault(PROP_LINE_DELIMITER, DEFAULT_LINE_DELIMITER));
         textParams.setMapkvDelimiter(delimiter.getOrDefault(PROP_MAP_KV_DELIMITER, DEFAULT_MAP_KV_DELIMITER));
 
@@ -377,6 +389,9 @@ protected TFileAttributes getFileAttributes() throws UserException {
         TFileAttributes fileAttributes = new TFileAttributes();
         fileAttributes.setTextParams(textParams);
         fileAttributes.setHeaderType("");
+        if (textParams.isSet(TFileTextScanRangeParams._Fields.ENCLOSE)) {
+            fileAttributes.setTrimDoubleQuotes(true);
+        }
         return fileAttributes;
     }
 
diff --git a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
new file mode 100644
index 00000000000000..1cde2baec27924
--- /dev/null
+++ b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
@@ -0,0 +1,4 @@
+test_hive_serde_prop.out -- This file is automatically generated. You should know what you did if you want to edit this
+-- !1 --
+a	1.1
+b	2.2
diff --git a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
new file mode 100644
index 00000000000000..41d7056d20832b
--- /dev/null
+++ b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_serde_prop", "external_docker,hive,external_docker_hive,p0,external") {
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        String catalog_name = "test_hive_serde_prop"
+        String ex_db_name = "`stats_test`"
+        String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+        String hms_port = context.config.otherConfigs.get("hms_port")
+
+        sql """drop catalog if exists ${catalog_name} """
+
+        sql """CREATE CATALOG ${catalog_name} PROPERTIES (
+                'type'='hms',
+                'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}',
+                'hadoop.username' = 'hive'
+            );"""
+
+		qt_1 """select * from ${catalog_name}.${ex_db_name}.employee_gz order by name;"""
+    }
+}

From 30934ff0c9eb2965a5891463801a288e87b5b7af Mon Sep 17 00:00:00 2001
From: bobhan1 <bh2444151092@outlook.com>
Date: Thu, 21 Dec 2023 20:15:24 +0800
Subject: [PATCH 2/6] [branch-2.0-fix](partial update) Don't use delete bitmap
 to mark delete for rows with delete sign when sequence column doesn't exist
 (#28800)

---
 .../olap/rowset/segment_v2/segment_writer.cpp |  30 ----
 be/src/olap/tablet.cpp                        |   8 -
 be/src/olap/tablet_meta.h                     |   1 -
 .../test_partial_update_delete_sign.out       |   2 -
 .../test_delete_sign_delete_bitmap.out        |  54 -------
 .../test_delete_sign_delete_bitmap.groovy     | 140 +++++++++---------
 6 files changed, 70 insertions(+), 165 deletions(-)
 delete mode 100644 regression-test/data/unique_with_mow_p0/test_delete_sign_delete_bitmap.out

diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 02ea943818d812..29ec7e02bc5652 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -444,13 +444,6 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block*
         // mark key with delete sign as deleted.
         bool have_delete_sign =
                 (delete_sign_column_data != nullptr && delete_sign_column_data[block_pos] != 0);
-        if (have_delete_sign && !_tablet_schema->has_sequence_col() && !have_input_seq_column) {
-            // we can directly use delete bitmap to mark the rows with delete sign as deleted
-            // if sequence column doesn't exist to eliminate reading delete sign columns in later reads
-            _mow_context->delete_bitmap->add({_opts.rowset_ctx->rowset_id, _segment_id,
-                                              DeleteBitmap::TEMP_VERSION_FOR_DELETE_SIGN},
-                                             segment_pos);
-        }
 
         RowLocation loc;
         // save rowset shared ptr so this rowset wouldn't delete
@@ -699,29 +692,6 @@ Status SegmentWriter::append_block(const vectorized::Block* block, size_t row_po
         _serialize_block_to_row_column(*const_cast<vectorized::Block*>(block));
     }
 
-    if (_opts.write_type == DataWriteType::TYPE_DIRECT && _opts.enable_unique_key_merge_on_write &&
-        !_tablet_schema->has_sequence_col() && _tablet_schema->delete_sign_idx() != -1) {
-        const vectorized::ColumnWithTypeAndName& delete_sign_column =
-                block->get_by_position(_tablet_schema->delete_sign_idx());
-        auto& delete_sign_col =
-                reinterpret_cast<const vectorized::ColumnInt8&>(*(delete_sign_column.column));
-        if (delete_sign_col.size() >= row_pos + num_rows) {
-            const vectorized::Int8* delete_sign_column_data = delete_sign_col.get_data().data();
-            uint32_t segment_start_pos =
-                    _column_writers[_tablet_schema->delete_sign_idx()]->get_next_rowid();
-            for (size_t block_pos = row_pos, seg_pos = segment_start_pos;
-                 seg_pos < segment_start_pos + num_rows; block_pos++, seg_pos++) {
-                // we can directly use delete bitmap to mark the rows with delete sign as deleted
-                // if sequence column doesn't exist to eliminate reading delete sign columns in later reads
-                if (delete_sign_column_data[block_pos]) {
-                    _mow_context->delete_bitmap->add({_opts.rowset_ctx->rowset_id, _segment_id,
-                                                      DeleteBitmap::TEMP_VERSION_FOR_DELETE_SIGN},
-                                                     seg_pos);
-                }
-            }
-        }
-    }
-
     _olap_data_convertor->set_source_content(block, row_pos, num_rows);
 
     // find all row pos for short key indexes
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 4c1ebd721de826..611e32828bba25 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -3075,14 +3075,6 @@ Status Tablet::calc_segment_delete_bitmap(RowsetSharedPtr rowset,
                 continue;
             }
             if (is_partial_update && rowset_writer != nullptr) {
-                if (delete_bitmap->contains(
-                            {rowset_id, seg->id(), DeleteBitmap::TEMP_VERSION_FOR_DELETE_SIGN},
-                            row_id)) {
-                    LOG(INFO)
-                            << "DEBUG: skip a delete sign column while calc_segment_delete_bitmap "
-                            << "processing confict for partial update";
-                    continue;
-                }
                 // In publish version, record rows to be deleted for concurrent update
                 // For example, if version 5 and 6 update a row, but version 6 only see
                 // version 4 when write, and when publish version, version 5's value will
diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h
index 11dc3532514c8b..0ef058760eae43 100644
--- a/be/src/olap/tablet_meta.h
+++ b/be/src/olap/tablet_meta.h
@@ -343,7 +343,6 @@ class DeleteBitmap {
     // tablet's delete bitmap we can use arbitary version number in BitmapKey. Here we define some version numbers
     // for specific usage during this periods to avoid conflicts
     constexpr static inline uint64_t TEMP_VERSION_COMMON = 0;
-    constexpr static inline uint64_t TEMP_VERSION_FOR_DELETE_SIGN = 1;
 
     /**
      * 
diff --git a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.out b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.out
index f14434b2f9f88c..8d3e69bbe26ac9 100644
--- a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.out
+++ b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.out
@@ -27,7 +27,6 @@
 -- !2 --
 
 -- !3 --
-1	2	\N
 
 -- !1 --
 1	1	1	1
@@ -65,7 +64,6 @@
 -- !2 --
 
 -- !3 --
-1	2	\N
 
 -- !1 --
 1	1	1	1
diff --git a/regression-test/data/unique_with_mow_p0/test_delete_sign_delete_bitmap.out b/regression-test/data/unique_with_mow_p0/test_delete_sign_delete_bitmap.out
deleted file mode 100644
index 687aeab54a1009..00000000000000
--- a/regression-test/data/unique_with_mow_p0/test_delete_sign_delete_bitmap.out
+++ /dev/null
@@ -1,54 +0,0 @@
--- This file is automatically generated. You should know what you did if you want to edit this
--- !sql --
-1	1	1	1	1
-2	2	2	2	2
-3	3	3	3	3
-4	4	4	4	4
-5	5	5	5	5
-
--- !after_delete --
-2	2	2	2	2
-4	4	4	4	4
-
--- !1 --
-1	1	1	1	1	0
-1	1	1	1	1	1
-2	2	2	2	2	0
-3	3	3	3	3	0
-3	3	3	3	3	1
-4	4	4	4	4	0
-5	5	5	5	5	0
-5	5	5	5	5	1
-
--- !2 --
-2	2	2	2	2	0
-4	4	4	4	4	0
-
--- !sql --
-1	1	1	1	1
-2	2	2	2	2
-3	3	3	3	3
-4	4	4	4	4
-5	5	5	5	5
-
--- !after_delete --
-2	2	2	2	2
-4	4	4	4	4
-
--- !1 --
-1	1	1	1	1	0
-1	1	1	1	1	1
-2	2	2	2	2	0
-3	3	3	3	3	0
-3	3	3	3	3	1
-4	4	4	4	4	0
-5	5	5	5	5	0
-5	5	5	5	5	1
-
--- !2 --
-1	1	1	1	1	1
-2	2	2	2	2	0
-3	3	3	3	3	1
-4	4	4	4	4	0
-5	5	5	5	5	1
-
diff --git a/regression-test/suites/unique_with_mow_p0/test_delete_sign_delete_bitmap.groovy b/regression-test/suites/unique_with_mow_p0/test_delete_sign_delete_bitmap.groovy
index f9b89c1eea7a27..be6324d2ec76a6 100644
--- a/regression-test/suites/unique_with_mow_p0/test_delete_sign_delete_bitmap.groovy
+++ b/regression-test/suites/unique_with_mow_p0/test_delete_sign_delete_bitmap.groovy
@@ -17,80 +17,80 @@
 
 suite('test_delete_sign_delete_bitmap') {
 
-    def tableName1 = "test_delete_sign_delete_bitmap1"
-    sql "DROP TABLE IF EXISTS ${tableName1};"
-    sql """ CREATE TABLE IF NOT EXISTS ${tableName1} (
-            `k1` int NOT NULL,
-            `c1` int,
-            `c2` int,
-            `c3` int,
-            `c4` int
-            )UNIQUE KEY(k1)
-        DISTRIBUTED BY HASH(k1) BUCKETS 1
-        PROPERTIES (
-            "enable_unique_key_merge_on_write" = "true",
-            "disable_auto_compaction" = "true",
-            "replication_num" = "1"
-        );"""
+    // def tableName1 = "test_delete_sign_delete_bitmap1"
+    // sql "DROP TABLE IF EXISTS ${tableName1};"
+    // sql """ CREATE TABLE IF NOT EXISTS ${tableName1} (
+    //         `k1` int NOT NULL,
+    //         `c1` int,
+    //         `c2` int,
+    //         `c3` int,
+    //         `c4` int
+    //         )UNIQUE KEY(k1)
+    //     DISTRIBUTED BY HASH(k1) BUCKETS 1
+    //     PROPERTIES (
+    //         "enable_unique_key_merge_on_write" = "true",
+    //         "disable_auto_compaction" = "true",
+    //         "replication_num" = "1"
+    //     );"""
 
-    sql "insert into ${tableName1} values(1,1,1,1,1),(2,2,2,2,2),(3,3,3,3,3),(4,4,4,4,4),(5,5,5,5,5);"
-    qt_sql "select * from ${tableName1} order by k1,c1,c2,c3,c4;"
-    // sql "insert into ${tableName1}(k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__) select k1,c1,c2,c3,c4,1 from ${tableName1} where k1 in (1,3,5);"
-    sql """insert into ${tableName1}(k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__) values(1,1,1,1,1,1),(3,3,3,3,3,1),(5,5,5,5,5,1);"""
-    sql "sync"
-    qt_after_delete "select * from ${tableName1} order by k1,c1,c2,c3,c4;"
-    sql "set skip_delete_sign=true;"
-    sql "set skip_storage_engine_merge=true;"
-    sql "set skip_delete_bitmap=true;"
-    sql "sync"
-    // skip_delete_bitmap=true, skip_delete_sign=true
-    qt_1 "select k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__ from ${tableName1} order by k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__;"
+    // sql "insert into ${tableName1} values(1,1,1,1,1),(2,2,2,2,2),(3,3,3,3,3),(4,4,4,4,4),(5,5,5,5,5);"
+    // qt_sql "select * from ${tableName1} order by k1,c1,c2,c3,c4;"
+    // // sql "insert into ${tableName1}(k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__) select k1,c1,c2,c3,c4,1 from ${tableName1} where k1 in (1,3,5);"
+    // sql """insert into ${tableName1}(k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__) values(1,1,1,1,1,1),(3,3,3,3,3,1),(5,5,5,5,5,1);"""
+    // sql "sync"
+    // qt_after_delete "select * from ${tableName1} order by k1,c1,c2,c3,c4;"
+    // sql "set skip_delete_sign=true;"
+    // sql "set skip_storage_engine_merge=true;"
+    // sql "set skip_delete_bitmap=true;"
+    // sql "sync"
+    // // skip_delete_bitmap=true, skip_delete_sign=true
+    // qt_1 "select k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__ from ${tableName1} order by k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__;"
 
-    sql "set skip_delete_sign=true;"
-    sql "set skip_delete_bitmap=false;"
-    sql "sync"
-    // skip_delete_bitmap=false, skip_delete_sign=true
-    qt_2 "select k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__ from ${tableName1} order by k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__;"
-    sql "drop table if exists ${tableName1};"
+    // sql "set skip_delete_sign=true;"
+    // sql "set skip_delete_bitmap=false;"
+    // sql "sync"
+    // // skip_delete_bitmap=false, skip_delete_sign=true
+    // qt_2 "select k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__ from ${tableName1} order by k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__;"
+    // sql "drop table if exists ${tableName1};"
 
 
-    sql "set skip_delete_sign=false;"
-    sql "set skip_storage_engine_merge=false;"
-    sql "set skip_delete_bitmap=false;"
-    sql "sync"
-    def tableName2 = "test_delete_sign_delete_bitmap2"
-    sql "DROP TABLE IF EXISTS ${tableName2};"
-    sql """ CREATE TABLE IF NOT EXISTS ${tableName2} (
-            `k1` int NOT NULL,
-            `c1` int,
-            `c2` int,
-            `c3` int,
-            `c4` int
-            )UNIQUE KEY(k1)
-        DISTRIBUTED BY HASH(k1) BUCKETS 1
-        PROPERTIES (
-            "enable_unique_key_merge_on_write" = "true",
-            "disable_auto_compaction" = "true",
-            "replication_num" = "1",
-            "function_column.sequence_col" = 'c4'
-        );"""
+    // sql "set skip_delete_sign=false;"
+    // sql "set skip_storage_engine_merge=false;"
+    // sql "set skip_delete_bitmap=false;"
+    // sql "sync"
+    // def tableName2 = "test_delete_sign_delete_bitmap2"
+    // sql "DROP TABLE IF EXISTS ${tableName2};"
+    // sql """ CREATE TABLE IF NOT EXISTS ${tableName2} (
+    //         `k1` int NOT NULL,
+    //         `c1` int,
+    //         `c2` int,
+    //         `c3` int,
+    //         `c4` int
+    //         )UNIQUE KEY(k1)
+    //     DISTRIBUTED BY HASH(k1) BUCKETS 1
+    //     PROPERTIES (
+    //         "enable_unique_key_merge_on_write" = "true",
+    //         "disable_auto_compaction" = "true",
+    //         "replication_num" = "1",
+    //         "function_column.sequence_col" = 'c4'
+    //     );"""
 
-    sql "insert into ${tableName2} values(1,1,1,1,1),(2,2,2,2,2),(3,3,3,3,3),(4,4,4,4,4),(5,5,5,5,5);"
-    qt_sql "select * from ${tableName2} order by k1,c1,c2,c3,c4;"
-    sql """insert into ${tableName2}(k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__) values(1,1,1,1,1,1),(3,3,3,3,3,1),(5,5,5,5,5,1);"""
-    sql "sync"
-    qt_after_delete "select * from ${tableName2} order by k1,c1,c2,c3,c4;"
-    sql "set skip_delete_sign=true;"
-    sql "set skip_storage_engine_merge=true;"
-    sql "set skip_delete_bitmap=true;"
-    sql "sync"
-    // skip_delete_bitmap=true, skip_delete_sign=true
-    qt_1 "select k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__ from ${tableName2} order by k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__;"
+    // sql "insert into ${tableName2} values(1,1,1,1,1),(2,2,2,2,2),(3,3,3,3,3),(4,4,4,4,4),(5,5,5,5,5);"
+    // qt_sql "select * from ${tableName2} order by k1,c1,c2,c3,c4;"
+    // sql """insert into ${tableName2}(k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__) values(1,1,1,1,1,1),(3,3,3,3,3,1),(5,5,5,5,5,1);"""
+    // sql "sync"
+    // qt_after_delete "select * from ${tableName2} order by k1,c1,c2,c3,c4;"
+    // sql "set skip_delete_sign=true;"
+    // sql "set skip_storage_engine_merge=true;"
+    // sql "set skip_delete_bitmap=true;"
+    // sql "sync"
+    // // skip_delete_bitmap=true, skip_delete_sign=true
+    // qt_1 "select k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__ from ${tableName2} order by k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__;"
 
-    sql "set skip_delete_sign=true;"
-    sql "set skip_delete_bitmap=false;"
-    sql "sync"
-    // skip_delete_bitmap=false, skip_delete_sign=true
-    qt_2 "select k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__ from ${tableName2} order by k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__;"
-    sql "drop table if exists ${tableName2};"
+    // sql "set skip_delete_sign=true;"
+    // sql "set skip_delete_bitmap=false;"
+    // sql "sync"
+    // // skip_delete_bitmap=false, skip_delete_sign=true
+    // qt_2 "select k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__ from ${tableName2} order by k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__;"
+    // sql "drop table if exists ${tableName2};"
 }

From 6708cb6c3bb848b95c4ca2946d234347630474da Mon Sep 17 00:00:00 2001
From: zzzxl <33418555+zzzxl1993@users.noreply.github.com>
Date: Thu, 21 Dec 2023 20:32:34 +0800
Subject: [PATCH 3/6] [feature](invert index) add match_phrase_prefix and
 match_regexp #27404 #28257 (#28715)

---
 be/src/clucene                                |   2 +-
 be/src/exec/olap_common.h                     |   4 +
 be/src/exec/olap_utils.h                      |  17 ++-
 be/src/olap/match_predicate.cpp               |   8 +-
 .../query/conjunction_query.cpp               |   6 +-
 .../query/disjunction_query.cpp               |  17 ++-
 .../inverted_index/query/disjunction_query.h  |   1 -
 .../query/phrase_prefix_query.cpp             |  63 +++++++++
 .../query/phrase_prefix_query.h               |  54 ++++++++
 .../inverted_index/query/prefix_query.cpp     |  80 +++++++++++
 .../inverted_index/query/prefix_query.h       |  40 ++++++
 .../inverted_index/query/regexp_query.cpp     |  98 +++++++++++++
 .../inverted_index/query/regexp_query.h       |  46 ++++++
 .../segment_v2/inverted_index_query_type.h    |   8 ++
 .../segment_v2/inverted_index_reader.cpp      | 131 +++++++++++++++---
 .../rowset/segment_v2/inverted_index_reader.h |  21 ++-
 be/src/vec/functions/function_tokenize.cpp    |   8 +-
 be/src/vec/functions/match.cpp                |  39 +++---
 be/src/vec/functions/match.h                  |  34 +++++
 .../org/apache/doris/nereids/DorisLexer.g4    |   2 +
 .../org/apache/doris/nereids/DorisParser.g4   |   2 +-
 fe/fe-core/src/main/cup/sql_parser.cup        |   8 +-
 .../apache/doris/analysis/MatchPredicate.java |  22 +++
 .../nereids/parser/LogicalPlanBuilder.java    |  14 ++
 .../nereids/trees/expressions/Match.java      |   4 +
 .../trees/expressions/MatchPhrasePrefix.java  |  49 +++++++
 .../trees/expressions/MatchRegexp.java        |  49 +++++++
 .../visitor/ExpressionVisitor.java            |  10 ++
 .../org/apache/doris/qe/SessionVariable.java  |   8 ++
 fe/fe-core/src/main/jflex/sql_scanner.flex    |   2 +
 gensrc/thrift/Opcodes.thrift                  |   2 +
 gensrc/thrift/PaloInternalService.thrift      |   2 +
 .../test_index_match_phrase_prefix.out        |  31 +++++
 .../test_index_match_regexp.out               |  16 +++
 .../test_index_match_phrase_prefix.groovy     |  98 +++++++++++++
 .../test_index_match_regexp.groovy            |  89 ++++++++++++
 36 files changed, 1018 insertions(+), 67 deletions(-)
 create mode 100644 be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp
 create mode 100644 be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h
 create mode 100644 be/src/olap/rowset/segment_v2/inverted_index/query/prefix_query.cpp
 create mode 100644 be/src/olap/rowset/segment_v2/inverted_index/query/prefix_query.h
 create mode 100644 be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.cpp
 create mode 100644 be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.h
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/MatchPhrasePrefix.java
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/MatchRegexp.java
 create mode 100644 regression-test/data/inverted_index_p0/test_index_match_phrase_prefix.out
 create mode 100644 regression-test/data/inverted_index_p0/test_index_match_regexp.out
 create mode 100644 regression-test/suites/inverted_index_p0/test_index_match_phrase_prefix.groovy
 create mode 100644 regression-test/suites/inverted_index_p0/test_index_match_regexp.groovy

diff --git a/be/src/clucene b/be/src/clucene
index 70c1a692bbb127..6f8a21ffe15bd7 160000
--- a/be/src/clucene
+++ b/be/src/clucene
@@ -1 +1 @@
-Subproject commit 70c1a692bbb1277f107ff2ddedda41b3a223c632
+Subproject commit 6f8a21ffe15bd78a1cd3e685067ee5c9ed071827
diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h
index 5bd06d8d54030e..7133f40e7adc8f 100644
--- a/be/src/exec/olap_common.h
+++ b/be/src/exec/olap_common.h
@@ -304,6 +304,10 @@ class ColumnValueRange {
                 condition.__set_condition_op("match_all");
             } else if (value.first == MatchType::MATCH_PHRASE) {
                 condition.__set_condition_op("match_phrase");
+            } else if (value.first == MatchType::MATCH_PHRASE_PREFIX) {
+                condition.__set_condition_op("match_phrase_prefix");
+            } else if (value.first == MatchType::MATCH_REGEXP) {
+                condition.__set_condition_op("match_regexp");
             } else if (value.first == MatchType::MATCH_ELEMENT_EQ) {
                 condition.__set_condition_op("match_element_eq");
             } else if (value.first == MatchType::MATCH_ELEMENT_LT) {
diff --git a/be/src/exec/olap_utils.h b/be/src/exec/olap_utils.h
index 1d6bdf959302b0..1b8525dc1b9948 100644
--- a/be/src/exec/olap_utils.h
+++ b/be/src/exec/olap_utils.h
@@ -170,6 +170,8 @@ enum class MatchType {
     MATCH_ELEMENT_GT = 5,
     MATCH_ELEMENT_LE = 6,
     MATCH_ELEMENT_GE = 7,
+    MATCH_PHRASE_PREFIX = 8,
+    MATCH_REGEXP = 9,
 };
 
 inline MatchType to_match_type(TExprOpcode::type type) {
@@ -183,6 +185,12 @@ inline MatchType to_match_type(TExprOpcode::type type) {
     case TExprOpcode::type::MATCH_PHRASE:
         return MatchType::MATCH_PHRASE;
         break;
+    case TExprOpcode::type::MATCH_PHRASE_PREFIX:
+        return MatchType::MATCH_PHRASE_PREFIX;
+        break;
+    case TExprOpcode::type::MATCH_REGEXP:
+        return MatchType::MATCH_REGEXP;
+        break;
     case TExprOpcode::type::MATCH_ELEMENT_EQ:
         return MatchType::MATCH_ELEMENT_EQ;
         break;
@@ -212,6 +220,10 @@ inline MatchType to_match_type(const std::string& condition_op) {
         return MatchType::MATCH_ALL;
     } else if (condition_op.compare("match_phrase") == 0) {
         return MatchType::MATCH_PHRASE;
+    } else if (condition_op.compare("match_phrase_prefix") == 0) {
+        return MatchType::MATCH_PHRASE_PREFIX;
+    } else if (condition_op.compare("match_regexp") == 0) {
+        return MatchType::MATCH_REGEXP;
     } else if (condition_op.compare("match_element_eq") == 0) {
         return MatchType::MATCH_ELEMENT_EQ;
     } else if (condition_op.compare("match_element_lt") == 0) {
@@ -229,6 +241,8 @@ inline MatchType to_match_type(const std::string& condition_op) {
 inline bool is_match_condition(const std::string& op) {
     if (0 == strcasecmp(op.c_str(), "match_any") || 0 == strcasecmp(op.c_str(), "match_all") ||
         0 == strcasecmp(op.c_str(), "match_phrase") ||
+        0 == strcasecmp(op.c_str(), "match_phrase_prefix") ||
+        0 == strcasecmp(op.c_str(), "match_regexp") ||
         0 == strcasecmp(op.c_str(), "match_element_eq") ||
         0 == strcasecmp(op.c_str(), "match_element_lt") ||
         0 == strcasecmp(op.c_str(), "match_element_gt") ||
@@ -241,7 +255,8 @@ inline bool is_match_condition(const std::string& op) {
 
 inline bool is_match_operator(const TExprOpcode::type& op_type) {
     return TExprOpcode::MATCH_ANY == op_type || TExprOpcode::MATCH_ALL == op_type ||
-           TExprOpcode::MATCH_PHRASE == op_type || TExprOpcode::MATCH_ELEMENT_EQ == op_type ||
+           TExprOpcode::MATCH_PHRASE == op_type || TExprOpcode::MATCH_PHRASE_PREFIX == op_type ||
+           TExprOpcode::MATCH_REGEXP == op_type || TExprOpcode::MATCH_ELEMENT_EQ == op_type ||
            TExprOpcode::MATCH_ELEMENT_LT == op_type || TExprOpcode::MATCH_ELEMENT_GT == op_type ||
            TExprOpcode::MATCH_ELEMENT_LE == op_type || TExprOpcode::MATCH_ELEMENT_GE == op_type;
 }
diff --git a/be/src/olap/match_predicate.cpp b/be/src/olap/match_predicate.cpp
index 61d257231553bb..8ffd6d9993609a 100644
--- a/be/src/olap/match_predicate.cpp
+++ b/be/src/olap/match_predicate.cpp
@@ -95,6 +95,12 @@ InvertedIndexQueryType MatchPredicate::_to_inverted_index_query_type(MatchType m
     case MatchType::MATCH_PHRASE:
         ret = InvertedIndexQueryType::MATCH_PHRASE_QUERY;
         break;
+    case MatchType::MATCH_PHRASE_PREFIX:
+        ret = InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY;
+        break;
+    case MatchType::MATCH_REGEXP:
+        ret = InvertedIndexQueryType::MATCH_REGEXP_QUERY;
+        break;
     case MatchType::MATCH_ELEMENT_EQ:
         ret = InvertedIndexQueryType::EQUAL_QUERY;
         break;
@@ -117,7 +123,7 @@ InvertedIndexQueryType MatchPredicate::_to_inverted_index_query_type(MatchType m
 }
 
 bool MatchPredicate::_skip_evaluate(InvertedIndexIterator* iterator) const {
-    if (_match_type == MatchType::MATCH_PHRASE &&
+    if ((_match_type == MatchType::MATCH_PHRASE || _match_type == MatchType::MATCH_PHRASE_PREFIX) &&
         iterator->get_inverted_index_reader_type() == InvertedIndexReaderType::FULLTEXT &&
         get_parser_phrase_support_string_from_properties(iterator->get_index_properties()) ==
                 INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO) {
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/conjunction_query.cpp b/be/src/olap/rowset/segment_v2/inverted_index/query/conjunction_query.cpp
index b77edc79ade905..b2448a8fa8e233 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query/conjunction_query.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/conjunction_query.cpp
@@ -38,12 +38,12 @@ ConjunctionQuery::~ConjunctionQuery() {
 }
 
 void ConjunctionQuery::add(const std::wstring& field_name, const std::vector<std::string>& terms) {
-    if (terms.size() < 1) {
-        _CLTHROWA(CL_ERR_IllegalArgument, "ConjunctionQuery::add: terms.size() < 1");
+    if (terms.empty()) {
+        _CLTHROWA(CL_ERR_IllegalArgument, "ConjunctionQuery::add: terms empty");
     }
 
     std::vector<TermIterator> iterators;
-    for (auto& term : terms) {
+    for (const auto& term : terms) {
         std::wstring ws_term = StringUtil::string_to_wstring(term);
         Term* t = _CLNEW Term(field_name.c_str(), ws_term.c_str());
         _terms.push_back(t);
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.cpp b/be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.cpp
index 07a159b3222408..7b797d7b54a91e 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.cpp
@@ -22,26 +22,25 @@ namespace doris {
 DisjunctionQuery::DisjunctionQuery(IndexReader* reader) : _reader(reader) {}
 
 DisjunctionQuery::~DisjunctionQuery() {
-    for (auto& term : _terms) {
-        if (term) {
-            _CLDELETE(term);
-        }
-    }
     for (auto& term_doc : _term_docs) {
         if (term_doc) {
             _CLDELETE(term_doc);
         }
     }
+    for (auto& term : _terms) {
+        if (term) {
+            _CLDELETE(term);
+        }
+    }
 }
 
 void DisjunctionQuery::add(const std::wstring& field_name, const std::vector<std::string>& terms) {
-    if (terms.size() < 1) {
-        _CLTHROWA(CL_ERR_IllegalArgument, "ConjunctionQuery::add: terms.size() < 1");
+    if (terms.empty()) {
+        _CLTHROWA(CL_ERR_IllegalArgument, "DisjunctionQuery::add: terms empty");
     }
 
-    for (auto& term : terms) {
+    for (const auto& term : terms) {
         std::wstring ws_term = StringUtil::string_to_wstring(term);
-        _wsterms.emplace_back(&ws_term);
         Term* t = _CLNEW Term(field_name.c_str(), ws_term.c_str());
         _terms.push_back(t);
         TermDocs* term_doc = _reader->termDocs(t);
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.h b/be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.h
index f42fd69dabc2ef..bb0a837f42a313 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.h
@@ -39,7 +39,6 @@ class DisjunctionQuery {
 
 private:
     IndexReader* _reader = nullptr;
-    std::vector<std::wstring*> _wsterms;
     std::vector<Term*> _terms;
     std::vector<TermDocs*> _term_docs;
     std::vector<TermIterator> _term_iterators;
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp
new file mode 100644
index 00000000000000..4b0340cda4a011
--- /dev/null
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "phrase_prefix_query.h"
+
+#include "olap/rowset//segment_v2/inverted_index/query/prefix_query.h"
+
+namespace doris {
+
+namespace segment_v2 {
+
+PhrasePrefixQuery::PhrasePrefixQuery(const std::shared_ptr<lucene::search::IndexSearcher>& searcher)
+        : _searcher(searcher) {}
+
+void PhrasePrefixQuery::add(const std::wstring& field_name, const std::vector<std::string>& terms) {
+    if (terms.empty()) {
+        return;
+    }
+
+    for (size_t i = 0; i < terms.size(); i++) {
+        if (i < terms.size() - 1) {
+            std::wstring ws = StringUtil::string_to_wstring(terms[i]);
+            Term* t = _CLNEW Term(field_name.c_str(), ws.c_str());
+            _query.add(t);
+            _CLDECDELETE(t);
+        } else {
+            std::vector<CL_NS(index)::Term*> prefix_terms;
+            PrefixQuery::get_prefix_terms(_searcher->getReader(), field_name, terms[i],
+                                          prefix_terms, _max_expansions);
+            if (prefix_terms.empty()) {
+                continue;
+            }
+            _query.add(prefix_terms);
+            for (auto& t : prefix_terms) {
+                _CLDECDELETE(t);
+            }
+        }
+    }
+}
+
+void PhrasePrefixQuery::search(roaring::Roaring& roaring) {
+    _searcher->_search(&_query, [&roaring](const int32_t docid, const float_t /*score*/) {
+        roaring.add(docid);
+    });
+}
+
+} // namespace segment_v2
+
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h
new file mode 100644
index 00000000000000..28007620ce581e
--- /dev/null
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <CLucene.h>
+#include <CLucene/index/IndexReader.h>
+
+#include <memory>
+
+#include "CLucene/search/MultiPhraseQuery.h"
+#include "roaring/roaring.hh"
+
+CL_NS_USE(index)
+CL_NS_USE(search)
+
+namespace doris {
+
+namespace segment_v2 {
+
+class PhrasePrefixQuery {
+public:
+    PhrasePrefixQuery(const std::shared_ptr<lucene::search::IndexSearcher>& searcher);
+    ~PhrasePrefixQuery() = default;
+
+    void set_max_expansions(int32_t max_expansions) { _max_expansions = max_expansions; }
+
+    void add(const std::wstring& field_name, const std::vector<std::string>& terms);
+    void search(roaring::Roaring& roaring);
+
+private:
+    std::shared_ptr<lucene::search::IndexSearcher> _searcher;
+    MultiPhraseQuery _query;
+
+    int32_t _max_expansions = 50;
+};
+
+} // namespace segment_v2
+
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/prefix_query.cpp b/be/src/olap/rowset/segment_v2/inverted_index/query/prefix_query.cpp
new file mode 100644
index 00000000000000..7d23d6eb60f348
--- /dev/null
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/prefix_query.cpp
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "prefix_query.h"
+
+namespace doris {
+
+void PrefixQuery::get_prefix_terms(IndexReader* reader, const std::wstring& field_name,
+                                   const std::string& prefix,
+                                   std::vector<CL_NS(index)::Term*>& prefix_terms,
+                                   int32_t max_expansions) {
+    std::wstring ws_prefix = StringUtil::string_to_wstring(prefix);
+
+    Term* prefix_term = _CLNEW Term(field_name.c_str(), ws_prefix.c_str());
+    TermEnum* enumerator = reader->terms(prefix_term);
+
+    int32_t count = 0;
+    Term* lastTerm = nullptr;
+    try {
+        const TCHAR* prefixText = prefix_term->text();
+        const TCHAR* prefixField = prefix_term->field();
+        const TCHAR* tmp = nullptr;
+        size_t i = 0;
+        size_t prefixLen = prefix_term->textLength();
+        do {
+            lastTerm = enumerator->term();
+            if (lastTerm != nullptr && lastTerm->field() == prefixField) {
+                size_t termLen = lastTerm->textLength();
+                if (prefixLen > termLen) {
+                    break;
+                }
+
+                tmp = lastTerm->text();
+
+                for (i = prefixLen - 1; i != -1; --i) {
+                    if (tmp[i] != prefixText[i]) {
+                        tmp = nullptr;
+                        break;
+                    }
+                }
+                if (tmp == nullptr) {
+                    break;
+                }
+
+                if (max_expansions > 0 && count >= max_expansions) {
+                    break;
+                }
+
+                Term* t = _CLNEW Term(field_name.c_str(), tmp);
+                prefix_terms.push_back(t);
+                count++;
+            } else {
+                break;
+            }
+            _CLDECDELETE(lastTerm);
+        } while (enumerator->next());
+    }
+    _CLFINALLY({
+        enumerator->close();
+        _CLDELETE(enumerator);
+        _CLDECDELETE(lastTerm);
+        _CLDECDELETE(prefix_term);
+    });
+}
+
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/prefix_query.h b/be/src/olap/rowset/segment_v2/inverted_index/query/prefix_query.h
new file mode 100644
index 00000000000000..5deb0c1a3628ad
--- /dev/null
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/prefix_query.h
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <CLucene.h>
+#include <CLucene/index/IndexReader.h>
+
+#include <cstdint>
+
+CL_NS_USE(index)
+
+namespace doris {
+
+class PrefixQuery {
+public:
+    PrefixQuery() = default;
+    ~PrefixQuery() = default;
+
+    static void get_prefix_terms(IndexReader* reader, const std::wstring& field_name,
+                                 const std::string& prefix,
+                                 std::vector<CL_NS(index)::Term*>& prefix_terms,
+                                 int32_t max_expansions = 50);
+};
+
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.cpp b/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.cpp
new file mode 100644
index 00000000000000..83c5401bac0e5b
--- /dev/null
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.cpp
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "regexp_query.h"
+
+#include <CLucene/config/repl_wchar.h>
+#include <hs/hs.h>
+
+#include "common/logging.h"
+
+namespace doris::segment_v2 {
+
+RegexpQuery::RegexpQuery(const std::shared_ptr<lucene::search::IndexSearcher>& searcher)
+        : _searcher(searcher), query(searcher->getReader()) {}
+
+void RegexpQuery::add(const std::wstring& field_name, const std::string& pattern) {
+    hs_database_t* database = nullptr;
+    hs_compile_error_t* compile_err = nullptr;
+    hs_scratch_t* scratch = nullptr;
+
+    if (hs_compile(pattern.data(), HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8,
+                   HS_MODE_BLOCK, nullptr, &database, &compile_err) != HS_SUCCESS) {
+        LOG(ERROR) << "hyperscan compilation failed: " << compile_err->message;
+        hs_free_compile_error(compile_err);
+        return;
+    }
+
+    if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) {
+        LOG(ERROR) << "hyperscan could not allocate scratch space.";
+        hs_free_database(database);
+        return;
+    }
+
+    auto on_match = [](unsigned int id, unsigned long long from, unsigned long long to,
+                       unsigned int flags, void* context) -> int {
+        *((bool*)context) = true;
+        return 0;
+    };
+
+    Term* term = nullptr;
+    TermEnum* enumerator = nullptr;
+    std::vector<std::string> terms;
+    int32_t count = 0;
+
+    try {
+        enumerator = _searcher->getReader()->terms();
+        while (enumerator->next()) {
+            term = enumerator->term();
+            std::string input = lucene_wcstoutf8string(term->text(), term->textLength());
+
+            bool is_match = false;
+            if (hs_scan(database, input.data(), input.size(), 0, scratch, on_match,
+                        (void*)&is_match) != HS_SUCCESS) {
+                LOG(ERROR) << "hyperscan match failed: " << input;
+                break;
+            }
+
+            if (is_match) {
+                terms.emplace_back(std::move(input));
+                if (++count >= _max_expansions) {
+                    break;
+                }
+            }
+
+            _CLDECDELETE(term);
+        }
+    }
+    _CLFINALLY({
+        _CLDECDELETE(term);
+        enumerator->close();
+        _CLDELETE(enumerator);
+
+        hs_free_scratch(scratch);
+        hs_free_database(database);
+    })
+
+    query.add(field_name, terms);
+}
+
+void RegexpQuery::search(roaring::Roaring& roaring) {
+    query.search(roaring);
+}
+
+} // namespace doris::segment_v2
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.h b/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.h
new file mode 100644
index 00000000000000..3791ad50d8f78f
--- /dev/null
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.h
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "olap/rowset/segment_v2/inverted_index/query/disjunction_query.h"
+
+CL_NS_USE(index)
+CL_NS_USE(search)
+
+namespace doris::segment_v2 {
+
+class RegexpQuery {
+public:
+    RegexpQuery(const std::shared_ptr<lucene::search::IndexSearcher>& searcher);
+    ~RegexpQuery() = default;
+
+    void set_max_expansions(int32_t max_expansions) { _max_expansions = max_expansions; }
+
+    void add(const std::wstring& field_name, const std::string& pattern);
+    void search(roaring::Roaring& roaring);
+
+private:
+    std::shared_ptr<lucene::search::IndexSearcher> _searcher;
+
+    int32_t _max_expansions = 50;
+    DisjunctionQuery query;
+};
+
+} // namespace doris::segment_v2
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_query_type.h b/be/src/olap/rowset/segment_v2/inverted_index_query_type.h
index 1ebfe6359181e9..3037f979f6edf7 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_query_type.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_query_type.h
@@ -32,6 +32,8 @@ enum class InvertedIndexQueryType {
     MATCH_ANY_QUERY = 5,
     MATCH_ALL_QUERY = 6,
     MATCH_PHRASE_QUERY = 7,
+    MATCH_PHRASE_PREFIX_QUERY = 8,
+    MATCH_REGEXP_QUERY = 9,
 };
 
 inline std::string InvertedIndexQueryType_toString(InvertedIndexQueryType query_type) {
@@ -63,6 +65,12 @@ inline std::string InvertedIndexQueryType_toString(InvertedIndexQueryType query_
     case InvertedIndexQueryType::MATCH_PHRASE_QUERY: {
         return "MPHRASE";
     }
+    case InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY: {
+        return "MPHRASEPREFIX";
+    }
+    case InvertedIndexQueryType::MATCH_REGEXP_QUERY: {
+        return "MREGEXP";
+    }
     default:
         return "";
     }
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index 7d710d72c382ab..292884e631b939 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -50,12 +50,15 @@
 #include "CLucene/analysis/standard95/StandardAnalyzer.h"
 #include "common/config.h"
 #include "common/logging.h"
+#include "inverted_index_query_type.h"
 #include "io/fs/file_system.h"
 #include "olap/inverted_index_parser.h"
 #include "olap/key_coder.h"
 #include "olap/olap_common.h"
 #include "olap/rowset/segment_v2/inverted_index/char_filter/char_filter_factory.h"
 #include "olap/rowset/segment_v2/inverted_index/query/conjunction_query.h"
+#include "olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h"
+#include "olap/rowset/segment_v2/inverted_index/query/regexp_query.h"
 #include "olap/rowset/segment_v2/inverted_index_cache.h"
 #include "olap/rowset/segment_v2/inverted_index_compound_directory.h"
 #include "olap/rowset/segment_v2/inverted_index_desc.h"
@@ -90,7 +93,9 @@ bool InvertedIndexReader::_is_range_query(InvertedIndexQueryType query_type) {
 bool InvertedIndexReader::_is_match_query(InvertedIndexQueryType query_type) {
     return (query_type == InvertedIndexQueryType::MATCH_ANY_QUERY ||
             query_type == InvertedIndexQueryType::MATCH_ALL_QUERY ||
-            query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY);
+            query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY ||
+            query_type == InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY ||
+            query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY);
 }
 
 bool InvertedIndexReader::indexExists(io::Path& index_file_path) {
@@ -141,10 +146,13 @@ std::unique_ptr<lucene::util::Reader> InvertedIndexReader::create_reader(
     return reader;
 }
 
-std::vector<std::string> InvertedIndexReader::get_analyse_result(
-        lucene::util::Reader* reader, lucene::analysis::Analyzer* analyzer,
-        const std::string& field_name, InvertedIndexQueryType query_type, bool drop_duplicates) {
-    std::vector<std::string> analyse_result;
+void InvertedIndexReader::get_analyse_result(std::vector<std::string>& analyse_result,
+                                             lucene::util::Reader* reader,
+                                             lucene::analysis::Analyzer* analyzer,
+                                             const std::string& field_name,
+                                             InvertedIndexQueryType query_type,
+                                             bool drop_duplicates) {
+    analyse_result.clear();
 
     std::wstring field_ws = std::wstring(field_name.begin(), field_name.end());
     std::unique_ptr<lucene::analysis::TokenStream> token_stream(
@@ -168,8 +176,6 @@ std::vector<std::string> InvertedIndexReader::get_analyse_result(
         std::set<std::string> unrepeated_result(analyse_result.begin(), analyse_result.end());
         analyse_result.assign(unrepeated_result.begin(), unrepeated_result.end());
     }
-
-    return analyse_result;
 }
 
 Status InvertedIndexReader::read_null_bitmap(InvertedIndexQueryCacheHandle* cache_handle,
@@ -246,19 +252,25 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run
     auto index_file_name =
             InvertedIndexDescriptor::get_index_file_name(path.filename(), _index_meta.index_id());
     auto index_file_path = index_dir / index_file_name;
-    InvertedIndexCtxSPtr inverted_index_ctx = std::make_shared<InvertedIndexCtx>();
-    inverted_index_ctx->parser_type = get_inverted_index_parser_type_from_string(
-            get_parser_string_from_properties(_index_meta.properties()));
-    inverted_index_ctx->parser_mode =
-            get_parser_mode_string_from_properties(_index_meta.properties());
-    inverted_index_ctx->char_filter_map =
-            get_parser_char_filter_map_from_properties(_index_meta.properties());
+
     try {
-        auto analyzer = create_analyzer(inverted_index_ctx.get());
-        auto reader = create_reader(inverted_index_ctx.get(), search_str);
-        inverted_index_ctx->analyzer = analyzer.get();
-        std::vector<std::string> analyse_result =
-                get_analyse_result(reader.get(), analyzer.get(), column_name, query_type);
+        std::vector<std::string> analyse_result;
+        if (query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY) {
+            analyse_result.emplace_back(search_str);
+        } else {
+            InvertedIndexCtxSPtr inverted_index_ctx = std::make_shared<InvertedIndexCtx>();
+            inverted_index_ctx->parser_type = get_inverted_index_parser_type_from_string(
+                    get_parser_string_from_properties(_index_meta.properties()));
+            inverted_index_ctx->parser_mode =
+                    get_parser_mode_string_from_properties(_index_meta.properties());
+            inverted_index_ctx->char_filter_map =
+                    get_parser_char_filter_map_from_properties(_index_meta.properties());
+            auto analyzer = create_analyzer(inverted_index_ctx.get());
+            auto reader = create_reader(inverted_index_ctx.get(), search_str);
+            inverted_index_ctx->analyzer = analyzer.get();
+            get_analyse_result(analyse_result, reader.get(), analyzer.get(), column_name,
+                               query_type);
+        }
 
         if (analyse_result.empty()) {
             auto msg = fmt::format(
@@ -267,7 +279,9 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run
                     search_str, get_parser_string_from_properties(_index_meta.properties()));
             if (query_type == InvertedIndexQueryType::MATCH_ALL_QUERY ||
                 query_type == InvertedIndexQueryType::MATCH_ANY_QUERY ||
-                query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY) {
+                query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY ||
+                query_type == InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY ||
+                query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY) {
                 LOG(WARNING) << msg;
                 return Status::OK();
             } else {
@@ -294,6 +308,7 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run
         roaring::Roaring query_match_bitmap;
         bool null_bitmap_already_read = false;
         if (query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY ||
+            query_type == InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY ||
             query_type == InvertedIndexQueryType::MATCH_ALL_QUERY ||
             query_type == InvertedIndexQueryType::EQUAL_QUERY) {
             std::string str_tokens;
@@ -302,7 +317,7 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run
                 str_tokens += " ";
             }
 
-            auto cache = InvertedIndexQueryCache::instance();
+            auto* cache = InvertedIndexQueryCache::instance();
             InvertedIndexQueryCache::CacheKey cache_key;
             cache_key.index_path = index_file_path;
             cache_key.column_name = column_name;
@@ -333,6 +348,10 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run
                     query.reset(phrase_query);
                     res = normal_index_search(stats, query_type, index_searcher,
                                               null_bitmap_already_read, query, term_match_bitmap);
+                } else if (query_type == InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY) {
+                    res = match_phrase_prefix_index_search(stats, runtime_state, field_ws,
+                                                           analyse_result, index_searcher,
+                                                           term_match_bitmap);
                 } else {
                     res = match_all_index_search(stats, runtime_state, field_ws, analyse_result,
                                                  index_searcher, term_match_bitmap);
@@ -346,13 +365,45 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run
                 cache->insert(cache_key, term_match_bitmap, &cache_handle);
             }
             query_match_bitmap = *term_match_bitmap;
+        } else if (query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY) {
+            const std::string& pattern = analyse_result[0];
+
+            std::shared_ptr<roaring::Roaring> term_match_bitmap = nullptr;
+            auto* cache = InvertedIndexQueryCache::instance();
+
+            InvertedIndexQueryCache::CacheKey cache_key;
+            cache_key.index_path = index_file_path;
+            cache_key.column_name = column_name;
+            cache_key.query_type = query_type;
+            cache_key.value = pattern;
+            InvertedIndexQueryCacheHandle cache_handle;
+            if (cache->lookup(cache_key, &cache_handle)) {
+                stats->inverted_index_query_cache_hit++;
+                term_match_bitmap = cache_handle.get_bitmap();
+            } else {
+                stats->inverted_index_query_cache_miss++;
+
+                auto index_searcher = get_index_search();
+
+                term_match_bitmap = std::make_shared<roaring::Roaring>();
+
+                Status res = match_regexp_index_search(stats, runtime_state, field_ws, pattern,
+                                                       index_searcher, term_match_bitmap);
+                if (!res.ok()) {
+                    return res;
+                }
+
+                term_match_bitmap->runOptimize();
+                cache->insert(cache_key, term_match_bitmap, &cache_handle);
+            }
+            query_match_bitmap = *term_match_bitmap;
         } else {
             bool first = true;
             for (auto token : analyse_result) {
                 std::shared_ptr<roaring::Roaring> term_match_bitmap = nullptr;
 
                 // try to get term bitmap match result from cache to avoid query index on cache hit
-                auto cache = InvertedIndexQueryCache::instance();
+                auto* cache = InvertedIndexQueryCache::instance();
                 // use EQUAL_QUERY type here since cache is for each term/token
                 //auto token = lucene_wcstoutf8string(token_ws.c_str(), token_ws.length());
                 std::wstring token_ws = StringUtil::string_to_wstring(token);
@@ -471,6 +522,42 @@ Status FullTextIndexReader::match_all_index_search(
     return Status::OK();
 }
 
+Status FullTextIndexReader::match_phrase_prefix_index_search(
+        OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::wstring& field_ws,
+        const std::vector<std::string>& analyse_result, const IndexSearcherPtr& index_searcher,
+        const std::shared_ptr<roaring::Roaring>& term_match_bitmap) {
+    TQueryOptions queryOptions = runtime_state->query_options();
+    try {
+        SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer);
+        PhrasePrefixQuery query(index_searcher);
+        query.set_max_expansions(queryOptions.inverted_index_max_expansions);
+        query.add(field_ws, analyse_result);
+        query.search(*term_match_bitmap);
+    } catch (const CLuceneError& e) {
+        return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>("CLuceneError occured: {}",
+                                                                      e.what());
+    }
+    return Status::OK();
+}
+
+Status FullTextIndexReader::match_regexp_index_search(
+        OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::wstring& field_ws,
+        const std::string& pattern, const IndexSearcherPtr& index_searcher,
+        const std::shared_ptr<roaring::Roaring>& term_match_bitmap) {
+    TQueryOptions queryOptions = runtime_state->query_options();
+    try {
+        SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer);
+        RegexpQuery query(index_searcher);
+        query.set_max_expansions(queryOptions.inverted_index_max_expansions);
+        query.add(field_ws, pattern);
+        query.search(*term_match_bitmap);
+    } catch (const CLuceneError& e) {
+        return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>("CLuceneError occured: {}",
+                                                                      e.what());
+    }
+    return Status::OK();
+}
+
 void FullTextIndexReader::check_null_bitmap(const IndexSearcherPtr& index_searcher,
                                             bool& null_bitmap_already_read) {
     // try to reuse index_searcher's directory to read null_bitmap to cache
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
index 20c5c731f9eca8..8b5c786f36f909 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
@@ -97,11 +97,12 @@ class InvertedIndexReader : public std::enable_shared_from_this<InvertedIndexRea
         return _index_meta.properties();
     }
 
-    static std::vector<std::string> get_analyse_result(lucene::util::Reader* reader,
-                                                       lucene::analysis::Analyzer* analyzer,
-                                                       const std::string& field_name,
-                                                       InvertedIndexQueryType query_type,
-                                                       bool drop_duplicates = true);
+    static void get_analyse_result(std::vector<std::string>& analyse_result,
+                                   lucene::util::Reader* reader,
+                                   lucene::analysis::Analyzer* analyzer,
+                                   const std::string& field_name, InvertedIndexQueryType query_type,
+                                   bool drop_duplicates = true);
+
     static std::unique_ptr<lucene::util::Reader> create_reader(InvertedIndexCtx* inverted_index_ctx,
                                                                const std::string& value);
     static std::unique_ptr<lucene::analysis::Analyzer> create_analyzer(
@@ -153,6 +154,16 @@ class FullTextIndexReader : public InvertedIndexReader {
                                   const std::shared_ptr<roaring::Roaring>& term_match_bitmap);
 
     void check_null_bitmap(const IndexSearcherPtr& index_searcher, bool& null_bitmap_already_read);
+
+    Status match_phrase_prefix_index_search(
+            OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::wstring& field_ws,
+            const std::vector<std::string>& analyse_result, const IndexSearcherPtr& index_searcher,
+            const std::shared_ptr<roaring::Roaring>& term_match_bitmap);
+
+    Status match_regexp_index_search(OlapReaderStatistics* stats, RuntimeState* runtime_state,
+                                     const std::wstring& field_ws, const std::string& pattern,
+                                     const IndexSearcherPtr& index_searcher,
+                                     const std::shared_ptr<roaring::Roaring>& term_match_bitmap);
 };
 
 class StringTypeInvertedIndexReader : public InvertedIndexReader {
diff --git a/be/src/vec/functions/function_tokenize.cpp b/be/src/vec/functions/function_tokenize.cpp
index 11760a30f5025f..54d9bee4ae9edc 100644
--- a/be/src/vec/functions/function_tokenize.cpp
+++ b/be/src/vec/functions/function_tokenize.cpp
@@ -79,10 +79,10 @@ void FunctionTokenize::_do_tokenize(const ColumnString& src_column_string,
         auto reader = doris::segment_v2::InvertedIndexReader::create_reader(
                 &inverted_index_ctx, tokenize_str.to_string());
 
-        std::vector<std::string> query_tokens =
-                doris::segment_v2::InvertedIndexReader::get_analyse_result(
-                        reader.get(), inverted_index_ctx.analyzer, "tokenize",
-                        doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_QUERY);
+        std::vector<std::string> query_tokens;
+        doris::segment_v2::InvertedIndexReader::get_analyse_result(
+                query_tokens, reader.get(), inverted_index_ctx.analyzer, "tokenize",
+                doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_QUERY);
         for (auto token : query_tokens) {
             const size_t old_size = column_string_chars.size();
             const size_t split_part_size = token.length();
diff --git a/be/src/vec/functions/match.cpp b/be/src/vec/functions/match.cpp
index 3497b2ef7a96a5..c81c1617ca61d4 100644
--- a/be/src/vec/functions/match.cpp
+++ b/be/src/vec/functions/match.cpp
@@ -129,10 +129,10 @@ inline std::vector<std::string> FunctionMatchBase::analyse_data_token(
             auto reader = doris::segment_v2::InvertedIndexReader::create_reader(
                     inverted_index_ctx, str_ref.to_string());
 
-            std::vector<std::string> element_tokens =
-                    doris::segment_v2::InvertedIndexReader::get_analyse_result(
-                            reader.get(), inverted_index_ctx->analyzer, column_name, query_type,
-                            false);
+            std::vector<std::string> element_tokens;
+            doris::segment_v2::InvertedIndexReader::get_analyse_result(
+                    element_tokens, reader.get(), inverted_index_ctx->analyzer, column_name,
+                    query_type, false);
             data_tokens.insert(data_tokens.end(), element_tokens.begin(), element_tokens.end());
         }
     } else {
@@ -140,8 +140,9 @@ inline std::vector<std::string> FunctionMatchBase::analyse_data_token(
         auto reader = doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx,
                                                                             str_ref.to_string());
 
-        data_tokens = doris::segment_v2::InvertedIndexReader::get_analyse_result(
-                reader.get(), inverted_index_ctx->analyzer, column_name, query_type, false);
+        doris::segment_v2::InvertedIndexReader::get_analyse_result(data_tokens, reader.get(),
+                                                                   inverted_index_ctx->analyzer,
+                                                                   column_name, query_type, false);
     }
     return data_tokens;
 }
@@ -160,10 +161,10 @@ Status FunctionMatchAny::execute_match(const std::string& column_name,
                << inverted_index_parser_type_to_string(parser_type);
     auto reader = doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx,
                                                                         match_query_str);
-    std::vector<std::string> query_tokens =
-            doris::segment_v2::InvertedIndexReader::get_analyse_result(
-                    reader.get(), inverted_index_ctx->analyzer, column_name,
-                    doris::segment_v2::InvertedIndexQueryType::MATCH_ANY_QUERY);
+    std::vector<std::string> query_tokens;
+    doris::segment_v2::InvertedIndexReader::get_analyse_result(
+            query_tokens, reader.get(), inverted_index_ctx->analyzer, column_name,
+            doris::segment_v2::InvertedIndexQueryType::MATCH_ANY_QUERY);
     if (query_tokens.empty()) {
         LOG(WARNING) << fmt::format(
                 "token parser result is empty for query, "
@@ -205,10 +206,10 @@ Status FunctionMatchAll::execute_match(const std::string& column_name,
                << inverted_index_parser_type_to_string(parser_type);
     auto reader = doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx,
                                                                         match_query_str);
-    std::vector<std::string> query_tokens =
-            doris::segment_v2::InvertedIndexReader::get_analyse_result(
-                    reader.get(), inverted_index_ctx->analyzer, column_name,
-                    doris::segment_v2::InvertedIndexQueryType::MATCH_ALL_QUERY);
+    std::vector<std::string> query_tokens;
+    doris::segment_v2::InvertedIndexReader::get_analyse_result(
+            query_tokens, reader.get(), inverted_index_ctx->analyzer, column_name,
+            doris::segment_v2::InvertedIndexQueryType::MATCH_ALL_QUERY);
     if (query_tokens.empty()) {
         LOG(WARNING) << fmt::format(
                 "token parser result is empty for query, "
@@ -256,10 +257,10 @@ Status FunctionMatchPhrase::execute_match(const std::string& column_name,
                << inverted_index_parser_type_to_string(parser_type);
     auto reader = doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx,
                                                                         match_query_str);
-    std::vector<std::string> query_tokens =
-            doris::segment_v2::InvertedIndexReader::get_analyse_result(
-                    reader.get(), inverted_index_ctx->analyzer, column_name,
-                    doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_QUERY);
+    std::vector<std::string> query_tokens;
+    doris::segment_v2::InvertedIndexReader::get_analyse_result(
+            query_tokens, reader.get(), inverted_index_ctx->analyzer, column_name,
+            doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_QUERY);
     if (query_tokens.empty()) {
         LOG(WARNING) << fmt::format(
                 "token parser result is empty for query, "
@@ -313,6 +314,8 @@ void register_function_match(SimpleFunctionFactory& factory) {
     factory.register_function<FunctionMatchAny>();
     factory.register_function<FunctionMatchAll>();
     factory.register_function<FunctionMatchPhrase>();
+    factory.register_function<FunctionMatchPhrasePrefix>();
+    factory.register_function<FunctionMatchRegexp>();
     factory.register_function<FunctionMatchElementEQ>();
     factory.register_function<FunctionMatchElementLT>();
     factory.register_function<FunctionMatchElementGT>();
diff --git a/be/src/vec/functions/match.h b/be/src/vec/functions/match.h
index b8e7f91cb019b8..13701bd2d6000a 100644
--- a/be/src/vec/functions/match.h
+++ b/be/src/vec/functions/match.h
@@ -128,6 +128,40 @@ class FunctionMatchPhrase : public FunctionMatchBase {
                          ColumnUInt8::Container& result) override;
 };
 
+class FunctionMatchPhrasePrefix : public FunctionMatchBase {
+public:
+    static constexpr auto name = "match_phrase_prefix";
+    static FunctionPtr create() { return std::make_shared<FunctionMatchPhrasePrefix>(); }
+
+    String get_name() const override { return name; }
+
+    Status execute_match(const std::string& column_name, const std::string& match_query_str,
+                         size_t input_rows_count, const ColumnString* string_col,
+                         InvertedIndexCtx* inverted_index_ctx,
+                         const ColumnArray::Offsets64* array_offsets,
+                         ColumnUInt8::Container& result) override {
+        return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
+                "FunctionMatchPhrasePrefix not support execute_match");
+    }
+};
+
+class FunctionMatchRegexp : public FunctionMatchBase {
+public:
+    static constexpr auto name = "match_regexp";
+    static FunctionPtr create() { return std::make_shared<FunctionMatchRegexp>(); }
+
+    String get_name() const override { return name; }
+
+    Status execute_match(const std::string& column_name, const std::string& match_query_str,
+                         size_t input_rows_count, const ColumnString* string_col,
+                         InvertedIndexCtx* inverted_index_ctx,
+                         const ColumnArray::Offsets64* array_offsets,
+                         ColumnUInt8::Container& result) override {
+        return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
+                "FunctionMatchRegexp not support execute_match");
+    }
+};
+
 class FunctionMatchElementEQ : public FunctionMatchBase {
 public:
     static constexpr auto name = "match_element_eq";
diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
index abf6e4bb27f00e..378a34f3da0bfd 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
@@ -338,6 +338,8 @@ MATCH_ELEMENT_GT: 'ELEMENT_GT';
 MATCH_ELEMENT_LE: 'ELEMENT_LE';
 MATCH_ELEMENT_LT: 'ELEMENT_LT';
 MATCH_PHRASE: 'MATCH_PHRASE';
+MATCH_PHRASE_PREFIX: 'MATCH_PHRASE_PREFIX';
+MATCH_REGEXP: 'MATCH_REGEXP';
 MATERIALIZED: 'MATERIALIZED';
 MAX: 'MAX';
 MAXVALUE: 'MAXVALUE';
diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
index ec4b2672c6d861..ba44219e7e047f 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
@@ -312,7 +312,7 @@ booleanExpression
 predicate
     : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression
     | NOT? kind=(LIKE | REGEXP | RLIKE) pattern=valueExpression
-    | NOT? kind=(MATCH | MATCH_ANY | MATCH_ALL | MATCH_PHRASE) pattern=valueExpression
+    | NOT? kind=(MATCH | MATCH_ANY | MATCH_ALL | MATCH_PHRASE | MATCH_PHRASE_PREFIX | MATCH_REGEXP) pattern=valueExpression
     | NOT? kind=IN LEFT_PAREN query RIGHT_PAREN
     | NOT? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN
     | IS NOT? kind=NULL
diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup
index 07345f6b6b9c5e..08ffc389a92814 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -468,6 +468,8 @@ terminal String
     KW_MATCH_ANY,
     KW_MATCH_ALL,
     KW_MATCH_PHRASE,
+    KW_MATCH_PHRASE_PREFIX,
+    KW_MATCH_REGEXP,
     KW_MATCH_ELEMENT_EQ,
     KW_MATCH_ELEMENT_LT,
     KW_MATCH_ELEMENT_GT,
@@ -959,7 +961,7 @@ precedence left KW_AND;
 precedence left KW_NOT, NOT;
 precedence left KW_BETWEEN, KW_IN, KW_IS, KW_EXISTS;
 precedence left KW_LIKE, KW_REGEXP;
-precedence left KW_MATCH_ANY, KW_MATCH_ALL, KW_MATCH_PHRASE, KW_MATCH, KW_MATCH_ELEMENT_EQ, KW_MATCH_ELEMENT_LT, KW_MATCH_ELEMENT_GT, KW_MATCH_ELEMENT_LE, KW_MATCH_ELEMENT_GE;
+precedence left KW_MATCH_ANY, KW_MATCH_ALL, KW_MATCH_PHRASE, KW_MATCH_PHRASE_PREFIX, KW_MATCH_REGEXP, KW_MATCH, KW_MATCH_ELEMENT_EQ, KW_MATCH_ELEMENT_LT, KW_MATCH_ELEMENT_GT, KW_MATCH_ELEMENT_LE, KW_MATCH_ELEMENT_GE;
 precedence left EQUAL, LESSTHAN, GREATERTHAN;
 precedence left ADD, SUBTRACT;
 precedence left AT, STAR, DIVIDE, MOD, KW_DIV;
@@ -6985,6 +6987,10 @@ match_predicate ::=
   {: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_ALL, e1, e2); :}
   | expr:e1 KW_MATCH_PHRASE expr:e2
   {: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_PHRASE, e1, e2); :}
+  | expr:e1 KW_MATCH_PHRASE_PREFIX expr:e2
+  {: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_PHRASE_PREFIX, e1, e2); :}
+  | expr:e1 KW_MATCH_REGEXP expr:e2
+  {: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_REGEXP, e1, e2); :}
   | expr:e1 KW_MATCH_ELEMENT_EQ expr:e2
   {: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_ELEMENT_EQ, e1, e2); :}
   | expr:e1 KW_MATCH_ELEMENT_LT expr:e2
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java
index 10579614524e1d..f106aec956c72c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java
@@ -50,6 +50,8 @@ public enum Operator {
         MATCH_ANY("MATCH_ANY", "match_any", TExprOpcode.MATCH_ANY),
         MATCH_ALL("MATCH_ALL", "match_all", TExprOpcode.MATCH_ALL),
         MATCH_PHRASE("MATCH_PHRASE", "match_phrase", TExprOpcode.MATCH_PHRASE),
+        MATCH_PHRASE_PREFIX("MATCH_PHRASE_PREFIX", "match_phrase_prefix", TExprOpcode.MATCH_PHRASE_PREFIX),
+        MATCH_REGEXP("MATCH_REGEXP", "match_regexp", TExprOpcode.MATCH_REGEXP),
         MATCH_ELEMENT_EQ("MATCH_ELEMENT_EQ", "match_element_eq", TExprOpcode.MATCH_ELEMENT_EQ),
         MATCH_ELEMENT_LT("MATCH_ELEMENT_LT", "match_element_lt", TExprOpcode.MATCH_ELEMENT_LT),
         MATCH_ELEMENT_GT("MATCH_ELEMENT_GT", "match_element_gt", TExprOpcode.MATCH_ELEMENT_GT),
@@ -147,6 +149,26 @@ public static void initBuiltins(FunctionSet functionSet) {
                     symbolNotUsed,
                     Lists.<Type>newArrayList(new ArrayType(t), t),
                     Type.BOOLEAN));
+            functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
+                    Operator.MATCH_PHRASE_PREFIX.getName(),
+                    symbolNotUsed,
+                    Lists.<Type>newArrayList(t, t),
+                    Type.BOOLEAN));
+            functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
+                    Operator.MATCH_PHRASE_PREFIX.getName(),
+                    symbolNotUsed,
+                    Lists.<Type>newArrayList(new ArrayType(t), t),
+                    Type.BOOLEAN));
+            functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
+                    Operator.MATCH_REGEXP.getName(),
+                    symbolNotUsed,
+                    Lists.<Type>newArrayList(t, t),
+                    Type.BOOLEAN));
+            functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
+                    Operator.MATCH_REGEXP.getName(),
+                    symbolNotUsed,
+                    Lists.<Type>newArrayList(new ArrayType(t), t),
+                    Type.BOOLEAN));
         }
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
index 23cb6c572e2a26..447f0c28442354 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
@@ -158,6 +158,8 @@
 import org.apache.doris.nereids.trees.expressions.MatchAll;
 import org.apache.doris.nereids.trees.expressions.MatchAny;
 import org.apache.doris.nereids.trees.expressions.MatchPhrase;
+import org.apache.doris.nereids.trees.expressions.MatchPhrasePrefix;
+import org.apache.doris.nereids.trees.expressions.MatchRegexp;
 import org.apache.doris.nereids.trees.expressions.Mod;
 import org.apache.doris.nereids.trees.expressions.Multiply;
 import org.apache.doris.nereids.trees.expressions.NamedExpression;
@@ -1927,6 +1929,18 @@ private Expression withPredicate(Expression valueExpression, PredicateContext ct
                         getExpression(ctx.pattern)
                     );
                     break;
+                case DorisParser.MATCH_PHRASE_PREFIX:
+                    outExpression = new MatchPhrasePrefix(
+                        valueExpression,
+                        getExpression(ctx.pattern)
+                    );
+                    break;
+                case DorisParser.MATCH_REGEXP:
+                    outExpression = new MatchRegexp(
+                        valueExpression,
+                        getExpression(ctx.pattern)
+                    );
+                    break;
                 default:
                     throw new ParseException("Unsupported predicate type: " + ctx.kind.getText(), ctx);
             }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Match.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Match.java
index bc9837eafec5bc..5b3027365a8d91 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Match.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Match.java
@@ -50,6 +50,10 @@ public Operator op() throws AnalysisException {
                 return Operator.MATCH_ALL;
             case "MATCH_PHRASE":
                 return Operator.MATCH_PHRASE;
+            case "MATCH_PHRASE_PREFIX":
+                return Operator.MATCH_PHRASE_PREFIX;
+            case "MATCH_REGEXP":
+                return Operator.MATCH_REGEXP;
             default:
                 throw new AnalysisException("UnSupported type: " + symbol);
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/MatchPhrasePrefix.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/MatchPhrasePrefix.java
new file mode 100644
index 00000000000000..748da21ce30c68
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/MatchPhrasePrefix.java
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions;
+
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * like expression: a MATCH_PHRASE_PREFIX 'hello w'.
+ */
+public class MatchPhrasePrefix extends Match {
+    public MatchPhrasePrefix(Expression left, Expression right) {
+        super(ImmutableList.of(left, right), "MATCH_PHRASE_PREFIX");
+    }
+
+    private MatchPhrasePrefix(List<Expression> children) {
+        super(children, "MATCH_PHRASE_PREFIX");
+    }
+
+    @Override
+    public MatchPhrasePrefix withChildren(List<Expression> children) {
+        Preconditions.checkArgument(children.size() == 2);
+        return new MatchPhrasePrefix(children);
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitMatchPhrasePrefix(this, context);
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/MatchRegexp.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/MatchRegexp.java
new file mode 100644
index 00000000000000..6bb55aeb8978a5
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/MatchRegexp.java
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions;
+
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * like expression: a MATCH_REGEXP '^h\\w*'.
+ */
+public class MatchRegexp extends Match {
+    public MatchRegexp(Expression left, Expression right) {
+        super(ImmutableList.of(left, right), "MATCH_REGEXP");
+    }
+
+    private MatchRegexp(List<Expression> children) {
+        super(children, "MATCH_REGEXP");
+    }
+
+    @Override
+    public MatchRegexp withChildren(List<Expression> children) {
+        Preconditions.checkArgument(children.size() == 2);
+        return new MatchRegexp(children);
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitMatchRegexp(this, context);
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ExpressionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ExpressionVisitor.java
index 4d89227ee9fcf8..179570b824e21c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ExpressionVisitor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ExpressionVisitor.java
@@ -56,6 +56,8 @@
 import org.apache.doris.nereids.trees.expressions.MatchAll;
 import org.apache.doris.nereids.trees.expressions.MatchAny;
 import org.apache.doris.nereids.trees.expressions.MatchPhrase;
+import org.apache.doris.nereids.trees.expressions.MatchPhrasePrefix;
+import org.apache.doris.nereids.trees.expressions.MatchRegexp;
 import org.apache.doris.nereids.trees.expressions.Mod;
 import org.apache.doris.nereids.trees.expressions.Multiply;
 import org.apache.doris.nereids.trees.expressions.NamedExpression;
@@ -454,6 +456,14 @@ public R visitMatchPhrase(MatchPhrase matchPhrase, C context) {
         return visitMatch(matchPhrase, context);
     }
 
+    public R visitMatchPhrasePrefix(MatchPhrasePrefix matchPhrasePrefix, C context) {
+        return visitMatch(matchPhrasePrefix, context);
+    }
+
+    public R visitMatchRegexp(MatchRegexp matchRegexp, C context) {
+        return visitMatch(matchRegexp, context);
+    }
+
     /* ********************************************************************************************
      * Unbound expressions
      * ********************************************************************************************/
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 2fd5dd7adea7b7..da4212f04185e4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -402,6 +402,7 @@ public class SessionVariable implements Serializable, Writable {
     public static final String ENABLE_UNIQUE_KEY_PARTIAL_UPDATE = "enable_unique_key_partial_update";
 
     public static final String INVERTED_INDEX_CONJUNCTION_OPT_THRESHOLD = "inverted_index_conjunction_opt_threshold";
+    public static final String INVERTED_INDEX_MAX_EXPANSIONS = "inverted_index_max_expansions";
 
     public static final String AUTO_ANALYZE_START_TIME = "auto_analyze_start_time";
 
@@ -1192,6 +1193,12 @@ public void setMaxJoinNumberOfReorder(int maxJoinNumberOfReorder) {
             flag = VariableMgr.GLOBAL)
     public String autoAnalyzeEndTime = "23:59:59";
 
+    @VariableMgr.VarAttr(name = INVERTED_INDEX_MAX_EXPANSIONS,
+            description = {"这个参数用来限制查询时扩展的词项（terms）的数量，以此来控制查询的性能",
+                    "This parameter is used to limit the number of term expansions during a query,"
+                    + " thereby controlling query performance"})
+    public int invertedIndexMaxExpansions = 50;
+
     @VariableMgr.VarAttr(name = ENABLE_UNIQUE_KEY_PARTIAL_UPDATE, needForward = true)
     public boolean enableUniqueKeyPartialUpdate = false;
 
@@ -2435,6 +2442,7 @@ public TQueryOptions toThrift() {
         tResult.setTruncateCharOrVarcharColumns(truncateCharOrVarcharColumns);
 
         tResult.setInvertedIndexConjunctionOptThreshold(invertedIndexConjunctionOptThreshold);
+        tResult.setInvertedIndexMaxExpansions(invertedIndexMaxExpansions);
 
         tResult.setFasterFloatConvert(fasterFloatConvert);
 
diff --git a/fe/fe-core/src/main/jflex/sql_scanner.flex b/fe/fe-core/src/main/jflex/sql_scanner.flex
index 4a19494ce80c99..86a3dc7482de60 100644
--- a/fe/fe-core/src/main/jflex/sql_scanner.flex
+++ b/fe/fe-core/src/main/jflex/sql_scanner.flex
@@ -309,6 +309,8 @@ import org.apache.doris.qe.SqlModeHelper;
         keywordMap.put("match_any", new Integer(SqlParserSymbols.KW_MATCH_ANY));
         keywordMap.put("match_all", new Integer(SqlParserSymbols.KW_MATCH_ALL));
         keywordMap.put("match_phrase", new Integer(SqlParserSymbols.KW_MATCH_PHRASE));
+        keywordMap.put("match_phrase_prefix", new Integer(SqlParserSymbols.KW_MATCH_PHRASE_PREFIX));
+        keywordMap.put("match_regexp", new Integer(SqlParserSymbols.KW_MATCH_REGEXP));
         keywordMap.put("element_eq", new Integer(SqlParserSymbols.KW_MATCH_ELEMENT_EQ));
         keywordMap.put("element_lt", new Integer(SqlParserSymbols.KW_MATCH_ELEMENT_LT));
         keywordMap.put("element_gt", new Integer(SqlParserSymbols.KW_MATCH_ELEMENT_GT));
diff --git a/gensrc/thrift/Opcodes.thrift b/gensrc/thrift/Opcodes.thrift
index f6444ebe218fd3..72a1d80e036222 100644
--- a/gensrc/thrift/Opcodes.thrift
+++ b/gensrc/thrift/Opcodes.thrift
@@ -93,4 +93,6 @@ enum TExprOpcode {
     MATCH_ELEMENT_GT,
     MATCH_ELEMENT_LE,
     MATCH_ELEMENT_GE,
+    MATCH_PHRASE_PREFIX,
+    MATCH_REGEXP,
 }
diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift
index 62eb5a0827b090..9ff6a589d69dd2 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -249,6 +249,8 @@ struct TQueryOptions {
   86: optional i32 analyze_timeout = 43200;
 
   87: optional bool faster_float_convert = false;
+
+  88: optional i32 inverted_index_max_expansions = 50;
 }
 
 
diff --git a/regression-test/data/inverted_index_p0/test_index_match_phrase_prefix.out b/regression-test/data/inverted_index_p0/test_index_match_phrase_prefix.out
new file mode 100644
index 00000000000000..140fd5ee937992
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_index_match_phrase_prefix.out
@@ -0,0 +1,31 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+863
+
+-- !sql --
+863
+
+-- !sql --
+235
+
+-- !sql --
+235
+
+-- !sql --
+166
+
+-- !sql --
+166
+
+-- !sql --
+56
+
+-- !sql --
+56
+
+-- !sql --
+7
+
+-- !sql --
+7
+
diff --git a/regression-test/data/inverted_index_p0/test_index_match_regexp.out b/regression-test/data/inverted_index_p0/test_index_match_regexp.out
new file mode 100644
index 00000000000000..eab27de65ee45f
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_index_match_regexp.out
@@ -0,0 +1,16 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+1000
+
+-- !sql --
+54
+
+-- !sql --
+910
+
+-- !sql --
+60
+
+-- !sql --
+38
+
diff --git a/regression-test/suites/inverted_index_p0/test_index_match_phrase_prefix.groovy b/regression-test/suites/inverted_index_p0/test_index_match_phrase_prefix.groovy
new file mode 100644
index 00000000000000..b23bc1b5a8b82a
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_index_match_phrase_prefix.groovy
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_index_match_phrase_prefix", "p0"){
+    def indexTbName1 = "test_index_match_phrase_prefix"
+
+    sql "DROP TABLE IF EXISTS ${indexTbName1}"
+
+    sql """
+      CREATE TABLE ${indexTbName1} (
+      `@timestamp` int(11) NULL COMMENT "",
+      `clientip` varchar(20) NULL COMMENT "",
+      `request` text NULL COMMENT "",
+      `status` int(11) NULL COMMENT "",
+      `size` int(11) NULL COMMENT "",
+      INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT ''
+      ) ENGINE=OLAP
+      DUPLICATE KEY(`@timestamp`)
+      COMMENT "OLAP"
+      DISTRIBUTED BY RANDOM BUCKETS 1
+      PROPERTIES (
+      "replication_allocation" = "tag.location.default: 1"
+      );
+    """
+
+    def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false,
+                        expected_succ_rows = -1, load_to_single_tablet = 'true' ->
+        
+        // load the json data
+        streamLoad {
+            table "${table_name}"
+            
+            // set http request header params
+            set 'label', label + "_" + UUID.randomUUID().toString()
+            set 'read_json_by_line', read_flag
+            set 'format', format_flag
+            file file_name // import json file
+            time 10000 // limit inflight 10s
+            if (expected_succ_rows >= 0) {
+                set 'max_filter_ratio', '1'
+            }
+
+            // if declared a check callback, the default check condition will ignore.
+            // So you must check all condition
+            check { result, exception, startTime, endTime ->
+		        if (ignore_failure && expected_succ_rows < 0) { return }
+                    if (exception != null) {
+                        throw exception
+                    }
+                    log.info("Stream load result: ${result}".toString())
+                    def json = parseJson(result)
+                    assertEquals("success", json.Status.toLowerCase())
+                    if (expected_succ_rows >= 0) {
+                        assertEquals(json.NumberLoadedRows, expected_succ_rows)
+                    } else {
+                        assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows)
+                        assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
+                }
+            }
+        }
+    }
+
+    try {
+        load_httplogs_data.call(indexTbName1, 'test_index_match_phrase_prefix', 'true', 'json', 'documents-1000.json')
+
+        qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix 'ima'; """
+        qt_sql """ select count() from test_index_match_phrase_prefix where request like '%ima%'; """
+
+        qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix 'images/h'; """
+        qt_sql """ select count() from test_index_match_phrase_prefix where request like '%images/h%'; """
+
+        qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix 'images/hm'; """
+        qt_sql """ select count() from test_index_match_phrase_prefix where request like '%images/hm%'; """
+
+        qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix '/french/images/n'; """
+        qt_sql """ select count() from test_index_match_phrase_prefix where request like '%/french/images/n%'; """
+
+        qt_sql """ select count() from test_index_match_phrase_prefix where request match_phrase_prefix '/french/tickets/images/ti'; """
+        qt_sql """ select count() from test_index_match_phrase_prefix where request like '%/french/tickets/images/ti%'; """
+    } finally {
+        //try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
+}
\ No newline at end of file
diff --git a/regression-test/suites/inverted_index_p0/test_index_match_regexp.groovy b/regression-test/suites/inverted_index_p0/test_index_match_regexp.groovy
new file mode 100644
index 00000000000000..4c1ee1a5b0b484
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_index_match_regexp.groovy
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_index_match_regexp", "p0"){
+    def indexTbName1 = "test_index_match_regexp"
+
+    sql "DROP TABLE IF EXISTS ${indexTbName1}"
+
+    sql """
+      CREATE TABLE ${indexTbName1} (
+      `@timestamp` int(11) NULL COMMENT "",
+      `clientip` varchar(20) NULL COMMENT "",
+      `request` text NULL COMMENT "",
+      `status` int(11) NULL COMMENT "",
+      `size` int(11) NULL COMMENT "",
+      INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english") COMMENT ''
+      ) ENGINE=OLAP
+      DUPLICATE KEY(`@timestamp`)
+      COMMENT "OLAP"
+      DISTRIBUTED BY RANDOM BUCKETS 1
+      PROPERTIES (
+      "replication_allocation" = "tag.location.default: 1"
+      );
+    """
+
+    def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false,
+                        expected_succ_rows = -1, load_to_single_tablet = 'true' ->
+        
+        // load the json data
+        streamLoad {
+            table "${table_name}"
+            
+            // set http request header params
+            set 'label', label + "_" + UUID.randomUUID().toString()
+            set 'read_json_by_line', read_flag
+            set 'format', format_flag
+            file file_name // import json file
+            time 10000 // limit inflight 10s
+            if (expected_succ_rows >= 0) {
+                set 'max_filter_ratio', '1'
+            }
+
+            // if declared a check callback, the default check condition will ignore.
+            // So you must check all condition
+            check { result, exception, startTime, endTime ->
+		        if (ignore_failure && expected_succ_rows < 0) { return }
+                    if (exception != null) {
+                        throw exception
+                    }
+                    log.info("Stream load result: ${result}".toString())
+                    def json = parseJson(result)
+                    assertEquals("success", json.Status.toLowerCase())
+                    if (expected_succ_rows >= 0) {
+                        assertEquals(json.NumberLoadedRows, expected_succ_rows)
+                    } else {
+                        assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows)
+                        assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
+                }
+            }
+        }
+    }
+
+    try {
+        load_httplogs_data.call(indexTbName1, 'test_index_match_regexp', 'true', 'json', 'documents-1000.json')
+
+        qt_sql """ select count() from test_index_match_regexp where request match_regexp '^h'; """
+        qt_sql """ select count() from test_index_match_regexp where request match_regexp '^team'; """
+        qt_sql """ select count() from test_index_match_regexp where request match_regexp 's\$'; """
+        qt_sql """ select count() from test_index_match_regexp where request match_regexp 'er\$'; """
+        qt_sql """ select count() from test_index_match_regexp where request match_regexp '.*tickets.*'; """
+    } finally {
+        //try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
+}
\ No newline at end of file

From 57980712b9457314a432ffaf97dbe42a31e48025 Mon Sep 17 00:00:00 2001
From: morrySnow <101034200+morrySnow@users.noreply.github.com>
Date: Fri, 22 Dec 2023 11:06:07 +0800
Subject: [PATCH 4/6] [fix](Nereids) explain should fallback too if Nereids is
 not enable (#28475) (#28574)

pick from master
PR #28475
commit b50bc0d2c902856f6727c5301d256ca6a561f8a3
---
 .../trees/plans/commands/DeleteCommand.java   |  8 +++++
 .../commands/InsertIntoTableCommand.java      |  8 +++++
 .../trees/plans/commands/UpdateCommand.java   |  8 +++++
 .../doris/nereids/util/ReadLockTest.java      | 32 +++++++++++--------
 4 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteCommand.java
index fa6fd629008389..ccaadd7cd9ccd3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteCommand.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteCommand.java
@@ -130,6 +130,14 @@ public LogicalPlan getLogicalQuery() {
 
     @Override
     public Plan getExplainPlan(ConnectContext ctx) {
+        if (!ctx.getSessionVariable().isEnableNereidsDML()) {
+            try {
+                ctx.getSessionVariable().enableFallbackToOriginalPlannerOnce();
+            } catch (Exception e) {
+                throw new AnalysisException("failed to set fallback to original planner to true", e);
+            }
+            throw new AnalysisException("Nereids DML is disabled, will try to fall back to the original planner");
+        }
         return completeQueryPlan(ctx, logicalQuery);
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/InsertIntoTableCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/InsertIntoTableCommand.java
index 33bbfe8d546aa4..0ff6099163c69a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/InsertIntoTableCommand.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/InsertIntoTableCommand.java
@@ -160,6 +160,14 @@ public void run(ConnectContext ctx, StmtExecutor executor) throws Exception {
 
     @Override
     public Plan getExplainPlan(ConnectContext ctx) {
+        if (!ctx.getSessionVariable().isEnableNereidsDML()) {
+            try {
+                ctx.getSessionVariable().enableFallbackToOriginalPlannerOnce();
+            } catch (Exception e) {
+                throw new AnalysisException("failed to set fallback to original planner to true", e);
+            }
+            throw new AnalysisException("Nereids DML is disabled, will try to fall back to the original planner");
+        }
         return this.logicalQuery;
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateCommand.java
index 6236ba019b5f9b..92f3fb21ee5414 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateCommand.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateCommand.java
@@ -148,6 +148,14 @@ private void checkTable(ConnectContext ctx) throws AnalysisException {
 
     @Override
     public Plan getExplainPlan(ConnectContext ctx) throws AnalysisException {
+        if (!ctx.getSessionVariable().isEnableNereidsDML()) {
+            try {
+                ctx.getSessionVariable().enableFallbackToOriginalPlannerOnce();
+            } catch (Exception e) {
+                throw new AnalysisException("failed to set fallback to original planner to true", e);
+            }
+            throw new AnalysisException("Nereids DML is disabled, will try to fall back to the original planner");
+        }
         return completeQueryPlan(ctx, logicalQuery);
     }
 
diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/ReadLockTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/ReadLockTest.java
index 58212c2d3ba5d2..c24a846a1bb245 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/ReadLockTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/ReadLockTest.java
@@ -108,20 +108,26 @@ public void testScalarSubQuery() {
     }
 
     @Test
-    public void testInserInto() {
+    public void testInsertInto() {
         String sql = "INSERT INTO supplier(s_suppkey) SELECT lo_orderkey FROM lineorder";
         StatementContext statementContext = MemoTestUtils.createStatementContext(connectContext, sql);
-        InsertIntoTableCommand insertIntoTableCommand = (InsertIntoTableCommand) parser.parseSingle(sql);
-        NereidsPlanner planner = new NereidsPlanner(statementContext);
-        planner.plan(
-                (LogicalPlan) insertIntoTableCommand.getExplainPlan(connectContext),
-                PhysicalProperties.ANY
-        );
-        CascadesContext cascadesContext = planner.getCascadesContext();
-        List<TableIf> f = cascadesContext.getTables();
-        Assertions.assertEquals(2, f.size());
-        Set<String> tableNames = f.stream().map(TableIf::getName).collect(Collectors.toSet());
-        Assertions.assertTrue(tableNames.contains("supplier"));
-        Assertions.assertTrue(tableNames.contains("lineorder"));
+        boolean originalDML = connectContext.getSessionVariable().enableNereidsDML;
+        connectContext.getSessionVariable().enableNereidsDML = true;
+        try {
+            InsertIntoTableCommand insertIntoTableCommand = (InsertIntoTableCommand) parser.parseSingle(sql);
+            NereidsPlanner planner = new NereidsPlanner(statementContext);
+            planner.plan(
+                    (LogicalPlan) insertIntoTableCommand.getExplainPlan(connectContext),
+                    PhysicalProperties.ANY
+            );
+            CascadesContext cascadesContext = planner.getCascadesContext();
+            List<TableIf> f = cascadesContext.getTables();
+            Assertions.assertEquals(2, f.size());
+            Set<String> tableNames = f.stream().map(TableIf::getName).collect(Collectors.toSet());
+            Assertions.assertTrue(tableNames.contains("supplier"));
+            Assertions.assertTrue(tableNames.contains("lineorder"));
+        } finally {
+            connectContext.getSessionVariable().enableNereidsDML = originalDML;
+        }
     }
 }

From 0c71825172cb2d2b661efb1d1fefe5f50785662e Mon Sep 17 00:00:00 2001
From: Tiewei Fang <43782773+BePPPower@users.noreply.github.com>
Date: Fri, 22 Dec 2023 11:10:47 +0800
Subject: [PATCH 5/6] [Fix](Export) Fix BE core when the `columns` attribute of
 `export` parquet is specified as an asterisk (#28627)

---
 .../org/apache/doris/analysis/ExportStmt.java | 31 +++++++++++++++++--
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ExportStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ExportStmt.java
index 38f02c49cc41e1..6eaa45cdfddba7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ExportStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ExportStmt.java
@@ -39,6 +39,7 @@
 
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
+import com.google.common.base.Splitter;
 import com.google.common.base.Strings;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Maps;
@@ -49,6 +50,7 @@
 import java.util.Map;
 import java.util.Optional;
 import java.util.UUID;
+import java.util.stream.Collectors;
 
 // EXPORT statement, export data to dirs by broker.
 //
@@ -64,7 +66,6 @@ public class ExportStmt extends StatementBase {
 
     private static final String DEFAULT_COLUMN_SEPARATOR = "\t";
     private static final String DEFAULT_LINE_DELIMITER = "\n";
-    private static final String DEFAULT_COLUMNS = "";
     private static final String DEFAULT_PARALLELISM = "1";
     private static final Integer DEFAULT_TIMEOUT = 7200;
 
@@ -121,7 +122,6 @@ public ExportStmt(TableRef tableRef, Expr whereExpr, String path,
         this.columnSeparator = DEFAULT_COLUMN_SEPARATOR;
         this.lineDelimiter = DEFAULT_LINE_DELIMITER;
         this.timeout = DEFAULT_TIMEOUT;
-        this.columns = DEFAULT_COLUMNS;
 
         // The ExportStmt may be created in replay thread, there is no ConnectionContext
         // in replay thread, so we need to clone session variable from default session variable.
@@ -352,7 +352,14 @@ private void checkProperties(Map<String, String> properties) throws UserExceptio
                 properties, ExportStmt.DEFAULT_COLUMN_SEPARATOR));
         this.lineDelimiter = Separator.convertSeparator(PropertyAnalyzer.analyzeLineDelimiter(
                 properties, ExportStmt.DEFAULT_LINE_DELIMITER));
-        this.columns = properties.getOrDefault(LoadStmt.KEY_IN_PARAM_COLUMNS, DEFAULT_COLUMNS);
+
+        // null means not specified
+        // "" means user specified zero columns
+        this.columns = properties.getOrDefault(LoadStmt.KEY_IN_PARAM_COLUMNS, null);
+        // check columns are exits
+        if (this.columns != null) {
+            checkColumns();
+        }
 
         // format
         this.format = properties.getOrDefault(LoadStmt.KEY_IN_PARAM_FORMAT_TYPE, "csv").toLowerCase();
@@ -384,6 +391,24 @@ private void checkProperties(Map<String, String> properties) throws UserExceptio
         label = properties.get(LABEL);
     }
 
+    private void checkColumns() throws DdlException {
+        if (this.columns.isEmpty()) {
+            throw new DdlException("columns can not be empty");
+        }
+        Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(this.tblName.getDb());
+        Table table = db.getTableOrDdlException(this.tblName.getTbl());
+        List<String> tableColumns = table.getBaseSchema().stream().map(column -> column.getName())
+                .collect(Collectors.toList());
+        Splitter split = Splitter.on(',').trimResults().omitEmptyStrings();
+
+        List<String> columnsSpecified = split.splitToList(this.columns.toLowerCase());
+        for (String columnName : columnsSpecified) {
+            if (!tableColumns.contains(columnName)) {
+                throw new DdlException("unknown column [" + columnName + "] in table [" + this.tblName.getTbl() + "]");
+            }
+        }
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();

From 69357f3592101795ba6214555e08bd77bdd750a0 Mon Sep 17 00:00:00 2001
From: zhiqiang <seuhezhiqiang@163.com>
Date: Fri, 22 Dec 2023 11:20:15 +0800
Subject: [PATCH 6/6] [test](regression-test) order by decs should only make
 effect on its nearest column #28728 (#28730)

---
 .../data/correctness_p0/test_order_by.out     | 22 +++++++
 .../correctness_p0/test_order_by.groovy       | 63 +++++++++++++++++++
 2 files changed, 85 insertions(+)
 create mode 100644 regression-test/data/correctness_p0/test_order_by.out
 create mode 100644 regression-test/suites/correctness_p0/test_order_by.groovy

diff --git a/regression-test/data/correctness_p0/test_order_by.out b/regression-test/data/correctness_p0/test_order_by.out
new file mode 100644
index 00000000000000..23d447cd520e27
--- /dev/null
+++ b/regression-test/data/correctness_p0/test_order_by.out
@@ -0,0 +1,22 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select --
+2023-12-19T00:01:12	2023-12-19 00:06:08.618	2023-12-18T23:56:12
+2023-12-19T00:01:12	2023-12-19 00:05:58.513	2023-12-18T23:56:12
+2023-12-18T23:56:12	2023-12-19 00:01:08.799	2023-12-18T23:56:12
+2023-12-18T23:56:12	2023-12-19 00:01:08.797	2023-12-18T23:56:12
+2023-12-18T23:56:12	2023-12-19 00:01:08.796	2023-12-18T23:56:12
+
+-- !select --
+2023-12-18T23:56:12	2023-12-19 00:01:08.799	2023-12-18T23:56:12
+2023-12-18T23:56:12	2023-12-19 00:01:08.797	2023-12-18T23:56:12
+2023-12-18T23:56:12	2023-12-19 00:01:08.796	2023-12-18T23:56:12
+2023-12-19T00:01:12	2023-12-19 00:06:08.618	2023-12-18T23:56:12
+2023-12-19T00:01:12	2023-12-19 00:05:58.513	2023-12-18T23:56:12
+
+-- !select --
+2023-12-19T00:01:12	2023-12-19 00:06:08.618	2023-12-18T23:56:12
+2023-12-19T00:01:12	2023-12-19 00:05:58.513	2023-12-18T23:56:12
+2023-12-18T23:56:12	2023-12-19 00:01:08.799	2023-12-18T23:56:12
+2023-12-18T23:56:12	2023-12-19 00:01:08.797	2023-12-18T23:56:12
+2023-12-18T23:56:12	2023-12-19 00:01:08.796	2023-12-18T23:56:12
+
diff --git a/regression-test/suites/correctness_p0/test_order_by.groovy b/regression-test/suites/correctness_p0/test_order_by.groovy
new file mode 100644
index 00000000000000..3bcb253f6d89b8
--- /dev/null
+++ b/regression-test/suites/correctness_p0/test_order_by.groovy
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_order_by") {
+    sql """
+        drop table if exists test_order_by;
+    """
+
+    sql """
+    create table if not exists test_order_by(
+        create_time datetime null default current_timestamp,
+        run_time varchar(200) null comment '时间戳',
+        create_time2 datetime null
+    )
+    duplicate key(create_time,run_time)
+    distributed by hash(create_time) buckets 1
+    PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1"
+    );
+    """
+
+    sql """
+        insert into test_order_by values ('2023-12-18 23:56:12','2023-12-19 00:01:08.799','2023-12-18 23:56:12');
+    """
+    sql """
+        insert into test_order_by values ('2023-12-18 23:56:12','2023-12-19 00:01:08.797','2023-12-18 23:56:12');
+    """
+    sql """
+        insert into test_order_by values ('2023-12-18 23:56:12','2023-12-19 00:01:08.796','2023-12-18 23:56:12');
+    """
+    sql """
+        insert into test_order_by values ('2023-12-19 00:01:12','2023-12-19 00:06:08.618','2023-12-18 23:56:12');
+    """
+    sql """
+        insert into test_order_by values ('2023-12-19 00:01:12','2023-12-19 00:05:58.513','2023-12-18 23:56:12');
+    """
+
+    qt_select """
+        select * from test_order_by order by create_time desc;
+    """
+
+    qt_select """
+        select * from test_order_by order by create_time, run_time desc;
+    """
+
+    qt_select """
+        select * from test_order_by order by create_time desc, run_time desc;
+    """   
+}
\ No newline at end of file