diff --git a/be/src/vec/functions/like.cpp b/be/src/vec/functions/like.cpp index 784dc51990defb..38604c3d1fb2ff 100644 --- a/be/src/vec/functions/like.cpp +++ b/be/src/vec/functions/like.cpp @@ -60,6 +60,130 @@ static const re2::RE2 LIKE_STARTS_WITH_RE(R"((((\\%)|(\\_)|([^%_\\]))+)(?:%+))") static const re2::RE2 LIKE_EQUALS_RE("(((\\\\_)|([^%_]))+)"); static const re2::RE2 LIKE_ALLPASS_RE("%+"); +struct VectorAllpassSearchState : public VectorPatternSearchState { + VectorAllpassSearchState() : VectorPatternSearchState(FunctionLikeBase::vector_allpass_fn) {} + + ~VectorAllpassSearchState() override = default; + + void like_pattern_match(const std::string& pattern_str) override { + if (!pattern_str.empty() && RE2::FullMatch(pattern_str, LIKE_ALLPASS_RE)) { + _search_strings->insert_default(); + } else { + _pattern_matched = false; + } + } + + void regexp_pattern_match(const std::string& pattern_str) override { + if (RE2::FullMatch(pattern_str, ALLPASS_RE)) { + _search_strings->insert_default(); + } else { + _pattern_matched = false; + } + } +}; + +struct VectorEqualSearchState : public VectorPatternSearchState { + VectorEqualSearchState() : VectorPatternSearchState(FunctionLikeBase::vector_equals_fn) {} + + ~VectorEqualSearchState() override = default; + + void like_pattern_match(const std::string& pattern_str) override { + _search_string.clear(); + if (pattern_str.empty() || RE2::FullMatch(pattern_str, LIKE_EQUALS_RE, &_search_string)) { + FunctionLike::remove_escape_character(&_search_string); + _search_strings->insert_data(_search_string.c_str(), _search_string.size()); + } else { + _pattern_matched = false; + } + } + + void regexp_pattern_match(const std::string& pattern_str) override { + _search_string.clear(); + if (RE2::FullMatch(pattern_str, EQUALS_RE, &_search_string)) { + _search_strings->insert_data(_search_string.c_str(), _search_string.size()); + } else { + _pattern_matched = false; + } + } +}; + +struct VectorSubStringSearchState : public VectorPatternSearchState { + VectorSubStringSearchState() + : VectorPatternSearchState(FunctionLikeBase::vector_substring_fn) {} + + ~VectorSubStringSearchState() override = default; + + void like_pattern_match(const std::string& pattern_str) override { + _search_string.clear(); + if (RE2::FullMatch(pattern_str, LIKE_SUBSTRING_RE, &_search_string)) { + FunctionLike::remove_escape_character(&_search_string); + _search_strings->insert_data(_search_string.c_str(), _search_string.size()); + } else { + _pattern_matched = false; + } + } + + void regexp_pattern_match(const std::string& pattern_str) override { + _search_string.clear(); + if (RE2::FullMatch(pattern_str, SUBSTRING_RE, &_search_string)) { + _search_strings->insert_data(_search_string.c_str(), _search_string.size()); + } else { + _pattern_matched = false; + } + } +}; + +struct VectorStartsWithSearchState : public VectorPatternSearchState { + VectorStartsWithSearchState() + : VectorPatternSearchState(FunctionLikeBase::vector_starts_with_fn) {} + + ~VectorStartsWithSearchState() override = default; + + void like_pattern_match(const std::string& pattern_str) override { + _search_string.clear(); + if (RE2::FullMatch(pattern_str, LIKE_STARTS_WITH_RE, &_search_string)) { + FunctionLike::remove_escape_character(&_search_string); + _search_strings->insert_data(_search_string.c_str(), _search_string.size()); + } else { + _pattern_matched = false; + } + } + + void regexp_pattern_match(const std::string& pattern_str) override { + _search_string.clear(); + if (RE2::FullMatch(pattern_str, STARTS_WITH_RE, &_search_string)) { + _search_strings->insert_data(_search_string.c_str(), _search_string.size()); + } else { + _pattern_matched = false; + } + } +}; + +struct VectorEndsWithSearchState : public VectorPatternSearchState { + VectorEndsWithSearchState() : VectorPatternSearchState(FunctionLikeBase::vector_ends_with_fn) {} + + ~VectorEndsWithSearchState() override = default; + + void like_pattern_match(const std::string& pattern_str) override { + _search_string.clear(); + if (RE2::FullMatch(pattern_str, LIKE_ENDS_WITH_RE, &_search_string)) { + FunctionLike::remove_escape_character(&_search_string); + _search_strings->insert_data(_search_string.c_str(), _search_string.size()); + } else { + _pattern_matched = false; + } + } + + void regexp_pattern_match(const std::string& pattern_str) override { + _search_string.clear(); + if (RE2::FullMatch(pattern_str, ENDS_WITH_RE, &_search_string)) { + _search_strings->insert_data(_search_string.c_str(), _search_string.size()); + } else { + _pattern_matched = false; + } + } +}; + Status LikeSearchState::clone(LikeSearchState& cloned) { cloned.escape_char = escape_char; cloned.set_search_string(search_string); @@ -89,13 +213,26 @@ Status LikeSearchState::clone(LikeSearchState& cloned) { return Status::OK(); } -Status FunctionLikeBase::constant_allpass_fn(LikeSearchState* state, const ColumnString& val, +Status FunctionLikeBase::constant_allpass_fn(LikeSearchState* state, const ColumnString& vals, const StringRef& pattern, ColumnUInt8::Container& result) { - auto sz = val.size(); - for (size_t i = 0; i < sz; i++) { - result[i] = 1; - } + memset(result.data(), 1, vals.size()); + return Status::OK(); +} + +Status FunctionLikeBase::constant_allpass_fn_scalar(LikeSearchState* state, const StringRef& val, + const StringRef& pattern, + unsigned char* result) { + *result = 1; + return Status::OK(); +} + +Status FunctionLikeBase::vector_allpass_fn(const ColumnString& vals, + const ColumnString& search_strings, + ColumnUInt8::Container& result) { + DCHECK(vals.size() == search_strings.size()); + DCHECK(vals.size() == result.size()); + memset(result.data(), 1, vals.size()); return Status::OK(); } @@ -111,6 +248,29 @@ Status FunctionLikeBase::constant_starts_with_fn(LikeSearchState* state, const C return Status::OK(); } +Status FunctionLikeBase::constant_starts_with_fn_scalar(LikeSearchState* state, + const StringRef& val, + const StringRef& pattern, + unsigned char* result) { + *result = (val.size >= state->search_string_sv.size) && + (state->search_string_sv == val.substring(0, state->search_string_sv.size)); + return Status::OK(); +} + +Status FunctionLikeBase::vector_starts_with_fn(const ColumnString& vals, + const ColumnString& search_strings, + ColumnUInt8::Container& result) { + DCHECK(vals.size() == search_strings.size()); + DCHECK(vals.size() == result.size()); + auto sz = vals.size(); + for (size_t i = 0; i < sz; ++i) { + const auto& str_sv = vals.get_data_at(i); + const auto& search_string_sv = search_strings.get_data_at(i); + result[i] = (str_sv.size >= search_string_sv.size) && str_sv.start_with(search_string_sv); + } + return Status::OK(); +} + Status FunctionLikeBase::constant_ends_with_fn(LikeSearchState* state, const ColumnString& val, const StringRef& pattern, ColumnUInt8::Container& result) { @@ -123,6 +283,29 @@ Status FunctionLikeBase::constant_ends_with_fn(LikeSearchState* state, const Col return Status::OK(); } +Status FunctionLikeBase::constant_ends_with_fn_scalar(LikeSearchState* state, const StringRef& val, + const StringRef& pattern, + unsigned char* result) { + *result = (val.size >= state->search_string_sv.size) && + (state->search_string_sv == val.substring(val.size - state->search_string_sv.size, + state->search_string_sv.size)); + return Status::OK(); +} + +Status FunctionLikeBase::vector_ends_with_fn(const ColumnString& vals, + const ColumnString& search_strings, + ColumnUInt8::Container& result) { + DCHECK(vals.size() == search_strings.size()); + DCHECK(vals.size() == result.size()); + auto sz = vals.size(); + for (size_t i = 0; i < sz; ++i) { + const auto& str_sv = vals.get_data_at(i); + const auto& search_string_sv = search_strings.get_data_at(i); + result[i] = (str_sv.size >= search_string_sv.size) && str_sv.end_with(search_string_sv); + } + return Status::OK(); +} + Status FunctionLikeBase::constant_equals_fn(LikeSearchState* state, const ColumnString& val, const StringRef& pattern, ColumnUInt8::Container& result) { @@ -133,6 +316,27 @@ Status FunctionLikeBase::constant_equals_fn(LikeSearchState* state, const Column return Status::OK(); } +Status FunctionLikeBase::constant_equals_fn_scalar(LikeSearchState* state, const StringRef& val, + const StringRef& pattern, + unsigned char* result) { + *result = (val == state->search_string_sv); + return Status::OK(); +} + +Status FunctionLikeBase::vector_equals_fn(const ColumnString& vals, + const ColumnString& search_strings, + ColumnUInt8::Container& result) { + DCHECK(vals.size() == search_strings.size()); + DCHECK(vals.size() == result.size()); + auto sz = vals.size(); + for (size_t i = 0; i < sz; ++i) { + const auto& str_sv = vals.get_data_at(i); + const auto& search_string_sv = search_strings.get_data_at(i); + result[i] = str_sv == search_string_sv; + } + return Status::OK(); +} + Status FunctionLikeBase::constant_substring_fn(LikeSearchState* state, const ColumnString& val, const StringRef& pattern, ColumnUInt8::Container& result) { @@ -140,44 +344,13 @@ Status FunctionLikeBase::constant_substring_fn(LikeSearchState* state, const Col for (size_t i = 0; i < sz; i++) { if (state->search_string_sv.size == 0) { result[i] = true; + continue; } result[i] = state->substring_pattern.search(val.get_data_at(i)) != -1; } return Status::OK(); } -Status FunctionLikeBase::constant_allpass_fn_scalar(LikeSearchState* state, const StringRef& val, - const StringRef& pattern, - unsigned char* result) { - *result = 1; - return Status::OK(); -} - -Status FunctionLikeBase::constant_starts_with_fn_scalar(LikeSearchState* state, - const StringRef& val, - const StringRef& pattern, - unsigned char* result) { - *result = (val.size >= state->search_string_sv.size) && - (state->search_string_sv == val.substring(0, state->search_string_sv.size)); - return Status::OK(); -} - -Status FunctionLikeBase::constant_ends_with_fn_scalar(LikeSearchState* state, const StringRef& val, - const StringRef& pattern, - unsigned char* result) { - *result = (val.size >= state->search_string_sv.size) && - (state->search_string_sv == val.substring(val.size - state->search_string_sv.size, - state->search_string_sv.size)); - return Status::OK(); -} - -Status FunctionLikeBase::constant_equals_fn_scalar(LikeSearchState* state, const StringRef& val, - const StringRef& pattern, - unsigned char* result) { - *result = (val == state->search_string_sv); - return Status::OK(); -} - Status FunctionLikeBase::constant_substring_fn_scalar(LikeSearchState* state, const StringRef& val, const StringRef& pattern, unsigned char* result) { @@ -189,6 +362,25 @@ Status FunctionLikeBase::constant_substring_fn_scalar(LikeSearchState* state, co return Status::OK(); } +Status FunctionLikeBase::vector_substring_fn(const ColumnString& vals, + const ColumnString& search_strings, + ColumnUInt8::Container& result) { + DCHECK(vals.size() == search_strings.size()); + DCHECK(vals.size() == result.size()); + auto sz = vals.size(); + for (size_t i = 0; i < sz; ++i) { + const auto& str_sv = vals.get_data_at(i); + const auto& search_string_sv = search_strings.get_data_at(i); + if (search_string_sv.size == 0) { + result[i] = true; + continue; + } + doris::StringSearch substring_search(&search_string_sv); + result[i] = substring_search.search(str_sv) != -1; + } + return Status::OK(); +} + Status FunctionLikeBase::constant_regex_fn_scalar(LikeSearchState* state, const StringRef& val, const StringRef& pattern, unsigned char* result) { if (state->hs_database) { // use hyperscan @@ -338,15 +530,9 @@ Status FunctionLikeBase::execute_impl(FunctionContext* context, Block& block, &state->search_state)); } else { const auto pattern_col = block.get_by_position(arguments[1]).column; - if (const auto* str_patterns = check_and_get_column(pattern_col.get())) { - for (int i = 0; i < input_rows_count; i++) { - const auto pattern_val = str_patterns->get_data_at(i); - const auto value_val = values->get_data_at(i); - (state->scalar_function)( - const_cast(&state->search_state), value_val, - pattern_val, &vec_res[i]); - } + RETURN_IF_ERROR( + vector_non_const(*values, *str_patterns, vec_res, state, input_rows_count)); } else if (const auto* const_patterns = check_and_get_column(pattern_col.get())) { const auto& pattern_val = const_patterns->get_data_at(0); @@ -413,11 +599,102 @@ Status FunctionLikeBase::vector_const(const ColumnString& values, const StringRe return Status::OK(); } +template +VPatternSearchStateSPtr FunctionLikeBase::pattern_type_recognition(const ColumnString& patterns) { + VPatternSearchStateSPtr allpass_state = std::make_shared(); + VPatternSearchStateSPtr equal_state = std::make_shared(); + VPatternSearchStateSPtr substring_state = std::make_shared(); + VPatternSearchStateSPtr starts_with_state = std::make_shared(); + VPatternSearchStateSPtr ends_with_state = std::make_shared(); + size_t size = patterns.size(); + + for (size_t i = 0; i < size; ++i) { + if (!allpass_state->_pattern_matched && !equal_state->_pattern_matched && + !substring_state->_pattern_matched && !starts_with_state->_pattern_matched && + !ends_with_state->_pattern_matched) { + return nullptr; + } + std::string pattern_str = patterns.get_data_at(i).to_string(); + if (allpass_state->_pattern_matched) { + if constexpr (LIKE_PATTERN) { + allpass_state->like_pattern_match(pattern_str); + } else { + allpass_state->regexp_pattern_match(pattern_str); + } + } + if (equal_state->_pattern_matched) { + if constexpr (LIKE_PATTERN) { + equal_state->like_pattern_match(pattern_str); + } else { + equal_state->regexp_pattern_match(pattern_str); + } + } + if (substring_state->_pattern_matched) { + if constexpr (LIKE_PATTERN) { + substring_state->like_pattern_match(pattern_str); + } else { + substring_state->regexp_pattern_match(pattern_str); + } + } + if (starts_with_state->_pattern_matched) { + if constexpr (LIKE_PATTERN) { + starts_with_state->like_pattern_match(pattern_str); + } else { + starts_with_state->regexp_pattern_match(pattern_str); + } + } + if (ends_with_state->_pattern_matched) { + if constexpr (LIKE_PATTERN) { + ends_with_state->like_pattern_match(pattern_str); + } else { + ends_with_state->regexp_pattern_match(pattern_str); + } + } + } + + if (allpass_state->_pattern_matched) { + return allpass_state; + } else if (equal_state->_pattern_matched) { + return equal_state; + } else if (substring_state->_pattern_matched) { + return substring_state; + } else if (starts_with_state->_pattern_matched) { + return starts_with_state; + } else if (ends_with_state->_pattern_matched) { + return ends_with_state; + } else { + return nullptr; + } +} + +Status FunctionLikeBase::vector_non_const(const ColumnString& values, const ColumnString& patterns, + ColumnUInt8::Container& result, LikeState* state, + size_t input_rows_count) const { + VPatternSearchStateSPtr vector_search_state; + if (state->is_like_pattern) { + vector_search_state = pattern_type_recognition(patterns); + } else { + vector_search_state = pattern_type_recognition(patterns); + } + if (vector_search_state == nullptr) { + // pattern type recognition failed, use default case + for (int i = 0; i < input_rows_count; ++i) { + const auto pattern_val = patterns.get_data_at(i); + const auto value_val = values.get_data_at(i); + RETURN_IF_ERROR((state->scalar_function)(&state->search_state, value_val, pattern_val, + &result[i])); + } + return Status::OK(); + } + const auto* search_strings = + static_cast(vector_search_state->_search_strings.get()); + return (vector_search_state->_vector_function)(values, *search_strings, result); +} + Status FunctionLike::like_fn(LikeSearchState* state, const ColumnString& val, const StringRef& pattern, ColumnUInt8::Container& result) { std::string re_pattern; convert_like_pattern(state, std::string(pattern.data, pattern.size), &re_pattern); - return regexp_fn(state, val, {re_pattern.c_str(), re_pattern.size()}, result); } @@ -542,6 +819,7 @@ Status FunctionLike::open(FunctionContext* context, FunctionContext::FunctionSta } std::shared_ptr state = std::make_shared(); context->set_function_state(scope, state); + state->is_like_pattern = true; state->function = like_fn; state->scalar_function = like_fn_scalar; if (context->is_col_constant(1)) { @@ -657,6 +935,7 @@ Status FunctionRegexp::open(FunctionContext* context, FunctionContext::FunctionS } std::shared_ptr state = std::make_shared(); context->set_function_state(scope, state); + state->is_like_pattern = false; state->function = regexp_fn; state->scalar_function = regexp_fn_scalar; if (context->is_col_constant(1)) { diff --git a/be/src/vec/functions/like.h b/be/src/vec/functions/like.h index 3726518a9b9360..0cb9f196a1859a 100644 --- a/be/src/vec/functions/like.h +++ b/be/src/vec/functions/like.h @@ -118,12 +118,36 @@ using LikeFn = std::function; +using VectorLikeFn = std::function; + struct LikeState { + bool is_like_pattern; LikeSearchState search_state; LikeFn function; ScalarLikeFn scalar_function; }; +struct VectorPatternSearchState { + MutableColumnPtr _search_strings; + std::string _search_string; + VectorLikeFn _vector_function; + bool _pattern_matched; + + VectorPatternSearchState(VectorLikeFn vector_function) + : _search_strings(ColumnString::create()), + _vector_function(vector_function), + _pattern_matched(true) {} + + virtual ~VectorPatternSearchState() = default; + + virtual void like_pattern_match(const std::string& pattern_str) = 0; + + virtual void regexp_pattern_match(const std::string& pattern_str) = 0; +}; + +using VPatternSearchStateSPtr = std::shared_ptr; + class FunctionLikeBase : public IFunction { public: size_t get_number_of_arguments() const override { return 2; } @@ -137,54 +161,83 @@ class FunctionLikeBase : public IFunction { Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override; + friend struct VectorAllpassSearchState; + friend struct VectorEqualSearchState; + friend struct VectorSubStringSearchState; + friend struct VectorStartsWithSearchState; + friend struct VectorEndsWithSearchState; + protected: Status vector_const(const ColumnString& values, const StringRef* pattern_val, ColumnUInt8::Container& result, const LikeFn& function, LikeSearchState* search_state); + Status vector_non_const(const ColumnString& values, const ColumnString& patterns, + ColumnUInt8::Container& result, LikeState* state, + size_t input_rows_count) const; + Status execute_substring(const ColumnString::Chars& values, const ColumnString::Offsets& value_offsets, ColumnUInt8::Container& result, LikeSearchState* search_state); + template + static VPatternSearchStateSPtr pattern_type_recognition(const ColumnString& patterns); + static Status constant_allpass_fn(LikeSearchState* state, const ColumnString& val, const StringRef& pattern, ColumnUInt8::Container& result); + static Status constant_allpass_fn_scalar(LikeSearchState* state, const StringRef& val, + const StringRef& pattern, unsigned char* result); + + static Status vector_allpass_fn(const ColumnString& vals, const ColumnString& search_strings, + ColumnUInt8::Container& result); + static Status constant_starts_with_fn(LikeSearchState* state, const ColumnString& val, const StringRef& pattern, ColumnUInt8::Container& result); - static Status constant_ends_with_fn(LikeSearchState* state, const ColumnString& val, - const StringRef& pattern, ColumnUInt8::Container& result); + static Status constant_starts_with_fn_scalar(LikeSearchState* state, const StringRef& val, + const StringRef& pattern, unsigned char* result); - static Status constant_equals_fn(LikeSearchState* state, const ColumnString& val, - const StringRef& pattern, ColumnUInt8::Container& result); + static Status vector_starts_with_fn(const ColumnString& vals, + const ColumnString& search_strings, + ColumnUInt8::Container& result); - static Status constant_substring_fn(LikeSearchState* state, const ColumnString& val, + static Status constant_ends_with_fn(LikeSearchState* state, const ColumnString& val, const StringRef& pattern, ColumnUInt8::Container& result); - static Status constant_regex_fn(LikeSearchState* state, const ColumnString& val, - const StringRef& pattern, ColumnUInt8::Container& result); - - static Status regexp_fn(LikeSearchState* state, const ColumnString& val, - const StringRef& pattern, ColumnUInt8::Container& result); - - static Status constant_allpass_fn_scalar(LikeSearchState* state, const StringRef& val, - const StringRef& pattern, unsigned char* result); - - static Status constant_starts_with_fn_scalar(LikeSearchState* state, const StringRef& val, - const StringRef& pattern, unsigned char* result); - static Status constant_ends_with_fn_scalar(LikeSearchState* state, const StringRef& val, const StringRef& pattern, unsigned char* result); + static Status vector_ends_with_fn(const ColumnString& vals, const ColumnString& search_strings, + ColumnUInt8::Container& result); + + static Status constant_equals_fn(LikeSearchState* state, const ColumnString& val, + const StringRef& pattern, ColumnUInt8::Container& result); + static Status constant_equals_fn_scalar(LikeSearchState* state, const StringRef& val, const StringRef& pattern, unsigned char* result); + static Status vector_equals_fn(const ColumnString& vals, const ColumnString& search_strings, + ColumnUInt8::Container& result); + + static Status constant_substring_fn(LikeSearchState* state, const ColumnString& val, + const StringRef& pattern, ColumnUInt8::Container& result); + static Status constant_substring_fn_scalar(LikeSearchState* state, const StringRef& val, const StringRef& pattern, unsigned char* result); + static Status vector_substring_fn(const ColumnString& vals, const ColumnString& search_strings, + ColumnUInt8::Container& result); + + static Status constant_regex_fn(LikeSearchState* state, const ColumnString& val, + const StringRef& pattern, ColumnUInt8::Container& result); + static Status constant_regex_fn_scalar(LikeSearchState* state, const StringRef& val, const StringRef& pattern, unsigned char* result); + static Status regexp_fn(LikeSearchState* state, const ColumnString& val, + const StringRef& pattern, ColumnUInt8::Container& result); + static Status regexp_fn_scalar(LikeSearchState* state, const StringRef& val, const StringRef& pattern, unsigned char* result); @@ -204,6 +257,11 @@ class FunctionLike : public FunctionLikeBase { Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override; friend struct LikeSearchState; + friend struct VectorAllpassSearchState; + friend struct VectorEqualSearchState; + friend struct VectorSubStringSearchState; + friend struct VectorStartsWithSearchState; + friend struct VectorEndsWithSearchState; private: static Status like_fn(LikeSearchState* state, const ColumnString& val, const StringRef& pattern, diff --git a/regression-test/data/correctness_p0/test_like_predicate_with_concat.out b/regression-test/data/correctness_p0/test_like_predicate_with_concat.out new file mode 100644 index 00000000000000..37f9dd42cd287f --- /dev/null +++ b/regression-test/data/correctness_p0/test_like_predicate_with_concat.out @@ -0,0 +1,101 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql1 -- +0 prefix0_infix0_suffix0 prefix0 +2 prefix2_$infix2$_suffix2 infix2 +3 prefix3_^infix3_suffix3 infix3 +5 prefix5%%$__infix5$_^^^%%$suffix5 suffix5 +6 prefix6__^^_%%%__infix6_%^suffix6% prefix6__^^_%%%__infix6_%^suffix6% +8 prefix8_^^%%%infix8%%$^^___suffix8 +9 prefix9$%%%^^__infix9__&&%%$suffix9 \N + +-- !sql2 -- +0 prefix0_infix0_suffix0 prefix0 +1 %prefix1_infix1_suffix1 prefix1 +2 prefix2_$infix2$_suffix2 infix2 +3 prefix3_^infix3_suffix3 infix3 +4 $prefix4_$infix4%%%_^^suffix4 suffix4 +5 prefix5%%$__infix5$_^^^%%$suffix5 suffix5 +8 prefix8_^^%%%infix8%%$^^___suffix8 +9 prefix9$%%%^^__infix9__&&%%$suffix9 \N + +-- !sql3 -- +0 prefix0_infix0_suffix0 prefix0 +1 %prefix1_infix1_suffix1 prefix1 +2 prefix2_$infix2$_suffix2 infix2 +3 prefix3_^infix3_suffix3 infix3 +4 $prefix4_$infix4%%%_^^suffix4 suffix4 +5 prefix5%%$__infix5$_^^^%%$suffix5 suffix5 +6 prefix6__^^_%%%__infix6_%^suffix6% prefix6__^^_%%%__infix6_%^suffix6% +7 %%%^^^$prefix7_infix7_suffix7%%%^^^$ prefix7_infix7_suffix7 +8 prefix8_^^%%%infix8%%$^^___suffix8 +9 prefix9$%%%^^__infix9__&&%%$suffix9 \N + +-- !sql4 -- +6 prefix6__^^_%%%__infix6_%^suffix6% prefix6__^^_%%%__infix6_%^suffix6% + +-- !sql5 -- +0 prefix0_infix0_suffix0 prefix0 +1 %prefix1_infix1_suffix1 prefix1 +2 prefix2_$infix2$_suffix2 infix2 +3 prefix3_^infix3_suffix3 infix3 +4 $prefix4_$infix4%%%_^^suffix4 suffix4 +5 prefix5%%$__infix5$_^^^%%$suffix5 suffix5 +6 prefix6__^^_%%%__infix6_%^suffix6% prefix6__^^_%%%__infix6_%^suffix6% +7 %%%^^^$prefix7_infix7_suffix7%%%^^^$ prefix7_infix7_suffix7 +8 prefix8_^^%%%infix8%%$^^___suffix8 + +-- !sql6 -- +0 prefix0_infix0_suffix0 prefix0 +6 prefix6__^^_%%%__infix6_%^suffix6% prefix6__^^_%%%__infix6_%^suffix6% +8 prefix8_^^%%%infix8%%$^^___suffix8 + +-- !sql7 -- +4 $prefix4_$infix4%%%_^^suffix4 suffix4 +5 prefix5%%$__infix5$_^^^%%$suffix5 suffix5 +6 prefix6__^^_%%%__infix6_%^suffix6% prefix6__^^_%%%__infix6_%^suffix6% +8 prefix8_^^%%%infix8%%$^^___suffix8 + +-- !sql8 -- +0 prefix0_infix0_suffix0 prefix0 + +-- !sql9 -- +0 prefix0_infix0_suffix0 prefix0 +2 prefix2_$infix2$_suffix2 infix2 +3 prefix3_^infix3_suffix3 infix3 +5 prefix5%%$__infix5$_^^^%%$suffix5 suffix5 +6 prefix6__^^_%%%__infix6_%^suffix6% prefix6__^^_%%%__infix6_%^suffix6% +8 prefix8_^^%%%infix8%%$^^___suffix8 +9 prefix9$%%%^^__infix9__&&%%$suffix9 \N + +-- !sql10 -- +0 prefix0_infix0_suffix0 prefix0 +1 %prefix1_infix1_suffix1 prefix1 +2 prefix2_$infix2$_suffix2 infix2 +3 prefix3_^infix3_suffix3 infix3 +4 $prefix4_$infix4%%%_^^suffix4 suffix4 +5 prefix5%%$__infix5$_^^^%%$suffix5 suffix5 +8 prefix8_^^%%%infix8%%$^^___suffix8 +9 prefix9$%%%^^__infix9__&&%%$suffix9 \N + +-- !sql13 -- +0 prefix0_infix0_suffix0 prefix0 +1 %prefix1_infix1_suffix1 prefix1 +2 prefix2_$infix2$_suffix2 infix2 +3 prefix3_^infix3_suffix3 infix3 +4 $prefix4_$infix4%%%_^^suffix4 suffix4 +5 prefix5%%$__infix5$_^^^%%$suffix5 suffix5 +7 %%%^^^$prefix7_infix7_suffix7%%%^^^$ prefix7_infix7_suffix7 +8 prefix8_^^%%%infix8%%$^^___suffix8 + +-- !sql14 -- +0 prefix0_infix0_suffix0 prefix0 +8 prefix8_^^%%%infix8%%$^^___suffix8 + +-- !sql15 -- +4 $prefix4_$infix4%%%_^^suffix4 suffix4 +5 prefix5%%$__infix5$_^^^%%$suffix5 suffix5 +8 prefix8_^^%%%infix8%%$^^___suffix8 + +-- !sql16 -- +0 prefix0_infix0_suffix0 prefix0 + diff --git a/regression-test/suites/correctness_p0/test_like_predicate_with_concat.groovy b/regression-test/suites/correctness_p0/test_like_predicate_with_concat.groovy new file mode 100644 index 00000000000000..a918bdff3a33c5 --- /dev/null +++ b/regression-test/suites/correctness_p0/test_like_predicate_with_concat.groovy @@ -0,0 +1,112 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_like_predicate_with_concat") { + sql """ DROP TABLE IF EXISTS `test_like_predicate_with_concat` """ + + sql """ + CREATE TABLE IF NOT EXISTS `test_like_predicate_with_concat` ( + `id` int, + `value_col` string, + `pattern_col` string + ) ENGINE=OLAP + DISTRIBUTED BY HASH(`id`) BUCKETS 4 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql """ + INSERT INTO `test_like_predicate_with_concat` VALUES + (0, 'prefix0_infix0_suffix0', 'prefix0'), + (1, '%prefix1_infix1_suffix1', 'prefix1'), + (2, 'prefix2_\$infix2\$_suffix2', 'infix2'), + (3, 'prefix3_^infix3_suffix3', 'infix3'), + (4, '\$prefix4_\$infix4%%%_^^suffix4', 'suffix4'), + (5, 'prefix5%%\$__infix5\$_^^^%%\$suffix5', 'suffix5'), + (6, 'prefix6__^^_%%%__infix6_%^suffix6%', 'prefix6__^^_%%%__infix6_%^suffix6%'), + (7, '%%%^^^\$prefix7_infix7_suffix7%%%^^^\$', 'prefix7_infix7_suffix7'), + (8, 'prefix8_^^%%%infix8%%\$^^___suffix8', ''), + (9, 'prefix9\$%%%^^__infix9__&&%%\$suffix9', NULL); + """ + + qt_sql1 """ + SELECT * FROM `test_like_predicate_with_concat` WHERE `value_col` LIKE 'prefix_%' ORDER BY `id`; + """ + + qt_sql2 """ + SELECT * FROM `test_like_predicate_with_concat` WHERE `value_col` LIKE '%suffix_' ORDER BY `id`; + """ + + qt_sql3 """ + SELECT * FROM `test_like_predicate_with_concat` WHERE `value_col` LIKE CONCAT('%', '%') ORDER BY `id`; + """ + + qt_sql4 """ + SELECT * FROM `test_like_predicate_with_concat` WHERE `value_col` LIKE CONCAT(pattern_col) ORDER BY `id`; + """ + + qt_sql5 """ + SELECT * FROM `test_like_predicate_with_concat` WHERE `value_col` LIKE CONCAT('%', pattern_col, '%') ORDER BY `id`; + """ + + qt_sql6 """ + SELECT * FROM `test_like_predicate_with_concat` WHERE `value_col` LIKE CONCAT(pattern_col, '%') ORDER BY `id`; + """ + + qt_sql7 """ + SELECT * FROM `test_like_predicate_with_concat` WHERE `value_col` LIKE CONCAT('%', pattern_col) ORDER BY `id`; + """ + + qt_sql8 """ + SELECT * FROM `test_like_predicate_with_concat` WHERE `value_col` LIKE CONCAT('prefix0_', 'infix0', '_suffix0') ORDER BY `id`; + """ + + qt_sql9 """ + SELECT * FROM `test_like_predicate_with_concat` WHERE `value_col` REGEXP '^prefix' ORDER BY `id`; + """ + + qt_sql10 """ + SELECT * FROM `test_like_predicate_with_concat` WHERE `value_col` REGEXP '.*suffix.\$' ORDER BY `id`; + """ + + // TODO: fix bug in master branch + // qt_sql11 """ + // SELECT * FROM `test_like_predicate_with_concat` WHERE `value_col` REGEXP '.*' ORDER BY `id`; + // """ + + // TODO: fix bug in master branch + // qt_sql12 """ + // SELECT * FROM `test_like_predicate_with_concat` WHERE `value_col` REGEXP CONCAT('.', '*') ORDER BY `id`; + // """ + + qt_sql13 """ + SELECT * FROM `test_like_predicate_with_concat` WHERE `value_col` REGEXP CONCAT('.*', pattern_col, '.*') ORDER BY `id`; + """ + + qt_sql14 """ + SELECT * FROM `test_like_predicate_with_concat` WHERE `value_col` REGEXP CONCAT('^', pattern_col, '.*') ORDER BY `id`; + """ + + qt_sql15 """ + SELECT * FROM `test_like_predicate_with_concat` WHERE `value_col` REGEXP CONCAT(pattern_col, '\$') ORDER BY `id`; + """ + + qt_sql16 """ + SELECT * FROM `test_like_predicate_with_concat` WHERE `value_col` REGEXP CONCAT('prefix0_', 'infix0', '_suffix0') ORDER BY `id`; + """ +} diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy index 49e6d57539f32d..292db35bb0da84 100644 --- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy @@ -156,5 +156,4 @@ suite("test_string_function_regexp") { qt_sql_field4 "SELECT FIELD('21','2130', '2131', '21');" qt_sql_field5 "SELECT FIELD(21, 2130, 21, 2131);" -} - +} \ No newline at end of file