From df3c91906aef26ea7040817ebf9374cab38a39ed Mon Sep 17 00:00:00 2001 From: Socrates Date: Sat, 14 Sep 2024 02:21:58 +0800 Subject: [PATCH] fix url_encode and add regress test for url_decode --- be/src/util/url_coding.cpp | 40 +++--------- be/src/util/url_coding.h | 4 +- be/src/vec/functions/function_string.h | 61 ++++++------------- .../string_functions/test_url_decode.out | 43 +++++++++++++ .../string_functions/test_url_decode.groovy | 47 ++++++++++++++ 5 files changed, 118 insertions(+), 77 deletions(-) create mode 100644 regression-test/data/query_p0/sql_functions/string_functions/test_url_decode.out create mode 100644 regression-test/suites/query_p0/sql_functions/string_functions/test_url_decode.groovy diff --git a/be/src/util/url_coding.cpp b/be/src/util/url_coding.cpp index 5973eb80d9acd64..baa10e21634e81c 100644 --- a/be/src/util/url_coding.cpp +++ b/be/src/util/url_coding.cpp @@ -24,8 +24,8 @@ namespace doris { -bool url_encode(const std::string& in, std::string* out) { - auto* encoded_url = curl_easy_escape(nullptr, in.c_str(), static_cast(in.length())); +bool url_encode(const std::string_view& in, std::string* out) { + auto* encoded_url = curl_easy_escape(nullptr, in.data(), static_cast(in.length())); if (encoded_url == nullptr) { return false; } @@ -34,36 +34,14 @@ bool url_encode(const std::string& in, std::string* out) { return true; } -// Adapted from -// http://www.boost.org/doc/libs/1_40_0/doc/html/boost_asio/ -// example/http/server3/request_handler.cpp -// See http://www.boost.org/LICENSE_1_0.txt for license for this method. -bool url_decode(const std::string& in, std::string* out) { - out->clear(); - out->reserve(in.size()); - - for (size_t i = 0; i < in.size(); ++i) { - if (in[i] == '%') { - if (i + 3 <= in.size()) { - int value = 0; - std::istringstream is(in.substr(i + 1, 2)); - - if (is >> std::hex >> value) { - (*out) += static_cast(value); - i += 2; - } else { - return false; - } - } else { - return false; - } - } else if (in[i] == '+') { - (*out) += ' '; - } else { - (*out) += in[i]; - } +bool url_decode(const std::string_view& in, std::string* out) { + int len = 0; + auto* decoded_url = curl_easy_unescape(nullptr, in.data(), static_cast(in.length()), &len); + if (decoded_url == nullptr) { + return false; } - + *out = std::string(decoded_url, len); + curl_free(static_cast(decoded_url)); return true; } diff --git a/be/src/util/url_coding.h b/be/src/util/url_coding.h index 351c29b7359b68f..a7ec270c486c923 100644 --- a/be/src/util/url_coding.h +++ b/be/src/util/url_coding.h @@ -26,11 +26,11 @@ namespace doris { // Utility method to URL-encode a string (that is, replace special // characters with %). -bool url_encode(const std::string& in, std::string* out); +bool url_encode(const std::string_view& in, std::string* out); // Utility method to decode a string that was URL-encoded. Returns // true unless the string could not be correctly decoded. -bool url_decode(const std::string& in, std::string* out); +bool url_decode(const std::string_view& in, std::string* out); void base64_encode(const std::string& in, std::string* out); size_t base64_encode(const unsigned char* data, size_t length, unsigned char* encoded_data); diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 427f9c7b800e073..2a3e17b83516120 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -2591,43 +2591,29 @@ class FunctionUrlDecode : public IFunction { static FunctionPtr create() { return std::make_shared(); } String get_name() const override { return name; } size_t get_number_of_arguments() const override { return 1; } - bool is_variadic() const override { return false; } - DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { return std::make_shared(); } - Status execute_impl(FunctionContext* context, Block& block, - - const ColumnNumbers& arguments, size_t result, - size_t input_rows_count) const override { + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) const override { auto res = ColumnString::create(); - auto& res_offsets = res->get_offsets(); - auto& res_chars = res->get_chars(); - res_offsets.resize(input_rows_count); + res->get_offsets().reserve(input_rows_count); - ColumnPtr argument_column = - block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); - const auto* url_col = check_and_get_column(argument_column.get()); - - if (!url_col) { - return Status::InternalError("Not supported input argument type"); - } + const auto* url_col = + assert_cast(block.get_by_position(arguments[0]).column.get()); std::string decoded_url; - for (size_t i = 0; i < input_rows_count; ++i) { - auto source = url_col->get_data_at(i); - StringRef url_val(const_cast(source.data), source.size); - - url_decode(url_val.to_string(), &decoded_url); - - StringOP::push_value_string(decoded_url, i, res_chars, res_offsets); + auto url = url_col->get_data_at(i); + if (!url_decode(url.to_string_view(), &decoded_url)) { + return Status::InternalError("Decode url failed"); + } + res->insert_data(decoded_url.data(), decoded_url.size()); decoded_url.clear(); } block.get_by_position(result).column = std::move(res); - return Status::OK(); } }; @@ -2638,8 +2624,6 @@ class FunctionUrlEncode : public IFunction { static FunctionPtr create() { return std::make_shared(); } String get_name() const override { return name; } size_t get_number_of_arguments() const override { return 1; } - bool is_variadic() const override { return false; } - DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { return std::make_shared(); } @@ -2647,34 +2631,22 @@ class FunctionUrlEncode : public IFunction { Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { auto res = ColumnString::create(); - auto& res_offsets = res->get_offsets(); - auto& res_chars = res->get_chars(); - res_offsets.resize(input_rows_count); - - ColumnPtr argument_column = - block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); - const auto* url_col = check_and_get_column(argument_column.get()); + res->get_offsets().reserve(input_rows_count); - if (!url_col) { - return Status::InternalError("Not supported input argument type"); - } + const auto* url_col = + assert_cast(block.get_by_position(arguments[0]).column.get()); std::string encoded_url; - for (size_t i = 0; i < input_rows_count; ++i) { - auto source = url_col->get_data_at(i); - StringRef url_val(const_cast(source.data), source.size); - - if (!url_encode(url_val.to_string(), &encoded_url)) { + auto url = url_col->get_data_at(i); + if (!url_encode(url.to_string_view(), &encoded_url)) { return Status::InternalError("Encode url failed"); } - - StringOP::push_value_string(encoded_url, i, res_chars, res_offsets); + res->insert_data(encoded_url.data(), encoded_url.size()); encoded_url.clear(); } block.get_by_position(result).column = std::move(res); - return Status::OK(); } }; @@ -4182,4 +4154,5 @@ class FunctionTranslate : public IFunction { return result; } }; + } // namespace doris::vectorized diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_url_decode.out b/regression-test/data/query_p0/sql_functions/string_functions/test_url_decode.out new file mode 100644 index 000000000000000..7199df82b4b4228 --- /dev/null +++ b/regression-test/data/query_p0/sql_functions/string_functions/test_url_decode.out @@ -0,0 +1,43 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !empty_nullable -- + +-- !empty_not_nullable -- + +-- !nullable -- +\N + +/home/doris/directory/ +1234567890 +ABCDEFGHIJKLMNOPQRSTUWXYZ +~!@#%^&*()<>?,./:{}|[]\\_+-= + +-- !not_nullable -- + + +/home/doris/directory/ +1234567890 +ABCDEFGHIJKLMNOPQRSTUWXYZ +~!@#%^&*()<>?,./:{}|[]\\_+-= + +-- !nullable_no_null -- + + +/home/doris/directory/ +1234567890 +ABCDEFGHIJKLMNOPQRSTUWXYZ +~!@#%^&*()<>?,./:{}|[]\\_+-= + +-- !const_nullable -- + + + + + + + +-- !const_not_nullable -- +/home/doris/directory/ + +-- !const_nullable_no_null -- +/home/doris/directory/ + diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_url_decode.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_url_decode.groovy new file mode 100644 index 000000000000000..2fae06056fdf2de --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_url_decode.groovy @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_url_decode") { + sql " drop table if exists test_url_decode" + sql """ + create table test_url_decode ( + k0 int, + a string not null, + b string null + ) + DISTRIBUTED BY HASH(k0) + PROPERTIES + ( + "replication_num" = "1" + ); + """ + + order_qt_empty_nullable "select url_decode(b) from test_url_decode" + order_qt_empty_not_nullable "select url_decode(a) from test_url_decode" + + sql """ insert into test_url_decode values (1, 'ABCDEFGHIJKLMNOPQRSTUWXYZ', 'ABCDEFGHIJKLMNOPQRSTUWXYZ'), (2, '1234567890', '1234567890'), + (3, '~%21%40%23%25%5E%26%2A%28%29%3C%3E%3F%2C.%2F%3A%7B%7D%7C%5B%5D%5C_%2B-%3D', '~%21%40%23%25%5E%26%2A%28%29%3C%3E%3F%2C.%2F%3A%7B%7D%7C%5B%5D%5C_%2B-%3D'), + (4, '', ''), (5, '%2Fhome%2Fdoris%2Fdirectory%2F', '%2Fhome%2Fdoris%2Fdirectory%2F'), (6, '', null); + """ + + order_qt_nullable "select url_decode(b) from test_url_decode" + order_qt_not_nullable "select url_decode(a) from test_url_decode" + order_qt_nullable_no_null "select url_decode(nullable(a)) from test_url_decode" + order_qt_const_nullable "select url_decode('') from test_url_decode" // choose one case to test const multi-rows + order_qt_const_not_nullable "select url_decode('%2Fhome%2Fdoris%2Fdirectory%2F')" + order_qt_const_nullable_no_null "select url_decode('%2Fhome%2Fdoris%2Fdirectory%2F')" +} \ No newline at end of file