Skip to content

Commit

Permalink
fix url_encode and add regress test for url_decode
Browse files Browse the repository at this point in the history
  • Loading branch information
suxiaogang223 committed Sep 13, 2024
1 parent 30adbcd commit df3c919
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 77 deletions.
40 changes: 9 additions & 31 deletions be/src/util/url_coding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@

namespace doris {

bool url_encode(const std::string& in, std::string* out) {
auto* encoded_url = curl_easy_escape(nullptr, in.c_str(), static_cast<int>(in.length()));
bool url_encode(const std::string_view& in, std::string* out) {
auto* encoded_url = curl_easy_escape(nullptr, in.data(), static_cast<int>(in.length()));
if (encoded_url == nullptr) {
return false;
}
Expand All @@ -34,36 +34,14 @@ bool url_encode(const std::string& in, std::string* out) {
return true;
}

// Adapted from
// http://www.boost.org/doc/libs/1_40_0/doc/html/boost_asio/
// example/http/server3/request_handler.cpp
// See http://www.boost.org/LICENSE_1_0.txt for license for this method.
bool url_decode(const std::string& in, std::string* out) {
out->clear();
out->reserve(in.size());

for (size_t i = 0; i < in.size(); ++i) {
if (in[i] == '%') {
if (i + 3 <= in.size()) {
int value = 0;
std::istringstream is(in.substr(i + 1, 2));

if (is >> std::hex >> value) {
(*out) += static_cast<char>(value);
i += 2;
} else {
return false;
}
} else {
return false;
}
} else if (in[i] == '+') {
(*out) += ' ';
} else {
(*out) += in[i];
}
bool url_decode(const std::string_view& in, std::string* out) {
int len = 0;
auto* decoded_url = curl_easy_unescape(nullptr, in.data(), static_cast<int>(in.length()), &len);
if (decoded_url == nullptr) {
return false;
}

*out = std::string(decoded_url, len);
curl_free(static_cast<void*>(decoded_url));
return true;
}

Expand Down
4 changes: 2 additions & 2 deletions be/src/util/url_coding.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ namespace doris {

// Utility method to URL-encode a string (that is, replace special
// characters with %<hex value in ascii>).
bool url_encode(const std::string& in, std::string* out);
bool url_encode(const std::string_view& in, std::string* out);

// Utility method to decode a string that was URL-encoded. Returns
// true unless the string could not be correctly decoded.
bool url_decode(const std::string& in, std::string* out);
bool url_decode(const std::string_view& in, std::string* out);

void base64_encode(const std::string& in, std::string* out);
size_t base64_encode(const unsigned char* data, size_t length, unsigned char* encoded_data);
Expand Down
61 changes: 17 additions & 44 deletions be/src/vec/functions/function_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -2591,43 +2591,29 @@ class FunctionUrlDecode : public IFunction {
static FunctionPtr create() { return std::make_shared<FunctionUrlDecode>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 1; }
bool is_variadic() const override { return false; }

DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return std::make_shared<DataTypeString>();
}

Status execute_impl(FunctionContext* context, Block& block,

const ColumnNumbers& arguments, size_t result,
size_t input_rows_count) const override {
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
auto res = ColumnString::create();
auto& res_offsets = res->get_offsets();
auto& res_chars = res->get_chars();
res_offsets.resize(input_rows_count);
res->get_offsets().reserve(input_rows_count);

ColumnPtr argument_column =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
const auto* url_col = check_and_get_column<ColumnString>(argument_column.get());

if (!url_col) {
return Status::InternalError("Not supported input argument type");
}
const auto* url_col =
assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());

std::string decoded_url;

for (size_t i = 0; i < input_rows_count; ++i) {
auto source = url_col->get_data_at(i);
StringRef url_val(const_cast<char*>(source.data), source.size);

url_decode(url_val.to_string(), &decoded_url);

StringOP::push_value_string(decoded_url, i, res_chars, res_offsets);
auto url = url_col->get_data_at(i);
if (!url_decode(url.to_string_view(), &decoded_url)) {
return Status::InternalError("Decode url failed");
}
res->insert_data(decoded_url.data(), decoded_url.size());
decoded_url.clear();
}

block.get_by_position(result).column = std::move(res);

return Status::OK();
}
};
Expand All @@ -2638,43 +2624,29 @@ class FunctionUrlEncode : public IFunction {
static FunctionPtr create() { return std::make_shared<FunctionUrlEncode>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 1; }
bool is_variadic() const override { return false; }

DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return std::make_shared<DataTypeString>();
}

Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
auto res = ColumnString::create();
auto& res_offsets = res->get_offsets();
auto& res_chars = res->get_chars();
res_offsets.resize(input_rows_count);

ColumnPtr argument_column =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
const auto* url_col = check_and_get_column<ColumnString>(argument_column.get());
res->get_offsets().reserve(input_rows_count);

if (!url_col) {
return Status::InternalError("Not supported input argument type");
}
const auto* url_col =
assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());

std::string encoded_url;

for (size_t i = 0; i < input_rows_count; ++i) {
auto source = url_col->get_data_at(i);
StringRef url_val(const_cast<char*>(source.data), source.size);

if (!url_encode(url_val.to_string(), &encoded_url)) {
auto url = url_col->get_data_at(i);
if (!url_encode(url.to_string_view(), &encoded_url)) {
return Status::InternalError("Encode url failed");
}

StringOP::push_value_string(encoded_url, i, res_chars, res_offsets);
res->insert_data(encoded_url.data(), encoded_url.size());
encoded_url.clear();
}

block.get_by_position(result).column = std::move(res);

return Status::OK();
}
};
Expand Down Expand Up @@ -4182,4 +4154,5 @@ class FunctionTranslate : public IFunction {
return result;
}
};

} // namespace doris::vectorized
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !empty_nullable --

-- !empty_not_nullable --

-- !nullable --
\N

/home/doris/directory/
1234567890
ABCDEFGHIJKLMNOPQRSTUWXYZ
~!@#%^&*()<>?,./:{}|[]\\_+-=

-- !not_nullable --


/home/doris/directory/
1234567890
ABCDEFGHIJKLMNOPQRSTUWXYZ
~!@#%^&*()<>?,./:{}|[]\\_+-=

-- !nullable_no_null --


/home/doris/directory/
1234567890
ABCDEFGHIJKLMNOPQRSTUWXYZ
~!@#%^&*()<>?,./:{}|[]\\_+-=

-- !const_nullable --







-- !const_not_nullable --
/home/doris/directory/

-- !const_nullable_no_null --
/home/doris/directory/

Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_url_decode") {
sql " drop table if exists test_url_decode"
sql """
create table test_url_decode (
k0 int,
a string not null,
b string null
)
DISTRIBUTED BY HASH(k0)
PROPERTIES
(
"replication_num" = "1"
);
"""

order_qt_empty_nullable "select url_decode(b) from test_url_decode"
order_qt_empty_not_nullable "select url_decode(a) from test_url_decode"

sql """ insert into test_url_decode values (1, 'ABCDEFGHIJKLMNOPQRSTUWXYZ', 'ABCDEFGHIJKLMNOPQRSTUWXYZ'), (2, '1234567890', '1234567890'),
(3, '~%21%40%23%25%5E%26%2A%28%29%3C%3E%3F%2C.%2F%3A%7B%7D%7C%5B%5D%5C_%2B-%3D', '~%21%40%23%25%5E%26%2A%28%29%3C%3E%3F%2C.%2F%3A%7B%7D%7C%5B%5D%5C_%2B-%3D'),
(4, '', ''), (5, '%2Fhome%2Fdoris%2Fdirectory%2F', '%2Fhome%2Fdoris%2Fdirectory%2F'), (6, '', null);
"""

order_qt_nullable "select url_decode(b) from test_url_decode"
order_qt_not_nullable "select url_decode(a) from test_url_decode"
order_qt_nullable_no_null "select url_decode(nullable(a)) from test_url_decode"
order_qt_const_nullable "select url_decode('') from test_url_decode" // choose one case to test const multi-rows
order_qt_const_not_nullable "select url_decode('%2Fhome%2Fdoris%2Fdirectory%2F')"
order_qt_const_nullable_no_null "select url_decode('%2Fhome%2Fdoris%2Fdirectory%2F')"
}

0 comments on commit df3c919

Please sign in to comment.