Skip to content

Commit

Permalink
opt
Browse files Browse the repository at this point in the history
  • Loading branch information
Mryange committed Sep 11, 2024
1 parent a01c85e commit 3a0902f
Showing 1 changed file with 105 additions and 15 deletions.
120 changes: 105 additions & 15 deletions be/src/vec/functions/function_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -1007,6 +1007,11 @@ class FunctionNotNullOrEmpty : public IFunction {

class FunctionStringConcat : public IFunction {
public:
struct ConcatState {
bool use_state = false;
std::string tail;
};

static constexpr auto name = "concat";
static FunctionPtr create() { return std::make_shared<FunctionStringConcat>(); }
String get_name() const override { return name; }
Expand All @@ -1017,6 +1022,34 @@ class FunctionStringConcat : public IFunction {
return std::make_shared<DataTypeString>();
}

Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
if (scope == FunctionContext::THREAD_LOCAL) {
return Status::OK();
}
std::shared_ptr<ConcatState> state = std::make_shared<ConcatState>();

context->set_function_state(scope, state);

state->use_state = true;

// Optimize function calls like this:
// concat(col, "123", "abc", "456") -> tail = "123abc456"
for (size_t i = 1; i < context->get_num_args(); i++) {
const auto* column_string = context->get_constant_col(i);
if (column_string == nullptr) {
state->use_state = false;
break;
}
auto string_vale = column_string->column_ptr->get_data_at(0);
state->tail.append(string_vale.begin(), string_vale.size);
}

// The reserve is used here to allow the usage of memcpy_small_allow_read_write_overflow15 below.
state->tail.reserve(state->tail.size() + 16);

return IFunction::open(context, scope);
}

Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
DCHECK_GE(arguments.size(), 1);
Expand All @@ -1025,7 +1058,29 @@ class FunctionStringConcat : public IFunction {
block.get_by_position(result).column = block.get_by_position(arguments[0]).column;
return Status::OK();
}
auto* concat_state = reinterpret_cast<ConcatState*>(
context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
if (!concat_state) {
return Status::RuntimeError("funciton context for function '{}' must have ConcatState;",
get_name());
}
if (concat_state->use_state) {
const auto& [col, is_const] =
unpack_if_const(block.get_by_position(arguments[0]).column);
const auto* col_str = assert_cast<const ColumnString*>(col.get());
if (is_const) {
return execute_const<true>(concat_state, block, col_str, result, input_rows_count);
} else {
return execute_const<false>(concat_state, block, col_str, result, input_rows_count);
}

} else {
return execute_vecotr(block, arguments, result, input_rows_count);
}
}

Status execute_vecotr(Block& block, const ColumnNumbers& arguments, size_t result,
size_t input_rows_count) const {
int argument_size = arguments.size();
std::vector<ColumnPtr> argument_columns(argument_size);

Expand All @@ -1048,43 +1103,78 @@ class FunctionStringConcat : public IFunction {
auto& res_offset = res->get_offsets();

res_offset.resize(input_rows_count);

size_t res_reserve_size = 0;
// we could ignore null string column
// but it's not necessary to ignore it
for (size_t i = 0; i < argument_size; ++i) {
if (is_const_args[i]) {
res_reserve_size +=
((*offsets_list[i])[0] - (*offsets_list[i])[-1]) * input_rows_count;
res_reserve_size += (*offsets_list[i])[0] * input_rows_count;
} else {
for (size_t j = 0; j < input_rows_count; ++j) {
res_reserve_size += (*offsets_list[i])[j] - (*offsets_list[i])[j - 1];
}
res_reserve_size += (*offsets_list[i])[input_rows_count - 1];
}
}

ColumnString::check_chars_length(res_reserve_size, 0);

res_data.resize(res_reserve_size);

auto* data = res_data.data();
size_t dst_offset = 0;

for (size_t i = 0; i < input_rows_count; ++i) {
int current_length = 0;
for (size_t j = 0; j < argument_size; ++j) {
const auto& current_offsets = *offsets_list[j];
const auto& current_chars = *chars_list[j];

auto idx = index_check_const(i, is_const_args[j]);
auto size = current_offsets[idx] - current_offsets[idx - 1];
const auto size = current_offsets[idx] - current_offsets[idx - 1];
if (size > 0) {
memcpy_small_allow_read_write_overflow15(
&res_data[res_offset[i - 1]] + current_length,
&current_chars[current_offsets[idx - 1]], size);
current_length += size;
data + dst_offset, current_chars.data() + current_offsets[idx - 1],
size);
dst_offset += size;
}
}
res_offset[i] = res_offset[i - 1] + current_length;
res_offset[i] = dst_offset;
}

block.get_by_position(result).column = std::move(res);
return Status::OK();
}

template <bool is_const>
Status execute_const(ConcatState* concat_state, Block& block, const ColumnString* col_str,
size_t result, size_t input_rows_count) const {
// using tail optimize

auto res = ColumnString::create();
auto& res_data = res->get_chars();
auto& res_offset = res->get_offsets();
res_offset.resize(input_rows_count);

size_t res_reserve_size = 0;
if constexpr (is_const) {
res_reserve_size = col_str->get_offsets()[0] * input_rows_count;
} else {
res_reserve_size = col_str->get_offsets()[input_rows_count - 1];
}
res_reserve_size += concat_state->tail.size() * input_rows_count;

ColumnString::check_chars_length(res_reserve_size, 0);
res_data.resize(res_reserve_size);

const auto& tail = concat_state->tail;
auto* data = res_data.data();
size_t dst_offset = 0;

for (size_t i = 0; i < input_rows_count; ++i) {
const auto idx = index_check_const<is_const>(i);
StringRef str_val = col_str->get_data_at(idx);
// copy column
memcpy_small_allow_read_write_overflow15(data + dst_offset, str_val.data, str_val.size);
dst_offset += str_val.size;
// copy tail
memcpy_small_allow_read_write_overflow15(data + dst_offset, tail.data(), tail.size());
dst_offset += tail.size();
res_offset[i] = dst_offset;
}
block.get_by_position(result).column = std::move(res);
return Status::OK();
}
Expand Down

0 comments on commit 3a0902f

Please sign in to comment.