From ac2e4357c874c42af272ac8c79698c7272ae17c2 Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Tue, 24 Oct 2023 15:35:51 +0800 Subject: [PATCH] [improvement](function) improve date_trunc function performance when timeunit is const --- be/src/vec/functions/function_timestamp.cpp | 109 +++++++++++--------- 1 file changed, 58 insertions(+), 51 deletions(-) diff --git a/be/src/vec/functions/function_timestamp.cpp b/be/src/vec/functions/function_timestamp.cpp index fb2340ffaaa8f03..a9002875315d9e1 100644 --- a/be/src/vec/functions/function_timestamp.cpp +++ b/be/src/vec/functions/function_timestamp.cpp @@ -412,36 +412,19 @@ struct DateTrunc { auto datetime_column = static_cast(argument_columns[0].get()); auto str_column = static_cast(argument_columns[1].get()); - auto& rdata = str_column->get_chars(); - auto& roffsets = str_column->get_offsets(); ColumnPtr res = ColumnType::create(); - if (col_const[1]) { - execute_impl_right_const( - datetime_column->get_data(), str_column->get_data_at(0), - static_cast(res->assume_mutable().get())->get_data(), - null_map->get_data(), input_rows_count); - } else { - execute_impl(datetime_column->get_data(), rdata, roffsets, - static_cast(res->assume_mutable().get())->get_data(), - null_map->get_data(), input_rows_count); - } + DCHECK(col_const[1]) + << "the argument[1] must be const string literal, have check function in FE."; + execute_impl_right_const(datetime_column->get_data(), str_column->get_data_at(0), + static_cast(res->assume_mutable().get())->get_data(), + null_map->get_data(), input_rows_count); block.get_by_position(result).column = ColumnNullable::create(res, std::move(null_map)); return Status::OK(); } private: - static void execute_impl(const PaddedPODArray& ldata, const ColumnString::Chars& rdata, - const ColumnString::Offsets& roffsets, PaddedPODArray& res, - NullMap& null_map, size_t input_rows_count) { - res.resize(input_rows_count); - for (size_t i = 0; i < input_rows_count; ++i) { - auto dt = binary_cast(ldata[i]); - const char* str_data = reinterpret_cast(&rdata[roffsets[i - 1]]); - _execute_inner_loop(dt, str_data, res, null_map, i); - } - } static void execute_impl_right_const(const PaddedPODArray& ldata, const StringRef& rdata, PaddedPODArray& res, NullMap& null_map, size_t input_rows_count) { @@ -449,34 +432,58 @@ struct DateTrunc { std::string lower_str(rdata.data, rdata.size); std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) { return std::tolower(c); }); - for (size_t i = 0; i < input_rows_count; ++i) { - auto dt = binary_cast(ldata[i]); - _execute_inner_loop(dt, lower_str.data(), res, null_map, i); - } - } - template - static void _execute_inner_loop(T& dt, const char* str_data, PaddedPODArray& res, - NullMap& null_map, size_t index) { - if (std::strncmp("year", str_data, 4) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("quarter", str_data, 7) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("month", str_data, 5) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("week", str_data, 4) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("day", str_data, 3) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("hour", str_data, 4) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("minute", str_data, 6) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("second", str_data, 6) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else { - null_map[index] = 1; + + auto _execute_inner_loop = [&]() { + for (size_t i = 0; i < input_rows_count; ++i) { + auto dt = binary_cast(ldata[i]); + null_map[i] = !dt.template datetime_trunc(); + res[i] = binary_cast(dt); + } + }; + + auto execute_impl = [&](const TimeUnit& UNIT) { + if (TimeUnit::YEAR == UNIT) { + _execute_inner_loop.template operator()(); + } else if (TimeUnit::QUARTER == UNIT) { + _execute_inner_loop.template operator()(); + } else if (TimeUnit::MONTH == UNIT) { + _execute_inner_loop.template operator()(); + } else if (TimeUnit::WEEK == UNIT) { + _execute_inner_loop.template operator()(); + } else if (TimeUnit::DAY == UNIT) { + _execute_inner_loop.template operator()(); + } else if (TimeUnit::HOUR == UNIT) { + _execute_inner_loop.template operator()(); + } else if (TimeUnit::MINUTE == UNIT) { + _execute_inner_loop.template operator()(); + } else if (TimeUnit::SECOND == UNIT) { + _execute_inner_loop.template operator()(); + } + }; + + if (std::strncmp("year", lower_str.data(), 4) == 0) { + execute_impl(TimeUnit::YEAR); + } else if (std::strncmp("quarter", lower_str.data(), 7) == 0) { + execute_impl(TimeUnit::QUARTER); + } else if (std::strncmp("month", lower_str.data(), 5) == 0) { + execute_impl(TimeUnit::MONTH); + } else if (std::strncmp("week", lower_str.data(), 4) == 0) { + execute_impl(TimeUnit::WEEK); + } else if (std::strncmp("day", lower_str.data(), 3) == 0) { + execute_impl(TimeUnit::DAY); + } else if (std::strncmp("hour", lower_str.data(), 4) == 0) { + execute_impl(TimeUnit::HOUR); + } else if (std::strncmp("minute", lower_str.data(), 6) == 0) { + execute_impl(TimeUnit::MINUTE); + } else if (std::strncmp("second", lower_str.data(), 6) == 0) { + execute_impl(TimeUnit::SECOND); + } else { //here maybe unreachable + for (size_t i = 0; i < input_rows_count; ++i) { + null_map[i] = 1; + auto dt = binary_cast(ldata[i]); + res[i] = binary_cast(dt); + } } - res[index] = binary_cast(dt); } }; @@ -1289,8 +1296,8 @@ void register_function_timestamp(SimpleFunctionFactory& factory) { factory.register_function(); factory.register_function(); factory.register_function(); - factory.register_function(); - factory.register_function(); + // factory.register_function(); + // factory.register_function(); factory.register_function(); factory.register_function();