Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix](expr) fix performance problem caused by too many virtual function call (#28508) #28689

Merged
merged 2 commits into from
Dec 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions be/src/vec/columns/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,8 @@ class IColumn : public COW<IColumn> {
// only used in ColumnNullable replace_column_data
virtual void replace_column_data_default(size_t self_row = 0) = 0;

virtual void replace_column_null_data(const uint8_t* __restrict null_map) {}

virtual bool is_date_type() const { return is_date; }
virtual bool is_datetime_type() const { return is_date_time; }
virtual bool is_decimalv2_type() const { return is_decimalv2; }
Expand Down
12 changes: 12 additions & 0 deletions be/src/vec/columns/column_decimal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,18 @@ ColumnPtr ColumnDecimal<T>::index(const IColumn& indexes, size_t limit) const {
return select_index_impl(*this, indexes, limit);
}

template <typename T>
void ColumnDecimal<T>::replace_column_null_data(const uint8_t* __restrict null_map) {
auto s = size();
size_t null_count = s - simd::count_zero_num((const int8_t*)null_map, s);
if (0 == null_count) {
return;
}
for (size_t i = 0; i < s; ++i) {
data[i] = null_map[i] ? T() : data[i];
}
}

template class ColumnDecimal<Decimal32>;
template class ColumnDecimal<Decimal64>;
template class ColumnDecimal<Decimal128>;
Expand Down
2 changes: 2 additions & 0 deletions be/src/vec/columns/column_decimal.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,8 @@ class ColumnDecimal final : public COWHelper<ColumnVectorHelper, ColumnDecimal<T
data[self_row] = T();
}

void replace_column_null_data(const uint8_t* __restrict null_map) override;

void sort_column(const ColumnSorter* sorter, EqualFlags& flags, IColumn::Permutation& perms,
EqualRange& range, bool last_column) const override;

Expand Down
9 changes: 0 additions & 9 deletions be/src/vec/columns/column_nullable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,6 @@ ColumnNullable::ColumnNullable(MutableColumnPtr&& nested_column_, MutableColumnP
_need_update_has_null = true;
}

void ColumnNullable::update_null_data() {
const auto& null_map_data = _get_null_map_data();
auto s = size();
for (size_t i = 0; i < s; ++i) {
if (null_map_data[i]) {
nested_column->replace_column_data_default(i);
}
}
}
MutableColumnPtr ColumnNullable::get_shrinked_column() {
return ColumnNullable::create(get_nested_column_ptr()->get_shrinked_column(),
get_null_map_column_ptr());
Expand Down
2 changes: 0 additions & 2 deletions be/src/vec/columns/column_nullable.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,6 @@ class ColumnNullable final : public COWHelper<IColumn, ColumnNullable> {
return Base::create(std::forward<Args>(args)...);
}

void update_null_data();

MutableColumnPtr get_shrinked_column() override;

const char* get_family_name() const override { return "Nullable"; }
Expand Down
12 changes: 12 additions & 0 deletions be/src/vec/columns/column_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,18 @@ ColumnPtr ColumnVector<T>::index(const IColumn& indexes, size_t limit) const {
return select_index_impl(*this, indexes, limit);
}

template <typename T>
void ColumnVector<T>::replace_column_null_data(const uint8_t* __restrict null_map) {
auto s = size();
size_t null_count = s - simd::count_zero_num((const int8_t*)null_map, s);
if (0 == null_count) {
return;
}
for (size_t i = 0; i < s; ++i) {
data[i] = null_map[i] ? T() : data[i];
}
}

/// Explicit template instantiations - to avoid code bloat in headers.
template class ColumnVector<UInt8>;
template class ColumnVector<UInt16>;
Expand Down
2 changes: 2 additions & 0 deletions be/src/vec/columns/column_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,8 @@ class ColumnVector final : public COWHelper<ColumnVectorHelper, ColumnVector<T>>
data[self_row] = T();
}

void replace_column_null_data(const uint8_t* __restrict null_map) override;

void sort_column(const ColumnSorter* sorter, EqualFlags& flags, IColumn::Permutation& perms,
EqualRange& range, bool last_column) const override;

Expand Down
27 changes: 10 additions & 17 deletions be/src/vec/functions/function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,21 +99,8 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum
return ColumnNullable::create(src, ColumnUInt8::create(input_rows_count, 0));
}

bool update_null_data = false;
auto full_column = src_not_nullable->convert_to_full_column_if_const();
if (const auto* nullable = check_and_get_column<const ColumnNullable>(full_column.get())) {
const auto& nested_column = nullable->get_nested_column();
update_null_data = nested_column.is_numeric() || nested_column.is_column_decimal();
} else {
update_null_data = full_column->is_numeric() || full_column->is_column_decimal();
}
auto result_column = ColumnNullable::create(full_column, result_null_map_column);
if (update_null_data) {
auto* res_nullable_column =
assert_cast<ColumnNullable*>(std::move(*result_column).mutate().get());
res_nullable_column->update_null_data();
}
return result_column;
return ColumnNullable::create(src_not_nullable->convert_to_full_column_if_const(),
result_null_map_column);
}

NullPresence get_null_presence(const Block& block, const ColumnNumbers& args) {
Expand Down Expand Up @@ -247,8 +234,14 @@ Status PreparedFunctionImpl::default_implementation_for_nulls(
}

if (null_presence.has_nullable) {
auto [temporary_block, new_args, new_result] =
create_block_with_nested_columns(block, args, result);
bool check_overflow_for_decimal = false;
if (context) {
check_overflow_for_decimal = context->check_overflow_for_decimal();
}
auto [temporary_block, new_args, new_result] = create_block_with_nested_columns(
block, args, result,
check_overflow_for_decimal && need_replace_null_data_to_default());

RETURN_IF_ERROR(execute_without_low_cardinality_columns(
context, temporary_block, new_args, new_result, temporary_block.rows(), dry_run));
block.get_by_position(result).column =
Expand Down
18 changes: 18 additions & 0 deletions be/src/vec/functions/function.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ class PreparedFunctionImpl : public IPreparedFunction {
*/
virtual bool use_default_implementation_for_constants() const { return true; }

/** If use_default_implementation_for_nulls() is true, after execute the function,
* whether need to replace the nested data of null data to the default value.
* E.g. for binary arithmetic exprs, need return true to avoid false overflow.
*/
virtual bool need_replace_null_data_to_default() const { return false; }

protected:
virtual Status execute_impl_dry_run(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, size_t result,
Expand Down Expand Up @@ -400,6 +406,8 @@ class FunctionBuilderImpl : public IFunctionBuilder {
*/
virtual bool use_default_implementation_for_nulls() const { return true; }

virtual bool need_replace_null_data_to_default() const { return false; }

/** If use_default_implementation_for_nulls() is true, than change arguments for get_return_type() and build_impl().
* If function arguments has low cardinality types, convert them to ordinary types.
* get_return_type returns ColumnLowCardinality if at least one argument type is ColumnLowCardinality.
Expand Down Expand Up @@ -441,6 +449,9 @@ class IFunction : public std::enable_shared_from_this<IFunction>,

/// Override this functions to change default implementation behavior. See details in IMyFunction.
bool use_default_implementation_for_nulls() const override { return true; }

bool need_replace_null_data_to_default() const override { return false; }

bool use_default_implementation_for_low_cardinality_columns() const override { return true; }

/// all constancy check should use this function to do automatically
Expand Down Expand Up @@ -513,6 +524,9 @@ class DefaultExecutable final : public PreparedFunctionImpl {
bool use_default_implementation_for_nulls() const final {
return function->use_default_implementation_for_nulls();
}
bool need_replace_null_data_to_default() const final {
return function->need_replace_null_data_to_default();
}
bool use_default_implementation_for_constants() const final {
return function->use_default_implementation_for_constants();
}
Expand Down Expand Up @@ -640,6 +654,10 @@ class DefaultFunctionBuilder : public FunctionBuilderImpl {
bool use_default_implementation_for_nulls() const override {
return function->use_default_implementation_for_nulls();
}

bool need_replace_null_data_to_default() const override {
return function->need_replace_null_data_to_default();
}
bool use_default_implementation_for_low_cardinality_columns() const override {
return function->use_default_implementation_for_low_cardinality_columns();
}
Expand Down
10 changes: 10 additions & 0 deletions be/src/vec/functions/function_binary_arithmetic.h
Original file line number Diff line number Diff line change
Expand Up @@ -806,6 +806,8 @@ template <template <typename, typename> class Operation, typename Name, bool is_
class FunctionBinaryArithmetic : public IFunction {
using OpTraits = OperationTraits<Operation>;

mutable bool need_replace_null_data_to_default_ = false;

template <typename F>
static bool cast_type(const IDataType* type, F&& f) {
return cast_type_to_either<DataTypeUInt8, DataTypeInt8, DataTypeInt16, DataTypeInt32,
Expand Down Expand Up @@ -841,6 +843,10 @@ class FunctionBinaryArithmetic : public IFunction {

String get_name() const override { return name; }

bool need_replace_null_data_to_default() const override {
return need_replace_null_data_to_default_;
}

size_t get_number_of_arguments() const override { return 2; }

DataTypes get_variadic_argument_types_impl() const override {
Expand All @@ -860,6 +866,10 @@ class FunctionBinaryArithmetic : public IFunction {
typename BinaryOperationTraits<Operation, LeftDataType,
RightDataType>::ResultDataType;
if constexpr (!std::is_same_v<ResultDataType, InvalidType>) {
need_replace_null_data_to_default_ =
IsDataTypeDecimal<ResultDataType> ||
(get_name() == "pow" &&
std::is_floating_point_v<typename ResultDataType::FieldType>);
if constexpr (IsDataTypeDecimal<LeftDataType> &&
IsDataTypeDecimal<RightDataType>) {
type_res = decimal_result_type(left, right, OpTraits::is_multiply,
Expand Down
35 changes: 24 additions & 11 deletions be/src/vec/functions/function_helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@

namespace doris::vectorized {

std::tuple<Block, ColumnNumbers> create_block_with_nested_columns(const Block& block,
const ColumnNumbers& args,
const bool need_check_same) {
std::tuple<Block, ColumnNumbers> create_block_with_nested_columns(
const Block& block, const ColumnNumbers& args, const bool need_check_same,
bool need_replace_null_data_to_default) {
Block res;
ColumnNumbers res_args(args.size());

Expand Down Expand Up @@ -69,10 +69,22 @@ std::tuple<Block, ColumnNumbers> create_block_with_nested_columns(const Block& b

if (!col.column) {
res.insert({nullptr, nested_type, col.name});
} else if (auto* nullable = check_and_get_column<ColumnNullable>(*col.column)) {
const auto& nested_col = nullable->get_nested_column_ptr();
res.insert({nested_col, nested_type, col.name});
} else if (auto* const_column = check_and_get_column<ColumnConst>(*col.column)) {
} else if (const auto* nullable =
check_and_get_column<ColumnNullable>(*col.column)) {
if (need_replace_null_data_to_default) {
const auto& null_map = nullable->get_null_map_data();
const auto nested_col = nullable->get_nested_column_ptr();
// only need to mutate nested column, avoid to copy nullmap
auto mutable_nested_col = (*std::move(nested_col)).mutate();
mutable_nested_col->replace_column_null_data(null_map.data());

res.insert({std::move(mutable_nested_col), nested_type, col.name});
} else {
const auto& nested_col = nullable->get_nested_column_ptr();
res.insert({nested_col, nested_type, col.name});
}
} else if (const auto* const_column =
check_and_get_column<ColumnConst>(*col.column)) {
const auto& nested_col =
check_and_get_column<ColumnNullable>(const_column->get_data_column())
->get_nested_column_ptr();
Expand Down Expand Up @@ -103,10 +115,11 @@ std::tuple<Block, ColumnNumbers> create_block_with_nested_columns(const Block& b
return {res, res_args};
}

std::tuple<Block, ColumnNumbers, size_t> create_block_with_nested_columns(const Block& block,
const ColumnNumbers& args,
size_t result) {
auto [res, res_args] = create_block_with_nested_columns(block, args, true);
std::tuple<Block, ColumnNumbers, size_t> create_block_with_nested_columns(
const Block& block, const ColumnNumbers& args, size_t result,
bool need_replace_null_data_to_default) {
auto [res, res_args] =
create_block_with_nested_columns(block, args, true, need_replace_null_data_to_default);
// insert result column in temp block
res.insert(block.get_by_position(result));
return {res, res_args, res.columns() - 1};
Expand Down
12 changes: 6 additions & 6 deletions be/src/vec/functions/function_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,14 +97,14 @@ Columns convert_const_tuple_to_constant_elements(const ColumnConst& column);
/// Returns the copy of a tmp block and temp args order same as args
/// in which only args column each column specified in the "arguments"
/// parameter is replaced with its respective nested column if it is nullable.
std::tuple<Block, ColumnNumbers> create_block_with_nested_columns(const Block& block,
const ColumnNumbers& args,
const bool need_check_same);
std::tuple<Block, ColumnNumbers> create_block_with_nested_columns(
const Block& block, const ColumnNumbers& args, const bool need_check_same,
bool need_replace_null_data_to_default = false);

// Same as above and return the new_res loc in tuple
std::tuple<Block, ColumnNumbers, size_t> create_block_with_nested_columns(const Block& block,
const ColumnNumbers& args,
size_t result);
std::tuple<Block, ColumnNumbers, size_t> create_block_with_nested_columns(
const Block& block, const ColumnNumbers& args, size_t result,
bool need_replace_null_data_to_default = false);

/// Checks argument type at specified index with predicate.
/// throws if there is no argument at specified index or if predicate returns false.
Expand Down
12 changes: 12 additions & 0 deletions regression-test/data/datatype_p0/decimalv3/fix-overflow.out
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,15 @@
a \N
b 0.00

-- !select_fix_overflow_float_null1 --
\N

-- !select_fix_overflow_int_null1 --
\N

-- !select_fix_overflow_int_null2 --
\N

-- !select_fix_overflow_bool_null1 --
\N

43 changes: 43 additions & 0 deletions regression-test/suites/datatype_p0/decimalv3/fix-overflow.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,47 @@ suite("fix-overflow") {
qt_select_insert """
select * from fix_overflow_null2 order by 1,2;
"""

sql """
drop table if exists fix_overflow_null3;
"""
sql """
create table fix_overflow_null3(k1 decimalv3(38, 6), k2 double, k3 double) distributed by hash(k1) properties("replication_num"="1");
"""
sql """
insert into fix_overflow_null3 values (9.9, -1, null);
"""
qt_select_fix_overflow_float_null1 """
select cast(pow(k2+k3, 0.2) as decimalv3(38,6)) from fix_overflow_null3;
"""

sql """
drop table if exists fix_overflow_null4
"""
sql """
create table fix_overflow_null4(k1 int, k2 int, k3 decimalv3(38,6)) distributed by hash(k1) properties("replication_num"="1");
"""
sql """
insert into fix_overflow_null4 values (1, null, 99999999999999999999999999999999.999999);
"""
qt_select_fix_overflow_int_null1 """
select k1 + k2 + k3 from fix_overflow_null4;
"""
qt_select_fix_overflow_int_null2 """
select cast( (k1 + k2) as decimalv3(3, 0) ) from fix_overflow_null4;
"""

sql """
drop table if exists fix_overflow_null5
"""
sql """
create table fix_overflow_null5(k1 int, k2 int, k3 decimalv3(38,6))
distributed by hash(k1) properties("replication_num"="1");
"""
sql """
insert into fix_overflow_null5 values (-1, null, 99999999999999999999999999999999.999999);
"""
qt_select_fix_overflow_bool_null1 """
select (k1 < k2) + k3 from fix_overflow_null5;
"""
}
Loading