From 92f0d4e66ce0996a6cdbf003732c0254e0f2cfc4 Mon Sep 17 00:00:00 2001 From: zclllhhjj Date: Sat, 14 Sep 2024 10:22:04 +0800 Subject: [PATCH] [Fix](Column) refactor ColumnNullable to provide flags safety (#40769) Issue Number: close #xxx In this pr we move null_map and relative flags to an individual base class to constrain its visibility. please reed the comment carefully test in master: ``` xxx/column_nullable_test.cpp:187: Failure Value of: null_dst->has_null() Actual: false Expected: true Error occurred in case0 [ FAILED ] ColumnNullableTest.PredicateTest (0 ms) [ RUN ] ColumnNullableTest.HashTest [ OK ] ColumnNullableTest.HashTest (0 ms) [----------] 3 tests from ColumnNullableTest (0 ms total) [----------] Global test environment tear-down [==========] 3 tests from 1 test suite ran. (1 ms total) [ PASSED ] 2 tests. [ FAILED ] 1 test, listed below: [ FAILED ] ColumnNullableTest.PredicateTest ``` and success after this pr --------- Co-authored-by: Jerry Hu --- be/src/vec/columns/column_nullable.cpp | 85 +++++----- be/src/vec/columns/column_nullable.h | 152 ++++++++++++------ be/test/vec/columns/column_nullable_test.cpp | 112 +++++++++++++ be/test/vec/columns/column_nullable_test.h | 105 ++++++++++++ be/test/vec/columns/column_resize_test.cpp | 1 - be/test/vec/function/function_test_util.h | 8 + .../test_column_nullable_cache.out | 6 + .../test_column_nullable_cache.groovy | 57 +++++++ 8 files changed, 426 insertions(+), 100 deletions(-) create mode 100644 be/test/vec/columns/column_nullable_test.cpp create mode 100644 be/test/vec/columns/column_nullable_test.h create mode 100644 regression-test/data/correctness/test_column_nullable_cache.out create mode 100644 regression-test/suites/correctness/test_column_nullable_cache.groovy diff --git a/be/src/vec/columns/column_nullable.cpp b/be/src/vec/columns/column_nullable.cpp index 79cee3b8a5f3ca..b89568736b5899 100644 --- a/be/src/vec/columns/column_nullable.cpp +++ b/be/src/vec/columns/column_nullable.cpp @@ -31,18 +31,19 @@ namespace doris::vectorized { ColumnNullable::ColumnNullable(MutableColumnPtr&& nested_column_, MutableColumnPtr&& null_map_) - : nested_column(std::move(nested_column_)), null_map(std::move(null_map_)) { + : NullMapProvider(std::move(null_map_)), nested_column(std::move(nested_column_)) { /// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it. nested_column = get_nested_column().convert_to_full_column_if_const(); // after convert const column to full column, it may be a nullable column if (nested_column->is_nullable()) { - assert_cast(*nested_column).apply_null_map((const ColumnUInt8&)*null_map); - null_map = assert_cast(*nested_column).get_null_map_column_ptr(); + assert_cast(*nested_column) + .apply_null_map(static_cast(get_null_map_column())); + reset_null_map(assert_cast(*nested_column).get_null_map_column_ptr()); nested_column = assert_cast(*nested_column).get_nested_column_ptr(); } - if (is_column_const(*null_map)) { + if (is_column_const(get_null_map_column())) [[unlikely]] { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "ColumnNullable cannot have constant null map"); __builtin_unreachable(); @@ -69,7 +70,7 @@ void ColumnNullable::update_xxHash_with_value(size_t start, size_t end, uint64_t nested_column->update_xxHash_with_value(start, end, hash, nullptr); } else { const auto* __restrict real_null_data = - assert_cast(*null_map).get_data().data(); + assert_cast(get_null_map_column()).get_data().data(); for (int i = start; i < end; ++i) { if (real_null_data[i] != 0) { hash = HashUtil::xxHash64NullWithSeed(hash); @@ -85,7 +86,7 @@ void ColumnNullable::update_crc_with_value(size_t start, size_t end, uint32_t& h nested_column->update_crc_with_value(start, end, hash, nullptr); } else { const auto* __restrict real_null_data = - assert_cast(*null_map).get_data().data(); + assert_cast(get_null_map_column()).get_data().data(); for (int i = start; i < end; ++i) { if (real_null_data[i] != 0) { hash = HashUtil::zlib_crc_hash_null(hash); @@ -110,7 +111,7 @@ void ColumnNullable::update_crcs_with_value(uint32_t* __restrict hashes, doris:: auto s = rows; DCHECK(s == size()); const auto* __restrict real_null_data = - assert_cast(*null_map).get_data().data(); + assert_cast(get_null_map_column()).get_data().data(); if (!has_null()) { nested_column->update_crcs_with_value(hashes, type, rows, offset, nullptr); } else { @@ -128,7 +129,7 @@ void ColumnNullable::update_hashes_with_value(uint64_t* __restrict hashes, DCHECK(null_data == nullptr); auto s = size(); const auto* __restrict real_null_data = - assert_cast(*null_map).get_data().data(); + assert_cast(get_null_map_column()).get_data().data(); if (!has_null()) { nested_column->update_hashes_with_value(hashes, nullptr); } else { @@ -183,24 +184,24 @@ StringRef ColumnNullable::get_data_at(size_t n) const { void ColumnNullable::insert_data(const char* pos, size_t length) { if (pos == nullptr) { get_nested_column().insert_default(); - _get_null_map_data().push_back(1); + get_null_map_data().push_back(1); _has_null = true; + _need_update_has_null = false; } else { get_nested_column().insert_data(pos, length); - _get_null_map_data().push_back(0); + _push_false_to_nullmap(1); } } void ColumnNullable::insert_many_strings(const StringRef* strings, size_t num) { - auto& null_map_data = _get_null_map_data(); for (size_t i = 0; i != num; ++i) { if (strings[i].data == nullptr) { nested_column->insert_default(); - null_map_data.push_back(1); + get_null_map_data().push_back(1); _has_null = true; } else { nested_column->insert_data(strings[i].data, strings[i].size); - null_map_data.push_back(0); + _push_false_to_nullmap(1); } } } @@ -227,13 +228,14 @@ const char* ColumnNullable::deserialize_and_insert_from_arena(const char* pos) { UInt8 val = *reinterpret_cast(pos); pos += sizeof(val); - _get_null_map_data().push_back(val); + get_null_map_data().push_back(val); if (val == 0) { pos = get_nested_column().deserialize_and_insert_from_arena(pos); } else { get_nested_column().insert_default(); _has_null = true; + _need_update_has_null = false; } return pos; @@ -265,7 +267,7 @@ void ColumnNullable::serialize_vec(std::vector& keys, size_t num_rows } void ColumnNullable::deserialize_vec(std::vector& keys, const size_t num_rows) { - auto& arr = _get_null_map_data(); + auto& arr = get_null_map_data(); const size_t old_size = arr.size(); arr.resize(old_size + num_rows); @@ -288,21 +290,15 @@ void ColumnNullable::deserialize_vec(std::vector& keys, const size_t void ColumnNullable::insert_range_from_ignore_overflow(const doris::vectorized::IColumn& src, size_t start, size_t length) { const auto& nullable_col = assert_cast(src); - _get_null_map_column().insert_range_from(*nullable_col.null_map, start, length); + get_null_map_column().insert_range_from(nullable_col.get_null_map_column(), start, length); get_nested_column().insert_range_from_ignore_overflow(*nullable_col.nested_column, start, length); - const auto& src_null_map_data = nullable_col.get_null_map_data(); - _has_null = has_null(); - _has_null |= simd::contain_byte(src_null_map_data.data() + start, length, 1); } void ColumnNullable::insert_range_from(const IColumn& src, size_t start, size_t length) { const auto& nullable_col = assert_cast(src); - _get_null_map_column().insert_range_from(*nullable_col.null_map, start, length); + get_null_map_column().insert_range_from(nullable_col.get_null_map_column(), start, length); get_nested_column().insert_range_from(*nullable_col.nested_column, start, length); - const auto& src_null_map_data = nullable_col.get_null_map_data(); - _has_null = has_null(); - _has_null |= simd::contain_byte(src_null_map_data.data() + start, length, 1); } void ColumnNullable::insert_indices_from(const IColumn& src, const uint32_t* indices_begin, @@ -310,9 +306,8 @@ void ColumnNullable::insert_indices_from(const IColumn& src, const uint32_t* ind const auto& src_concrete = assert_cast(src); get_nested_column().insert_indices_from(src_concrete.get_nested_column(), indices_begin, indices_end); - _get_null_map_column().insert_indices_from(src_concrete.get_null_map_column(), indices_begin, - indices_end); - _need_update_has_null = true; + get_null_map_column().insert_indices_from(src_concrete.get_null_map_column(), indices_begin, + indices_end); } void ColumnNullable::insert_indices_from_not_has_null(const IColumn& src, @@ -321,17 +316,18 @@ void ColumnNullable::insert_indices_from_not_has_null(const IColumn& src, const auto& src_concrete = assert_cast(src); get_nested_column().insert_indices_from(src_concrete.get_nested_column(), indices_begin, indices_end); - _get_null_map_column().insert_many_defaults(indices_end - indices_begin); + _push_false_to_nullmap(indices_end - indices_begin); } void ColumnNullable::insert(const Field& x) { if (x.is_null()) { get_nested_column().insert_default(); - _get_null_map_data().push_back(1); + get_null_map_data().push_back(1); _has_null = true; + _need_update_has_null = false; } else { get_nested_column().insert(x); - _get_null_map_data().push_back(0); + _push_false_to_nullmap(1); } } @@ -339,19 +335,18 @@ void ColumnNullable::insert_from(const IColumn& src, size_t n) { const auto& src_concrete = assert_cast(src); get_nested_column().insert_from(src_concrete.get_nested_column(), n); auto is_null = src_concrete.get_null_map_data()[n]; - _has_null |= is_null; - _get_null_map_data().push_back(is_null); + get_null_map_data().push_back(is_null); } void ColumnNullable::insert_from_not_nullable(const IColumn& src, size_t n) { get_nested_column().insert_from(src, n); - _get_null_map_data().push_back(0); + _push_false_to_nullmap(1); } void ColumnNullable::insert_range_from_not_nullable(const IColumn& src, size_t start, size_t length) { get_nested_column().insert_range_from(src, start, length); - _get_null_map_data().resize_fill(_get_null_map_data().size() + length, 0); + _push_false_to_nullmap(length); } void ColumnNullable::insert_many_from_not_nullable(const IColumn& src, size_t position, @@ -380,15 +375,14 @@ size_t ColumnNullable::filter(const Filter& filter) { } Status ColumnNullable::filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) { - const auto* nullable_col_ptr = reinterpret_cast(col_ptr); + auto* nullable_col_ptr = assert_cast(col_ptr); ColumnPtr nest_col_ptr = nullable_col_ptr->nested_column; - ColumnPtr null_map_ptr = nullable_col_ptr->null_map; + + /// `get_null_map_data` will set `_need_update_has_null` to true + auto& res_nullmap = nullable_col_ptr->get_null_map_data(); + RETURN_IF_ERROR(get_nested_column().filter_by_selector( sel, sel_size, const_cast(nest_col_ptr.get()))); - //insert cur nullmap into result nullmap which is empty - auto& res_nullmap = reinterpret_cast*>( - const_cast(null_map_ptr.get())) - ->get_data(); DCHECK(res_nullmap.empty()); res_nullmap.resize(sel_size); auto& cur_nullmap = get_null_map_column().get_data(); @@ -535,15 +529,10 @@ void ColumnNullable::get_permutation(bool reverse, size_t limit, int null_direct } } } -// -//void ColumnNullable::gather(ColumnGathererStream & gatherer) -//{ -// gatherer.gather(*this); -//} void ColumnNullable::reserve(size_t n) { get_nested_column().reserve(n); - _get_null_map_data().reserve(n); + get_null_map_data(false).reserve(n); } void ColumnNullable::resize(size_t n) { @@ -595,7 +584,7 @@ void ColumnNullable::apply_null_map(const ColumnNullable& other) { } void ColumnNullable::check_consistency() const { - if (null_map->size() != get_nested_column().size()) { + if (get_null_map_column().size() != get_nested_column().size()) { throw Exception(ErrorCode::INTERNAL_ERROR, "Sizes of nested column and null map of Nullable column are not equal"); } @@ -609,8 +598,8 @@ void ColumnNullable::sort_column(const ColumnSorter* sorter, EqualFlags& flags, } void ColumnNullable::_update_has_null() { - const UInt8* null_pos = _get_null_map_data().data(); - _has_null = simd::contain_byte(null_pos, _get_null_map_data().size(), 1); + const UInt8* null_pos = get_null_map_data().data(); + _has_null = simd::contain_byte(null_pos, get_null_map_data().size(), 1); _need_update_has_null = false; } diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index d55bcd0bcf61b7..52f49719826064 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -21,7 +21,6 @@ #pragma once #include -#include #include #include #include @@ -50,6 +49,69 @@ class ColumnSorter; using NullMap = ColumnUInt8::Container; using ConstNullMapPtr = const NullMap*; +/// use this to avoid directly access null_map forgetting modify _need_update_has_null. see more in inner comments +class NullMapProvider { +public: + NullMapProvider() = default; + NullMapProvider(MutableColumnPtr&& null_map) : _null_map(std::move(null_map)) {} + void reset_null_map(MutableColumnPtr&& null_map) { _null_map = std::move(null_map); } + + // return the column that represents the byte map. if want use null_map, just call this. + const ColumnPtr& get_null_map_column_ptr() const { return _null_map; } + // for functions getting nullmap, we assume it will modify it. so set `_need_update_has_null` to true. if you know it wouldn't, + // call with arg false. but for the ops which will set _has_null themselves, call `update_has_null()` + MutableColumnPtr get_null_map_column_ptr(bool may_change = true) { + if (may_change) { + _need_update_has_null = true; + } + return _null_map->assume_mutable(); + } + ColumnUInt8::WrappedPtr& get_null_map(bool may_change = true) { + if (may_change) { + _need_update_has_null = true; + } + return _null_map; + } + + ColumnUInt8& get_null_map_column(bool may_change = true) { + if (may_change) { + _need_update_has_null = true; + } + return assert_cast(*_null_map); + } + const ColumnUInt8& get_null_map_column() const { + return assert_cast(*_null_map); + } + + NullMap& get_null_map_data(bool may_change = true) { + return get_null_map_column(may_change).get_data(); + } + const NullMap& get_null_map_data() const { return get_null_map_column().get_data(); } + + void clear_null_map() { assert_cast(_null_map.get())->clear(); } + + void update_has_null(bool new_value) { + _has_null = new_value; + _need_update_has_null = false; + } + +protected: + /** + * Here we have three variables which serve for `has_null()` judgement. If we have known the nullity of object, no need + * to check through the `null_map` to get the answer until the next time we modify it. Here `_has_null` is just the answer + * we cached. `_need_update_has_null` indicates there's modification or not since we got `_has_null()` last time. So in + * `_has_null()` we can check the two vars to know if there's need to update `has_null` or not. + * If you just want QUERY BUT NOT MODIFY, make sure the caller is const. There will be no perf overhead for const overload. + * Otherwise, this class, as the base class, will make it no possible to directly visit `null_map` forgetting to change the + * protected flags. Just call the interface is ok. + */ + bool _need_update_has_null = true; + bool _has_null = true; + +private: + IColumn::WrappedPtr _null_map; +}; + /// Class that specifies nullable columns. A nullable column represents /// a column, which may have any type, provided with the possibility of /// storing NULL values. For this purpose, a ColumnNullable object stores @@ -59,7 +121,7 @@ using ConstNullMapPtr = const NullMap*; /// over a bitmap because columns are usually stored on disk as compressed /// files. In this regard, using a bitmap instead of a byte map would /// greatly complicate the implementation with little to no benefits. -class ColumnNullable final : public COWHelper { +class ColumnNullable final : public COWHelper, public NullMapProvider { private: friend class COWHelper; @@ -88,9 +150,11 @@ class ColumnNullable final : public COWHelper { const char* get_family_name() const override { return "Nullable"; } std::string get_name() const override { return "Nullable(" + nested_column->get_name() + ")"; } MutableColumnPtr clone_resized(size_t size) const override; - size_t size() const override { return assert_cast(*null_map).size(); } + size_t size() const override { + return assert_cast(get_null_map_column()).size(); + } PURE bool is_null_at(size_t n) const override { - return assert_cast(*null_map).get_data()[n] != 0; + return assert_cast(get_null_map_column()).get_data()[n] != 0; } bool is_default_at(size_t n) const override { return is_null_at(n); } Field operator[](size_t n) const override; @@ -139,8 +203,13 @@ class ColumnNullable final : public COWHelper { assert_cast(nested_column.get()) ->insert_from(src_concrete.get_nested_column(), n); auto is_null = src_concrete.get_null_map_data()[n]; - _has_null |= is_null; - _get_null_map_data().push_back(is_null); + if (is_null) { + get_null_map_data().push_back(1); + _has_null = true; + _need_update_has_null = false; + } else { + _push_false_to_nullmap(1); + } } void insert_from_not_nullable(const IColumn& src, size_t n); @@ -148,18 +217,18 @@ class ColumnNullable final : public COWHelper { void insert_many_from_not_nullable(const IColumn& src, size_t position, size_t length); void insert_many_fix_len_data(const char* pos, size_t num) override { - _get_null_map_column().insert_many_vals(0, num); + _push_false_to_nullmap(num); get_nested_column().insert_many_fix_len_data(pos, num); } void insert_many_raw_data(const char* pos, size_t num) override { - _get_null_map_column().insert_many_vals(0, num); + _push_false_to_nullmap(num); get_nested_column().insert_many_raw_data(pos, num); } void insert_many_dict_data(const int32_t* data_array, size_t start_index, const StringRef* dict, size_t data_num, uint32_t dict_num) override { - _get_null_map_column().insert_many_vals(0, data_num); + _push_false_to_nullmap(data_num); get_nested_column().insert_many_dict_data(data_array, start_index, dict, data_num, dict_num); } @@ -169,38 +238,40 @@ class ColumnNullable final : public COWHelper { if (UNLIKELY(num == 0)) { return; } - _get_null_map_column().insert_many_vals(0, num); + _push_false_to_nullmap(num); get_nested_column().insert_many_continuous_binary_data(data, offsets, num); } void insert_many_binary_data(char* data_array, uint32_t* len_array, uint32_t* start_offset_array, size_t num) override { - _get_null_map_column().insert_many_vals(0, num); + _push_false_to_nullmap(num); get_nested_column().insert_many_binary_data(data_array, len_array, start_offset_array, num); } void insert_default() override { get_nested_column().insert_default(); - _get_null_map_data().push_back(1); + get_null_map_data().push_back(1); _has_null = true; + _need_update_has_null = false; } void insert_many_defaults(size_t length) override { get_nested_column().insert_many_defaults(length); - _get_null_map_data().resize_fill(_get_null_map_data().size() + length, 1); + get_null_map_data().resize_fill(get_null_map_data().size() + length, 1); _has_null = true; + _need_update_has_null = false; } void insert_not_null_elements(size_t num) { get_nested_column().insert_many_defaults(num); - _get_null_map_column().insert_many_vals(0, num); - _has_null = false; + _push_false_to_nullmap(num); } void insert_null_elements(int num) { get_nested_column().insert_many_defaults(num); - _get_null_map_column().insert_many_vals(1, num); + get_null_map_column().insert_many_vals(1, num); _has_null = true; + _need_update_has_null = false; } void pop_back(size_t n) override; @@ -252,7 +323,7 @@ class ColumnNullable final : public COWHelper { void for_each_subcolumn(ColumnCallback callback) override { callback(nested_column); - callback(null_map); + callback(get_null_map()); } bool structure_equals(const IColumn& rhs) const override { @@ -279,11 +350,13 @@ class ColumnNullable final : public COWHelper { bool values_have_fixed_size() const override { return nested_column->values_have_fixed_size(); } bool is_exclusive() const override { - return IColumn::is_exclusive() && nested_column->is_exclusive() && null_map->is_exclusive(); + return IColumn::is_exclusive() && nested_column->is_exclusive() && + get_null_map_column().is_exclusive(); } size_t size_of_value_if_fixed() const override { - return null_map->size_of_value_if_fixed() + nested_column->size_of_value_if_fixed(); + return get_null_map_column().size_of_value_if_fixed() + + nested_column->size_of_value_if_fixed(); } bool only_null() const override { return size() == 1 && is_null_at(0); } @@ -299,32 +372,12 @@ class ColumnNullable final : public COWHelper { MutableColumnPtr get_nested_column_ptr() { return nested_column->assume_mutable(); } - /// Return the column that represents the byte map. - const ColumnPtr& get_null_map_column_ptr() const { return null_map; } - - MutableColumnPtr get_null_map_column_ptr() { - _need_update_has_null = true; - return null_map->assume_mutable(); - } - - ColumnUInt8& get_null_map_column() { - _need_update_has_null = true; - return assert_cast(*null_map); - } - const ColumnUInt8& get_null_map_column() const { - return assert_cast(*null_map); - } - void clear() override { - null_map->clear(); + clear_null_map(); nested_column->clear(); _has_null = false; } - NullMap& get_null_map_data() { return get_null_map_column().get_data(); } - - const NullMap& get_null_map_data() const { return get_null_map_column().get_data(); } - /// Apply the null byte map of a specified nullable column onto the /// null byte map of the current column by performing an element-wise OR /// between both byte maps. This method is used to determine the null byte @@ -349,7 +402,8 @@ class ColumnNullable final : public COWHelper { void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override { DCHECK(size() > self_row); const auto& nullable_rhs = assert_cast(rhs); - null_map->replace_column_data(*nullable_rhs.null_map, row, self_row); + get_null_map_column().replace_column_data(nullable_rhs.get_null_map_column(), row, + self_row); if (!nullable_rhs.is_null_at(row)) { nested_column->replace_column_data(*nullable_rhs.nested_column, row, self_row); @@ -398,19 +452,15 @@ class ColumnNullable final : public COWHelper { ColumnPtr index(const IColumn& indexes, size_t limit) const override; private: - // the two functions will not update `_need_update_has_null` - ColumnUInt8& _get_null_map_column() { return assert_cast(*null_map); } - NullMap& _get_null_map_data() { return _get_null_map_column().get_data(); } - - WrappedPtr nested_column; - WrappedPtr null_map; - - bool _need_update_has_null = true; - bool _has_null; - void _update_has_null(); + template void apply_null_map_impl(const ColumnUInt8& map); + + // push not null value wouldn't change the nullity. no need to update _has_null + void _push_false_to_nullmap(size_t num) { get_null_map_column(false).insert_many_vals(0, num); } + + WrappedPtr nested_column; }; ColumnPtr make_nullable(const ColumnPtr& column, bool is_nullable = false); diff --git a/be/test/vec/columns/column_nullable_test.cpp b/be/test/vec/columns/column_nullable_test.cpp new file mode 100644 index 00000000000000..9e66d5bb302f38 --- /dev/null +++ b/be/test/vec/columns/column_nullable_test.cpp @@ -0,0 +1,112 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/columns/column_nullable.h" + +#include +#include +#include + +#include "column_nullable_test.h" +#include "common/status.h" +#include "runtime/define_primitive_type.h" +#include "vec/columns/columns_number.h" +#include "vec/columns/predicate_column.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" + +namespace doris::vectorized { + +TEST(ColumnNullableTest, NullTest) { + ColumnNullable::MutablePtr null_col = create_column_nullable(500, true); + EXPECT_TRUE(null_col->has_null()); + + ColumnNullable::MutablePtr dst_col = + ColumnNullable::create(create_nested_column(10), ColumnUInt8::create(10)); + EXPECT_FALSE(dst_col->has_null()); + + ColumnInt64::MutablePtr source_col = ColumnInt64::create(); + source_col->insert_range_of_integer(0, 100); + + dst_col->insert(Field()); + EXPECT_TRUE(dst_col->has_null()); + dst_col->clear(); + EXPECT_FALSE(dst_col->has_null()); + dst_col->insert_many_from_not_nullable(*source_col, 0, 10); + EXPECT_FALSE(dst_col->has_null()); + dst_col->insert_from_not_nullable(*source_col, 5); + EXPECT_FALSE(dst_col->has_null()); + dst_col->insert_many_from_not_nullable(*source_col, 5, 5); + EXPECT_FALSE(dst_col->has_null()); + dst_col->insert_range_from_not_nullable(*source_col, 5, 5); + EXPECT_FALSE(dst_col->has_null()); + dst_col->insert_range_from( + *ColumnNullable::create(std::move(source_col), ColumnUInt8::create(10)), 5, 5); + EXPECT_FALSE(dst_col->has_null()); + + dst_col->clear(); + EXPECT_FALSE(dst_col->has_null()); + dst_col->insert_null_elements(10); + EXPECT_TRUE(dst_col->has_null()); + + dst_col->clear(); + EXPECT_FALSE(dst_col->has_null()); + dst_col->insert_from(*null_col, 100); + EXPECT_TRUE(dst_col->has_null()); + + auto tmp_col = + ColumnNullable::create(create_nested_column(10), ColumnUInt8::create(10, 1)); + + dst_col->clear(); + EXPECT_FALSE(dst_col->has_null()); + dst_col->insert_from(*tmp_col, 9); + EXPECT_TRUE(dst_col->has_null()); + + dst_col->clear(); + EXPECT_FALSE(dst_col->has_null()); + dst_col->insert_range_from(*tmp_col, 0, 3); + EXPECT_TRUE(dst_col->has_null()); + + dst_col->clear(); + EXPECT_FALSE(dst_col->has_null()); + dst_col->insert_from(*tmp_col, 9); + EXPECT_TRUE(dst_col->has_null()); +} + +TEST(ColumnNullableTest, PredicateTest) { + auto nullable_pred = + ColumnNullable::create(PredicateColumnType::create(), ColumnUInt8::create()); + nullable_pred->insert_many_defaults(3); + EXPECT_TRUE(nullable_pred->has_null()); + nullable_pred->insert_null_elements(10); + EXPECT_TRUE(nullable_pred->has_null()); + + nullable_pred->clear(); + EXPECT_FALSE(nullable_pred->has_null()); + nullable_pred->insert_null_elements(10); + EXPECT_TRUE(nullable_pred->has_null()); // now it have 10 nulls + + auto null_dst = ColumnNullable::create(ColumnDate::create(), ColumnUInt8::create()); + EXPECT_FALSE(null_dst->has_null()); + + uint16_t selector[] = {5, 8}; // both null + EXPECT_EQ(nullable_pred->filter_by_selector(selector, 2, null_dst.get()), Status::OK()); + // filter_by_selector must announce to update has_null to make below right. + EXPECT_TRUE(null_dst->has_null()); +} +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/test/vec/columns/column_nullable_test.h b/be/test/vec/columns/column_nullable_test.h new file mode 100644 index 00000000000000..0f90a25c9b56b3 --- /dev/null +++ b/be/test/vec/columns/column_nullable_test.h @@ -0,0 +1,105 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include +#include +#include + +#include "vec/columns/column.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_string.h" +#include "vec/columns/columns_number.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_date_time.h" +#include "vec/data_types/data_type_string.h" + +namespace doris::vectorized { + +inline std::string generate_random_string(size_t max_length) { + std::srand(std::time(nullptr)); // use current time as seed for random generator + + if (max_length == 0) { + return ""; + } + + auto randbyte = []() -> char { + // generate a random byte, in range [0x00, 0xFF] + return static_cast(rand() % 256); + }; + + std::string str(max_length, 0); + std::generate_n(str.begin(), max_length, randbyte); + + return str; +} + +inline MutableColumnPtr create_null_map(size_t input_rows_count, bool all_null = false, + bool all_not_null = false) { + std::srand(std::time(nullptr)); // use current time as seed for random generator + auto null_map = ColumnUInt8::create(); + for (size_t i = 0; i < input_rows_count; ++i) { + if (all_null) { + null_map->insert(1); + } else if (all_not_null) { + null_map->insert(0); + } else { + null_map->insert(rand() % 2); + } + } + return null_map; +} + +template +inline MutableColumnPtr create_nested_column(size_t input_rows_count) { + MutableColumnPtr column; + if constexpr (std::is_integral_v) { + column = ColumnVector::create(); + } else if constexpr (std::is_same_v) { + column = ColumnString::create(); + } else if constexpr (std::is_same_v) { + column = ColumnDecimal64::create(0, 6); + } + + for (size_t i = 0; i < input_rows_count; ++i) { + if constexpr (std::is_integral_v) { + column->insert(rand() % std::numeric_limits::max()); + } else if constexpr (std::is_same_v) { + column->insert(generate_random_string(rand() % 512)); + } else if constexpr (std::is_same_v) { + column->insert(Int64(rand() % std::numeric_limits::max())); + } else { + throw std::runtime_error("Unsupported type"); + } + } + + return column; +} + +template +inline ColumnNullable::MutablePtr create_column_nullable(size_t input_rows_count, + bool all_null = false, + bool all_not_null = false) { + auto null_map = create_null_map(input_rows_count, all_null, all_not_null); + auto nested_column = create_nested_column(input_rows_count); + return ColumnNullable::create(std::move(nested_column), std::move(null_map)); +} +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/test/vec/columns/column_resize_test.cpp b/be/test/vec/columns/column_resize_test.cpp index dd6f78f3e8d2f0..0b5f6ba183ea3c 100644 --- a/be/test/vec/columns/column_resize_test.cpp +++ b/be/test/vec/columns/column_resize_test.cpp @@ -1,4 +1,3 @@ - // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index dec3fbf00ae93e..34cd86eccdd38c 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -266,6 +266,14 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty fn_ctx_return.type = doris::PrimitiveType::TYPE_DATEV2; } else if (std::is_same_v) { fn_ctx_return.type = doris::PrimitiveType::TYPE_DATETIMEV2; + } else if (std::is_same_v) { + fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMALV2; + } else if (std::is_same_v) { + fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL32; + } else if (std::is_same_v) { + fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL64; + } else if (std::is_same_v) { + fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL128I; } else { fn_ctx_return.type = doris::PrimitiveType::INVALID_TYPE; } diff --git a/regression-test/data/correctness/test_column_nullable_cache.out b/regression-test/data/correctness/test_column_nullable_cache.out new file mode 100644 index 00000000000000..eac491df2120bb --- /dev/null +++ b/regression-test/data/correctness/test_column_nullable_cache.out @@ -0,0 +1,6 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !test1 -- + +-- !test2 -- +0 + diff --git a/regression-test/suites/correctness/test_column_nullable_cache.groovy b/regression-test/suites/correctness/test_column_nullable_cache.groovy new file mode 100644 index 00000000000000..cf871c617aad42 --- /dev/null +++ b/regression-test/suites/correctness/test_column_nullable_cache.groovy @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_column_nullable_cache") { + sql """ + drop table if exists test_column_nullable_cache; + """ + sql """ + CREATE TABLE `test_column_nullable_cache` ( + `col_int_undef_signed2` int NULL, + `col_int_undef_signed` int NULL, + `col_int_undef_signed3` int NULL, + `col_int_undef_signed4` int NULL, + `pk` int NULL + ) ENGINE=OLAP + DUPLICATE KEY(`col_int_undef_signed2`) + DISTRIBUTED by RANDOM BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql """ + insert into test_column_nullable_cache + (pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4) + values (0,3,7164641,5,8),(1,null,3916062,5,6),(2,1,5533498,0,9),(3,7,2,null,7057679),(4,1,0,7,7), + (5,null,4,2448564,1),(6,7531976,7324373,9,7),(7,3,1,1,3),(8,6,8131576,9,-1793807),(9,9,2,4214547,9), + (10,-7299852,5,1,3),(11,7,3,-1036551,5),(12,-6108579,84823,4,1229534),(13,-1065629,5,4,null),(14,null,8072633,3328285,2), + (15,2,7,6,6),(16,8,5,-4582103,1),(17,5,-4677722,-2379367,4),(18,-7807532,-6686732,0,5329341), + (19,8,7,-4013246,-7013374),(20,0,2,9,2),(21,7,2383333,5,4),(22,5844611,2,2,0),(23,0,4756185,0,-5612039), + (24,6,4878754,608172,0),(25,null,7858692,7,-6704206),(26,7,-1697597,6,9),(27,9,-7021349,3,-3094786), + (28,2,2830915,null,8),(29,4133633,489212,5,9),(30,6,-3346211,3668768,2),(31,1,4862070,-5066405,0),(32,9,6,7,8), + (33,2,null,4,2),(34,1,2893430,-3282825,5),(35,2,3,4,2),(36,4,-3418732,6,1263819),(37,5,4,-6342170,6),(99,9,2,8,null); + """ + + qt_test1 """ + select * from test_column_nullable_cache where col_int_undef_signed3 IS NULL and col_int_undef_signed3 = col_int_undef_signed3; + """ + + qt_test2 """ + select count(*) from test_column_nullable_cache where col_int_undef_signed3 IS NULL and col_int_undef_signed3 = col_int_undef_signed3; + """ +}