Skip to content

Commit

Permalink
[Fix](Column) refactor ColumnNullable to provide flags safety (#40769)
Browse files Browse the repository at this point in the history
## Proposed changes

Issue Number: close #xxx

In this pr we move null_map and relative flags to an individual base
class to constrain its visibility.
please reed the comment carefully

test in master:
```
xxx/column_nullable_test.cpp:187: Failure
Value of: null_dst->has_null()
  Actual: false
Expected: true

Error occurred in case0
[  FAILED  ] ColumnNullableTest.PredicateTest (0 ms)
[ RUN      ] ColumnNullableTest.HashTest
[       OK ] ColumnNullableTest.HashTest (0 ms)
[----------] 3 tests from ColumnNullableTest (0 ms total)

[----------] Global test environment tear-down
[==========] 3 tests from 1 test suite ran. (1 ms total)
[  PASSED  ] 2 tests.
[  FAILED  ] 1 test, listed below:
[  FAILED  ] ColumnNullableTest.PredicateTest
```

and success after this pr

---------

Co-authored-by: Jerry Hu <[email protected]>
  • Loading branch information
2 people authored and dataroaring committed Sep 26, 2024
1 parent 9f22401 commit 6d0b67a
Show file tree
Hide file tree
Showing 9 changed files with 426 additions and 176 deletions.
85 changes: 37 additions & 48 deletions be/src/vec/columns/column_nullable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,19 @@
namespace doris::vectorized {

ColumnNullable::ColumnNullable(MutableColumnPtr&& nested_column_, MutableColumnPtr&& null_map_)
: nested_column(std::move(nested_column_)), null_map(std::move(null_map_)) {
: NullMapProvider(std::move(null_map_)), nested_column(std::move(nested_column_)) {
/// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it.
nested_column = get_nested_column().convert_to_full_column_if_const();

// after convert const column to full column, it may be a nullable column
if (nested_column->is_nullable()) {
assert_cast<ColumnNullable&>(*nested_column).apply_null_map((const ColumnUInt8&)*null_map);
null_map = assert_cast<ColumnNullable&>(*nested_column).get_null_map_column_ptr();
assert_cast<ColumnNullable&>(*nested_column)
.apply_null_map(static_cast<const ColumnUInt8&>(get_null_map_column()));
reset_null_map(assert_cast<ColumnNullable&>(*nested_column).get_null_map_column_ptr());
nested_column = assert_cast<ColumnNullable&>(*nested_column).get_nested_column_ptr();
}

if (is_column_const(*null_map)) {
if (is_column_const(get_null_map_column())) [[unlikely]] {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"ColumnNullable cannot have constant null map");
__builtin_unreachable();
Expand All @@ -69,7 +70,7 @@ void ColumnNullable::update_xxHash_with_value(size_t start, size_t end, uint64_t
nested_column->update_xxHash_with_value(start, end, hash, nullptr);
} else {
const auto* __restrict real_null_data =
assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
assert_cast<const ColumnUInt8&>(get_null_map_column()).get_data().data();
for (int i = start; i < end; ++i) {
if (real_null_data[i] != 0) {
hash = HashUtil::xxHash64NullWithSeed(hash);
Expand All @@ -85,7 +86,7 @@ void ColumnNullable::update_crc_with_value(size_t start, size_t end, uint32_t& h
nested_column->update_crc_with_value(start, end, hash, nullptr);
} else {
const auto* __restrict real_null_data =
assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
assert_cast<const ColumnUInt8&>(get_null_map_column()).get_data().data();
for (int i = start; i < end; ++i) {
if (real_null_data[i] != 0) {
hash = HashUtil::zlib_crc_hash_null(hash);
Expand All @@ -110,7 +111,7 @@ void ColumnNullable::update_crcs_with_value(uint32_t* __restrict hashes, doris::
auto s = rows;
DCHECK(s == size());
const auto* __restrict real_null_data =
assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
assert_cast<const ColumnUInt8&>(get_null_map_column()).get_data().data();
if (!has_null()) {
nested_column->update_crcs_with_value(hashes, type, rows, offset, nullptr);
} else {
Expand All @@ -128,7 +129,7 @@ void ColumnNullable::update_hashes_with_value(uint64_t* __restrict hashes,
DCHECK(null_data == nullptr);
auto s = size();
const auto* __restrict real_null_data =
assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
assert_cast<const ColumnUInt8&>(get_null_map_column()).get_data().data();
if (!has_null()) {
nested_column->update_hashes_with_value(hashes, nullptr);
} else {
Expand Down Expand Up @@ -183,24 +184,24 @@ StringRef ColumnNullable::get_data_at(size_t n) const {
void ColumnNullable::insert_data(const char* pos, size_t length) {
if (pos == nullptr) {
get_nested_column().insert_default();
_get_null_map_data().push_back(1);
get_null_map_data().push_back(1);
_has_null = true;
_need_update_has_null = false;
} else {
get_nested_column().insert_data(pos, length);
_get_null_map_data().push_back(0);
_push_false_to_nullmap(1);
}
}

void ColumnNullable::insert_many_strings(const StringRef* strings, size_t num) {
auto& null_map_data = _get_null_map_data();
for (size_t i = 0; i != num; ++i) {
if (strings[i].data == nullptr) {
nested_column->insert_default();
null_map_data.push_back(1);
get_null_map_data().push_back(1);
_has_null = true;
} else {
nested_column->insert_data(strings[i].data, strings[i].size);
null_map_data.push_back(0);
_push_false_to_nullmap(1);
}
}
}
Expand All @@ -227,13 +228,14 @@ const char* ColumnNullable::deserialize_and_insert_from_arena(const char* pos) {
UInt8 val = *reinterpret_cast<const UInt8*>(pos);
pos += sizeof(val);

_get_null_map_data().push_back(val);
get_null_map_data().push_back(val);

if (val == 0) {
pos = get_nested_column().deserialize_and_insert_from_arena(pos);
} else {
get_nested_column().insert_default();
_has_null = true;
_need_update_has_null = false;
}

return pos;
Expand All @@ -251,7 +253,7 @@ void ColumnNullable::serialize_vec(std::vector<StringRef>& keys, size_t num_rows
}

void ColumnNullable::deserialize_vec(std::vector<StringRef>& keys, const size_t num_rows) {
auto& arr = _get_null_map_data();
auto& arr = get_null_map_data();
const size_t old_size = arr.size();
arr.resize(old_size + num_rows);

Expand All @@ -274,31 +276,24 @@ void ColumnNullable::deserialize_vec(std::vector<StringRef>& keys, const size_t
void ColumnNullable::insert_range_from_ignore_overflow(const doris::vectorized::IColumn& src,
size_t start, size_t length) {
const auto& nullable_col = assert_cast<const ColumnNullable&>(src);
_get_null_map_column().insert_range_from(*nullable_col.null_map, start, length);
get_null_map_column().insert_range_from(nullable_col.get_null_map_column(), start, length);
get_nested_column().insert_range_from_ignore_overflow(*nullable_col.nested_column, start,
length);
const auto& src_null_map_data = nullable_col.get_null_map_data();
_has_null = has_null();
_has_null |= simd::contain_byte(src_null_map_data.data() + start, length, 1);
}

void ColumnNullable::insert_range_from(const IColumn& src, size_t start, size_t length) {
const auto& nullable_col = assert_cast<const ColumnNullable&>(src);
_get_null_map_column().insert_range_from(*nullable_col.null_map, start, length);
get_null_map_column().insert_range_from(nullable_col.get_null_map_column(), start, length);
get_nested_column().insert_range_from(*nullable_col.nested_column, start, length);
const auto& src_null_map_data = nullable_col.get_null_map_data();
_has_null = has_null();
_has_null |= simd::contain_byte(src_null_map_data.data() + start, length, 1);
}

void ColumnNullable::insert_indices_from(const IColumn& src, const uint32_t* indices_begin,
const uint32_t* indices_end) {
const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
get_nested_column().insert_indices_from(src_concrete.get_nested_column(), indices_begin,
indices_end);
_get_null_map_column().insert_indices_from(src_concrete.get_null_map_column(), indices_begin,
indices_end);
_need_update_has_null = true;
get_null_map_column().insert_indices_from(src_concrete.get_null_map_column(), indices_begin,
indices_end);
}

void ColumnNullable::insert_indices_from_not_has_null(const IColumn& src,
Expand All @@ -307,37 +302,37 @@ void ColumnNullable::insert_indices_from_not_has_null(const IColumn& src,
const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
get_nested_column().insert_indices_from(src_concrete.get_nested_column(), indices_begin,
indices_end);
_get_null_map_column().insert_many_defaults(indices_end - indices_begin);
_push_false_to_nullmap(indices_end - indices_begin);
}

void ColumnNullable::insert(const Field& x) {
if (x.is_null()) {
get_nested_column().insert_default();
_get_null_map_data().push_back(1);
get_null_map_data().push_back(1);
_has_null = true;
_need_update_has_null = false;
} else {
get_nested_column().insert(x);
_get_null_map_data().push_back(0);
_push_false_to_nullmap(1);
}
}

void ColumnNullable::insert_from(const IColumn& src, size_t n) {
const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
get_nested_column().insert_from(src_concrete.get_nested_column(), n);
auto is_null = src_concrete.get_null_map_data()[n];
_has_null |= is_null;
_get_null_map_data().push_back(is_null);
get_null_map_data().push_back(is_null);
}

void ColumnNullable::insert_from_not_nullable(const IColumn& src, size_t n) {
get_nested_column().insert_from(src, n);
_get_null_map_data().push_back(0);
_push_false_to_nullmap(1);
}

void ColumnNullable::insert_range_from_not_nullable(const IColumn& src, size_t start,
size_t length) {
get_nested_column().insert_range_from(src, start, length);
_get_null_map_data().resize_fill(_get_null_map_data().size() + length, 0);
_push_false_to_nullmap(length);
}

void ColumnNullable::insert_many_from_not_nullable(const IColumn& src, size_t position,
Expand Down Expand Up @@ -366,15 +361,14 @@ size_t ColumnNullable::filter(const Filter& filter) {
}

Status ColumnNullable::filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) {
const auto* nullable_col_ptr = reinterpret_cast<const ColumnNullable*>(col_ptr);
auto* nullable_col_ptr = assert_cast<ColumnNullable*>(col_ptr);
ColumnPtr nest_col_ptr = nullable_col_ptr->nested_column;
ColumnPtr null_map_ptr = nullable_col_ptr->null_map;

/// `get_null_map_data` will set `_need_update_has_null` to true
auto& res_nullmap = nullable_col_ptr->get_null_map_data();

RETURN_IF_ERROR(get_nested_column().filter_by_selector(
sel, sel_size, const_cast<doris::vectorized::IColumn*>(nest_col_ptr.get())));
//insert cur nullmap into result nullmap which is empty
auto& res_nullmap = reinterpret_cast<vectorized::ColumnVector<UInt8>*>(
const_cast<doris::vectorized::IColumn*>(null_map_ptr.get()))
->get_data();
DCHECK(res_nullmap.empty());
res_nullmap.resize(sel_size);
auto& cur_nullmap = get_null_map_column().get_data();
Expand Down Expand Up @@ -522,15 +516,10 @@ void ColumnNullable::get_permutation(bool reverse, size_t limit, int null_direct
}
}
}
//
//void ColumnNullable::gather(ColumnGathererStream & gatherer)
//{
// gatherer.gather(*this);
//}

void ColumnNullable::reserve(size_t n) {
get_nested_column().reserve(n);
_get_null_map_data().reserve(n);
get_null_map_data(false).reserve(n);
}

void ColumnNullable::resize(size_t n) {
Expand Down Expand Up @@ -582,7 +571,7 @@ void ColumnNullable::apply_null_map(const ColumnNullable& other) {
}

void ColumnNullable::check_consistency() const {
if (null_map->size() != get_nested_column().size()) {
if (get_null_map_column().size() != get_nested_column().size()) {
throw Exception(ErrorCode::INTERNAL_ERROR,
"Sizes of nested column and null map of Nullable column are not equal");
}
Expand All @@ -596,8 +585,8 @@ void ColumnNullable::sort_column(const ColumnSorter* sorter, EqualFlags& flags,
}

void ColumnNullable::_update_has_null() {
const UInt8* null_pos = _get_null_map_data().data();
_has_null = simd::contain_byte(null_pos, _get_null_map_data().size(), 1);
const UInt8* null_pos = get_null_map_data().data();
_has_null = simd::contain_byte(null_pos, get_null_map_data().size(), 1);
_need_update_has_null = false;
}

Expand Down
Loading

0 comments on commit 6d0b67a

Please sign in to comment.