From 87e3332001fe5c90e63c31d1751a7352f52ad504 Mon Sep 17 00:00:00 2001 From: Jerry Hu Date: Wed, 24 May 2023 16:13:01 +0800 Subject: [PATCH] [improvement](bitmap) Use shared_ptr in BitmapValue to avoid deep copying (#19101) Currently bitmapvalue type is copied between columns, it cost a lot of memory. Use a shared ptr in bitmap value to avoid copy data. --- be/src/util/bitmap_value.h | 227 ++++++++++++++++++++--------- be/test/util/bitmap_value_test.cpp | 22 +++ 2 files changed, 178 insertions(+), 71 deletions(-) diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h index 21fa5f217ed447..f2c869f8620bcc 100644 --- a/be/src/util/bitmap_value.h +++ b/be/src/util/bitmap_value.h @@ -1144,10 +1144,10 @@ class BitmapValueIterator; class BitmapValue { public: // Construct an empty bitmap. - BitmapValue() : _type(EMPTY) {} + BitmapValue() : _type(EMPTY), _is_shared(false) {} // Construct a bitmap with one element. - explicit BitmapValue(uint64_t value) : _sv(value), _type(SINGLE) {} + explicit BitmapValue(uint64_t value) : _sv(value), _type(SINGLE), _is_shared(false) {} // Construct a bitmap from serialized data. explicit BitmapValue(const char* src) { @@ -1155,8 +1155,48 @@ class BitmapValue { DCHECK(res); } + BitmapValue(const BitmapValue& other) { + _type = other._type; + _sv = other._sv; + _bitmap = other._bitmap; + _is_shared = true; + // should also set other's state to shared, so that other bitmap value will + // create a new bitmap when it wants to modify it. + const_cast(other)._is_shared = true; + } + + BitmapValue(BitmapValue&& other) { + _type = other._type; + _sv = other._sv; + _is_shared = other._is_shared; + _bitmap = std::move(other._bitmap); + } + + BitmapValue& operator=(const BitmapValue& other) { + _type = other._type; + _sv = other._sv; + _bitmap = other._bitmap; + _is_shared = true; + // should also set other's state to shared, so that other bitmap value will + // create a new bitmap when it wants to modify it. + const_cast(other)._is_shared = true; + return *this; + } + + BitmapValue& operator=(BitmapValue&& other) { + if (this == &other) { + return *this; + } + + _type = other._type; + _sv = other._sv; + _bitmap = std::move(other._bitmap); + _is_shared = other._is_shared; + return *this; + } + // Construct a bitmap from given elements. - explicit BitmapValue(const std::vector& bits) { + explicit BitmapValue(const std::vector& bits) : _is_shared(false) { switch (bits.size()) { case 0: _type = EMPTY; @@ -1167,7 +1207,8 @@ class BitmapValue { break; default: _type = BITMAP; - _bitmap.addMany(bits.size(), &bits[0]); + _prepare_bitmap_for_write(); + _bitmap->addMany(bits.size(), &bits[0]); } } @@ -1182,12 +1223,15 @@ class BitmapValue { if (_sv == value) { break; } - _bitmap.add(_sv); - _bitmap.add(value); + _prepare_bitmap_for_write(); + _bitmap->add(_sv); + _bitmap->add(value); _type = BITMAP; break; case BITMAP: - _bitmap.add(value); + _prepare_bitmap_for_write(); + _bitmap->add(value); + break; } } @@ -1202,7 +1246,8 @@ class BitmapValue { } break; case BITMAP: - _bitmap.remove(value); + _prepare_bitmap_for_write(); + _bitmap->remove(value); _convert_to_smaller_type(); } } @@ -1220,12 +1265,13 @@ class BitmapValue { case EMPTY: break; case SINGLE: - if (rhs._bitmap.contains(_sv)) { + if (rhs._bitmap->contains(_sv)) { _type = EMPTY; } break; case BITMAP: - _bitmap -= rhs._bitmap; + _prepare_bitmap_for_write(); + *_bitmap -= *rhs._bitmap; _convert_to_smaller_type(); break; } @@ -1250,15 +1296,21 @@ class BitmapValue { switch (_type) { case EMPTY: _bitmap = rhs._bitmap; + const_cast(rhs)._is_shared = true; + _is_shared = true; _type = BITMAP; break; case SINGLE: _bitmap = rhs._bitmap; - _bitmap.add(_sv); + const_cast(rhs)._is_shared = true; + _is_shared = true; + _prepare_bitmap_for_write(); + _bitmap->add(_sv); _type = BITMAP; break; case BITMAP: - _bitmap |= rhs._bitmap; + _prepare_bitmap_for_write(); + *_bitmap |= *rhs._bitmap; } break; } @@ -1277,24 +1329,25 @@ class BitmapValue { single_values.push_back(value->_sv); break; case BITMAP: - bitmaps.push_back(&value->_bitmap); + bitmaps.push_back(value->_bitmap.get()); break; } } if (!bitmaps.empty()) { + _prepare_bitmap_for_write(); switch (_type) { case EMPTY: - _bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data()); + *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data()); _type = BITMAP; break; case SINGLE: - _bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data()); - _bitmap.add(_sv); + *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data()); + _bitmap->add(_sv); _type = BITMAP; break; case BITMAP: - _bitmap |= detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data()); + *_bitmap |= detail::Roaring64Map::fastunion(bitmaps.size(), bitmaps.data()); break; } } @@ -1303,9 +1356,10 @@ class BitmapValue { _sv = single_values[0]; _type = SINGLE; } else if (!single_values.empty()) { - _bitmap.addMany(single_values.size(), single_values.data()); + _prepare_bitmap_for_write(); + _bitmap->addMany(single_values.size(), single_values.data()); if (_type == SINGLE) { - _bitmap.add(_sv); + _bitmap->add(_sv); } _type = BITMAP; } @@ -1322,7 +1376,7 @@ class BitmapValue { switch (rhs._type) { case EMPTY: _type = EMPTY; - _bitmap.clear(); + _bitmap.reset(); break; case SINGLE: switch (_type) { @@ -1334,13 +1388,13 @@ class BitmapValue { } break; case BITMAP: - if (!_bitmap.contains(rhs._sv)) { + if (!_bitmap->contains(rhs._sv)) { _type = EMPTY; } else { _type = SINGLE; _sv = rhs._sv; } - _bitmap.clear(); + _bitmap.reset(); break; } break; @@ -1349,12 +1403,13 @@ class BitmapValue { case EMPTY: break; case SINGLE: - if (!rhs._bitmap.contains(_sv)) { + if (!rhs._bitmap->contains(_sv)) { _type = EMPTY; } break; case BITMAP: - _bitmap &= rhs._bitmap; + _prepare_bitmap_for_write(); + *_bitmap &= *rhs._bitmap; _convert_to_smaller_type(); break; } @@ -1380,16 +1435,17 @@ class BitmapValue { case SINGLE: if (_sv == rhs._sv) { _type = EMPTY; - _bitmap.clear(); + _bitmap.reset(); } else { add(rhs._sv); } break; case BITMAP: - if (!_bitmap.contains(rhs._sv)) { + if (!_bitmap->contains(rhs._sv)) { add(rhs._sv); } else { - _bitmap.remove(rhs._sv); + _prepare_bitmap_for_write(); + _bitmap->remove(rhs._sv); } break; } @@ -1398,19 +1454,25 @@ class BitmapValue { switch (_type) { case EMPTY: _bitmap = rhs._bitmap; + const_cast(rhs)._is_shared = true; + _is_shared = true; _type = BITMAP; break; case SINGLE: _bitmap = rhs._bitmap; + const_cast(rhs)._is_shared = true; + _is_shared = true; _type = BITMAP; - if (!rhs._bitmap.contains(_sv)) { - _bitmap.add(_sv); + _prepare_bitmap_for_write(); + if (!rhs._bitmap->contains(_sv)) { + _bitmap->add(_sv); } else { - _bitmap.remove(_sv); + _bitmap->remove(_sv); } break; case BITMAP: - _bitmap ^= rhs._bitmap; + _prepare_bitmap_for_write(); + *_bitmap ^= *rhs._bitmap; _convert_to_smaller_type(); break; } @@ -1427,7 +1489,7 @@ class BitmapValue { case SINGLE: return _sv == x; case BITMAP: - return _bitmap.contains(x); + return _bitmap->contains(x); } return false; } @@ -1442,7 +1504,7 @@ class BitmapValue { case SINGLE: return 1; case BITMAP: - return _bitmap.cardinality(); + return _bitmap->cardinality(); } return 0; } @@ -1458,16 +1520,16 @@ class BitmapValue { case SINGLE: return _sv == rhs._sv; case BITMAP: - return _bitmap.contains(rhs._sv); + return _bitmap->contains(rhs._sv); } case BITMAP: switch (_type) { case EMPTY: return 0; case SINGLE: - return rhs._bitmap.contains(_sv); + return rhs._bitmap->contains(_sv); case BITMAP: - return _bitmap.andCardinality(rhs._bitmap); + return _bitmap->andCardinality(*rhs._bitmap); } } return 0; @@ -1484,16 +1546,16 @@ class BitmapValue { case SINGLE: return 1 + (_sv != rhs._sv); case BITMAP: - return cardinality() + !_bitmap.contains(rhs._sv); + return cardinality() + !_bitmap->contains(rhs._sv); } case BITMAP: switch (_type) { case EMPTY: return rhs.cardinality(); case SINGLE: - return rhs.cardinality() + !rhs._bitmap.contains(_sv); + return rhs.cardinality() + !rhs._bitmap->contains(_sv); case BITMAP: - return _bitmap.orCardinality(rhs._bitmap); + return _bitmap->orCardinality(*rhs._bitmap); } } return 0; @@ -1510,16 +1572,16 @@ class BitmapValue { case SINGLE: return 2 - 2 * (_sv == rhs._sv); case BITMAP: - return cardinality() + 1 - 2 * (_bitmap.contains(rhs._sv)); + return cardinality() + 1 - 2 * (_bitmap->contains(rhs._sv)); } case BITMAP: switch (_type) { case EMPTY: return rhs.cardinality(); case SINGLE: - return rhs.cardinality() + 1 - 2 * (rhs._bitmap.contains(_sv)); + return rhs.cardinality() + 1 - 2 * (rhs._bitmap->contains(_sv)); case BITMAP: - return _bitmap.xorCardinality(rhs._bitmap); + return _bitmap->xorCardinality(*rhs._bitmap); } } return 0; @@ -1536,16 +1598,16 @@ class BitmapValue { case SINGLE: return 1 - _sv == rhs._sv; case BITMAP: - return cardinality() - _bitmap.contains(rhs._sv); + return cardinality() - _bitmap->contains(rhs._sv); } case BITMAP: switch (_type) { case EMPTY: return 0; case SINGLE: - return !rhs._bitmap.contains(_sv); + return !rhs._bitmap->contains(_sv); case BITMAP: - return _bitmap.andnotCardinality(rhs._bitmap); + return _bitmap->andnotCardinality(*rhs._bitmap); } } return 0; @@ -1567,9 +1629,9 @@ class BitmapValue { } break; case BITMAP: - _bitmap.runOptimize(); - _bitmap.shrinkToFit(); - res = _bitmap.getSizeInBytes(); + _bitmap->runOptimize(); + _bitmap->shrinkToFit(); + res = _bitmap->getSizeInBytes(); break; } return res; @@ -1592,7 +1654,7 @@ class BitmapValue { } break; case BITMAP: - _bitmap.write(dst); + _bitmap->write(dst); break; } } @@ -1615,7 +1677,8 @@ class BitmapValue { case BitmapTypeCode::BITMAP32: case BitmapTypeCode::BITMAP64: _type = BITMAP; - _bitmap = detail::Roaring64Map::read(src); + _prepare_bitmap_for_write(); + *_bitmap = detail::Roaring64Map::read(src); break; default: LOG(ERROR) << "BitmapTypeCode invalid, should between: " << BitmapTypeCode::EMPTY @@ -1631,7 +1694,7 @@ class BitmapValue { case SINGLE: return doris_udf::BigIntVal(_sv); case BITMAP: - return doris_udf::BigIntVal(_bitmap.minimum()); + return doris_udf::BigIntVal(_bitmap->minimum()); default: return doris_udf::BigIntVal::null(); } @@ -1653,7 +1716,7 @@ class BitmapValue { } iter_ctx; iter_ctx.ss = &ss; - _bitmap.iterate( + _bitmap->iterate( [](uint64_t value, void* c) -> bool { auto ctx = reinterpret_cast(c); if (ctx->first) { @@ -1676,18 +1739,18 @@ class BitmapValue { case SINGLE: return doris_udf::BigIntVal(_sv); case BITMAP: - return doris_udf::BigIntVal(_bitmap.maximum()); + return doris_udf::BigIntVal(_bitmap->maximum()); default: return doris_udf::BigIntVal::null(); } } uint64_t max(bool* empty) const { - return min_or_max(empty, [&]() { return _bitmap.maximum(); }); + return min_or_max(empty, [&]() { return _bitmap->maximum(); }); } uint64_t min(bool* empty) const { - return min_or_max(empty, [&]() { return _bitmap.minimum(); }); + return min_or_max(empty, [&]() { return _bitmap->minimum(); }); } bool empty() const { return _type == EMPTY; } @@ -1711,7 +1774,7 @@ class BitmapValue { } case BITMAP: { int64_t count = 0; - for (auto it = _bitmap.begin(); it != _bitmap.end(); ++it) { + for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) { if (*it < range_start) { continue; } @@ -1750,7 +1813,7 @@ class BitmapValue { } case BITMAP: { int64_t count = 0; - for (auto it = _bitmap.begin(); it != _bitmap.end(); ++it) { + for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) { if (*it < range_start) { continue; } @@ -1786,23 +1849,23 @@ class BitmapValue { } } case BITMAP: { - if (std::abs(offset) >= _bitmap.cardinality()) { + if (std::abs(offset) >= _bitmap->cardinality()) { return 0; } } } int64_t abs_offset = offset; if (offset < 0) { - abs_offset = _bitmap.cardinality() + offset; + abs_offset = _bitmap->cardinality() + offset; } int64_t count = 0; int64_t offset_count = 0; - auto it = _bitmap.begin(); - for (; it != _bitmap.end() && offset_count < abs_offset; ++it) { + auto it = _bitmap->begin(); + for (; it != _bitmap->end() && offset_count < abs_offset; ++it) { ++offset_count; } - for (; it != _bitmap.end() && count < limit; ++it, ++count) { + for (; it != _bitmap->end() && count < limit; ++it, ++count) { ret_bitmap->add(*it); } return count; @@ -1818,7 +1881,7 @@ class BitmapValue { break; } case BITMAP: { - for (auto it = _bitmap.begin(); it != _bitmap.end(); ++it) { + for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) { data.emplace_back(*it); } break; @@ -1828,7 +1891,7 @@ class BitmapValue { void clear() { _type = EMPTY; - _bitmap.clear(); + _bitmap.reset(); _sv = 0; } @@ -1843,15 +1906,15 @@ class BitmapValue { private: void _convert_to_smaller_type() { if (_type == BITMAP) { - uint64_t c = _bitmap.cardinality(); + uint64_t c = _bitmap->cardinality(); if (c > 1) return; if (c == 0) { _type = EMPTY; } else { _type = SINGLE; - _sv = _bitmap.minimum(); + _sv = _bitmap->minimum(); } - _bitmap.clear(); + _bitmap.reset(); } } @@ -1874,14 +1937,36 @@ class BitmapValue { return result; } + void _prepare_bitmap_for_write() { + if (!_bitmap) { + _bitmap = std::make_shared(); + _is_shared = false; + return; + } + + if (!_is_shared) { + // the state is not shared, not need to check use count any more + return; + } + + if (_bitmap.use_count() > 1) { + auto new_one = std::make_shared(); + *new_one = *_bitmap; + _bitmap = new_one; + } + _is_shared = false; + } + enum BitmapDataType { EMPTY = 0, SINGLE = 1, // single element BITMAP = 2 // more than one elements }; - uint64_t _sv = 0; // store the single value when _type == SINGLE - detail::Roaring64Map _bitmap; // used when _type == BITMAP - BitmapDataType _type; + uint64_t _sv = 0; // store the single value when _type == SINGLE + std::shared_ptr _bitmap; // used when _type == BITMAP + BitmapDataType _type {EMPTY}; + // Indicate whether the state is shared among multi BitmapValue object + bool _is_shared = true; }; // A simple implement of bitmap value iterator(Read only) @@ -1903,7 +1988,7 @@ class BitmapValueIterator { _sv = _bitmap._sv; break; case BitmapValue::BitmapDataType::BITMAP: - _iter = new detail::Roaring64MapSetBitForwardIterator(_bitmap._bitmap, _end); + _iter = new detail::Roaring64MapSetBitForwardIterator(*_bitmap._bitmap, _end); break; default: CHECK(false) << _bitmap._type; diff --git a/be/test/util/bitmap_value_test.cpp b/be/test/util/bitmap_value_test.cpp index ee950744dbc961..5964927d4dfd81 100644 --- a/be/test/util/bitmap_value_test.cpp +++ b/be/test/util/bitmap_value_test.cpp @@ -27,6 +27,28 @@ namespace doris { using roaring::Roaring; +TEST(BitmapValueTest, copy) { + BitmapValue empty; + BitmapValue single(1024); + BitmapValue bitmap({1024, 1025, 1026}); + + BitmapValue copied = bitmap; + EXPECT_TRUE(copied.contains(1024)); + EXPECT_TRUE(copied.contains(1025)); + EXPECT_TRUE(copied.contains(1026)); + copied &= single; + + // value of copied changed. + EXPECT_TRUE(copied.contains(1024)); + EXPECT_FALSE(copied.contains(1025)); + EXPECT_FALSE(copied.contains(1026)); + + // value of bitmap not changed. + EXPECT_TRUE(bitmap.contains(1024)); + EXPECT_TRUE(bitmap.contains(1025)); + EXPECT_TRUE(bitmap.contains(1026)); +} + TEST(BitmapValueTest, bitmap_union) { BitmapValue empty; BitmapValue single(1024);