Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-41687: [C++] bit_util: Trying to remove pre-compute table #41690

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 23 additions & 6 deletions cpp/src/arrow/util/bit_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,10 +281,22 @@ static inline int Log2(uint64_t x) {
//

// Bitmask selecting the k-th bit in a byte
static constexpr uint8_t kBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128};
// static constexpr uint8_t kBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128};
template <typename T>
static constexpr uint8_t GetBitMask(T index) {
// DCHECK(index >= 0 && index <= 7);
ARROW_COMPILER_ASSUME(index >= 0);
return static_cast<uint8_t>(1) << index;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The compiler will generate code that performs the << on a 32-bit integer. A more honest implementation (in the sense that it gives more freedom to the compiler [1]):

return static_cast<uint8_t>(0x1 << index)
// and
return static_cast<uint8_t>(~(0x1 << i))

And since indices in arrow are rarely uint8_t, I would keep the index type unconstrained like this:

template <typename T>
static constexpr uint8_t GetBitmask2(T index) {
    return static_cast<uint8_t>(0x1 << index);
}
template <typename T>
static constexpr uint8_t GetFlippedBitmask2(T index) {
  return static_cast<uint8_t>(~(0x1 << index));
}

[1] might matter more for rustc than clang because C/C++ compilers already have a lot of freedom even when your code contains many type constraints

}

// the bitwise complement version of kBitmask
static constexpr uint8_t kFlippedBitmask[] = {254, 253, 251, 247, 239, 223, 191, 127};
// static constexpr uint8_t kFlippedBitmask[] = {254, 253, 251, 247, 239, 223, 191, 127};
template <typename T>
static constexpr uint8_t GetFlippedBitMask(T index) {
// DCHECK(index >= 0 && index <= 7);
ARROW_COMPILER_ASSUME(index >= 0);
return ~(static_cast<uint8_t>(1) << index);
}

// Bitmask selecting the (k - 1) preceding bits in a byte
static constexpr uint8_t kPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127};
Expand All @@ -299,22 +311,27 @@ static constexpr bool GetBit(const uint8_t* bits, uint64_t i) {

// Gets the i-th bit from a byte. Should only be used with i <= 7.
static constexpr bool GetBitFromByte(uint8_t byte, uint8_t i) {
return byte & kBitmask[i];
return byte & GetBitMask(i);
}

static inline void ClearBit(uint8_t* bits, int64_t i) {
bits[i / 8] &= kFlippedBitmask[i % 8];
ARROW_COMPILER_ASSUME(i >= 0);
bits[i / 8] &= GetFlippedBitMask(i % 8);
}

static inline void SetBit(uint8_t* bits, int64_t i) { bits[i / 8] |= kBitmask[i % 8]; }
static inline void SetBit(uint8_t* bits, int64_t i) {
ARROW_COMPILER_ASSUME(i >= 0);
bits[i / 8] |= GetBitMask(i % 8);
}

static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) {
ARROW_COMPILER_ASSUME(i >= 0);
// https://graphics.stanford.edu/~seander/bithacks.html
// "Conditionally set or clear bits without branching"
// NOTE: this seems to confuse Valgrind as it reads from potentially
// uninitialized memory
bits[i / 8] ^= static_cast<uint8_t>(-static_cast<uint8_t>(bit_is_set) ^ bits[i / 8]) &
kBitmask[i % 8];
GetBitMask(i % 8);
}

/// \brief set or clear a range of bits quickly
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/util/bitmap_generate.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ void GenerateBits(uint8_t* bitmap, int64_t start_offset, int64_t length, Generat
return;
}
uint8_t* cur = bitmap + start_offset / 8;
uint8_t bit_mask = bit_util::kBitmask[start_offset % 8];
uint8_t bit_mask = bit_util::GetBitMask(start_offset % 8);
uint8_t current_byte = *cur & bit_util::kPrecedingBitmask[start_offset % 8];

for (int64_t index = 0; index < length; ++index) {
Expand Down Expand Up @@ -71,7 +71,7 @@ void GenerateBitsUnrolled(uint8_t* bitmap, int64_t start_offset, int64_t length,
uint8_t current_byte;
uint8_t* cur = bitmap + start_offset / 8;
const uint64_t start_bit_offset = start_offset % 8;
uint8_t bit_mask = bit_util::kBitmask[start_bit_offset];
uint8_t bit_mask = bit_util::GetBitMask(start_bit_offset);
int64_t remaining = length;

if (bit_mask != 0x01) {
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/util/bitmap_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class BitmapWriter {
BitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
: bitmap_(bitmap), position_(0), length_(length) {
byte_offset_ = start_offset / 8;
bit_mask_ = bit_util::kBitmask[start_offset % 8];
bit_mask_ = bit_util::GetBitMask(start_offset % 8);
if (length > 0) {
current_byte_ = bitmap[byte_offset_];
} else {
Expand Down Expand Up @@ -88,7 +88,7 @@ class FirstTimeBitmapWriter {
: bitmap_(bitmap), position_(0), length_(length) {
current_byte_ = 0;
byte_offset_ = start_offset / 8;
bit_mask_ = bit_util::kBitmask[start_offset % 8];
bit_mask_ = bit_util::GetBitMask(start_offset % 8);
if (length > 0) {
current_byte_ =
bitmap[byte_offset_] & bit_util::kPrecedingBitmask[start_offset % 8];
Expand All @@ -113,7 +113,7 @@ class FirstTimeBitmapWriter {
// Update state variables except for current_byte_ here.
position_ += number_of_bits;
int64_t bit_offset = bit_util::CountTrailingZeros(static_cast<uint32_t>(bit_mask_));
bit_mask_ = bit_util::kBitmask[(bit_offset + number_of_bits) % 8];
bit_mask_ = bit_util::GetBitMask((bit_offset + number_of_bits) % 8);
byte_offset_ += (bit_offset + number_of_bits) / 8;

if (bit_offset != 0) {
Expand Down
Loading