Skip to content

Commit

Permalink
Merge branch 'main' into Fix-#19150-Warning-message
Browse files Browse the repository at this point in the history
  • Loading branch information
ComputingVictor authored Oct 10, 2024
2 parents f18041f + 194b31e commit 5e9b032
Show file tree
Hide file tree
Showing 30 changed files with 1,830 additions and 899 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions crates/polars-arrow/src/bitmap/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::sync::LazyLock;
use either::Either;
use polars_error::{polars_bail, PolarsResult};

use super::utils::{count_zeros, fmt, get_bit, get_bit_unchecked, BitChunk, BitChunks, BitmapIter};
use super::utils::{count_zeros, fmt, get_bit_unchecked, BitChunk, BitChunks, BitmapIter};
use super::{chunk_iter_to_vec, intersects_with, num_intersections_with, IntoIter, MutableBitmap};
use crate::array::Splitable;
use crate::bitmap::aligned::AlignedBitmapSlice;
Expand Down Expand Up @@ -334,7 +334,8 @@ impl Bitmap {
/// Panics iff `i >= self.len()`.
#[inline]
pub fn get_bit(&self, i: usize) -> bool {
get_bit(&self.storage, self.offset + i)
assert!(i < self.len());
unsafe { self.get_bit_unchecked(i) }
}

/// Unsafely returns whether the bit at position `i` is set.
Expand Down
69 changes: 51 additions & 18 deletions crates/polars-arrow/src/bitmap/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
use std::hint::unreachable_unchecked;

use polars_error::{polars_bail, PolarsResult};
use polars_utils::vec::PushUnchecked;

use super::utils::{
count_zeros, fmt, get_bit, set, set_bit, BitChunk, BitChunks, BitChunksExactMut, BitmapIter,
};
use super::utils::{count_zeros, fmt, BitChunk, BitChunks, BitChunksExactMut, BitmapIter};
use super::{intersects_with_mut, Bitmap};
use crate::bitmap::utils::{get_bit_unchecked, merge_reversed, set_bit_unchecked};
use crate::bitmap::utils::{get_bit_unchecked, merge_reversed, set_bit_in_byte};
use crate::storage::SharedStorage;
use crate::trusted_len::TrustedLen;

Expand Down Expand Up @@ -118,8 +117,8 @@ impl MutableBitmap {
if self.length % 8 == 0 {
self.buffer.push(0);
}
let byte = unsafe { self.buffer.as_mut_slice().last_mut().unwrap_unchecked() };
*byte = set(*byte, self.length % 8, value);
let byte = unsafe { self.buffer.last_mut().unwrap_unchecked() };
*byte = set_bit_in_byte(*byte, self.length % 8, value);
self.length += 1;
}

Expand All @@ -144,7 +143,8 @@ impl MutableBitmap {
/// Panics iff `index >= self.len()`.
#[inline]
pub fn get(&self, index: usize) -> bool {
get_bit(&self.buffer, index)
assert!(index < self.len());
unsafe { self.get_unchecked(index) }
}

/// Returns whether the position `index` is set.
Expand All @@ -161,7 +161,28 @@ impl MutableBitmap {
/// Panics iff `index >= self.len()`.
#[inline]
pub fn set(&mut self, index: usize, value: bool) {
set_bit(self.buffer.as_mut_slice(), index, value)
assert!(index < self.len());
unsafe {
self.set_unchecked(index, value);
}
}

/// Sets the position `index` to the OR of its original value and `value`.
///
/// # Safety
/// It's undefined behavior if index >= self.len().
#[inline]
pub unsafe fn or_pos_unchecked(&mut self, index: usize, value: bool) {
*self.buffer.get_unchecked_mut(index / 8) |= (value as u8) << (index % 8);
}

/// Sets the position `index` to the AND of its original value and `value`.
///
/// # Safety
/// It's undefined behavior if index >= self.len().
#[inline]
pub unsafe fn and_pos_unchecked(&mut self, index: usize, value: bool) {
*self.buffer.get_unchecked_mut(index / 8) &= (value as u8) << (index % 8);
}

/// constructs a new iterator over the bits of [`MutableBitmap`].
Expand Down Expand Up @@ -192,6 +213,17 @@ impl MutableBitmap {
}
}

/// Resizes the [`MutableBitmap`] to the specified length, inserting value
/// if the length is bigger than the current length.
pub fn resize(&mut self, length: usize, value: bool) {
if let Some(additional) = length.checked_sub(self.len()) {
self.extend_constant(additional, value);
} else {
self.buffer.truncate(length.saturating_add(7) / 8);
self.length = length;
}
}

/// Initializes a zeroed [`MutableBitmap`].
#[inline]
pub fn from_len_zeroed(length: usize) -> Self {
Expand Down Expand Up @@ -230,10 +262,10 @@ impl MutableBitmap {
#[inline]
pub unsafe fn push_unchecked(&mut self, value: bool) {
if self.length % 8 == 0 {
self.buffer.push(0);
self.buffer.push_unchecked(0);
}
let byte = self.buffer.as_mut_slice().last_mut().unwrap();
*byte = set(*byte, self.length % 8, value);
let byte = self.buffer.last_mut().unwrap_unchecked();
*byte = set_bit_in_byte(*byte, self.length % 8, value);
self.length += 1;
}

Expand Down Expand Up @@ -330,7 +362,8 @@ impl MutableBitmap {
/// Caller must ensure that `index < self.len()`
#[inline]
pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) {
set_bit_unchecked(self.buffer.as_mut_slice(), index, value)
let byte = self.buffer.get_unchecked_mut(index / 8);
*byte = set_bit_in_byte(*byte, index % 8, value);
}

/// Shrinks the capacity of the [`MutableBitmap`] to fit its current length.
Expand Down Expand Up @@ -566,10 +599,10 @@ impl MutableBitmap {
self.buffer.push(0);
}
// the iterator will not fill the last byte
let byte = self.buffer.as_mut_slice().last_mut().unwrap();
let byte = self.buffer.last_mut().unwrap();
let mut i = bit_offset;
for value in iterator {
*byte = set(*byte, i, value);
*byte = set_bit_in_byte(*byte, i, value);
i += 1;
}
self.length += length;
Expand All @@ -581,9 +614,9 @@ impl MutableBitmap {

if bit_offset != 0 {
// we are in the middle of a byte; lets finish it
let byte = self.buffer.as_mut_slice().last_mut().unwrap();
let byte = self.buffer.last_mut().unwrap();
(bit_offset..8).for_each(|i| {
*byte = set(*byte, i, iterator.next().unwrap());
*byte = set_bit_in_byte(*byte, i, iterator.next().unwrap());
});
self.length += 8 - bit_offset;
length -= 8 - bit_offset;
Expand Down Expand Up @@ -650,15 +683,15 @@ impl MutableBitmap {
let data = buffer.as_mut_slice();
data[..chunks].iter_mut().try_for_each(|byte| {
(0..8).try_for_each(|i| {
*byte = set(*byte, i, iterator.next().unwrap()?);
*byte = set_bit_in_byte(*byte, i, iterator.next().unwrap()?);
Ok(())
})
})?;

if reminder != 0 {
let last = &mut data[chunks];
iterator.enumerate().try_for_each(|(i, value)| {
*last = set(*last, i, value?);
*last = set_bit_in_byte(*last, i, value?);
Ok(())
})?;
}
Expand Down
43 changes: 12 additions & 31 deletions crates/polars-arrow/src/bitmap/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,53 +25,34 @@ pub fn is_set(byte: u8, i: usize) -> bool {
}

/// Sets bit at position `i` in `byte`.
#[inline]
pub fn set(byte: u8, i: usize, value: bool) -> u8 {
#[inline(always)]
pub fn set_bit_in_byte(byte: u8, i: usize, value: bool) -> u8 {
debug_assert!(i < 8);

let mask = !(1 << i);
let insert = (value as u8) << i;
(byte & mask) | insert
}

/// Sets bit at position `i` in `bytes`.
/// # Panics
/// This function panics iff `i >= bytes.len() * 8`.
#[inline]
pub fn set_bit(bytes: &mut [u8], i: usize, value: bool) {
bytes[i / 8] = set(bytes[i / 8], i % 8, value);
}

/// Sets bit at position `i` in `bytes` without doing bound checks
/// # Safety
/// `i >= bytes.len() * 8` results in undefined behavior.
#[inline]
pub unsafe fn set_bit_unchecked(bytes: &mut [u8], i: usize, value: bool) {
let byte = bytes.get_unchecked_mut(i / 8);
*byte = set(*byte, i % 8, value);
}

/// Returns whether bit at position `i` in `bytes` is set.
/// # Panic
/// This function panics iff `i >= bytes.len() * 8`.
#[inline]
pub fn get_bit(bytes: &[u8], i: usize) -> bool {
let byte = bytes[i / 8];
let bit = (byte >> (i % 8)) & 1;
bit != 0
}

/// Returns whether bit at position `i` in `bytes` is set or not.
///
/// # Safety
/// `i >= bytes.len() * 8` results in undefined behavior.
#[inline]
#[inline(always)]
pub unsafe fn get_bit_unchecked(bytes: &[u8], i: usize) -> bool {
let byte = *bytes.get_unchecked_release(i / 8);
let bit = (byte >> (i % 8)) & 1;
bit != 0
}

/// Sets bit at position `i` in `bytes` without doing bound checks.
/// # Safety
/// `i >= bytes.len() * 8` results in undefined behavior.
#[inline(always)]
pub unsafe fn set_bit_unchecked(bytes: &mut [u8], i: usize, value: bool) {
let byte = bytes.get_unchecked_mut(i / 8);
*byte = set_bit_in_byte(*byte, i % 8, value);
}

/// Returns the number of bytes required to hold `bits` bits.
#[inline]
pub fn bytes_for(bits: usize) -> usize {
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-core/src/chunked_array/ops/aggregate/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ impl ChunkAggSeries for CategoricalChunked {
}

impl BinaryChunked {
pub(crate) fn max_binary(&self) -> Option<&[u8]> {
pub fn max_binary(&self) -> Option<&[u8]> {
if self.is_empty() {
return None;
}
Expand All @@ -587,7 +587,7 @@ impl BinaryChunked {
}
}

pub(crate) fn min_binary(&self) -> Option<&[u8]> {
pub fn min_binary(&self) -> Option<&[u8]> {
if self.is_empty() {
return None;
}
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-core/src/datatypes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use bytemuck::Zeroable;
pub use dtype::*;
pub use field::*;
pub use into_scalar::*;
use num_traits::{Bounded, FromPrimitive, Num, NumCast, One, Zero};
use num_traits::{AsPrimitive, Bounded, FromPrimitive, Num, NumCast, One, Zero};
use polars_compute::arithmetic::HasPrimitiveArithmeticKernel;
use polars_compute::float_sum::FloatSum;
use polars_utils::abs_diff::AbsDiff;
Expand Down Expand Up @@ -356,6 +356,7 @@ pub trait NumericNative:
+ IsFloat
+ HasPrimitiveArithmeticKernel<TrueDivT=<Self::TrueDivPolarsType as PolarsNumericType>::Native>
+ FloatSum<f64>
+ AsPrimitive<f64>
+ MinMax
+ IsNull
{
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ description = "Physical expression implementation of the Polars project."
ahash = { workspace = true }
arrow = { workspace = true }
bitflags = { workspace = true }
num-traits = { workspace = true }
once_cell = { workspace = true }
polars-compute = { workspace = true }
polars-core = { workspace = true, features = ["lazy", "zip_with", "random"] }
Expand Down Expand Up @@ -72,5 +73,5 @@ bitwise = ["polars-core/bitwise", "polars-plan/bitwise"]
round_series = ["polars-plan/round_series", "polars-ops/round_series"]
is_between = ["polars-plan/is_between"]
dynamic_group_by = ["polars-plan/dynamic_group_by", "polars-time", "temporal"]
propagate_nans = ["polars-plan/propagate_nans"]
propagate_nans = ["polars-plan/propagate_nans", "polars-ops/propagate_nans"]
panic_on_schema = ["polars-plan/panic_on_schema"]
63 changes: 12 additions & 51 deletions crates/polars-expr/src/reduce/convert.rs
Original file line number Diff line number Diff line change
@@ -1,81 +1,42 @@
use polars_core::error::feature_gated;
// use polars_core::error::feature_gated;
use polars_plan::prelude::*;
use polars_utils::arena::{Arena, Node};

use super::len::LenReduce;
use super::mean::MeanReduce;
use super::min_max::{MaxReduce, MinReduce};
#[cfg(feature = "propagate_nans")]
use super::nan_min_max::{NanMaxReduce, NanMinReduce};
use super::sum::SumReduce;
use super::*;
use crate::reduce::len::LenReduce;
use crate::reduce::mean::new_mean_reduction;
use crate::reduce::min_max::{new_max_reduction, new_min_reduction};
use crate::reduce::sum::new_sum_reduction;

/// Converts a node into a reduction + its associated selector expression.
pub fn into_reduction(
node: Node,
expr_arena: &mut Arena<AExpr>,
schema: &Schema,
) -> PolarsResult<(Box<dyn Reduction>, Node)> {
) -> PolarsResult<(Box<dyn GroupedReduction>, Node)> {
let get_dt = |node| {
expr_arena
.get(node)
.to_dtype(schema, Context::Default, expr_arena)
};
let out = match expr_arena.get(node) {
AExpr::Agg(agg) => match agg {
IRAggExpr::Sum(input) => (
Box::new(SumReduce::new(get_dt(*input)?)) as Box<dyn Reduction>,
*input,
),
IRAggExpr::Sum(input) => (new_sum_reduction(get_dt(*input)?), *input),
IRAggExpr::Mean(input) => (new_mean_reduction(get_dt(*input)?), *input),
IRAggExpr::Min {
propagate_nans,
input,
} => {
let dt = get_dt(*input)?;
if *propagate_nans && dt.is_float() {
feature_gated!("propagate_nans", {
let out: Box<dyn Reduction> = match dt {
DataType::Float32 => Box::new(NanMinReduce::<Float32Type>::new()),
DataType::Float64 => Box::new(NanMinReduce::<Float64Type>::new()),
_ => unreachable!(),
};
(out, *input)
})
} else {
(
Box::new(MinReduce::new(dt.clone())) as Box<dyn Reduction>,
*input,
)
}
},
} => (new_min_reduction(get_dt(*input)?, *propagate_nans), *input),
IRAggExpr::Max {
propagate_nans,
input,
} => {
let dt = get_dt(*input)?;
if *propagate_nans && dt.is_float() {
feature_gated!("propagate_nans", {
let out: Box<dyn Reduction> = match dt {
DataType::Float32 => Box::new(NanMaxReduce::<Float32Type>::new()),
DataType::Float64 => Box::new(NanMaxReduce::<Float64Type>::new()),
_ => unreachable!(),
};
(out, *input)
})
} else {
(Box::new(MaxReduce::new(dt.clone())) as _, *input)
}
},
IRAggExpr::Mean(input) => {
let out: Box<dyn Reduction> = Box::new(MeanReduce::new(get_dt(*input)?));
(out, *input)
},
_ => unreachable!(),
} => (new_max_reduction(get_dt(*input)?, *propagate_nans), *input),
_ => todo!(),
},
AExpr::Len => {
// Compute length on the first column, or if none exist we'll use
// a zero-length dummy series.
let out: Box<dyn Reduction> = Box::new(LenReduce::new());
let out: Box<dyn GroupedReduction> = Box::new(LenReduce::default());
let expr = if let Some(first_column) = schema.iter_names().next() {
expr_arena.add(AExpr::Column(first_column.as_str().into()))
} else {
Expand Down
Loading

0 comments on commit 5e9b032

Please sign in to comment.