Skip to content

Commit

Permalink
perf: add RangedUniqueKernel for primitive array
Browse files Browse the repository at this point in the history
This PR adds a unique value kernel that is selected based on the metadata for
`PrimitiveArray`. When the difference between the metadata min and max value is
small enough a different kernel is used that does not require sorting the data
first.

This is mostly to show how the new metadata can be used to select a different
kernel.

For a microbenchmark on debug we see following results:

```python
import polars as pl
import numpy as np
from timeit import timeit

xs = list(np.random.randint(5, 100, size = 500000))
df = pl.DataFrame({ "x": xs, }, schema = { "x": pl.Int32 })

def rand_unique():
    df.select(pl.col.x.unique())

t = timeit(rand_unique, number = 100)
print(f'Before Time = {t}')
df.select(xmin = pl.col.x.min(), xmax = pl.col.x.max())
t = timeit(rand_unique, number = 100)
print(f'After Time = {t}')
```

```
Before Time = 3.450812840001163
After Time  = 1.3185648359976767
```

This is a ~2.6x improvement. I feel like this can also be further improved if needed.

For now this is a draft PR as I still need to.

- Restructure the `RangedUniqueKernel` trait
- Disable this for float types
  • Loading branch information
coastalwhite committed Jun 24, 2024
1 parent 4731834 commit cc0a3d1
Show file tree
Hide file tree
Showing 4 changed files with 283 additions and 73 deletions.
148 changes: 80 additions & 68 deletions crates/polars-compute/src/unique/boolean.rs
Original file line number Diff line number Diff line change
@@ -1,86 +1,98 @@
use arrow::array::{Array, BooleanArray};
use arrow::bitmap::MutableBitmap;

use super::UniqueKernel;

fn bool_unique_fold<'a>(
fst: &'a BooleanArray,
arrs: impl Iterator<Item = &'a BooleanArray>,
) -> BooleanArray {
// can be None, Some(true), Some(false)
//
// We assign values to each value
// None = 1
// Some(false) = 2
// Some(true) = 3
//
// And keep track of 2 things
// - `found_set`: which values have already appeared
// - `order`: in which order did the values appear
use super::RangedUniqueKernel;

#[inline(always)]
fn append_arr(arr: &BooleanArray, found_set: &mut u32, order: &mut u32) {
for v in arr {
let value = v.map_or(1, |v| 2 + u32::from(v));
let nulled_value = if *found_set & (1 << value) != 0 {
0
} else {
value
};

*order |= nulled_value << (found_set.count_ones() * 2);
*found_set |= 1 << value;

if *found_set == 0b1110 {
break;
}
}
const fn to_value(scalar: Option<bool>) -> u8 {
match scalar {
None => 1,
Some(false) => 2,
Some(true) => 3,
}
}

let mut found_set = 0u32;
let mut order = 0u32;
impl RangedUniqueKernel for BooleanArray {
type Scalar<'a> = bool;
type Range<'a> = ();

append_arr(fst, &mut found_set, &mut order);
for arr in arrs {
append_arr(arr, &mut found_set, &mut order);
#[inline(always)]
fn to_value<'a>(scalar: Option<Self::Scalar<'a>>, _: Self::Range<'a>) -> u8 {
to_value(scalar)
}

let mut values = MutableBitmap::with_capacity(3);
let validity = if found_set & 0b10 != 0 {
let mut validity = MutableBitmap::with_capacity(3);
while order != 0 {
values.push(order & 0b11 > 2);
validity.push(order & 0b11 > 1);
order >>= 2;
}
Some(validity.freeze())
} else {
while order != 0 {
values.push(order & 0b11 > 2);
order >>= 2;
fn unique_fold<'a>(fst: &'a Self, others: impl Iterator<Item = &'a Self>, _: Self::Range<'a>) -> Self {
const ALL_FOUND: u32 = 0b1110;

// We keep track of 2 things
// - `found_set`: which values have already appeared
// - `order`: in which order did the values appear

#[inline(always)]
fn append_arr(arr: &BooleanArray, found_set: &mut u32, order: &mut u32) {
if arr.len() == 0 {
return;
}

let null_count = arr.null_count();

if arr.len() == null_count {
*found_set |= 1 << to_value(None); // None
*order = (*order << 2) | u32::from(to_value(None));
return;
}

for v in arr {
let value = u32::from(to_value(v));
let nulled_value = if *found_set & (1 << value) != 0 {
0
} else {
value
};

*order |= nulled_value << (found_set.count_ones() * 2);
*found_set |= 1 << value;

if *found_set == ALL_FOUND {
break;
}
}
}
None
};

let values = values.freeze();
let mut found_set = 0u32;
let mut order = 0u32;

BooleanArray::new(fst.data_type().clone(), values, validity)
}
append_arr(fst, &mut found_set, &mut order);
for arr in others {
append_arr(arr, &mut found_set, &mut order);
}

impl UniqueKernel for BooleanArray {
fn unique_fold<'a>(fst: &'a Self, others: impl Iterator<Item = &'a Self>) -> Self {
bool_unique_fold(fst, others)
}
let mut values = MutableBitmap::with_capacity(3);
let validity = if found_set & 0b10 != 0 {
let mut validity = MutableBitmap::with_capacity(3);
while order != 0 {
values.push(order & 0b11 > 2);
validity.push(order & 0b11 > 1);
order >>= 2;
}
Some(validity.freeze())
} else {
while order != 0 {
values.push(order & 0b11 > 2);
order >>= 2;
}
None
};

let values = values.freeze();

fn unique(&self) -> Self {
Self::unique_fold(self, [].iter())
BooleanArray::new(fst.data_type().clone(), values, validity)
}

fn unique_sorted(&self) -> Self {
Self::unique_fold(self, [].iter())
fn unique<'a>(&'a self, range: Self::Range<'a>) -> Self {
Self::unique_fold(self, [].iter(), range)
}

fn n_unique(&self) -> usize {
fn n_unique<'a>(&'a self, _: Self::Range<'a>) -> usize {
if self.len() == 0 {
return 0;
}
Expand All @@ -106,8 +118,8 @@ impl UniqueKernel for BooleanArray {
}

#[inline]
fn n_unique_non_null(&self) -> usize {
self.n_unique() - usize::from(self.null_count() > 0)
fn n_unique_non_null<'a>(&'a self, range: Self::Range<'a>) -> usize {
self.n_unique(range) - usize::from(self.null_count() > 0)
}
}

Expand All @@ -122,7 +134,7 @@ fn test_boolean_distinct_count() {
<Option<Vec<bool>>>::map($validity, |v| Bitmap::from_iter(v));
let arr =
BooleanArray::new(ArrowDataType::Boolean, Bitmap::from_iter($values), validity);
assert_eq!(arr.n_unique(), $dc);
assert_eq!(arr.n_unique(()), $dc);
};
}

Expand Down
54 changes: 50 additions & 4 deletions crates/polars-compute/src/unique/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use arrow::array::Array;

/// Kernel to calculate the number of unique elements
pub trait UniqueKernel: Array {
/// Kernel to calculate the number of unique elements where the elements are already sorted.
pub trait SortedUniqueKernel: Array {
/// Calculate the set of unique elements in `fst` and `others` and fold the result into one
/// array.
fn unique_fold<'a>(fst: &'a Self, others: impl Iterator<Item = &'a Self>) -> Self;
Expand All @@ -10,8 +10,24 @@ pub trait UniqueKernel: Array {
/// `self`.
fn unique(&self) -> Self;

/// Calculate the set of unique elements in [`Self`] where `self` is sorted.
fn unique_sorted(&self) -> Self;
/// Calculate the number of unique elements in [`Self`]
///
/// A null is also considered a unique value
fn n_unique(&self) -> usize;

/// Calculate the number of unique non-null elements in [`Self`]
fn n_unique_non_null(&self) -> usize;
}

/// Kernel to calculate the number of unique elements
pub trait HashSetUniqueKernel: Array {
/// Calculate the set of unique elements in `fst` and `others` and fold the result into one
/// array.
fn unique_fold<'a>(fst: &'a Self, others: impl Iterator<Item = &'a Self>) -> Self;

/// Calculate the set of unique elements in [`Self`] where we have no further information about
/// `self`.
fn unique(&self) -> Self;

/// Calculate the number of unique elements in [`Self`]
///
Expand All @@ -22,4 +38,34 @@ pub trait UniqueKernel: Array {
fn n_unique_non_null(&self) -> usize;
}

/// Kernel to calculate the number of unique elements where elements are in a small range of
/// values.
pub trait RangedUniqueKernel: Array {
type Scalar<'a>;
type Range<'a>;

fn to_value<'a>(scalar: Option<Self::Scalar<'a>>, range: Self::Range<'a>) -> u8;

/// Calculate the set of unique elements in `fst` and `others` and fold the result into one
/// array.
fn unique_fold<'a>(
fst: &'a Self,
others: impl Iterator<Item = &'a Self>,
range: Self::Range<'a>,
) -> Self;

/// Calculate the set of unique elements in [`Self`] where we have no further information about
/// `self`.
fn unique<'a>(&'a self, range: Self::Range<'a>) -> Self;

/// Calculate the number of unique elements in [`Self`]
///
/// A null is also considered a unique value
fn n_unique<'a>(&'a self, range: Self::Range<'a>) -> usize;

/// Calculate the number of unique non-null elements in [`Self`]
fn n_unique_non_null<'a>(&'a self, range: Self::Range<'a>) -> usize;
}

mod boolean;
mod primitive;
134 changes: 134 additions & 0 deletions crates/polars-compute/src/unique/primitive.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
use std::ops::{Add, Sub, RangeInclusive};

use arrow::array::PrimitiveArray;
use arrow::bitmap::MutableBitmap;
use arrow::types::NativeType;
use num_traits::FromPrimitive;
// use polars_utils::total_ord::TotalOrd;

use super::RangedUniqueKernel;

const SEEN_ALL: u128 = !0;
const SEEN_NONE_MASK: u128 = 1;

fn append_arr<T: NativeType>(arr: &PrimitiveArray<T>, seen: &mut u128, range: (RangeInclusive<T>, bool))
where
T: Add<T, Output = T> + Sub<T, Output = T> + FromPrimitive
{
for v in arr {
*seen |= 1 << <PrimitiveArray<T>>::to_value(v.copied(), range.clone());

if *seen == SEEN_ALL {
break;
}
}
}

impl<T: NativeType> RangedUniqueKernel for PrimitiveArray<T>
where
T: Add<T, Output = T> + Sub<T, Output = T> + FromPrimitive
{
type Scalar<'a> = T;
type Range<'a> = (RangeInclusive<T>, bool);

#[inline(always)]
fn to_value<'a>(scalar: Option<Self::Scalar<'a>>, range: Self::Range<'a>) -> u8 {
// debug_assert!({
// (*range.0.end() - *range.0.start()).to_le_bytes()[0] < 128 + u8::from(range.1)
// });
// debug_assert!({
// let mut is_zero = true;
// for b in (*range.0.end() - *range.0.start()).to_le_bytes().as_ref().iter().skip(1) {
// is_zero &= *b == 0;
// }
// is_zero
// });
//
match scalar {
None => {
debug_assert!(!range.1);
0
},
Some(v) => {
// debug_assert!(<T as TotalOrd>::tot_le(&v, range.0.end()));
// debug_assert!(<T as TotalOrd>::tot_ge(&v, range.0.start()));

(v - *range.0.start()).to_le_bytes()[0] + u8::from(range.1)
}
}
}

fn unique_fold<'a>(
fst: &'a Self,
others: impl Iterator<Item = &'a Self>,
range: Self::Range<'a>,
) -> Self {
let mut seen = 0u128;

append_arr(fst, &mut seen, range.clone());
for arr in others {
if seen == SEEN_ALL {
break;
}

append_arr(arr, &mut seen, range.clone());
}

let num_values = seen.count_ones() as usize;
let mut values = Vec::with_capacity(num_values);

let (values, validity) = if range.1 && seen & SEEN_NONE_MASK != 0 {
let mut validity = MutableBitmap::with_capacity(num_values);

values.push(T::zeroed());
validity.push(false);
seen >>= 1;

let mut offset = 0u8;
while seen != 0 {
let shift = seen.trailing_zeros();
offset += shift as u8;
values.push(*range.0.start() + T::from_u8(offset).unwrap());
validity.push(true);

seen >>= shift + 1;
offset += 1;
}

(values, Some(validity.freeze()))
} else {
seen >>= u8::from(range.1);

let mut offset = 0u8;
while seen != 0 {
let shift = seen.trailing_zeros();
offset += shift as u8;
values.push(*range.0.start() + T::from_u8(offset).unwrap());

seen >>= shift + 1;
offset += 1;
}

(values, None)
};

PrimitiveArray::new(fst.data_type().clone(), values.into(), validity)
}

fn unique<'a>(&'a self, range: Self::Range<'a>) -> Self {
Self::unique_fold(self, [].iter(), range)
}

fn n_unique<'a>(&'a self, range: Self::Range<'a>) -> usize {
let mut seen = 0u128;
append_arr(self, &mut seen, range.clone());
seen.count_ones() as usize
}

fn n_unique_non_null<'a>(&'a self, range: Self::Range<'a>) -> usize {
let mut seen = 0u128;
append_arr(self, &mut seen, range.clone());
seen &= !SEEN_NONE_MASK;
seen.count_ones() as usize
}
}
Loading

0 comments on commit cc0a3d1

Please sign in to comment.