-
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
perf: add RangedUniqueKernel for primitive array
This PR adds a unique value kernel that is selected based on the metadata for `PrimitiveArray`. When the difference between the metadata min and max value is small enough a different kernel is used that does not require sorting the data first. This is mostly to show how the new metadata can be used to select a different kernel. For a microbenchmark on debug we see following results: ```python import polars as pl import numpy as np from timeit import timeit xs = list(np.random.randint(5, 100, size = 500000)) df = pl.DataFrame({ "x": xs, }, schema = { "x": pl.Int32 }) def rand_unique(): df.select(pl.col.x.unique()) t = timeit(rand_unique, number = 100) print(f'Before Time = {t}') df.select(xmin = pl.col.x.min(), xmax = pl.col.x.max()) t = timeit(rand_unique, number = 100) print(f'After Time = {t}') ``` ``` Before Time = 3.450812840001163 After Time = 1.3185648359976767 ``` This is a ~2.6x improvement. I feel like this can also be further improved if needed. For now this is a draft PR as I still need to. - Restructure the `RangedUniqueKernel` trait - Disable this for float types
- Loading branch information
1 parent
4731834
commit cc0a3d1
Showing
4 changed files
with
283 additions
and
73 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
use std::ops::{Add, Sub, RangeInclusive}; | ||
|
||
use arrow::array::PrimitiveArray; | ||
use arrow::bitmap::MutableBitmap; | ||
use arrow::types::NativeType; | ||
use num_traits::FromPrimitive; | ||
// use polars_utils::total_ord::TotalOrd; | ||
|
||
use super::RangedUniqueKernel; | ||
|
||
const SEEN_ALL: u128 = !0; | ||
const SEEN_NONE_MASK: u128 = 1; | ||
|
||
fn append_arr<T: NativeType>(arr: &PrimitiveArray<T>, seen: &mut u128, range: (RangeInclusive<T>, bool)) | ||
where | ||
T: Add<T, Output = T> + Sub<T, Output = T> + FromPrimitive | ||
{ | ||
for v in arr { | ||
*seen |= 1 << <PrimitiveArray<T>>::to_value(v.copied(), range.clone()); | ||
|
||
if *seen == SEEN_ALL { | ||
break; | ||
} | ||
} | ||
} | ||
|
||
impl<T: NativeType> RangedUniqueKernel for PrimitiveArray<T> | ||
where | ||
T: Add<T, Output = T> + Sub<T, Output = T> + FromPrimitive | ||
{ | ||
type Scalar<'a> = T; | ||
type Range<'a> = (RangeInclusive<T>, bool); | ||
|
||
#[inline(always)] | ||
fn to_value<'a>(scalar: Option<Self::Scalar<'a>>, range: Self::Range<'a>) -> u8 { | ||
// debug_assert!({ | ||
// (*range.0.end() - *range.0.start()).to_le_bytes()[0] < 128 + u8::from(range.1) | ||
// }); | ||
// debug_assert!({ | ||
// let mut is_zero = true; | ||
// for b in (*range.0.end() - *range.0.start()).to_le_bytes().as_ref().iter().skip(1) { | ||
// is_zero &= *b == 0; | ||
// } | ||
// is_zero | ||
// }); | ||
// | ||
match scalar { | ||
None => { | ||
debug_assert!(!range.1); | ||
0 | ||
}, | ||
Some(v) => { | ||
// debug_assert!(<T as TotalOrd>::tot_le(&v, range.0.end())); | ||
// debug_assert!(<T as TotalOrd>::tot_ge(&v, range.0.start())); | ||
|
||
(v - *range.0.start()).to_le_bytes()[0] + u8::from(range.1) | ||
} | ||
} | ||
} | ||
|
||
fn unique_fold<'a>( | ||
fst: &'a Self, | ||
others: impl Iterator<Item = &'a Self>, | ||
range: Self::Range<'a>, | ||
) -> Self { | ||
let mut seen = 0u128; | ||
|
||
append_arr(fst, &mut seen, range.clone()); | ||
for arr in others { | ||
if seen == SEEN_ALL { | ||
break; | ||
} | ||
|
||
append_arr(arr, &mut seen, range.clone()); | ||
} | ||
|
||
let num_values = seen.count_ones() as usize; | ||
let mut values = Vec::with_capacity(num_values); | ||
|
||
let (values, validity) = if range.1 && seen & SEEN_NONE_MASK != 0 { | ||
let mut validity = MutableBitmap::with_capacity(num_values); | ||
|
||
values.push(T::zeroed()); | ||
validity.push(false); | ||
seen >>= 1; | ||
|
||
let mut offset = 0u8; | ||
while seen != 0 { | ||
let shift = seen.trailing_zeros(); | ||
offset += shift as u8; | ||
values.push(*range.0.start() + T::from_u8(offset).unwrap()); | ||
validity.push(true); | ||
|
||
seen >>= shift + 1; | ||
offset += 1; | ||
} | ||
|
||
(values, Some(validity.freeze())) | ||
} else { | ||
seen >>= u8::from(range.1); | ||
|
||
let mut offset = 0u8; | ||
while seen != 0 { | ||
let shift = seen.trailing_zeros(); | ||
offset += shift as u8; | ||
values.push(*range.0.start() + T::from_u8(offset).unwrap()); | ||
|
||
seen >>= shift + 1; | ||
offset += 1; | ||
} | ||
|
||
(values, None) | ||
}; | ||
|
||
PrimitiveArray::new(fst.data_type().clone(), values.into(), validity) | ||
} | ||
|
||
fn unique<'a>(&'a self, range: Self::Range<'a>) -> Self { | ||
Self::unique_fold(self, [].iter(), range) | ||
} | ||
|
||
fn n_unique<'a>(&'a self, range: Self::Range<'a>) -> usize { | ||
let mut seen = 0u128; | ||
append_arr(self, &mut seen, range.clone()); | ||
seen.count_ones() as usize | ||
} | ||
|
||
fn n_unique_non_null<'a>(&'a self, range: Self::Range<'a>) -> usize { | ||
let mut seen = 0u128; | ||
append_arr(self, &mut seen, range.clone()); | ||
seen &= !SEEN_NONE_MASK; | ||
seen.count_ones() as usize | ||
} | ||
} |
Oops, something went wrong.