Skip to content

Commit

Permalink
perf: improve join-asof materialization (pola-rs#14884)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Mar 6, 2024
1 parent ac0131a commit 72a6f89
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 112 deletions.
2 changes: 0 additions & 2 deletions crates/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ round_series = []
checked_arithmetic = []
is_first_distinct = []
is_last_distinct = []
asof_join = []
dot_product = []
row_hash = []
reinterpret = []
Expand Down Expand Up @@ -133,7 +132,6 @@ docs-selection = [
"checked_arithmetic",
"is_first_distinct",
"is_last_distinct",
"asof_join",
"dot_product",
"row_hash",
"rolling_window",
Expand Down
29 changes: 29 additions & 0 deletions crates/polars-core/src/utils/flatten.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use arrow::bitmap::MutableBitmap;
use polars_utils::sync::SyncPtr;

use super::*;
Expand Down Expand Up @@ -89,3 +90,31 @@ fn flatten_par_impl<T: Send + Sync + Copy>(
}
out
}

pub fn flatten_nullable<S: AsRef<[NullableIdxSize]> + Send + Sync>(
bufs: &[S],
) -> PrimitiveArray<IdxSize> {
let a = || flatten_par(bufs);
let b = || {
let cap = bufs.iter().map(|s| s.as_ref().len()).sum::<usize>();
let mut validity = MutableBitmap::with_capacity(cap);
validity.extend_constant(cap, true);

let mut count = 0usize;
for s in bufs {
let s = s.as_ref();

for id in s {
if id.is_null_idx() {
unsafe { validity.set_bit_unchecked(count, false) };
}

count += 1;
}
}
validity.freeze()
};

let (a, b) = POOL.join(a, b);
PrimitiveArray::from_vec(bytemuck::cast_vec::<_, IdxSize>(a)).with_validity(Some(b))
}
2 changes: 1 addition & 1 deletion crates/polars-ops/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ top_k = []
pivot = ["polars-core/reinterpret"]
cross_join = []
chunked_ids = []
asof_join = ["polars-core/asof_join"]
asof_join = []
semi_anti_join = []
array_any_all = ["dtype-array"]
array_count = ["dtype-array"]
Expand Down
Loading

0 comments on commit 72a6f89

Please sign in to comment.