From 1e86dc9c187c67749eb6db4f78fbc16667bc789c Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Wed, 19 Jun 2024 13:30:45 +0200 Subject: [PATCH] feat: Allow (non-)coalescing in join_asof (#17066) --- py-polars/polars/dataframe/frame.py | 11 +++++++++++ py-polars/polars/lazyframe/frame.py | 11 +++++++++++ py-polars/src/lazyframe/mod.rs | 9 ++++++++- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 10acc4d3a5bfb..8912ae73be84a 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -6216,6 +6216,7 @@ def join_asof( tolerance: str | int | float | timedelta | None = None, allow_parallel: bool = True, force_parallel: bool = False, + coalesce: bool | None = None, ) -> DataFrame: """ Perform an asof join. @@ -6292,6 +6293,15 @@ def join_asof( force_parallel Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel. + coalesce + Coalescing behavior (merging of join columns). + + - None: -> join specific. + - True: -> Always coalesce join columns. + - False: -> Never coalesce join columns. + + Note that joining on any other expressions than `col` + will turn off coalescing. Examples -------- @@ -6509,6 +6519,7 @@ def join_asof( tolerance=tolerance, allow_parallel=allow_parallel, force_parallel=force_parallel, + coalesce=coalesce, ) .collect(_eager=True) ) diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 5bc86aba553bd..b520be9b1a80c 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -3733,6 +3733,7 @@ def join_asof( tolerance: str | int | float | timedelta | None = None, allow_parallel: bool = True, force_parallel: bool = False, + coalesce: bool | None = None, ) -> Self: """ Perform an asof join. @@ -3809,6 +3810,15 @@ def join_asof( force_parallel Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel. + coalesce + Coalescing behavior (merging of join columns). + + - None: -> join specific. + - True: -> Always coalesce join columns. + - False: -> Never coalesce join columns. + + Note that joining on any other expressions than `col` + will turn off coalescing. Examples @@ -3899,6 +3909,7 @@ def join_asof( strategy, tolerance_num, tolerance_str, + coalesce=coalesce, ) ) diff --git a/py-polars/src/lazyframe/mod.rs b/py-polars/src/lazyframe/mod.rs index 9b0ea118c8060..9de61cd2b0226 100644 --- a/py-polars/src/lazyframe/mod.rs +++ b/py-polars/src/lazyframe/mod.rs @@ -892,7 +892,7 @@ impl PyLazyFrame { } #[cfg(feature = "asof_join")] - #[pyo3(signature = (other, left_on, right_on, left_by, right_by, allow_parallel, force_parallel, suffix, strategy, tolerance, tolerance_str))] + #[pyo3(signature = (other, left_on, right_on, left_by, right_by, allow_parallel, force_parallel, suffix, strategy, tolerance, tolerance_str, coalesce))] fn join_asof( &self, other: Self, @@ -906,7 +906,13 @@ impl PyLazyFrame { strategy: Wrap, tolerance: Option>>, tolerance_str: Option, + coalesce: Option, ) -> PyResult { + let coalesce = match coalesce { + None => JoinCoalesce::JoinSpecific, + Some(true) => JoinCoalesce::CoalesceColumns, + Some(false) => JoinCoalesce::KeepColumns, + }; let ldf = self.ldf.clone(); let other = other.ldf; let left_on = left_on.inner; @@ -918,6 +924,7 @@ impl PyLazyFrame { .right_on([right_on]) .allow_parallel(allow_parallel) .force_parallel(force_parallel) + .coalesce(coalesce) .how(JoinType::AsOf(AsOfOptions { strategy: strategy.0, left_by: left_by.map(strings_to_smartstrings),