Skip to content

Commit

Permalink
depr(python): Deprecate default coalesce behavior of left join (#16532)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored May 28, 2024
1 parent ac8d61a commit 44b0771
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 18 deletions.
2 changes: 1 addition & 1 deletion py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6496,7 +6496,7 @@ def join(
│ 3 ┆ 8.0 ┆ c ┆ null ┆ null │
└──────┴──────┴──────┴───────┴───────────┘
>>> df.join(other_df, on="ham", how="left")
>>> df.join(other_df, on="ham", how="left", coalesce=True)
shape: (3, 4)
┌─────┬─────┬─────┬───────┐
│ foo ┆ bar ┆ ham ┆ apple │
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -6245,7 +6245,7 @@ def interpolate(self, method: InterpolationMethod = "linear") -> Self:
... ) # Interpolate from this to the new grid
>>> df_new_grid = pl.DataFrame({"grid_points": range(1, 11)})
>>> df_new_grid.join(
... df_original_grid, on="grid_points", how="left"
... df_original_grid, on="grid_points", how="left", coalesce=True
... ).with_columns(pl.col("values").interpolate())
shape: (10, 2)
┌─────────────┬────────┐
Expand Down
25 changes: 15 additions & 10 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3980,7 +3980,7 @@ def join(
│ null ┆ null ┆ null ┆ z ┆ d │
│ 3 ┆ 8.0 ┆ c ┆ null ┆ null │
└──────┴──────┴──────┴───────┴───────────┘
>>> lf.join(other_lf, on="ham", how="left").collect()
>>> lf.join(other_lf, on="ham", how="left", coalesce=True).collect()
shape: (3, 4)
┌─────┬─────┬─────┬───────┐
│ foo ┆ bar ┆ ham ┆ apple │
Expand Down Expand Up @@ -4021,8 +4021,21 @@ def join(
"Use of `how='outer'` should be replaced with `how='full'`.",
version="0.20.29",
)
elif how == "outer_coalesce":
coalesce = True
how = "full"
issue_deprecation_warning(
"Use of `how='outer_coalesce'` should be replaced with `how='full', coalesce=True`.",
version="0.20.29",
)
elif how == "left" and coalesce is None:
issue_deprecation_warning(
"The default coalesce behavior of left join will change to `False` in the next breaking release."
" Pass `coalesce=True` to keep the current behavior and silence this warning.",
version="0.20.30",
)

if how == "cross":
elif how == "cross":
return self._from_pyldf(
self._ldf.join(
other._ldf,
Expand All @@ -4048,14 +4061,6 @@ def join(
msg = "must specify `on` OR `left_on` and `right_on`"
raise ValueError(msg)

if how == "outer_coalesce":
coalesce = True
how = "full"
issue_deprecation_warning(
"Use of `how='outer_coalesce'` should be replaced with `how='full', coalesce=True`.",
version="0.20.29",
)

return self._from_pyldf(
self._ldf.join(
other._ldf,
Expand Down
4 changes: 3 additions & 1 deletion py-polars/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,9 @@ filterwarnings = [
# https://github.com/pola-rs/polars/issues/14466
"ignore:unclosed file.*:ResourceWarning",
"ignore:the 'pyxlsb' engine is deprecated.*:DeprecationWarning",
"ignore:Use of `how='outer(_coalesce)?'` should be replaced with `how='full'.*:DeprecationWarning",
# TODO: Remove when behavior is updated
# https://github.com/pola-rs/polars/issues/13441
"ignore:.*default coalesce behavior of left join.*:DeprecationWarning",
]
xfail_strict = true

Expand Down
6 changes: 4 additions & 2 deletions py-polars/tests/unit/datatypes/test_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,13 @@ def test_list_empty_group_by_result_3521() -> None:

# Calculate n_unique after dropping nulls
# This will panic on polars version 0.13.38 and 0.13.39
assert (
result = (
left.join(right, on="join_column", how="left")
.group_by("group_by_column")
.agg(pl.col("n_unique_column").drop_nulls())
).to_dict(as_series=False) == {"group_by_column": [1], "n_unique_column": [[]]}
)
expected = {"group_by_column": [1], "n_unique_column": [[]]}
assert result.to_dict(as_series=False) == expected


def test_list_fill_null() -> None:
Expand Down
13 changes: 10 additions & 3 deletions py-polars/tests/unit/operations/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -833,7 +833,7 @@ def test_full_outer_join_coalesce_different_names_13450() -> None:
}
)

out = df1.join(df2, left_on="L1", right_on="L3", how="outer_coalesce")
out = df1.join(df2, left_on="L1", right_on="L3", how="full", coalesce=True)
assert_frame_equal(out, expected)


Expand Down Expand Up @@ -993,7 +993,7 @@ def test_join_coalesce(how: JoinStrategy) -> None:
assert out.columns == ["a", "b", "c"]


@pytest.mark.parametrize("how", ["left", "inner", "full", "outer"])
@pytest.mark.parametrize("how", ["left", "inner", "full"])
def test_join_empties(how: JoinStrategy) -> None:
df1 = pl.DataFrame({"col1": [], "col2": [], "col3": []})
df2 = pl.DataFrame({"col2": [], "col4": [], "col5": []})
Expand All @@ -1006,4 +1006,11 @@ def test_join_raise_on_redundant_keys() -> None:
left = pl.DataFrame({"a": [1, 2, 3], "b": [3, 4, 5], "c": [5, 6, 7]})
right = pl.DataFrame({"a": [2, 3, 4], "c": [4, 5, 6]})
with pytest.raises(pl.InvalidOperationError, match="already joined on"):
left.join(right, on=["a", "a"], how="outer_coalesce")
left.join(right, on=["a", "a"], how="full", coalesce=True)


def test_left_join_coalesce_default_deprecation_message() -> None:
left = pl.DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]})
right = pl.DataFrame({"a": [2, 3, 4], "c": [4, 5, 6]})
with pytest.deprecated_call():
left.join(right, on="a", how="left")

0 comments on commit 44b0771

Please sign in to comment.