Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX-#7381: Fix Series binary operators ignoring fill_value #7394

Merged
merged 9 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions modin/core/storage_formats/base/doc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,14 @@ def doc_binary_method(operation, sign, self_on_right=False, op_type="arithmetic"
fill_value : float or None
Value to fill missing elements during frame alignment.
""",
"series_comparison": """
level : int or label
In case of MultiIndex match index values on the passed level.
fill_value : float or None
Value to fill missing elements during frame alignment.
axis : {{0, 1}}
Unused. Parameter needed for compatibility with DataFrame.
""",
}

verbose_substitution = (
Expand Down
76 changes: 76 additions & 0 deletions modin/core/storage_formats/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,18 @@ def combine_first(self, other, **kwargs): # noqa: PR02
def eq(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.DataFrame.eq)(self, other=other, **kwargs)

@doc_utils.doc_binary_method(
operation="equality comparison", sign="==", op_type="series_comparison"
)
def series_eq(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.Series.eq)(
self,
other=other,
squeeze_self=True,
squeeze_other=kwargs.pop("squeeze_other", False),
**kwargs,
)

@doc_utils.add_refer_to("DataFrame.equals")
def equals(self, other): # noqa: PR01, RT01
return BinaryDefault.register(pandas.DataFrame.equals)(self, other=other)
Expand Down Expand Up @@ -685,24 +697,76 @@ def divmod(self, other, **kwargs):
def ge(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.DataFrame.ge)(self, other=other, **kwargs)

@doc_utils.doc_binary_method(
operation="greater than or equal comparison",
sign=">=",
op_type="series_comparison",
)
def series_ge(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.Series.ge)(
self,
other=other,
squeeze_self=True,
squeeze_other=kwargs.pop("squeeze_other", False),
**kwargs,
)

@doc_utils.doc_binary_method(
operation="greater than comparison", sign=">", op_type="comparison"
)
def gt(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.DataFrame.gt)(self, other=other, **kwargs)

@doc_utils.doc_binary_method(
operation="greater than comparison", sign=">", op_type="series_comparison"
)
def series_gt(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.Series.gt)(
self,
other=other,
squeeze_self=True,
squeeze_other=kwargs.pop("squeeze_other", False),
**kwargs,
)

@doc_utils.doc_binary_method(
operation="less than or equal comparison", sign="<=", op_type="comparison"
)
def le(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.DataFrame.le)(self, other=other, **kwargs)

@doc_utils.doc_binary_method(
operation="less than or equal comparison",
sign="<=",
op_type="series_comparison",
)
def series_le(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.Series.le)(
self,
other=other,
squeeze_self=True,
squeeze_other=kwargs.pop("squeeze_other", False),
**kwargs,
)

@doc_utils.doc_binary_method(
operation="less than comparison", sign="<", op_type="comparison"
)
def lt(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.DataFrame.lt)(self, other=other, **kwargs)

@doc_utils.doc_binary_method(
operation="less than", sign="<", op_type="series_comparison"
)
def series_lt(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.Series.lt)(
self,
other=other,
squeeze_self=True,
squeeze_other=kwargs.pop("squeeze_other", False),
**kwargs,
)

@doc_utils.doc_binary_method(operation="modulo", sign="%")
def mod(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.DataFrame.mod)(self, other=other, **kwargs)
Expand Down Expand Up @@ -818,6 +882,18 @@ def dot(self, other, **kwargs): # noqa: PR02
def ne(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.DataFrame.ne)(self, other=other, **kwargs)

@doc_utils.doc_binary_method(
operation="not equal comparison", sign="!=", op_type="series_comparison"
)
def series_ne(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.Series.ne)(
self,
other=other,
squeeze_self=True,
squeeze_other=kwargs.pop("squeeze_other", False),
**kwargs,
)

@doc_utils.doc_binary_method(operation="exponential power", sign="**")
def pow(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.DataFrame.pow)(self, other=other, **kwargs)
Expand Down
40 changes: 40 additions & 0 deletions modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,26 @@ def caller(df, *args, **kwargs):
return caller


def _series_logical_binop(func):
"""
Build a callable function to pass to Binary.register for Series logical operators.

Parameters
----------
func : callable
Binary operator method of pandas.Series to be applied.

Returns
-------
callable
"""
return lambda x, y, **kwargs: func(
x.squeeze(axis=1),
y.squeeze(axis=1) if kwargs.pop("squeeze_other", False) else y,
**kwargs,
).to_frame()


@_inherit_docstrings(BaseQueryCompiler)
class PandasQueryCompiler(BaseQueryCompiler, QueryCompilerCaster):
"""
Expand Down Expand Up @@ -522,6 +542,26 @@ def to_numpy(self, **kwargs):
sort=False,
)

# Series logical operators take an additional fill_value flag that dataframe does not
series_eq = Binary.register(
_series_logical_binop(pandas.Series.eq), infer_dtypes="bool"
)
series_ge = Binary.register(
_series_logical_binop(pandas.Series.ge), infer_dtypes="bool"
)
series_gt = Binary.register(
_series_logical_binop(pandas.Series.gt), infer_dtypes="bool"
)
series_le = Binary.register(
_series_logical_binop(pandas.Series.le), infer_dtypes="bool"
)
series_lt = Binary.register(
_series_logical_binop(pandas.Series.lt), infer_dtypes="bool"
)
series_ne = Binary.register(
_series_logical_binop(pandas.Series.ne), infer_dtypes="bool"
)

# Needed for numpy API
_logical_and = Binary.register(
lambda df, other, *args, **kwargs: pandas.DataFrame(
Expand Down
11 changes: 11 additions & 0 deletions modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,17 @@ def _binary_op(self, op, other, **kwargs) -> Self:
]
if op in exclude_list:
kwargs.pop("axis")
# Series logical operations take an additional fill_value argument that DF does not
series_specialize_list = [
"eq",
"ge",
"gt",
"le",
"lt",
"ne",
]
if not self._is_dataframe and op in series_specialize_list:
op = "series_" + op
new_query_compiler = getattr(self._query_compiler, op)(other, **kwargs)
return self._create_or_update_from_compiler(new_query_compiler)

Expand Down
Loading
Loading