Skip to content

Commit

Permalink
fix typo and add solution for series + index
Browse files Browse the repository at this point in the history
Signed-off-by: Rehan Durrani <[email protected]>
  • Loading branch information
RehanSD committed Jun 7, 2022
1 parent 3ce19ea commit 0e4a521
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 20 deletions.
29 changes: 19 additions & 10 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,17 +425,21 @@ def groupby(
# groupby takes place.
drop = False
# Check that there is no ambiguity in the parameter we were given.
_by_check = by if is_list_like(by) else [by]
for k in _by_check:
if k in self.index.names and k in self.axes[axis]:
level_name, index_name = "an index", "a column"
if axis == 1:
level_name, index_name = index_name, level_name
raise ValueError(
f"{k} is both {level_name} level and {index_name} label, which is ambiguous."
)
# We don't need to check if `by` is a Series or Index, since those
# won't be referencing labels
if not isinstance(by, (pandas.Series, Series, pandas.Index)):
_by_check = by if is_list_like(by) else [by]
for k in _by_check:
if not isinstance(k, (Series, pandas.Series, pandas.Index)):
if k in self.index.names and k in self.axes[axis ^ 1]:
level_name, index_name = "an index", "a column"
if axis == 1:
level_name, index_name = index_name, level_name
raise ValueError(
f"{k} is both {level_name} level and {index_name} label, which is ambiguous."
)
if (
not isinstance(by, (pandas.Series, Series))
not isinstance(by, (pandas.Series, Series, pandas.Index))
and is_list_like(by)
and len(by) == 1
):
Expand All @@ -452,6 +456,11 @@ def groupby(
level, by = by, None
elif level is None:
by = self.__getitem__(by)._query_compiler
elif isinstance(by, (pandas.Series, pandas.Index)):
if isinstance(by, pandas.Index) and len(by) != len(self.axes[axis]):
raise ValueError("Grouper and axis must be same length")
idx_name = by.name
by = Series(by)._query_compiler
elif isinstance(by, Series):
drop = by._parent is self
idx_name = by.name
Expand Down
54 changes: 44 additions & 10 deletions modin/pandas/test/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2076,15 +2076,16 @@ def test_by_in_index_and_columns():
modin_df,
pandas_df,
lambda df: df.groupby(by="a").count(),
raising_exceptions=True,
check_exception_type=True,
)
eval_general(
modin_df,
pandas_df,
lambda df: df.groupby(by=["a", "b"]).count(),
raising_exceptions=True,
check_exception_type=True,
)
eval_general(
modin_df,
pandas_df,
lambda df: df.groupby(by=[df["b"], "a"]).count(),
)
pandas_df = pandas.DataFrame(
[[1, 2, 3]], index=pd.Index([(0, 1)], names=["a", "b"]), columns=["a", "b", "c"]
Expand All @@ -2094,20 +2095,53 @@ def test_by_in_index_and_columns():
modin_df,
pandas_df,
lambda df: df.groupby(by="a").count(),
raising_exceptions=True,
check_exception_type=True,
)
eval_general(
modin_df,
pandas_df,
lambda df: df.groupby(by=["a", "c"]).count(),
raising_exceptions=True,
check_exception_type=True,
)
eval_general(
modin_df,
pandas_df,
lambda df: df.groupby(by=["a", "b"]).count(),
raising_exceptions=True,
check_exception_type=True,
)


def test_by_series():
pandas_df = pandas.DataFrame(
[[1, 2, 3]], index=pd.Index([0], name="a"), columns=["a", "b", "c"]
)
modin_df = from_pandas(pandas_df)

def make_appropriately_typed_series(df, values=["a"]):
"""Return a Series from either pandas or modin.pandas depending on type of `df`."""
if isinstance(df, pd.DataFrame):
return pd.Series(values)
return pandas.Series(values)

eval_general(
modin_df,
pandas_df,
lambda df: df.groupby(by=make_appropriately_typed_series(df)).count(),
)
eval_general(
modin_df,
pandas_df,
lambda df: df.groupby(
by=make_appropriately_typed_series(df, ["a", "b"])
).count(),
)


def test_by_index():
pandas_df = pandas.DataFrame(
[[1, 2, 3]], index=pd.Index([0], name="a"), columns=["a", "b", "c"]
)
modin_df = from_pandas(pandas_df)
eval_general(modin_df, pandas_df, lambda df: df.groupby(by=pd.Index(["a"])).count())
eval_general(
modin_df,
pandas_df,
lambda df: df.groupby(by=pd.Index(["a", "b"])).count(),
)

0 comments on commit 0e4a521

Please sign in to comment.