fix typo and add solution for series + index

Signed-off-by: Rehan Durrani <[email protected]>
modin-project · Jun 7, 2022 · 0e4a521 · 0e4a521
1 parent 3ce19ea
commit 0e4a521
Show file tree

Hide file tree

Showing 2 changed files with 63 additions and 20 deletions.
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
@@ -425,17 +425,21 @@ def groupby(
         # groupby takes place.
         drop = False
         # Check that there is no ambiguity in the parameter we were given.
-        _by_check = by if is_list_like(by) else [by]
-        for k in _by_check:
-            if k in self.index.names and k in self.axes[axis]:
-                level_name, index_name = "an index", "a column"
-                if axis == 1:
-                    level_name, index_name = index_name, level_name
-                raise ValueError(
-                    f"{k} is both {level_name} level and {index_name} label, which is ambiguous."
-                )
+        # We don't need to check if `by` is a Series or Index, since those
+        # won't be referencing labels
+        if not isinstance(by, (pandas.Series, Series, pandas.Index)):
+            _by_check = by if is_list_like(by) else [by]
+            for k in _by_check:
+                if not isinstance(k, (Series, pandas.Series, pandas.Index)):
+                    if k in self.index.names and k in self.axes[axis ^ 1]:
+                        level_name, index_name = "an index", "a column"
+                        if axis == 1:
+                            level_name, index_name = index_name, level_name
+                        raise ValueError(
+                            f"{k} is both {level_name} level and {index_name} label, which is ambiguous."
+                        )
         if (
-            not isinstance(by, (pandas.Series, Series))
+            not isinstance(by, (pandas.Series, Series, pandas.Index))
             and is_list_like(by)
             and len(by) == 1
         ):
@@ -452,6 +456,11 @@ def groupby(
                 level, by = by, None
             elif level is None:
                 by = self.__getitem__(by)._query_compiler
+        elif isinstance(by, (pandas.Series, pandas.Index)):
+            if isinstance(by, pandas.Index) and len(by) != len(self.axes[axis]):
+                raise ValueError("Grouper and axis must be same length")
+            idx_name = by.name
+            by = Series(by)._query_compiler
         elif isinstance(by, Series):
             drop = by._parent is self
             idx_name = by.name

diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
@@ -2076,15 +2076,16 @@ def test_by_in_index_and_columns():
         modin_df,
         pandas_df,
         lambda df: df.groupby(by="a").count(),
-        raising_exceptions=True,
-        check_exception_type=True,
     )
     eval_general(
         modin_df,
         pandas_df,
         lambda df: df.groupby(by=["a", "b"]).count(),
-        raising_exceptions=True,
-        check_exception_type=True,
+    )
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(by=[df["b"], "a"]).count(),
     )
     pandas_df = pandas.DataFrame(
         [[1, 2, 3]], index=pd.Index([(0, 1)], names=["a", "b"]), columns=["a", "b", "c"]
@@ -2094,20 +2095,53 @@ def test_by_in_index_and_columns():
         modin_df,
         pandas_df,
         lambda df: df.groupby(by="a").count(),
-        raising_exceptions=True,
-        check_exception_type=True,
     )
     eval_general(
         modin_df,
         pandas_df,
         lambda df: df.groupby(by=["a", "c"]).count(),
-        raising_exceptions=True,
-        check_exception_type=True,
     )
     eval_general(
         modin_df,
         pandas_df,
         lambda df: df.groupby(by=["a", "b"]).count(),
-        raising_exceptions=True,
-        check_exception_type=True,
+    )
+
+
+def test_by_series():
+    pandas_df = pandas.DataFrame(
+        [[1, 2, 3]], index=pd.Index([0], name="a"), columns=["a", "b", "c"]
+    )
+    modin_df = from_pandas(pandas_df)
+
+    def make_appropriately_typed_series(df, values=["a"]):
+        """Return a Series from either pandas or modin.pandas depending on type of `df`."""
+        if isinstance(df, pd.DataFrame):
+            return pd.Series(values)
+        return pandas.Series(values)
+
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(by=make_appropriately_typed_series(df)).count(),
+    )
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(
+            by=make_appropriately_typed_series(df, ["a", "b"])
+        ).count(),
+    )
+
+
+def test_by_index():
+    pandas_df = pandas.DataFrame(
+        [[1, 2, 3]], index=pd.Index([0], name="a"), columns=["a", "b", "c"]
+    )
+    modin_df = from_pandas(pandas_df)
+    eval_general(modin_df, pandas_df, lambda df: df.groupby(by=pd.Index(["a"])).count())
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(by=pd.Index(["a", "b"])).count(),
     )