diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py index 48df124c32b..e1acc608cea 100644 --- a/modin/pandas/groupby.py +++ b/modin/pandas/groupby.py @@ -461,7 +461,7 @@ def __getitem__(self, key): ) return SeriesGroupBy( self._df[key], - drop=False, + drop=True, **kwargs, ) @@ -1223,6 +1223,12 @@ def __init__( ) self._squeeze = True + def _default_to_pandas(self, f, *args, **kwargs): + intermediate = super(SeriesGroupBy, self)._default_to_pandas(f, *args, **kwargs) + if not isinstance(intermediate, Series) and self._squeeze: + return intermediate.squeeze(axis=1) + return intermediate + @property def ndim(self): """ @@ -1276,6 +1282,14 @@ def _iter(self): for k in (sorted(group_ids) if self._sort else group_ids) ) + def aggregate(self, func=None, *args, **kwargs): + if isinstance(func, (list, dict)): + self._squeeze = False + result = super(SeriesGroupBy, self).aggregate(func, *args, **kwargs) + self._squeeze = True + return result + + agg = aggregate if IsExperimental.get(): from modin.experimental.cloud.meta_magic import make_wrapped_class diff --git a/modin/pandas/test/utils.py b/modin/pandas/test/utils.py index 21565286309..9287607f8a8 100644 --- a/modin/pandas/test/utils.py +++ b/modin/pandas/test/utils.py @@ -615,6 +615,7 @@ def df_equals(df1, df2): elif isinstance(df1, np.recarray) and isinstance(df2, np.recarray): np.testing.assert_array_equal(df1, df2) else: + raise ValueError(f"type {type(df1)} not equal type {type(df2)}\n\n{df1}\n\n{df2}") if df1 != df2: np.testing.assert_almost_equal(df1, df2)