diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 48df124c32b..e1acc608cea 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -461,7 +461,7 @@ def __getitem__(self, key):
             )
         return SeriesGroupBy(
             self._df[key],
-            drop=False,
+            drop=True,
             **kwargs,
         )
 
@@ -1223,6 +1223,12 @@ def __init__(
         )
         self._squeeze = True
 
+    def _default_to_pandas(self, f, *args, **kwargs):
+        intermediate = super(SeriesGroupBy, self)._default_to_pandas(f, *args, **kwargs)
+        if not isinstance(intermediate, Series) and self._squeeze:
+            return intermediate.squeeze(axis=1)
+        return intermediate
+
     @property
     def ndim(self):
         """
@@ -1276,6 +1282,14 @@ def _iter(self):
                 for k in (sorted(group_ids) if self._sort else group_ids)
             )
 
+    def aggregate(self, func=None, *args, **kwargs):
+        if isinstance(func, (list, dict)):
+            self._squeeze = False
+        result = super(SeriesGroupBy, self).aggregate(func, *args, **kwargs)
+        self._squeeze = True
+        return result
+
+    agg = aggregate
 
 if IsExperimental.get():
     from modin.experimental.cloud.meta_magic import make_wrapped_class
diff --git a/modin/pandas/test/utils.py b/modin/pandas/test/utils.py
index 21565286309..9287607f8a8 100644
--- a/modin/pandas/test/utils.py
+++ b/modin/pandas/test/utils.py
@@ -615,6 +615,7 @@ def df_equals(df1, df2):
     elif isinstance(df1, np.recarray) and isinstance(df2, np.recarray):
         np.testing.assert_array_equal(df1, df2)
     else:
+        raise ValueError(f"type {type(df1)} not equal type {type(df2)}\n\n{df1}\n\n{df2}")
         if df1 != df2:
             np.testing.assert_almost_equal(df1, df2)