diff --git a/modin/experimental/core/storage_formats/pandas/small_query_compiler.py b/modin/experimental/core/storage_formats/pandas/small_query_compiler.py index d6767478926..75f2130cea4 100644 --- a/modin/experimental/core/storage_formats/pandas/small_query_compiler.py +++ b/modin/experimental/core/storage_formats/pandas/small_query_compiler.py @@ -18,6 +18,8 @@ queries for small data and empty ``PandasDataFrame``. """ +from typing import Optional + import numpy as np import pandas from pandas.core.dtypes.common import is_list_like, is_scalar @@ -587,7 +589,11 @@ def caller(query_compiler, *args, **kwargs): args = try_cast_to_pandas(args, squeeze=squeeze_args) kwargs = try_cast_to_pandas(kwargs, squeeze=squeeze_kwargs) result = func(df, *args, **kwargs) + inplace_method = kwargs.get("inplace", False) + if in_place: + inplace_method = in_place + if inplace_method: result = df if not (return_modin or isinstance(result, (pandas.Series, pandas.DataFrame))): return result @@ -692,7 +698,6 @@ def setitem_bool(self, row_loc, col_loc, item): cumprod = _register_default_pandas(pandas.DataFrame.cumprod) cumsum = _register_default_pandas(pandas.DataFrame.cumsum) delitem = _register_default_pandas(_delitem) - describe = _register_default_pandas(pandas.DataFrame.describe) df_update = _register_default_pandas( pandas.DataFrame.update, in_place=True, df_copy=True ) @@ -855,7 +860,7 @@ def setitem_bool(self, row_loc, col_loc, item): ) isna = _register_default_pandas(pandas.DataFrame.isna) join = _register_default_pandas(pandas.DataFrame.join) - kurt = _register_default_pandas(pandas.DataFrame.kurt) + kurt = _register_default_pandas(pandas.DataFrame.kurt, return_modin=False) last_valid_index = _register_default_pandas( pandas.DataFrame.last_valid_index, return_modin=False ) @@ -866,7 +871,7 @@ def setitem_bool(self, row_loc, col_loc, item): max = _register_default_pandas(pandas.DataFrame.max) map = _register_default_pandas(pandas.DataFrame.map) mean = _register_default_pandas(pandas.DataFrame.mean, return_modin=False) - median = _register_default_pandas(pandas.DataFrame.median) + median = _register_default_pandas(pandas.DataFrame.median, return_modin=False) melt = _register_default_pandas(pandas.DataFrame.melt) memory_usage = _register_default_pandas(pandas.DataFrame.memory_usage) merge = _register_default_pandas(pandas.DataFrame.merge) @@ -964,7 +969,7 @@ def setitem_bool(self, row_loc, col_loc, item): series_view = _register_default_pandas(pandas.Series.view, is_series=True) set_index_from_columns = _register_default_pandas(pandas.DataFrame.set_index) setitem = _register_default_pandas(_setitem) - skew = _register_default_pandas(pandas.DataFrame.skew) + skew = _register_default_pandas(pandas.DataFrame.skew, return_modin=False) sort_index = _register_default_pandas(_sort_index) sort_columns_by_row_values = _register_default_pandas( lambda df, columns, **kwargs: df.sort_values(by=columns, axis=1, **kwargs) @@ -1057,6 +1062,13 @@ def clip(self, lower, upper, **kwargs): self, lower, upper, **kwargs ) + def describe(self, percentiles: np.ndarray): + return _register_default_pandas(pandas.DataFrame.describe)( + self, + percentiles=percentiles, + include="all", + ) + def dot(self, other, squeeze_self=None, squeeze_other=None): other = try_cast_to_pandas(other) if squeeze_other: @@ -1223,3 +1235,24 @@ def getitem_column_array(self, key, numeric=False, ignore_order=False): def is_series_like(self): return len(self._modin_frame.columns) == 1 or len(self._modin_frame.index) == 1 + + def support_materialization_in_worker_process(self) -> bool: + """ + Whether it's possible to call function `to_pandas` during the pickling process, at the moment of recreating the object. + + Returns + ------- + bool + """ + True + + def get_pandas_backend(self) -> Optional[str]: + """ + Get backend stored in `_modin_frame`. + + Returns + ------- + str | None + Backend name. + """ + return None diff --git a/modin/pandas/series.py b/modin/pandas/series.py index 749cf0f6a50..4df41168b5a 100644 --- a/modin/pandas/series.py +++ b/modin/pandas/series.py @@ -31,10 +31,7 @@ from pandas.io.formats.info import SeriesInfo from pandas.util._validators import validate_bool_kwarg -from modin.config import PersistentPickle, UsePlainPandasQueryCompiler -from modin.experimental.core.storage_formats.pandas.small_query_compiler import ( - PlainPandasQueryCompiler, -) +from modin.config import PersistentPickle from modin.logging import disable_logging from modin.pandas.io import from_pandas, to_pandas from modin.utils import ( @@ -147,32 +144,18 @@ def __init__( name = MODIN_UNNAMED_SERIES_LABEL if isinstance(data, pandas.Series) and data.name is not None: name = data.name - if UsePlainPandasQueryCompiler.get(): - query_compiler = PlainPandasQueryCompiler( - pandas.DataFrame( - pandas.Series( - data=data, - index=index, - dtype=dtype, - name=name, - copy=copy, - fastpath=fastpath, - ) + query_compiler = from_pandas( + pandas.DataFrame( + pandas.Series( + data=data, + index=index, + dtype=dtype, + name=name, + copy=copy, + fastpath=fastpath, ) ) - else: - query_compiler = from_pandas( - pandas.DataFrame( - pandas.Series( - data=data, - index=index, - dtype=dtype, - name=name, - copy=copy, - fastpath=fastpath, - ) - ) - )._query_compiler + )._query_compiler self._query_compiler = query_compiler.columnarize() if name is not None: self.name = name