Skip to content

Commit

Permalink
PR comments + changes
Browse files Browse the repository at this point in the history
  • Loading branch information
arunjose696 committed Jun 10, 2024
1 parent 625da37 commit da02e5f
Show file tree
Hide file tree
Showing 10 changed files with 58 additions and 123 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,7 @@ jobs:
- run: python -m pytest modin/tests/pandas/dataframe/test_reduce.py
- run: python -m pytest modin/tests/pandas/dataframe/test_udf.py
- run: python -m pytest modin/tests/pandas/dataframe/test_window.py
- uses: codecov/codecov-action@v2
- uses: ./.github/actions/upload-coverage

merge-coverage-artifacts:
needs: [test-internals, test-api-and-no-engine, test-defaults, test-all-unidist, test-all, test-experimental, test-sanity]
Expand Down
5 changes: 2 additions & 3 deletions modin/config/envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -850,16 +850,15 @@ def _check_vars() -> None:

class NativeDataframeMode(EnvironmentVariable, type=str):
"""
The mode of execution used for handling dataframes in Modin
The mode of execution used for handling dataframes in Modin.
When the env variable is set to None the PandasQueryCompiler would be used
which would lead to modin executing dataframes in distributed fashion.
which would lead to Modin executing dataframes in distributed fashion.
When set to Native_pandas NativeQueryCompiler is used which handles the
dataframes without distributing, falling back to native pandas functions.
In future more execution modes can be added for single node execution so
keeping the parameter as string.
"""

varname = "MODIN_NATIVE_DATAFRAME_MODE"
Expand Down
11 changes: 0 additions & 11 deletions modin/core/storage_formats/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4574,17 +4574,6 @@ def frame_has_dtypes_cache(self) -> bool:
"""
return self._modin_frame.has_dtypes_cache

def has_dtypes_cache(self) -> bool:
"""
Check if the dtypes cache exists for the underlying modin frame.
Returns
-------
bool
True for base class as dtypes are always present
"""
return True

def get_index_name(self, axis=0):
"""
Get index name of specified axis.
Expand Down
31 changes: 0 additions & 31 deletions modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,37 +365,6 @@ def copy(self):

# END Copy

def has_materialized_dtypes(self):
"""
Check if the undelying modin frame has materialized dtypes
Returns
-------
bool
True if if the undelying modin frame and False otherwise.
"""
return self._modin_frame.has_materialized_dtypes

def set_frame_dtypes_cache(self, dtypes):
"""
Set dtypes cache for the underlying modin frame.
Parameters
----------
dtypes : pandas.Series, ModinDtypes, callable or None
"""
self._modin_frame.set_dtypes_cache(dtypes)

def has_dtypes_cache(self) -> bool:
"""
Check if the dtypes cache exists for the underlying modin frame.
Returns
-------
bool
"""
return self._modin_frame.has_dtypes_cache

# Append/Concat/Join (Not Merge)
# The append/concat/join operations should ideally never trigger remote
# compute. These operations should only ever be manipulations of the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,6 @@ def groupby_callable(
groupby_obj = df.groupby(by=by, axis=axis, **groupby_kwargs)
if agg_name == "agg":
if isinstance(agg_func, dict):
# Related to pandas issue when dict with list of funcs as value is passed in agg_func
# https://github.com/pandas-dev/pandas/issues/39103
agg_func = {

Check warning on line 297 in modin/experimental/core/storage_formats/pandas/native_query_compiler.py

View check run for this annotation

Codecov / codecov/patch

modin/experimental/core/storage_formats/pandas/native_query_compiler.py#L294-L297

Added lines #L294 - L297 were not covered by tests
k: v[0] if isinstance(v, list) and len(v) == 1 else v
for k, v in agg_func.items()
Expand All @@ -314,12 +312,6 @@ def groupby_callable(
return groupby_callable


def _take_2d(df, index=None, columns=None): # noqa: GL08
columns = columns if columns is not None else slice(None)
index = index if index is not None else slice(None)
return df.iloc[index, columns]


def _register_binary(op):
"""
Build function that apply specified binary method of the passed frame.
Expand All @@ -346,7 +338,6 @@ def binary_operator(df, other, **kwargs):

if squeeze_self:
df = df.squeeze(axis=1)

result = getattr(df, op)(other, **kwargs)
if (

Check warning on line 342 in modin/experimental/core/storage_formats/pandas/native_query_compiler.py

View check run for this annotation

Codecov / codecov/patch

modin/experimental/core/storage_formats/pandas/native_query_compiler.py#L339-L342

Added lines #L339 - L342 were not covered by tests
not isinstance(result, pandas.Series)
Expand Down Expand Up @@ -727,6 +718,7 @@ def setitem_bool(self, row_loc, col_loc, item):
pandas.DataFrame.update, in_place=True, df_copy=True
)
diff = _register_default_pandas(pandas.DataFrame.diff)
dot = _register_default_pandas(_register_binary("dot"))
drop = _register_default_pandas(_drop)
dropna = _register_default_pandas(pandas.DataFrame.dropna) # axis values switched?
dt_ceil = _register_default_pandas(_dt_func_map("ceil"))
Expand Down Expand Up @@ -859,7 +851,6 @@ def setitem_bool(self, row_loc, col_loc, item):
groupby_quantile = _register_default_pandas(_groupby("quantile"))
groupby_rank = _register_default_pandas(_groupby("rank"))
groupby_shift = _register_default_pandas(_groupby("shift"))
groupby_size = _register_default_pandas(_groupby("size"))
groupby_skew = _register_default_pandas(_groupby("skew"))
groupby_std = _register_default_pandas(_groupby("std"))
groupby_sum = _register_default_pandas(_groupby("sum"))
Expand Down Expand Up @@ -988,9 +979,6 @@ def setitem_bool(self, row_loc, col_loc, item):
rtruediv = _register_default_pandas(_register_binary("rtruediv"))
searchsorted = _register_default_pandas(pandas.Series.searchsorted, is_series=True)
sem = _register_default_pandas(pandas.DataFrame.sem)
series_update = _register_default_pandas(
pandas.Series.update, is_series=True, in_place=True, df_copy=True
)
series_view = _register_default_pandas(pandas.Series.view, is_series=True)
set_index_from_columns = _register_default_pandas(pandas.DataFrame.set_index)
setitem = _register_default_pandas(_setitem)
Expand Down Expand Up @@ -1054,7 +1042,6 @@ def setitem_bool(self, row_loc, col_loc, item):
sub = _register_default_pandas(_register_binary("sub"))
sum = _register_default_pandas(pandas.DataFrame.sum)
sum_min_count = _register_default_pandas(pandas.DataFrame.sum)
take_2d = _register_default_pandas(_take_2d)
to_datetime = _register_default_pandas(_to_datetime)
to_numeric = _register_default_pandas(_to_numeric)
to_numpy = _register_default_pandas(pandas.DataFrame.to_numpy, return_modin=False)
Expand Down Expand Up @@ -1094,24 +1081,14 @@ def describe(self, percentiles: np.ndarray):
include="all",
)

def dot(self, other, squeeze_self=None, squeeze_other=None):
other = try_cast_to_pandas(other)
if squeeze_other:
other = other.squeeze()
if squeeze_self:
result = self._modin_frame.squeeze(axis=1).dot(other)
else:
result = self._modin_frame.dot(other)
if isinstance(result, pandas.Series):
if result.name is None:
result.name = "__reduced__"
result = result.to_frame()
if is_list_like(result):
result = pandas.DataFrame(result)
else:
result = pandas.DataFrame([result])

return self.__constructor__(result)
def series_update(self, other, **kwargs):
return _register_default_pandas(_register_binary("update"), in_place=True)(

Check warning on line 1085 in modin/experimental/core/storage_formats/pandas/native_query_compiler.py

View check run for this annotation

Codecov / codecov/patch

modin/experimental/core/storage_formats/pandas/native_query_compiler.py#L1085

Added line #L1085 was not covered by tests
self,
other=other,
squeeze_self=True,
squeeze_other=True,
**kwargs,
)

def expanding_cov(
self,
Expand All @@ -1134,8 +1111,6 @@ def expanding_cov(
else other.to_pandas()
)
)
# expanding_rank = _register_default_pandas(_register_expanding(pandas.core.window.expanding.Expanding.rank))

return _register_default_pandas(

Check warning on line 1114 in modin/experimental/core/storage_formats/pandas/native_query_compiler.py

View check run for this annotation

Codecov / codecov/patch

modin/experimental/core/storage_formats/pandas/native_query_compiler.py#L1114

Added line #L1114 was not covered by tests
_register_expanding(pandas.core.window.expanding.Expanding.cov)
)(
Expand Down Expand Up @@ -1185,6 +1160,31 @@ def expanding_corr(
**kwargs,
)

def groupby_size(
self,
by,
axis,
groupby_kwargs,
agg_args,
agg_kwargs,
drop=False,
):
result = _register_default_pandas(_groupby("size"))(

Check warning on line 1172 in modin/experimental/core/storage_formats/pandas/native_query_compiler.py

View check run for this annotation

Codecov / codecov/patch

modin/experimental/core/storage_formats/pandas/native_query_compiler.py#L1172

Added line #L1172 was not covered by tests
self,
by=by,
axis=axis,
groupby_kwargs=groupby_kwargs,
agg_args=agg_args,
agg_kwargs=agg_kwargs,
drop=drop,
method="size",
)
if not groupby_kwargs.get("as_index", False):

Check warning on line 1182 in modin/experimental/core/storage_formats/pandas/native_query_compiler.py

View check run for this annotation

Codecov / codecov/patch

modin/experimental/core/storage_formats/pandas/native_query_compiler.py#L1182

Added line #L1182 was not covered by tests
# Renaming 'MODIN_UNNAMED_SERIES_LABEL' to a proper name

result.columns = result.columns[:-1].append(pandas.Index(["size"]))
return result

Check warning on line 1186 in modin/experimental/core/storage_formats/pandas/native_query_compiler.py

View check run for this annotation

Codecov / codecov/patch

modin/experimental/core/storage_formats/pandas/native_query_compiler.py#L1185-L1186

Added lines #L1185 - L1186 were not covered by tests

def get_axis(self, axis):
return self._modin_frame.index if axis == 0 else self._modin_frame.columns

Check warning on line 1189 in modin/experimental/core/storage_formats/pandas/native_query_compiler.py

View check run for this annotation

Codecov / codecov/patch

modin/experimental/core/storage_formats/pandas/native_query_compiler.py#L1189

Added line #L1189 was not covered by tests

Expand Down
1 change: 1 addition & 0 deletions modin/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ def __init__(
name = MODIN_UNNAMED_SERIES_LABEL
if isinstance(data, pandas.Series) and data.name is not None:
name = data.name

query_compiler = from_pandas(
pandas.DataFrame(
pandas.Series(
Expand Down
25 changes: 4 additions & 21 deletions modin/tests/pandas/dataframe/test_default.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import contextlib
import io
import warnings

Expand Down Expand Up @@ -88,11 +87,7 @@
)
def test_ops_defaulting_to_pandas(op, make_args):
modin_df = pd.DataFrame(test_data_diff_dtype).drop(["str_col", "bool_col"], axis=1)
with (
warns_that_defaulting_to_pandas()
if not NativeDataframeMode.get()
else contextlib.nullcontext()
):
with warns_that_defaulting_to_pandas():
operation = getattr(modin_df, op)
if make_args is not None:
operation(**make_args(modin_df))
Expand All @@ -106,23 +101,15 @@ def test_ops_defaulting_to_pandas(op, make_args):

def test_style():
data = test_data_values[0]
with (
warns_that_defaulting_to_pandas()
if not NativeDataframeMode.get()
else contextlib.nullcontext()
):
with warns_that_defaulting_to_pandas():
pd.DataFrame(data).style


def test_to_timestamp():
idx = pd.date_range("1/1/2012", periods=5, freq="M")
df = pd.DataFrame(np.random.randint(0, 100, size=(len(idx), 4)), index=idx)

with (
warns_that_defaulting_to_pandas()
if not NativeDataframeMode.get()
else contextlib.nullcontext()
):
with warns_that_defaulting_to_pandas():
df.to_period().to_timestamp()


Expand Down Expand Up @@ -151,11 +138,7 @@ def test_asfreq():
index = pd.date_range("1/1/2000", periods=4, freq="min")
series = pd.Series([0.0, None, 2.0, 3.0], index=index)
df = pd.DataFrame({"s": series})
with (
warns_that_defaulting_to_pandas()
if not NativeDataframeMode.get()
else contextlib.nullcontext()
):
with warns_that_defaulting_to_pandas():
# We are only testing that this defaults to pandas, so we will just check for
# the warning
df.asfreq(freq="30S")
Expand Down
7 changes: 1 addition & 6 deletions modin/tests/pandas/dataframe/test_join_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import contextlib
import warnings

import matplotlib
Expand Down Expand Up @@ -611,11 +610,7 @@ def test_sort_multiindex(sort_remaining):
setattr(df, index, new_index)

for kwargs in [{"level": 0}, {"axis": 0}, {"axis": 1}]:
with (
warns_that_defaulting_to_pandas()
if not NativeDataframeMode.get()
else contextlib.nullcontext()
):
with warns_that_defaulting_to_pandas():
df_equals(
modin_df.sort_index(sort_remaining=sort_remaining, **kwargs),
pandas_df.sort_index(sort_remaining=sort_remaining, **kwargs),
Expand Down
16 changes: 3 additions & 13 deletions modin/tests/pandas/test_expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,12 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import contextlib

import numpy as np
import pandas
import pytest

import modin.pandas as pd
from modin.config import NativeDataframeMode, NPartitions
from modin.config import NPartitions
from modin.tests.test_utils import warns_that_defaulting_to_pandas

from .utils import (
Expand Down Expand Up @@ -69,11 +67,7 @@ def test_dataframe(data, min_periods, axis, method, kwargs):
@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("method", ["corr", "cov"])
def test_dataframe_corr_cov(data, min_periods, axis, method):
with (
warns_that_defaulting_to_pandas()
if not NativeDataframeMode.get()
else contextlib.nullcontext()
):
with warns_that_defaulting_to_pandas():
eval_general(
*create_test_dfs(data),
lambda df: getattr(
Expand All @@ -85,11 +79,7 @@ def test_dataframe_corr_cov(data, min_periods, axis, method):
@pytest.mark.parametrize("method", ["corr", "cov"])
def test_dataframe_corr_cov_with_self(method):
mdf, pdf = create_test_dfs(test_data["float_nan_data"])
with (
warns_that_defaulting_to_pandas()
if not NativeDataframeMode.get()
else contextlib.nullcontext()
):
with warns_that_defaulting_to_pandas():
eval_general(
mdf,
pdf,
Expand Down
15 changes: 12 additions & 3 deletions modin/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import contextlib
import json
from textwrap import dedent, indent
from unittest.mock import Mock, patch
Expand All @@ -21,6 +22,7 @@

import modin.pandas as pd
import modin.utils
from modin.config import NativeDataframeMode
from modin.error_message import ErrorMessage
from modin.tests.pandas.utils import create_test_dfs

Expand Down Expand Up @@ -263,10 +265,17 @@ def warns_that_defaulting_to_pandas(prefix=None, suffix=None):
Returns
-------
pytest.recwarn.WarningsChecker
A WarningsChecker checking for a UserWarning saying that Modin is
defaulting to Pandas.
pytest.recwarn.WarningsChecker or contextlib.nullcontext
If Modin is not operating in MODIN_NATIVE_DATAFRAME_MODE,a WarningsChecker
is returned whic will check for a UserWarning indicating that Modin
is defaulting to Pandas. If MODIN_NATIVE_DATAFRAME_MODE is set, a
nullcontext is returned to avoid warning about the default to Pandas,
as this occurs due user selecting of MODIN_NATIVE_DATAFRAME_MODE.
"""
if NativeDataframeMode.get():
return contextlib.nullcontext()

match = "[Dd]efaulting to pandas"
if prefix:
# Message may be separated by newlines
Expand Down

0 comments on commit da02e5f

Please sign in to comment.