Skip to content

Commit

Permalink
renaming to PlainPandasQueryCompiler to NativeDataframeMode
Browse files Browse the repository at this point in the history
  • Loading branch information
arunjose696 committed Jun 5, 2024
1 parent c486925 commit 1984aa1
Show file tree
Hide file tree
Showing 11 changed files with 118 additions and 74 deletions.
17 changes: 1 addition & 16 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ jobs:
matrix:
python-version: ["3.9"]
env:
MODIN_USE_PLAIN_PANDAS_QUERY_COMPILER: "True"
MODIN_NATIVE_DATAFRAME_MODE: "Native_pandas"
name: test-small-query-compiler python ${{matrix.python-version}})
steps:
- uses: actions/checkout@v4
Expand All @@ -667,21 +667,6 @@ jobs:
- run: python -m pytest modin/tests/pandas/dataframe/test_reduce.py
- run: python -m pytest modin/tests/pandas/dataframe/test_udf.py
- run: python -m pytest modin/tests/pandas/dataframe/test_window.py
- run: python -m pytest modin/tests/pandas/extensions/test_dataframe_extensions.py
- run: python -m pytest modin/tests/pandas/extensions/test_pd_extensions.py
- run: python -m pytest modin/tests/pandas/extensions/test_series_extensions.py
- run: python -m pytest modin/tests/pandas/integrations/test_lazy_import.py
- run: python -m pytest modin/tests/pandas/internals/test_benchmark_mode.py
- run: python -m pytest modin/tests/pandas/internals/test_repartition.py
- run: python -m pytest modin/tests/pandas/test_api.py
- run: python -m pytest modin/tests/pandas/test_concat.py
- run: python -m pytest modin/tests/pandas/test_expanding.py
- run: python -m pytest modin/tests/pandas/test_general.py
- run: python -m pytest modin/tests/pandas/test_groupby.py
- run: python -m pytest modin/tests/pandas/test_io.py
- run: python -m pytest modin/tests/pandas/test_reshape.py
- run: python -m pytest modin/tests/pandas/test_rolling.py
- run: python -m pytest modin/tests/pandas/test_series.py
- uses: codecov/codecov-action@v2

merge-coverage-artifacts:
Expand Down
4 changes: 2 additions & 2 deletions modin/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
Memory,
MinPartitionSize,
ModinNumpy,
NativeDataframeMode,
NPartitions,
PersistentPickle,
ProgressBar,
Expand All @@ -51,7 +52,6 @@
TestReadFromPostgres,
TestReadFromSqlServer,
TrackFileLeaks,
UsePlainPandasQueryCompiler,
)
from modin.config.pubsub import Parameter, ValueSource, context

Expand All @@ -67,7 +67,7 @@
"CpuCount",
"GpuCount",
"Memory",
"UsePlainPandasQueryCompiler",
"NativeDataframeMode",
# Ray specific
"IsRayCluster",
"RayRedisAddress",
Expand Down
20 changes: 16 additions & 4 deletions modin/config/envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,11 +848,23 @@ def _check_vars() -> None:
)


class UsePlainPandasQueryCompiler(EnvironmentVariable, type=bool):
"""Set to true to use implementation of PlainPandasQueryCompiler."""
class NativeDataframeMode(EnvironmentVariable, type=str):
"""
The mode of execution used for handling dataframes in Modin
varname = "MODIN_USE_PLAIN_PANDAS_QUERY_COMPILER"
default = False
When the env variable is set to None the PandasQueryCompiler would be used
which would lead to modin executing dataframes in distributed fashion.
When set to Native_pandas NativeQueryCompiler is used which handles the
dataframes without distributing, falling back to native pandas functions.
In future more execution modes can be added for single node execution so
keeping the parameter as string.
"""

varname = "MODIN_NATIVE_DATAFRAME_MODE"
choices = ("Native_pandas",)
default = None


_check_vars()
11 changes: 5 additions & 6 deletions modin/core/execution/dispatching/factories/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,10 @@
import pandas
from pandas.util._decorators import doc

from modin.config import UsePlainPandasQueryCompiler

from modin.config import NativeDataframeMode
from modin.core.io import BaseIO
from modin.experimental.core.storage_formats.pandas.small_query_compiler import (
PlainPandasQueryCompiler,
from modin.experimental.core.storage_formats.pandas.native_query_compiler import (
NativeQueryCompiler,
)

Check notice

Code scanning / CodeQL

Cyclic import Note

Import of module
modin.experimental.core.storage_formats.pandas.native_query_compiler
begins an import cycle.
from modin.utils import get_current_execution

Expand Down Expand Up @@ -173,9 +172,9 @@ def prepare(cls):
method="io.from_pandas",
)
def _from_pandas(cls, df):
if UsePlainPandasQueryCompiler.get():
if NativeDataframeMode.get():
df_copy = df.copy()
return PlainPandasQueryCompiler(df_copy)
return NativeQueryCompiler(df_copy)
return cls.io_cls.from_pandas(df)

@classmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
# governing permissions and limitations under the License.

"""
Module contains ``PlainPandasQueryCompiler`` class.
Module contains ``NativeQueryCompiler`` class.
``PlainPandasQueryCompiler`` is responsible for compiling efficient DataFrame algebra
``NativeQueryCompiler`` is responsible for compiling efficient DataFrame algebra
queries for small data and empty ``PandasDataFrame``.
"""

Expand All @@ -24,7 +24,7 @@
import pandas
from pandas.core.dtypes.common import is_list_like, is_scalar

from modin.config.envvars import UsePlainPandasQueryCompiler
from modin.config.envvars import NativeDataframeMode
from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler

Check notice

Code scanning / CodeQL

Cyclic import Note

Import of module
modin.core.storage_formats.base.query_compiler
begins an import cycle.
from modin.utils import (
MODIN_UNNAMED_SERIES_LABEL,
Expand Down Expand Up @@ -608,7 +608,7 @@ def caller(query_compiler, *args, **kwargs):


@_inherit_docstrings(BaseQueryCompiler)
class PlainPandasQueryCompiler(BaseQueryCompiler):
class NativeQueryCompiler(BaseQueryCompiler):
"""
Query compiler for the pandas storage format.
Expand All @@ -623,7 +623,7 @@ class PlainPandasQueryCompiler(BaseQueryCompiler):
"""

def __init__(self, pandas_frame):
assert UsePlainPandasQueryCompiler.get()
assert NativeDataframeMode.get() == "Native_Pandas"
if hasattr(pandas_frame, "_to_pandas"):
pandas_frame = pandas_frame._to_pandas()
if is_scalar(pandas_frame):
Expand All @@ -636,6 +636,59 @@ def __init__(self, pandas_frame):
def execute(self):
pass

@property
def frame_has_materialized_dtypes(self) -> bool:
"""
Check if the undelying dataframe has materialized dtypes.
Returns
-------
bool
"""
return True

def set_frame_dtypes_cache(self, dtypes):
"""
Set dtypes cache for the underlying dataframe frame.
Parameters
----------
dtypes : pandas.Series, ModinDtypes, callable or None
"""
pass

def set_frame_index_cache(self, index):
"""
Set index cache for underlying dataframe.
Parameters
----------
index : sequence, callable or None
"""
pass

@property
def frame_has_index_cache(self):
"""
Check if the index cache exists for underlying dataframe.
Returns
-------
bool
"""
return True

@property
def frame_has_dtypes_cache(self) -> bool:
"""
Check if the dtypes cache exists for the underlying dataframe.
Returns
-------
bool
"""
return True

def take_2d_positional(self, index=None, columns=None):
index = slice(None) if index is None else index
columns = slice(None) if columns is None else columns
Expand Down
6 changes: 3 additions & 3 deletions modin/tests/pandas/dataframe/test_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import pytest

import modin.pandas as pd
from modin.config import NPartitions, StorageFormat, UsePlainPandasQueryCompiler
from modin.config import NativeDataframeMode, NPartitions, StorageFormat
from modin.core.dataframe.pandas.partitioning.axis_partition import (
PandasDataframeAxisPartition,
)
Expand Down Expand Up @@ -211,8 +211,8 @@ def operation(df):
reason="Modin on this engine doesn't create virtual partitions.",
)
@pytest.mark.skipif(
UsePlainPandasQueryCompiler.get(),
reason="PlainPandasQueryCompiler does not contain partitions.",
NativeDataframeMode.get() is not None,
reason="NativeQueryCompiler does not contain partitions.",
)
@pytest.mark.parametrize(
"left_virtual,right_virtual", [(True, False), (False, True), (True, True)]
Expand Down
27 changes: 12 additions & 15 deletions modin/tests/pandas/dataframe/test_default.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from numpy.testing import assert_array_equal

import modin.pandas as pd
from modin.config import Engine, NPartitions, StorageFormat, UsePlainPandasQueryCompiler
from modin.config import Engine, NativeDataframeMode, NPartitions, StorageFormat
from modin.pandas.io import to_pandas
from modin.tests.pandas.utils import (
axis_keys,
Expand Down Expand Up @@ -90,7 +90,7 @@ def test_ops_defaulting_to_pandas(op, make_args):
modin_df = pd.DataFrame(test_data_diff_dtype).drop(["str_col", "bool_col"], axis=1)
with (
warns_that_defaulting_to_pandas()
if not UsePlainPandasQueryCompiler.get()
if not NativeDataframeMode.get()
else contextlib.nullcontext()
):
operation = getattr(modin_df, op)
Expand All @@ -108,7 +108,7 @@ def test_style():
data = test_data_values[0]
with (
warns_that_defaulting_to_pandas()
if not UsePlainPandasQueryCompiler.get()
if not NativeDataframeMode.get()
else contextlib.nullcontext()
):
pd.DataFrame(data).style
Expand All @@ -120,7 +120,7 @@ def test_to_timestamp():

with (
warns_that_defaulting_to_pandas()
if not UsePlainPandasQueryCompiler.get()
if not NativeDataframeMode.get()
else contextlib.nullcontext()
):
df.to_period().to_timestamp()
Expand All @@ -137,8 +137,8 @@ def test_to_numpy(data):


@pytest.mark.skipif(
UsePlainPandasQueryCompiler.get(),
reason="PlainPandasQueryCompiler does not contain partitions.",
NativeDataframeMode.get() is not None,
reason="NativeQueryCompiler does not contain partitions.",
)
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_partition_to_numpy(data):
Expand All @@ -153,7 +153,7 @@ def test_asfreq():
df = pd.DataFrame({"s": series})
with (
warns_that_defaulting_to_pandas()
if not UsePlainPandasQueryCompiler.get()
if not NativeDataframeMode.get()
else contextlib.nullcontext()
):
# We are only testing that this defaults to pandas, so we will just check for
Expand Down Expand Up @@ -315,7 +315,7 @@ def test_corr_min_periods(self, min_periods):
{"a": [1, np.nan, 3, 4, 5, 6], "b": [1, 2, 1, 4, 5, np.nan]}
)
modin_df = pd.concat([modin_df.iloc[:3], modin_df.iloc[3:]])
if not UsePlainPandasQueryCompiler.get():
if not NativeDataframeMode.get():
assert modin_df._query_compiler._modin_frame._partitions.shape == (2, 1)
eval_general(
modin_df, pandas_df, lambda df: df.corr(min_periods=min_periods)
Expand All @@ -335,8 +335,8 @@ def test_corr_non_numeric(self, numeric_only):
reason="doesn't make sense for non-partitioned executions",
)
@pytest.mark.skipif(
UsePlainPandasQueryCompiler.get(),
reason="PlainPandasQueryCompiler does not contain partitions.",
NativeDataframeMode.get() is not None,
reason="NativeQueryCompiler does not contain partitions.",
)
def test_corr_nans_in_different_partitions(self):
# NaN in the first partition
Expand Down Expand Up @@ -628,10 +628,7 @@ def test_pivot(data, index, columns, values, request):
or "default-one_column-several_columns_index" in request.node.callspec.id
or "default-one_column-one_column_index" in request.node.callspec.id
or (
(
current_execution in ("BaseOnPython",)
or UsePlainPandasQueryCompiler.get()
)
(current_execution in ("BaseOnPython",) or NativeDataframeMode.get())
and index is lib.no_default
)
):
Expand Down Expand Up @@ -1012,7 +1009,7 @@ def test_resampler_functions_with_arg(rule, axis, method_arg):
marks=pytest.mark.xfail(
condition=Engine.get() in ("Ray", "Unidist", "Dask", "Python")
and StorageFormat.get() != "Base"
and not UsePlainPandasQueryCompiler.get(),
and NativeDataframeMode.get() is None,
reason="https://github.com/modin-project/modin/issues/6399",
),
),
Expand Down
10 changes: 5 additions & 5 deletions modin/tests/pandas/dataframe/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from pandas._testing import ensure_clean

import modin.pandas as pd
from modin.config import MinPartitionSize, NPartitions, UsePlainPandasQueryCompiler
from modin.config import MinPartitionSize, NativeDataframeMode, NPartitions
from modin.pandas.indexing import is_range_like
from modin.pandas.testing import assert_index_equal
from modin.tests.pandas.utils import (
Expand Down Expand Up @@ -584,8 +584,8 @@ def test_loc_setting_single_categorical_column():


@pytest.mark.skipif(
UsePlainPandasQueryCompiler.get(),
reason="PlainPandasQueryCompiler does not currently support IO functions.",
NativeDataframeMode.get() is not None,
reason="NativeQueryCompiler does not currently support IO functions.",
)
def test_loc_multi_index():
modin_df = pd.read_csv(
Expand Down Expand Up @@ -2241,8 +2241,8 @@ def test___setitem__partitions_aligning():


@pytest.mark.skipif(
UsePlainPandasQueryCompiler.get(),
reason="PlainPandasQueryCompiler does not currently support IO functions.",
NativeDataframeMode.get() is not None,
reason="NativeQueryCompiler does not currently support IO functions.",
)
def test___setitem__with_mismatched_partitions():
with ensure_clean(".csv") as fname:
Expand Down
Loading

0 comments on commit 1984aa1

Please sign in to comment.