Skip to content

Commit

Permalink
FEAT-#7308: Interoperability between query compilers (#7376)
Browse files Browse the repository at this point in the history
Co-authored-by: Anatoly Myachev <[email protected]>
Co-authored-by: Igoshev, Iaroslav <[email protected]>
Signed-off-by: arunjose696 <[email protected]>
  • Loading branch information
3 people committed Sep 2, 2024
1 parent 5f4d401 commit cf5d638
Show file tree
Hide file tree
Showing 15 changed files with 2,502 additions and 7 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,14 @@ jobs:
- run: python -m pytest modin/tests/pandas/dataframe/test_reduce.py
- run: python -m pytest modin/tests/pandas/dataframe/test_udf.py
- run: python -m pytest modin/tests/pandas/dataframe/test_window.py
- run: python -m pytest modin/tests/pandas/native_df_mode/test_binary.py
- run: python -m pytest modin/tests/pandas/native_df_mode/test_default.py
- run: python -m pytest modin/tests/pandas/native_df_mode/test_indexing.py
- run: python -m pytest modin/tests/pandas/native_df_mode/test_iter.py
- run: python -m pytest modin/tests/pandas/native_df_mode/test_join_sort.py
- run: python -m pytest modin/tests/pandas/native_df_mode/test_map_metadata.py
- run: python -m pytest modin/tests/pandas/native_df_mode/test_pickle.py
- run: python -m pytest modin/tests/pandas/native_df_mode/test_window.py
- uses: ./.github/actions/upload-coverage

merge-coverage-artifacts:
Expand Down
5 changes: 2 additions & 3 deletions modin/core/storage_formats/pandas/native_query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
import pandas
from pandas.core.dtypes.common import is_list_like, is_scalar

from modin.config.envvars import NativeDataframeMode
from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
from modin.core.storage_formats.pandas.query_compiler_caster import QueryCompilerCaster
from modin.utils import (
MODIN_UNNAMED_SERIES_LABEL,
_inherit_docstrings,
Expand Down Expand Up @@ -565,7 +565,7 @@ def caller(query_compiler, *args, **kwargs):


@_inherit_docstrings(BaseQueryCompiler)
class NativeQueryCompiler(BaseQueryCompiler):
class NativeQueryCompiler(BaseQueryCompiler, QueryCompilerCaster):
"""
Query compiler for the pandas storage format.
Expand All @@ -585,7 +585,6 @@ class NativeQueryCompiler(BaseQueryCompiler):
_shape_hint: Optional[str]

def __init__(self, pandas_frame, shape_hint: Optional[str] = None):
assert NativeDataframeMode.get() == "Pandas"
if hasattr(pandas_frame, "_to_pandas"):
pandas_frame = pandas_frame._to_pandas()
if is_scalar(pandas_frame):
Expand Down
3 changes: 2 additions & 1 deletion modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
extract_dtype,
)
from modin.core.storage_formats import BaseQueryCompiler
from modin.core.storage_formats.pandas.query_compiler_caster import QueryCompilerCaster
from modin.error_message import ErrorMessage
from modin.logging import get_logger
from modin.utils import (
Expand Down Expand Up @@ -253,7 +254,7 @@ def caller(df, *args, **kwargs):


@_inherit_docstrings(BaseQueryCompiler)
class PandasQueryCompiler(BaseQueryCompiler):
class PandasQueryCompiler(BaseQueryCompiler, QueryCompilerCaster):
"""
Query compiler for the pandas storage format.
Expand Down
159 changes: 159 additions & 0 deletions modin/core/storage_formats/pandas/query_compiler_caster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Module contains ``QueryCompilerCaster`` class.
``QueryCompilerCaster`` is used for automatically casting query compiler
arguments to the type of the current query compiler for query compiler class functions.
This ensures compatibility between different query compiler classes.
"""

import functools
import inspect
from types import FunctionType, MethodType
from typing import Any, Dict, Tuple, TypeVar

from pandas.core.indexes.frozen import FrozenList

from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler

Fn = TypeVar("Fn", bound=Any)


class QueryCompilerCaster:
"""Cast all query compiler arguments of the member function to current query compiler."""

@classmethod
def __init_subclass__(
cls,
**kwargs: Dict,
) -> None:
"""
Apply type casting to all children of ``QueryCompilerCaster``.
This method is called automatically when a class inherits from
``QueryCompilerCaster``. It ensures that all member functions within the
subclass have their arguments automatically casted to the current query
compiler type.
Parameters
----------
**kwargs : Additional keyword arguments
"""
super().__init_subclass__(**kwargs)
apply_argument_cast(cls)


def cast_nested_args_to_current_qc_type(arguments, current_qc):
"""
Cast all arguments in nested fashion to current query compiler.
Parameters
----------
arguments : tuple or dict
current_qc : BaseQueryCompiler
Returns
-------
tuple or dict
Returns args and kwargs with all query compilers casted to current_qc.
"""

def cast_arg_to_current_qc(arg):
current_qc_type = type(current_qc)
if isinstance(arg, BaseQueryCompiler) and not isinstance(arg, current_qc_type):
data_cls = current_qc._modin_frame
return current_qc_type.from_pandas(arg.to_pandas(), data_cls)
else:
return arg

imutable_types = (FrozenList, tuple)
if isinstance(arguments, imutable_types):
args_type = type(arguments)
arguments = list(arguments)
arguments = cast_nested_args_to_current_qc_type(arguments, current_qc)

return args_type(arguments)
if isinstance(arguments, list):
for i in range(len(arguments)):
if isinstance(arguments[i], (list, dict)):
cast_nested_args_to_current_qc_type(arguments[i], current_qc)
else:
arguments[i] = cast_arg_to_current_qc(arguments[i])
elif isinstance(arguments, dict):
for key in arguments:
if isinstance(arguments[key], (list, dict)):
cast_nested_args_to_current_qc_type(arguments[key], current_qc)
else:
arguments[key] = cast_arg_to_current_qc(arguments[key])
return arguments


def apply_argument_cast(obj: Fn) -> Fn:
"""
Cast all arguments that are query compilers to the current query compiler.
Parameters
----------
obj : function
Returns
-------
function
Returns decorated function which does argument casting.
"""
if isinstance(obj, type):
all_attrs = dict(inspect.getmembers(obj))
all_attrs.pop("__abstractmethods__")

# This is required because inspect converts class methods to member functions
current_class_attrs = vars(obj)
for key in current_class_attrs:
all_attrs[key] = current_class_attrs[key]

for attr_name, attr_value in all_attrs.items():
if isinstance(
attr_value, (FunctionType, MethodType, classmethod, staticmethod)
):
wrapped = apply_argument_cast(attr_value)
setattr(obj, attr_name, wrapped)
return obj # type: ignore [return-value]
elif isinstance(obj, classmethod):
return classmethod(apply_argument_cast(obj.__func__)) # type: ignore [return-value, arg-type]
elif isinstance(obj, staticmethod):
return staticmethod(apply_argument_cast(obj.__func__))

@functools.wraps(obj)
def cast_args(*args: Tuple, **kwargs: Dict) -> Any:
"""
Add casting for query compiler arguments.
Parameters
----------
*args : tuple
The function arguments.
**kwargs : dict
The function keyword arguments.
Returns
-------
Any
"""
current_qc = args[0]
if isinstance(current_qc, BaseQueryCompiler):
kwargs = cast_nested_args_to_current_qc_type(kwargs, current_qc)
args = cast_nested_args_to_current_qc_type(args, current_qc)
return obj(*args, **kwargs)

return cast_args
5 changes: 2 additions & 3 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2993,9 +2993,8 @@ def _create_or_update_from_compiler(
DataFrame or None
None if update was done, ``DataFrame`` otherwise.
"""
assert (
isinstance(new_query_compiler, type(self._query_compiler))
or type(new_query_compiler) in self._query_compiler.__class__.__bases__
assert isinstance(
new_query_compiler, self._query_compiler.__class__.__bases__
), "Invalid Query Compiler object: {}".format(type(new_query_compiler))
if not inplace:
return self.__constructor__(query_compiler=new_query_compiler)
Expand Down
12 changes: 12 additions & 0 deletions modin/tests/pandas/native_df_mode/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.
Loading

0 comments on commit cf5d638

Please sign in to comment.