Skip to content

Commit

Permalink
fixed test_map_metadata by adding set_frame_dtypes_cache and has_mate…
Browse files Browse the repository at this point in the history
…rialized_dtypes to query compiler layer as in the code in multiple places the methods of private _modin_frame were used
  • Loading branch information
arunjose696 committed May 23, 2024
1 parent 3aa554c commit d406414
Show file tree
Hide file tree
Showing 15 changed files with 109 additions and 70 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ jobs:
unidist: ${{ steps.filter.outputs.unidist }}
engines: ${{ steps.engines.outputs.engines }}
experimental: ${{ steps.experimental.outputs.experimental }}
test-small-query-compiler: ${{ steps.filter.outputs.test-small-query-compiler }}
steps:
- uses: actions/checkout@v4
- uses: dorny/paths-filter@v3
Expand Down Expand Up @@ -636,8 +637,8 @@ jobs:
- run: python -m pytest modin/tests/experimental/spreadsheet/test_general.py

test-small-query-compiler:
needs: [changes, lint-flake8, lint-black, test-api, test-headers]
if: ${{ needs.changes.outputs.test-small-query-compiler == 'true' }}
needs: [ lint-flake8, execution-filter]
if: ${{ needs.execution-filter.outputs.test-small-query-compiler == 'true' }}
runs-on: ubuntu-latest
defaults:
run:
Expand Down
4 changes: 1 addition & 3 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,7 @@ def noop_decorator(*args, **kwargs):
if not hasattr(sys.modules["unidist"].core.base, "object_ref"):
sys.modules["unidist"].core.base.object_ref = type("object_ref", (object,), {})
if not hasattr(sys.modules["unidist"].core.base.object_ref, "ObjectRef"):
sys.modules["unidist"].core.base.object_ref.ObjectRef = type(
"ObjectRef", (object,), {}
)
sys.modules["unidist"].core.base.object_ref.ObjectRef = type("ObjectRef", (object,), {})

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
import modin
Expand Down
1 change: 0 additions & 1 deletion modin/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@
TestReadFromSqlServer,
TrackFileLeaks,
UsePlainPandasQueryCompiler,

)
from modin.config.pubsub import Parameter, ValueSource, context

Expand Down
7 changes: 2 additions & 5 deletions modin/core/dataframe/algebra/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,10 @@ def maybe_compute_dtypes_common_cast(
The dtypes of the operands are supposed to be known.
"""
if not trigger_computations:
if not first._modin_frame.has_materialized_dtypes:
if not first.has_materialized_dtypes():

Check warning on line 73 in modin/core/dataframe/algebra/binary.py

View check run for this annotation

Codecov / codecov/patch

modin/core/dataframe/algebra/binary.py#L73

Added line #L73 was not covered by tests
return None

if (
isinstance(second, type(first))
and not second._modin_frame.has_materialized_dtypes
):
if isinstance(second, type(first)) and not second.has_materialized_dtypes():

Check warning on line 76 in modin/core/dataframe/algebra/binary.py

View check run for this annotation

Codecov / codecov/patch

modin/core/dataframe/algebra/binary.py#L76

Added line #L76 was not covered by tests
return None

dtypes_first = first._modin_frame.dtypes.to_dict()
Expand Down
2 changes: 1 addition & 1 deletion modin/core/dataframe/algebra/tree_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def caller(
_axis = kwargs.get("axis") if axis is None else axis

new_dtypes = None
if compute_dtypes and query_compiler._modin_frame.has_materialized_dtypes:
if compute_dtypes and query_compiler.has_materialized_dtypes():

Check warning on line 70 in modin/core/dataframe/algebra/tree_reduce.py

View check run for this annotation

Codecov / codecov/patch

modin/core/dataframe/algebra/tree_reduce.py#L70

Added line #L70 was not covered by tests
new_dtypes = str(compute_dtypes(query_compiler.dtypes, *args, **kwargs))

return query_compiler.__constructor__(
Expand Down
21 changes: 21 additions & 0 deletions modin/core/storage_formats/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4521,6 +4521,27 @@ def has_multiindex(self, axis=0):
assert axis == 1
return isinstance(self.columns, pandas.MultiIndex)

def has_materialized_dtypes(self):
"""
Check if the undelying modin frame has materialized dtypes
Returns
-------
bool
True if if the undelying modin frame and False otherwise.
"""
return True

def set_frame_dtypes_cache(self, dtypes):
"""
Set dtypes cache for the underlying modin frame.
Parameters
----------
dtypes : pandas.Series, ModinDtypes, callable or None
"""
pass

def get_index_name(self, axis=0):
"""
Get index name of specified axis.
Expand Down
2 changes: 1 addition & 1 deletion modin/core/storage_formats/pandas/aggregations.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def corr_method(
np.repeat(pandas.api.types.pandas_dtype("float"), len(new_columns)),
index=new_columns,
)
elif numeric_only and qc._modin_frame.has_materialized_dtypes:
elif numeric_only and qc.has_materialized_dtypes():

Check warning on line 74 in modin/core/storage_formats/pandas/aggregations.py

View check run for this annotation

Codecov / codecov/patch

modin/core/storage_formats/pandas/aggregations.py#L74

Added line #L74 was not covered by tests
old_dtypes = qc._modin_frame.dtypes

new_columns = old_dtypes[old_dtypes.map(is_numeric_dtype)].index
Expand Down
34 changes: 26 additions & 8 deletions modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,27 @@ def copy(self):

# END Copy

def has_materialized_dtypes(self):
"""
Check if the undelying modin frame has materialized dtypes
Returns
-------
bool
True if if the undelying modin frame and False otherwise.
"""
return self.has_materialized_dtypes()

Check warning on line 377 in modin/core/storage_formats/pandas/query_compiler.py

View check run for this annotation

Codecov / codecov/patch

modin/core/storage_formats/pandas/query_compiler.py#L377

Added line #L377 was not covered by tests

def set_frame_dtypes_cache(self, dtypes):
"""
Set dtypes cache for the underlying modin frame.
Parameters
----------
dtypes : pandas.Series, ModinDtypes, callable or None
"""
self.set_frame_dtypes_cache(dtypes)

Check warning on line 387 in modin/core/storage_formats/pandas/query_compiler.py

View check run for this annotation

Codecov / codecov/patch

modin/core/storage_formats/pandas/query_compiler.py#L387

Added line #L387 was not covered by tests

# Append/Concat/Join (Not Merge)
# The append/concat/join operations should ideally never trigger remote
# compute. These operations should only ever be manipulations of the
Expand Down Expand Up @@ -580,10 +601,7 @@ def reindex(self, axis, labels, **kwargs):
new_index, indexer = (self.index, None) if axis else self.index.reindex(labels)
new_columns, _ = self.columns.reindex(labels) if axis else (self.columns, None)
new_dtypes = None
if (
self._modin_frame.has_materialized_dtypes
and kwargs.get("method", None) is None
):
if self.has_materialized_dtypes() and kwargs.get("method", None) is None:

Check warning on line 604 in modin/core/storage_formats/pandas/query_compiler.py

View check run for this annotation

Codecov / codecov/patch

modin/core/storage_formats/pandas/query_compiler.py#L604

Added line #L604 was not covered by tests
# For columns, defining types is easier because we don't have to calculate the common
# type, since the entire column is filled. A simple `reindex` covers our needs.
# For rows, we can avoid calculating common types if we know that no new strings of
Expand Down Expand Up @@ -2650,7 +2668,7 @@ def fillna(df):
}
return df.fillna(value=func_dict, **kwargs)

if self._modin_frame.has_materialized_dtypes:
if self.has_materialized_dtypes():

Check warning on line 2671 in modin/core/storage_formats/pandas/query_compiler.py

View check run for this annotation

Codecov / codecov/patch

modin/core/storage_formats/pandas/query_compiler.py#L2671

Added line #L2671 was not covered by tests
dtypes = self._modin_frame.dtypes
value_dtypes = pandas.DataFrame(
{k: [v] for (k, v) in value.items()}
Expand All @@ -2663,7 +2681,7 @@ def fillna(df):
new_dtypes = dtypes

else:
if self._modin_frame.has_materialized_dtypes:
if self.has_materialized_dtypes():

Check warning on line 2684 in modin/core/storage_formats/pandas/query_compiler.py

View check run for this annotation

Codecov / codecov/patch

modin/core/storage_formats/pandas/query_compiler.py#L2684

Added line #L2684 was not covered by tests
dtype = pandas.Series(value).dtype
if all(
find_common_type([t, dtype]) == t for t in self._modin_frame.dtypes
Expand Down Expand Up @@ -2898,7 +2916,7 @@ def _set_item(df, row_loc): # pragma: no cover
df.loc[row_loc.squeeze(axis=1), col_loc] = item
return df

if self._modin_frame.has_materialized_dtypes and is_scalar(item):
if self.has_materialized_dtypes() and is_scalar(item):

Check warning on line 2919 in modin/core/storage_formats/pandas/query_compiler.py

View check run for this annotation

Codecov / codecov/patch

modin/core/storage_formats/pandas/query_compiler.py#L2919

Added line #L2919 was not covered by tests
new_dtypes = self.dtypes.copy()
old_dtypes = new_dtypes[col_loc]
item_type = extract_dtype(item)
Expand Down Expand Up @@ -4607,7 +4625,7 @@ def iloc_mut(partition, row_internal_indices, col_internal_indices, item):
# compute dtypes only if assigning entire columns
isinstance(row_numeric_index, slice)
and row_numeric_index == slice(None)
and self._modin_frame.has_materialized_dtypes
and self.has_materialized_dtypes()
):
new_dtypes = self.dtypes.copy()
new_dtypes.iloc[col_numeric_index] = broadcasted_dtypes.values
Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1075,7 +1075,7 @@ def astype(

if not copy:
# If the new types match the old ones, then copying can be avoided
if self._query_compiler._modin_frame.has_materialized_dtypes:
if self._query_compiler.has_materialized_dtypes():
frame_dtypes = self._query_compiler._modin_frame.dtypes
if isinstance(dtype, dict):
for col in dtype:
Expand Down
3 changes: 1 addition & 2 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,7 @@ def __init__(
# use this list to update inplace when there is a shallow copy.
self._siblings = []
if isinstance(data, (DataFrame, Series)):
query_compiler = data._query_compiler.copy()
self._query_compiler = query_compiler
self._query_compiler = data._query_compiler.copy()
if index is not None and any(i not in data.index for i in index):
raise NotImplementedError(
"Passing non-existant columns or index values to constructor not"
Expand Down
1 change: 0 additions & 1 deletion modin/pandas/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@
from pandas.io.parsers import TextFileReader
from pandas.io.parsers.readers import _c_parser_defaults


from modin.config import ModinNumpy, UsePlainPandasQueryCompiler
from modin.error_message import ErrorMessage
from modin.experimental.core.storage_formats.pandas.small_query_compiler import (
Expand Down
8 changes: 1 addition & 7 deletions modin/pandas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,7 @@
from pandas._typing import AggFuncType, AggFuncTypeBase, AggFuncTypeDict, IndexLabel
from pandas.util._decorators import doc



from modin.experimental.core.storage_formats.pandas.small_query_compiler import (
SmallQueryCompiler,
)

from modin.utils import hashable
from modin.utils import hashable

_doc_binary_operation = """
Return {operation} of {left} and `{right}` (binary operator `{bin_op}`).
Expand Down
Loading

0 comments on commit d406414

Please sign in to comment.