modin-project · yashgosa · Oct 31, 2022 · Oct 31, 2022 · Oct 31, 2022
@@ -43,12 +43,7 @@ class BaseTimeGroupBy:
     def setup(self, shape, ngroups=5, groupby_ncols=1):
         ngroups = translator_groupby_ngroups(ngroups, shape)
         self.df, self.groupby_columns = generate_dataframe(
-            "int",
-            *shape,
-            RAND_LOW,
-            RAND_HIGH,
-            groupby_ncols,
-            count_groups=ngroups,
+            "int", *shape, RAND_LOW, RAND_HIGH, groupby_ncols, count_groups=ngroups,
         )
 
 

@@ -442,12 +442,7 @@ class BaseTimeGroupBy:
     def setup(self, shape, ngroups=5, groupby_ncols=1):
         ngroups = translator_groupby_ngroups(ngroups, shape)
         self.df, self.groupby_columns = generate_dataframe(
-            "int",
-            *shape,
-            RAND_LOW,
-            RAND_HIGH,
-            groupby_ncols,
-            count_groups=ngroups,
+            "int", *shape, RAND_LOW, RAND_HIGH, groupby_ncols, count_groups=ngroups,
         )
         # correct while we use 'col*' like name for non-groupby columns
         # and 'groupby_col*' like name for groupby columns

@@ -54,10 +54,5 @@ def setup(self, cache, shape):
         self.filename, self.names, self.dtype = cache[file_id]
 
     def time_read_csv_names(self, cache, shape):
-        df = IMPL.read_csv(
-            self.filename,
-            names=self.names,
-            header=0,
-            dtype=self.dtype,
-        )
+        df = IMPL.read_csv(self.filename, names=self.names, header=0, dtype=self.dtype,)
         trigger_import(df)
@@ -44,8 +44,4 @@ def setup(self, test_filenames, shape):
         self.shape_id = get_shape_id(shape)
 
     def time_read_parquet(self, test_filenames, shape):
-        execute(
-            IMPL.read_parquet(
-                test_filenames[self.shape_id],
-            )
-        )
+        execute(IMPL.read_parquet(test_filenames[self.shape_id],))
@@ -244,11 +244,7 @@ def gen_true_false_int_data(nrows, ncols, rand_low, rand_high):
 
 
 def gen_data(
-    data_type: str,
-    nrows: int,
-    ncols: int,
-    rand_low: int,
-    rand_high: int,
+    data_type: str, nrows: int, ncols: int, rand_low: int, rand_high: int,
 ) -> dict:
     """
     Generate data with caching.
@@ -501,12 +497,7 @@ def execute(
             return
 
         # compatibility with old Modin versions
-        all(
-            map(
-                lambda partition: partition.drain_call_queue() or True,
-                partitions,
-            )
-        )
+        all(map(lambda partition: partition.drain_call_queue() or True, partitions,))
         if ASV_USE_ENGINE == "ray":
             from ray import wait
 

@@ -156,10 +156,7 @@
             "hdk.TimeReadCsvNames",
         ],
     ),
-    (
-        HDK_BINARY_OP_DATA_SIZE[ASV_DATASET_SIZE],
-        ["hdk.TimeMerge", "hdk.TimeAppend"],
-    ),
+    (HDK_BINARY_OP_DATA_SIZE[ASV_DATASET_SIZE], ["hdk.TimeMerge", "hdk.TimeAppend"],),
     (
         HDK_SERIES_DATA_SIZE[ASV_DATASET_SIZE],
         ["hdk.TimeBinaryOpSeries", "hdk.TimeValueCountsSeries"],

@@ -206,16 +206,9 @@ def read_json(
         returns=_doc_returns_qc,
     )
     def read_feather(
-        cls,
-        path,
-        columns=None,
-        use_threads=True,
+        cls, path, columns=None, use_threads=True,
     ):  # noqa: PR01
-        return cls._read_feather(
-            path=path,
-            columns=columns,
-            use_threads=use_threads,
-        )
+        return cls._read_feather(path=path, columns=columns, use_threads=use_threads,)
 
     @classmethod
     @_inherit_docstrings(pandas.read_stata, apilink="pandas.read_stata")
@@ -258,13 +251,10 @@ def read_stata(
         returns=_doc_returns_qc,
     )
     def read_pickle(
-        cls,
-        filepath_or_buffer,
-        compression="infer",
+        cls, filepath_or_buffer, compression="infer",
     ):  # noqa: PR01
         return cls._read_pickle(
-            filepath_or_buffer=filepath_or_buffer,
-            compression=compression,
+            filepath_or_buffer=filepath_or_buffer, compression=compression,
         )
 
     @classmethod
@@ -306,8 +296,5 @@ def to_pickle(
         protocol: int = 4,  # older pandas only supports protocol <= 4
     ):  # noqa: PR01, D200
         return cls._to_pickle(
-            obj,
-            filepath_or_buffer,
-            compression=compression,
-            protocol=protocol,
+            obj, filepath_or_buffer, compression=compression, protocol=protocol,
         )
@@ -16,9 +16,7 @@
 from modin._compat import PandasCompatVersion
 
 if PandasCompatVersion.CURRENT == PandasCompatVersion.PY36:
-    from .py36 import (
-        Python36CompatibleBasePandasDataset as BasePandasDatasetCompat,
-    )
+    from .py36 import Python36CompatibleBasePandasDataset as BasePandasDatasetCompat
     from .py36 import Python36CompatibleDataFrame as DataFrameCompat
     from .py36 import Python36CompatibleSeries as SeriesCompat
     from .py36 import Python36CompatibleDataFrameGroupBy as DataFrameGroupByCompat
@@ -27,9 +25,7 @@
     from .py36 import Python36CompatibleRolling as RollingCompat
     from .py36 import Python36CompatibleResampler as ResamplerCompat
 elif PandasCompatVersion.CURRENT == PandasCompatVersion.LATEST:
-    from .latest import (
-        LatestCompatibleBasePandasDataset as BasePandasDatasetCompat,
-    )
+    from .latest import LatestCompatibleBasePandasDataset as BasePandasDatasetCompat
     from .latest import LatestCompatibleDataFrame as DataFrameCompat
     from .latest import LatestCompatibleSeries as SeriesCompat
     from .latest import LatestCompatibleDataFrameGroupBy as DataFrameGroupByCompat

@@ -224,11 +224,7 @@ def rank(
         )
 
     def reindex(
-        self,
-        index=None,
-        columns=None,
-        copy=True,
-        **kwargs,
+        self, index=None, columns=None, copy=True, **kwargs,
     ):
         return self._reindex(index=index, columns=columns, copy=copy, **kwargs)
 
@@ -345,13 +341,7 @@ def set_axis(self, labels, axis=0, inplace=no_default, *, copy=no_default):
         )
 
     def sem(
-        self,
-        axis=None,
-        skipna=True,
-        level=None,
-        ddof=1,
-        numeric_only=None,
-        **kwargs,
+        self, axis=None, skipna=True, level=None, ddof=1, numeric_only=None, **kwargs,
     ):
         return self._sem(
             axis=axis,
@@ -402,13 +392,7 @@ def skew(
         )
 
     def std(
-        self,
-        axis=None,
-        skipna=True,
-        level=None,
-        ddof=1,
-        numeric_only=None,
-        **kwargs,
+        self, axis=None, skipna=True, level=None, ddof=1, numeric_only=None, **kwargs,
     ):
         return self._std(
             axis=axis,

@@ -160,18 +160,9 @@ def rank(
         )
 
     def reindex(
-        self,
-        index=None,
-        columns=None,
-        copy=True,
-        **kwargs,
+        self, index=None, columns=None, copy=True, **kwargs,
     ):
-        return self._reindex(
-            index=index,
-            columns=columns,
-            copy=copy,
-            **kwargs,
-        )
+        return self._reindex(index=index, columns=columns, copy=copy, **kwargs,)
 
     def resample(
         self,

@@ -155,10 +155,7 @@ def read_parquet(path, engine: str = "auto", columns=None, **kwargs):
 
     return DataFrame(
         query_compiler=FactoryDispatcher.read_parquet(
-            path=path,
-            engine=engine,
-            columns=columns,
-            **kwargs,
+            path=path, engine=engine, columns=columns, **kwargs,
         )
     )
 

@@ -61,12 +61,7 @@ def idxmin(self, axis=0, skipna=True, *args, **kwargs):
         return self._idxmin(axis=axis, skipna=skipna)
 
     def kurt(
-        self,
-        axis=None,
-        skipna=None,
-        level=None,
-        numeric_only=None,
-        **kwargs,
+        self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs,
     ):  # noqa: PR01, RT01, D200
         if axis not in (None, 0, "index", "rows"):
             raise ValueError(f"No axis named {axis} for object type Series")

@@ -30,12 +30,7 @@ def create_stat_method(name):
     """
 
     def stat_method(
-        self,
-        axis=None,
-        skipna=None,
-        level=None,
-        numeric_only=None,
-        **kwargs,
+        self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs,
     ):
         return self._stat_operation(name, axis, skipna, level, numeric_only, **kwargs)
 

@@ -39,7 +39,6 @@ def __init__(
             axis,
         )
 
-
 @append_to_docstring("Compatibility layer for 'Python 3.6 pandas' for Rolling.")
 @_inherit_docstrings(pandas.core.window.rolling.Rolling)
 class Python36CompatibleRolling(BaseCompatibleRolling):

@@ -372,18 +372,15 @@ def TestReadCSVFixture():
     # each xdist worker spawned in separate process with separate namespace and dataset
     pytest.csvs_names = {file_id: get_unique_filename() for file_id in files_ids}
     # test_read_csv_col_handling, test_read_csv_parsing
-    _make_csv_file(filenames)(
-        filename=pytest.csvs_names["test_read_csv_regular"],
-    )
+    _make_csv_file(filenames)(filename=pytest.csvs_names["test_read_csv_regular"],)
     # test_read_csv_parsing
     _make_csv_file(filenames)(
         filename=pytest.csvs_names["test_read_csv_yes_no"],
         additional_col_values=["Yes", "true", "No", "false"],
     )
     # test_read_csv_col_handling
     _make_csv_file(filenames)(
-        filename=pytest.csvs_names["test_read_csv_blank_lines"],
-        add_blank_lines=True,
+        filename=pytest.csvs_names["test_read_csv_blank_lines"], add_blank_lines=True,
     )
     # test_read_csv_nans_handling
     _make_csv_file(filenames)(
@@ -393,8 +390,7 @@ def TestReadCSVFixture():
     )
     # test_read_csv_error_handling
     _make_csv_file(filenames)(
-        filename=pytest.csvs_names["test_read_csv_bad_lines"],
-        add_bad_lines=True,
+        filename=pytest.csvs_names["test_read_csv_bad_lines"], add_bad_lines=True,
     )
 
     yield

@@ -13,6 +13,7 @@
 
 """Module houses builder class for Binary operator."""
 
+from typing import Any, Callable
 import numpy as np
 import pandas
 
@@ -23,7 +24,9 @@ class Binary(Operator):
     """Builder class for Binary operator."""
 
     @classmethod
-    def register(cls, func, join_type="outer", labels="replace"):
+    def register(
+        cls, func: Callable, join_type: str = "outer", labels: str = "replace"
+    ) -> Callable:
         """
         Build template binary operator.
 
@@ -44,8 +47,13 @@ def register(cls, func, join_type="outer", labels="replace"):
         """
 
         def caller(
-            query_compiler, other, broadcast=False, *args, dtypes=None, **kwargs
-        ):
+            query_compiler: Any,
+            other: Any,
+            broadcast: bool = False,
+            *args: Any,
+            dtypes: Any | None = None,
+            **kwargs: Any
+        ) -> Any:
             """
             Apply binary `func` to passed operands.
 
@@ -112,8 +120,7 @@ def caller(
                     )
                 else:
                     new_modin_frame = query_compiler._modin_frame.map(
-                        lambda df: func(df, other, *args, **kwargs),
-                        dtypes=dtypes,
+                        lambda df: func(df, other, *args, **kwargs), dtypes=dtypes,
                     )
                 return query_compiler.__constructor__(new_modin_frame)
 

@@ -13,6 +13,7 @@
 
 """Module houses default binary functions builder class."""
 
+from typing import Any, Callable
 from .default import DefaultMethod
 
 import pandas
@@ -23,7 +24,7 @@ class BinaryDefault(DefaultMethod):
     """Build default-to-pandas methods which executes binary functions."""
 
     @classmethod
-    def build_default_to_pandas(cls, fn, fn_name):
+    def build_default_to_pandas(cls, fn: Callable, fn_name: str) -> Callable:
         """
         Build function that do fallback to pandas for passed binary `fn`.
 
@@ -41,7 +42,7 @@ def build_default_to_pandas(cls, fn, fn_name):
             to the casted to pandas frame.
         """
 
-        def bin_ops_wrapper(df, other, *args, **kwargs):
+        def bin_ops_wrapper(df: Any, other: Any, *args: Any, **kwargs: Any) -> None:
             """Apply specified binary function to the passed operands."""
             squeeze_other = kwargs.pop("broadcast", False) or kwargs.pop(
                 "squeeze_other", False

@@ -14,13 +14,16 @@
 """Module houses default applied-on-category functions builder class."""
 
 from .series import SeriesDefault
+import pandas
 
 
 class CatDefault(SeriesDefault):
     """Builder for default-to-pandas methods which is executed under category accessor."""
 
     @classmethod
-    def frame_wrapper(cls, df):
+    def frame_wrapper(
+        cls, df: pandas.DataFrame
+    ) -> pandas.core.arrays.categorical.CategoricalAccessor:
         """
         Get category accessor of the passed frame.