From 3d90bc83be20d9ab60ba0878543fdccde8ca6749 Mon Sep 17 00:00:00 2001
From: Mahesh Vashishtha <mvashishtha@users.noreply.github.com>
Date: Fri, 18 Nov 2022 03:12:29 -0800
Subject: [PATCH 01/13] FIX-#5234: Use query compiler str_repeat. (#5235)

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/pandas/series_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modin/pandas/series_utils.py b/modin/pandas/series_utils.py
index ae01e884835..368f77684fd 100644
--- a/modin/pandas/series_utils.py
+++ b/modin/pandas/series_utils.py
@@ -331,7 +331,7 @@ def partition(self, sep=" ", expand=True):
             )
 
     def repeat(self, repeats):
-        return self._default_to_pandas(pandas.Series.str.repeat, repeats)
+        return Series(query_compiler=self._query_compiler.str_repeat(repeats))
 
     def rpartition(self, sep=" ", expand=True):
         if sep is not None and len(sep) == 0:

From c51ab405efec920dbb4baa2e2389409df04e8d43 Mon Sep 17 00:00:00 2001
From: Andrey <andrey.a.pavlenko@gmail.com>
Date: Fri, 18 Nov 2022 17:27:27 +0100
Subject: [PATCH 02/13] FIX-#5187: Fixed RecursionError in
 OmnisciLaunchParameters.get() (#5199)

Signed-off-by: Andrey Pavlenko <andrey.a.pavlenko@gmail.com>
---
 modin/config/envvars.py           | 16 ++++++++++++++--
 modin/config/test/test_envvars.py | 31 ++++++++++++++++++++++++++++++-
 2 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/modin/config/envvars.py b/modin/config/envvars.py
index 2c38f0f283a..e287bf41187 100644
--- a/modin/config/envvars.py
+++ b/modin/config/envvars.py
@@ -491,7 +491,7 @@ class PersistentPickle(EnvironmentVariable, type=bool):
 
 class HdkLaunchParameters(EnvironmentVariable, type=dict):
     """
-    Additional command line options for the OmniSci engine.
+    Additional command line options for the HDK engine.
 
     Please visit OmniSci documentation for the description of available parameters:
     https://docs.omnisci.com/installation-and-configuration/config-parameters#configuration-parameters-for-omniscidb
@@ -523,8 +523,20 @@ def get(cls) -> dict:
             OmnisciLaunchParameters.varname in os.environ
             and HdkLaunchParameters.varname not in os.environ
         ):
-            return OmnisciLaunchParameters.get()
+            return OmnisciLaunchParameters._get()
+        else:
+            return HdkLaunchParameters._get()
+
+    @classmethod
+    def _get(cls) -> dict:
+        """
+        Get the resulted command-line options.
 
+        Returns
+        -------
+        dict
+            Decoded and verified config value.
+        """
         custom_parameters = super().get()
         result = cls.default.copy()
         result.update(
diff --git a/modin/config/test/test_envvars.py b/modin/config/test/test_envvars.py
index b01864afd63..01ed1c9304f 100644
--- a/modin/config/test/test_envvars.py
+++ b/modin/config/test/test_envvars.py
@@ -13,7 +13,7 @@
 
 import os
 import pytest
-
+import modin.config as cfg
 from modin.config.envvars import EnvironmentVariable, _check_vars, ExactStr
 
 
@@ -60,3 +60,32 @@ def test_custom_set(make_custom_envvar, set_custom_envvar):
 def test_custom_help(make_custom_envvar):
     assert "MODIN_CUSTOM" in make_custom_envvar.get_help()
     assert "custom var" in make_custom_envvar.get_help()
+
+
+def test_hdk_envvar():
+    os.environ[
+        cfg.OmnisciLaunchParameters.varname
+    ] = "enable_union=2,enable_thrift_logs=3"
+    params = cfg.OmnisciLaunchParameters.get()
+    assert params["enable_union"] == 2
+    assert params["enable_thrift_logs"] == 3
+
+    params = cfg.HdkLaunchParameters.get()
+    assert params["enable_union"] == 2
+    assert params["enable_thrift_logs"] == 3
+
+    os.environ[cfg.HdkLaunchParameters.varname] = "enable_union=4,enable_thrift_logs=5"
+    del cfg.HdkLaunchParameters._value
+    params = cfg.HdkLaunchParameters.get()
+    assert params["enable_union"] == 4
+    assert params["enable_thrift_logs"] == 5
+
+    params = cfg.OmnisciLaunchParameters.get()
+    assert params["enable_union"] == 2
+    assert params["enable_thrift_logs"] == 3
+
+    del os.environ[cfg.OmnisciLaunchParameters.varname]
+    del cfg.OmnisciLaunchParameters._value
+    params = cfg.OmnisciLaunchParameters.get()
+    assert params["enable_union"] == 4
+    assert params["enable_thrift_logs"] == 5

From 7af1b9d2569fb74e4b934dfb0ceafad7be565ea1 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 19 Nov 2022 10:55:17 +0100
Subject: [PATCH 03/13] FIX-#5240: fix dask[complete] syntax in conda
 environment files (#5241)

Signed-off-by: Myachev <anatoly.myachev@intel.com>
---
 environment-dev.yml               | 2 +-
 requirements/environment-py36.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index d10d7b351ba..1a3623f4377 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -5,7 +5,7 @@ dependencies:
   - pandas==1.5.1
   - numpy>=1.18.5
   - pyarrow>=4.0.1
-  - dask[complete]>=2.22.0
+  - dask>=2.22.0
   - distributed>=2.22.0
   - fsspec
   - xarray
diff --git a/requirements/environment-py36.yml b/requirements/environment-py36.yml
index b5f88a4bf89..3303ad03dd6 100644
--- a/requirements/environment-py36.yml
+++ b/requirements/environment-py36.yml
@@ -5,7 +5,7 @@ dependencies:
   - pandas
   - numpy>=1.18.5
   - pyarrow>=4.0.1
-  - dask[complete]>=2.22.0,<2021.3.0
+  - dask>=2.22.0,<2021.3.0
   - distributed>=2.22.0,<2021.3.0
   - pickle5 # for dask to correctly serialize nested functions
   - fsspec

From 47794730fdb6abdc29b308aa3e9cdbe9cc19ee8d Mon Sep 17 00:00:00 2001
From: Mahesh Vashishtha <mvashishtha@users.noreply.github.com>
Date: Mon, 21 Nov 2022 00:30:43 -0800
Subject: [PATCH 04/13] TEST-#5123: Add CodeQL workflow for GitHub code
 scanning (#5222)

Signed-off-by: mvashishtha <mahesh@ponder.io>
Co-authored-by: LGTM Migrator <lgtm-migrator@users.noreply.github.com>
Co-authored-by: Karthik Velayutham <vkarthik@ponder.io>
---
 .github/workflows/codeql.yml               | 40 ++++++++++++++++++++++
 .github/workflows/codeql/codeql-config.yml |  7 ++++
 modin/pandas/test/dataframe/test_binary.py |  2 ++
 modin/pandas/test/test_io.py               |  3 +-
 modin/pandas/test/test_series.py           | 25 ++++++++------
 modin/test/test_partition_api.py           |  2 +-
 6 files changed, 67 insertions(+), 12 deletions(-)
 create mode 100644 .github/workflows/codeql.yml
 create mode 100644 .github/workflows/codeql/codeql-config.yml

diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
new file mode 100644
index 00000000000..cf55eb2f384
--- /dev/null
+++ b/.github/workflows/codeql.yml
@@ -0,0 +1,40 @@
+name: "CodeQL"
+
+on:
+  push:
+    branches: [ "master" ]
+  pull_request:
+    branches: [ "master" ]
+
+jobs:
+  analyze:
+    name: Analyze
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+
+    strategy:
+      fail-fast: false
+      matrix:
+        language: [ python ]
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Initialize CodeQL
+        uses: github/codeql-action/init@v2
+        with:
+          languages: ${{ matrix.language }}
+          queries: +security-and-quality
+          config-file: ./.github/workflows/codeql/codeql-config.yml
+
+      - name: Autobuild
+        uses: github/codeql-action/autobuild@v2
+
+      - name: Perform CodeQL Analysis
+        uses: github/codeql-action/analyze@v2
+        with:
+          category: "/language:${{ matrix.language }}"
diff --git a/.github/workflows/codeql/codeql-config.yml b/.github/workflows/codeql/codeql-config.yml
new file mode 100644
index 00000000000..ea1991a4d82
--- /dev/null
+++ b/.github/workflows/codeql/codeql-config.yml
@@ -0,0 +1,7 @@
+name: "Modin CodeQL config"
+
+paths:
+  - modin/** 
+paths-ignore: 
+  - modin/experimental/cloud/** # TODO: fix module-level cyclic error, see #5228
+  - modin/experimental/core/execution/native/implementations/hdk_on_native/test/** # TODO: fix unhashable list error, see #5227
diff --git a/modin/pandas/test/dataframe/test_binary.py b/modin/pandas/test/dataframe/test_binary.py
index 23e0e90a4b3..c86d47e0a3f 100644
--- a/modin/pandas/test/dataframe/test_binary.py
+++ b/modin/pandas/test/dataframe/test_binary.py
@@ -280,6 +280,8 @@ def test_mismatched_row_partitions(is_idx_aligned, op_type, is_more_other_partit
     elif op_type == "ser_ser_different_name":
         modin_res = modin_df2.a / modin_df1.b
         pandas_res = pandas_df2.a / pandas_df1.b
+    else:
+        raise Exception(f"op_type: {op_type} not supported in test")
     df_equals(modin_res, pandas_res)
 
 
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index 768a3ca4ebf..aa84983c21c 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -201,7 +201,8 @@ def eval_to_file(modin_obj, pandas_obj, fn, extension, **fn_kwargs):
                 last_exception = err
                 continue
             break
-        else:
+        # If we do have an exception that's valid let's raise it
+        if last_exception:
             raise last_exception
 
         getattr(pandas_obj, fn)(unique_filename_pandas, **fn_kwargs)
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index 22e551d6a7a..6f086e6e6e6 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -1135,8 +1135,9 @@ def test_array(data):
 
 
 @pytest.mark.xfail(reason="Using pandas Series.")
-def test_between():
-    modin_series = create_test_series()
+@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
+def test_between(data):
+    modin_series = create_test_series(data)
 
     with pytest.raises(NotImplementedError):
         modin_series.between(None, None)
@@ -1577,8 +1578,9 @@ def test_matmul(data):
 
 
 @pytest.mark.xfail(reason="Using pandas Series.")
-def test_drop():
-    modin_series = create_test_series()
+@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
+def test_drop(data):
+    modin_series = create_test_series(data)
 
     with pytest.raises(NotImplementedError):
         modin_series.drop(None, None, None, None)
@@ -1879,8 +1881,9 @@ def test_fillna(data, reindex, limit):
 
 
 @pytest.mark.xfail(reason="Using pandas Series.")
-def test_filter():
-    modin_series = create_test_series()
+@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
+def test_filter(data):
+    modin_series = create_test_series(data)
 
     with pytest.raises(NotImplementedError):
         modin_series.filter(None, None, None)
@@ -2400,8 +2403,9 @@ def test_ne(data):
 
 
 @pytest.mark.xfail(reason="Using pandas Series.")
-def test_nlargest():
-    modin_series = create_test_series()
+@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
+def test_nlargest(data):
+    modin_series = create_test_series(data)
 
     with pytest.raises(NotImplementedError):
         modin_series.nlargest(None)
@@ -2877,8 +2881,9 @@ def test_reset_index(data, drop, name, inplace):
 
 
 @pytest.mark.xfail(reason="Using pandas Series.")
-def test_reshape():
-    modin_series = create_test_series()
+@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
+def test_reshape(data):
+    modin_series = create_test_series(data)
 
     with pytest.raises(NotImplementedError):
         modin_series.reshape(None)
diff --git a/modin/test/test_partition_api.py b/modin/test/test_partition_api.py
index 5f4d8a44bf6..0b5e7edf7fd 100644
--- a/modin/test/test_partition_api.py
+++ b/modin/test/test_partition_api.py
@@ -130,7 +130,7 @@ def test_from_partitions(axis, index, columns, row_lengths, column_widths):
         if axis == 0
         else [num_cols, num_cols]
     )
-
+    futures = []
     if Engine.get() == "Ray":
         if axis is None:
             futures = [[put_func(df1), put_func(df2)]]

From 5acf539fa4f205e383b91ca3baeed8cd94217392 Mon Sep 17 00:00:00 2001
From: Mahesh Vashishtha <mvashishtha@users.noreply.github.com>
Date: Mon, 21 Nov 2022 01:40:57 -0800
Subject: [PATCH 05/13] FIX-#5236: Allow binary operations with custom classes.
 (#5237)

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/pandas/base.py                       | 68 +++++++++++-----------
 modin/pandas/test/dataframe/test_binary.py | 17 ++++++
 modin/pandas/test/test_series.py           | 11 ++++
 modin/pandas/test/utils.py                 | 11 ++++
 4 files changed, 72 insertions(+), 35 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index ce517932c7b..2771d105483 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -245,47 +245,42 @@ def _validate_other(
         TypeError
             If any validation checks fail.
         """
-        # We skip dtype checking if the other is a scalar.
-        if is_scalar(other):
+        if isinstance(other, BasePandasDataset):
+            return other._query_compiler
+        if not is_list_like(other):
+            # We skip dtype checking if the other is a scalar. Note that pandas
+            # is_scalar can be misleading as it is False for almost all objects,
+            # even when those objects should be treated as scalars. See e.g.
+            # https://github.com/modin-project/modin/issues/5236. Therefore, we
+            # detect scalars by checking that `other` is neither a list-like nor
+            # another BasePandasDataset.
             return other
         axis = self._get_axis_number(axis) if axis is not None else 1
         result = other
-        if isinstance(other, BasePandasDataset):
-            return other._query_compiler
-        elif is_list_like(other):
-            if axis == 0:
-                if len(other) != len(self._query_compiler.index):
-                    raise ValueError(
-                        f"Unable to coerce to Series, length must be {len(self._query_compiler.index)}: "
-                        + f"given {len(other)}"
-                    )
-            else:
-                if len(other) != len(self._query_compiler.columns):
-                    raise ValueError(
-                        f"Unable to coerce to Series, length must be {len(self._query_compiler.columns)}: "
-                        + f"given {len(other)}"
-                    )
-            if hasattr(other, "dtype"):
-                other_dtypes = [other.dtype] * len(other)
-            elif is_dict_like(other):
-                other_dtypes = [
-                    type(other[label])
-                    for label in self._query_compiler.get_axis(axis)
-                    # The binary operation is applied for intersection of axis labels
-                    # and dictionary keys. So filtering out extra keys.
-                    if label in other
-                ]
-            else:
-                other_dtypes = [type(x) for x in other]
+        if axis == 0:
+            if len(other) != len(self._query_compiler.index):
+                raise ValueError(
+                    f"Unable to coerce to Series, length must be {len(self._query_compiler.index)}: "
+                    + f"given {len(other)}"
+                )
         else:
-            other_dtypes = [
-                type(other)
-                for _ in range(
-                    len(self._query_compiler.index)
-                    if axis
-                    else len(self._query_compiler.columns)
+            if len(other) != len(self._query_compiler.columns):
+                raise ValueError(
+                    f"Unable to coerce to Series, length must be {len(self._query_compiler.columns)}: "
+                    + f"given {len(other)}"
                 )
+        if hasattr(other, "dtype"):
+            other_dtypes = [other.dtype] * len(other)
+        elif is_dict_like(other):
+            other_dtypes = [
+                type(other[label])
+                for label in self._query_compiler.get_axis(axis)
+                # The binary operation is applied for intersection of axis labels
+                # and dictionary keys. So filtering out extra keys.
+                if label in other
             ]
+        else:
+            other_dtypes = [type(x) for x in other]
         if compare_index:
             if not self.index.equals(other.index):
                 raise TypeError("Cannot perform operation with non-equal index")
@@ -304,6 +299,9 @@ def _validate_other(
                     if label in other
                 ]
 
+            # TODO(https://github.com/modin-project/modin/issues/5239):
+            # this spuriously rejects other that is a list including some
+            # custom type that can be added to self's elements.
             if not all(
                 (is_numeric_dtype(self_dtype) and is_numeric_dtype(other_dtype))
                 or (is_object_dtype(self_dtype) and is_object_dtype(other_dtype))
diff --git a/modin/pandas/test/dataframe/test_binary.py b/modin/pandas/test/dataframe/test_binary.py
index c86d47e0a3f..1334941895e 100644
--- a/modin/pandas/test/dataframe/test_binary.py
+++ b/modin/pandas/test/dataframe/test_binary.py
@@ -16,6 +16,7 @@
 import matplotlib
 import modin.pandas as pd
 
+from modin._compat import PandasCompatVersion
 from modin.core.dataframe.pandas.partitioning.axis_partition import (
     PandasDataframeAxisPartition,
 )
@@ -27,6 +28,7 @@
     test_data,
     create_test_dfs,
     default_to_pandas_ignore_string,
+    CustomIntegerForAddition,
 )
 from modin.config import Engine, NPartitions
 from modin.test.test_utils import warns_that_defaulting_to_pandas
@@ -336,3 +338,18 @@ def test_add_string_to_df():
     modin_df, pandas_df = create_test_dfs(["a", "b"])
     eval_general(modin_df, pandas_df, lambda df: "string" + df)
     eval_general(modin_df, pandas_df, lambda df: df + "string")
+
+
+@pytest.mark.xfail(
+    PandasCompatVersion.CURRENT == PandasCompatVersion.PY36,
+    reason="Seems to be a bug in pandas 1.1.5. pandas throws ValueError "
+    + "for this particular dataframe.",
+)
+def test_add_custom_class():
+    # see https://github.com/modin-project/modin/issues/5236
+    # Test that we can add any object that is addable to pandas object data
+    # via "+".
+    eval_general(
+        *create_test_dfs(test_data["int_data"]),
+        lambda df: df + CustomIntegerForAddition(4),
+    )
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index 6f086e6e6e6..39bf634ef34 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -74,6 +74,7 @@
     test_data_large_categorical_series_keys,
     test_data_large_categorical_series_values,
     default_to_pandas_ignore_string,
+    CustomIntegerForAddition,
 )
 from modin.config import NPartitions
 
@@ -635,6 +636,16 @@ def test_add_suffix(data):
     )
 
 
+def test_add_custom_class():
+    # see https://github.com/modin-project/modin/issues/5236
+    # Test that we can add any object that is addable to pandas object data
+    # via "+".
+    eval_general(
+        *create_test_series(test_data["int_data"]),
+        lambda df: df + CustomIntegerForAddition(4),
+    )
+
+
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 @pytest.mark.parametrize("func", agg_func_values, ids=agg_func_keys)
 def test_agg(data, func):
diff --git a/modin/pandas/test/utils.py b/modin/pandas/test/utils.py
index 21565286309..54774ed95ef 100644
--- a/modin/pandas/test/utils.py
+++ b/modin/pandas/test/utils.py
@@ -479,6 +479,17 @@
 time_parsing_csv_path = "modin/pandas/test/data/test_time_parsing.csv"
 
 
+class CustomIntegerForAddition:
+    def __init__(self, value: int):
+        self.value = value
+
+    def __add__(self, other):
+        return self.value + other
+
+    def __radd__(self, other):
+        return other + self.value
+
+
 def categories_equals(left, right):
     assert (left.ordered and right.ordered) or (not left.ordered and not right.ordered)
     assert_extension_array_equal(left, right)

From 073dffc31a628978e4dd09b40a6a0ac648f20426 Mon Sep 17 00:00:00 2001
From: Billy2551 <billiam@ponder.io>
Date: Mon, 21 Nov 2022 09:19:01 -0800
Subject: [PATCH 06/13] FIX-#4636: allows `read_parquet` to detect column
 partitioning in non-local filesystems (#5192)

Signed-off-by: Bill Wang <billiam@ponder.io>
---
 .../core/io/column_stores/parquet_dispatcher.py  | 16 +++++++++++++---
 modin/core/storage_formats/pandas/parsers.py     |  2 +-
 modin/pandas/test/test_io.py                     | 15 +++++++++++++++
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/modin/core/io/column_stores/parquet_dispatcher.py b/modin/core/io/column_stores/parquet_dispatcher.py
index 29f959ab54e..e89f15918bd 100644
--- a/modin/core/io/column_stores/parquet_dispatcher.py
+++ b/modin/core/io/column_stores/parquet_dispatcher.py
@@ -609,18 +609,28 @@ def _read(cls, path, engine, columns, **kwargs):
         )
         from modin.pandas.io import PQ_INDEX_REGEX
 
-        if isinstance(path, str) and os.path.isdir(path):
+        if isinstance(path, str):
+            if os.path.isdir(path):
+                path_generator = os.walk(path)
+            else:
+                storage_options = kwargs.get("storage_options")
+                if storage_options is not None:
+                    fs, fs_path = url_to_fs(path, **storage_options)
+                else:
+                    fs, fs_path = url_to_fs(path)
+                path_generator = fs.walk(fs_path)
             partitioned_columns = set()
             # We do a tree walk of the path directory because partitioned
             # parquet directories have a unique column at each directory level.
             # Thus, we can use os.walk(), which does a dfs search, to walk
             # through the different columns that the data is partitioned on
-            for (_, dir_names, files) in os.walk(path):
+            for (_, dir_names, files) in path_generator:
                 if dir_names:
                     partitioned_columns.add(dir_names[0].split("=")[0])
                 if files:
                     # Metadata files, git files, .DSStore
-                    if files[0][0] == ".":
+                    # TODO: fix conditional for column partitioning, see issue #4637
+                    if len(files[0]) > 0 and files[0][0] == ".":
                         continue
                     break
             partitioned_columns = list(partitioned_columns)
diff --git a/modin/core/storage_formats/pandas/parsers.py b/modin/core/storage_formats/pandas/parsers.py
index e08f4da8451..2dc4a12adb1 100644
--- a/modin/core/storage_formats/pandas/parsers.py
+++ b/modin/core/storage_formats/pandas/parsers.py
@@ -705,7 +705,7 @@ def _read_row_group_chunk(
     )
     def parse(files_for_parser, engine, **kwargs):
         columns = kwargs.get("columns", None)
-        storage_options = kwargs.pop("storage_options", {}) or {}
+        storage_options = kwargs.get("storage_options", {})
         chunks = []
         # `single_worker_read` just passes in a string path
         if isinstance(files_for_parser, str):
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index aa84983c21c..aab496bbb63 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -1729,6 +1729,21 @@ def test_read_parquet_2462(self, engine):
 
             df_equals(test_df, read_df)
 
+    @pytest.mark.skipif(
+        PandasCompatVersion.CURRENT == PandasCompatVersion.PY36,
+        reason="storage_options not supported for older pandas",
+    )
+    def test_read_parquet_s3_with_column_partitioning(self, engine):
+        # This test case comes from
+        # https://github.com/modin-project/modin/issues/4636
+        dataset_url = "s3://modin-datasets/modin-bugs/modin_bug_5159_parquet/df.parquet"
+        eval_io(
+            fn_name="read_parquet",
+            path=dataset_url,
+            engine=engine,
+            storage_options={"anon": True},
+        )
+
 
 class TestJson:
     @pytest.mark.parametrize("lines", [False, True])

From e639418584d45d571126ceba29847e278bec943b Mon Sep 17 00:00:00 2001
From: Andrey <andrey.a.pavlenko@gmail.com>
Date: Tue, 22 Nov 2022 19:05:03 +0100
Subject: [PATCH 07/13] FIX-#4100: Fall back to Pandas on row drop (#4937)

Co-authored-by: Iaroslav Igoshev <Poolliver868@mail.ru>
Signed-off-by: Andrey Pavlenko <andrey.a.pavlenko@gmail.com>
---
 .../hdk_on_native/test/test_dataframe.py      | 22 +++++++++++++++++++
 .../storage_formats/hdk/query_compiler.py     |  3 ++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py b/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py
index fb1f44fa164..cd604f2cd55 100644
--- a/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py
+++ b/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py
@@ -2197,6 +2197,28 @@ def drop_rename_concat(df1, df2, lib, **kwargs):
             force_arrow_execute=True,
         )
 
+    def test_drop_row(self):
+        def drop_row(df, **kwargs):
+            return df.drop(labels=1)
+
+        run_and_compare(
+            drop_row,
+            data=self.data1,
+            force_lazy=False,
+        )
+
+    def test_series_pop(self):
+        def pop(df, **kwargs):
+            col = df["a"]
+            col.pop(0)
+            return col
+
+        run_and_compare(
+            pop,
+            data=self.data1,
+            force_lazy=False,
+        )
+
     def test_empty_transform(self):
         def apply(df, **kwargs):
             return df + 1
diff --git a/modin/experimental/core/storage_formats/hdk/query_compiler.py b/modin/experimental/core/storage_formats/hdk/query_compiler.py
index 7ac735b4a02..cbf0d95b2b5 100644
--- a/modin/experimental/core/storage_formats/hdk/query_compiler.py
+++ b/modin/experimental/core/storage_formats/hdk/query_compiler.py
@@ -556,7 +556,8 @@ def concat(self, axis, other, **kwargs):
         return self.__constructor__(new_modin_frame)
 
     def drop(self, index=None, columns=None):
-        assert index is None, "Only column drop is supported"
+        if index is not None:
+            raise NotImplementedError("Row drop")
         return self.__constructor__(
             self._modin_frame.take_2d_labels_or_positional(
                 row_labels=index, col_labels=self.columns.drop(columns)

From 7b1ae5a17c81b248b40359bc95acd41d778cce6a Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jhshi@ponder.io>
Date: Wed, 23 Nov 2022 00:15:12 -0800
Subject: [PATCH 08/13] FIX-#5138: df_categories_equals typo (#5250)

Signed-off-by: Jonathan Shi <jhshi@ponder.io>
---
 modin/pandas/test/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modin/pandas/test/utils.py b/modin/pandas/test/utils.py
index 54774ed95ef..412ebed29bd 100644
--- a/modin/pandas/test/utils.py
+++ b/modin/pandas/test/utils.py
@@ -500,7 +500,7 @@ def df_categories_equals(df1, df2):
         if isinstance(df1, pandas.CategoricalDtype):
             return categories_equals(df1, df2)
         elif isinstance(getattr(df1, "dtype"), pandas.CategoricalDtype) and isinstance(
-            getattr(df1, "dtype"), pandas.CategoricalDtype
+            getattr(df2, "dtype"), pandas.CategoricalDtype
         ):
             return categories_equals(df1.dtype, df2.dtype)
         else:

From a0e5a650b54cc14451dddc8edaf40731ecc212e9 Mon Sep 17 00:00:00 2001
From: Andrey <andrey.a.pavlenko@gmail.com>
Date: Wed, 23 Nov 2022 14:49:47 +0100
Subject: [PATCH 09/13] FIX-#4859: Add support for PyArrow Dictionary Arrays to
 type mapping (#4864)

Signed-off-by: Andrey Pavlenko <andrey.a.pavlenko@gmail.com>
---
 modin/core/dataframe/pandas/dataframe/dataframe.py    |  2 ++
 .../hdk_on_native/test/test_dataframe.py              | 11 +++++++++++
 2 files changed, 13 insertions(+)

diff --git a/modin/core/dataframe/pandas/dataframe/dataframe.py b/modin/core/dataframe/pandas/dataframe/dataframe.py
index 6b7292a46a9..3f31942162f 100644
--- a/modin/core/dataframe/pandas/dataframe/dataframe.py
+++ b/modin/core/dataframe/pandas/dataframe/dataframe.py
@@ -3095,6 +3095,8 @@ def _arrow_type_to_dtype(cls, arrow_type):
         except NotImplementedError:
             if pyarrow.types.is_time(arrow_type):
                 res = np.dtype(datetime.time)
+            elif pyarrow.types.is_dictionary(arrow_type):
+                res = pandas.CategoricalDtype
             else:
                 raise
 
diff --git a/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py b/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py
index cd604f2cd55..21999c86d28 100644
--- a/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py
+++ b/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py
@@ -2388,5 +2388,16 @@ def set_axis(df, **kwargs):
         )
 
 
+class TestFromArrow:
+    def test_dict(self):
+        indices = pyarrow.array([0, 1, 0, 1, 2, 0, None, 2])
+        dictionary = pyarrow.array(["first", "second", "third"])
+        dict_array = pyarrow.DictionaryArray.from_arrays(indices, dictionary)
+        at = pyarrow.table({"col": dict_array})
+        pdf = at.to_pandas()
+        mdf = from_arrow(at)
+        df_equals(mdf, pdf)
+
+
 if __name__ == "__main__":
     pytest.main(["-v", __file__])

From 317bd75d0677c0ae1a400f7313953ee6638185ac Mon Sep 17 00:00:00 2001
From: Rehan Sohail Durrani <rehan@ponder.io>
Date: Wed, 23 Nov 2022 15:42:09 -0800
Subject: [PATCH 10/13] FIX-#5252: Disable notebook tests until access control
 issues are resolved for `modin-test` bucket (#5257)

Signed-off-by: Rehan Durrani <rehan@ponder.io>
---
 .../jupyter/execution/hdk_on_native/test/test_notebooks.py | 4 ++++
 .../execution/pandas_on_dask/test/test_notebooks.py        | 7 +++++++
 .../jupyter/execution/pandas_on_ray/test/test_notebooks.py | 7 +++++++
 3 files changed, 18 insertions(+)

diff --git a/examples/tutorial/jupyter/execution/hdk_on_native/test/test_notebooks.py b/examples/tutorial/jupyter/execution/hdk_on_native/test/test_notebooks.py
index 01e928f6bb1..fd755504955 100644
--- a/examples/tutorial/jupyter/execution/hdk_on_native/test/test_notebooks.py
+++ b/examples/tutorial/jupyter/execution/hdk_on_native/test/test_notebooks.py
@@ -13,6 +13,7 @@
 
 import os
 import sys
+import pytest
 
 import nbformat
 
@@ -44,6 +45,9 @@ def test_exercise_1():
 
 
 # this notebook works "as is"
+# GH #5252: Access to the modin-test bucket has changed, so we cannot currently run this test.
+# We will need to come back and unskip this test once the access control issue is resolved.
+@pytest.mark.skip(reason="Bucket cannot currently be accessed.")
 def test_exercise_2():
     modified_notebook_path = os.path.join(local_notebooks_dir, "exercise_2_test.ipynb")
     nb = nbformat.read(
diff --git a/examples/tutorial/jupyter/execution/pandas_on_dask/test/test_notebooks.py b/examples/tutorial/jupyter/execution/pandas_on_dask/test/test_notebooks.py
index 1d723e0c6b5..effc8d37adf 100644
--- a/examples/tutorial/jupyter/execution/pandas_on_dask/test/test_notebooks.py
+++ b/examples/tutorial/jupyter/execution/pandas_on_dask/test/test_notebooks.py
@@ -13,6 +13,7 @@
 
 import os
 import sys
+import pytest
 
 import nbformat
 
@@ -46,6 +47,9 @@ def test_exercise_1():
 
 
 # this notebook works "as is" but for testing purposes we can use smaller dataset
+# GH #5252: Access to the modin-test bucket has changed, so we cannot currently run this test.
+# We will need to come back and unskip this test once the access control issue is resolved.
+@pytest.mark.skip(reason="Bucket cannot currently be accessed.")
 def test_exercise_2():
     modified_notebook_path = os.path.join(local_notebooks_dir, "exercise_2_test.ipynb")
     nb = nbformat.read(
@@ -99,6 +103,9 @@ def sq_mad_func(self, axis=None, skipna=True, level=None, **kwargs):
 
 
 # this notebook works "as is" but for testing purposes we can use smaller dataset
+# GH #5252: Access to the modin-test bucket has changed, so we cannot currently run this test.
+# We will need to come back and unskip this test once the access control issue is resolved.
+@pytest.mark.skip(reason="Bucket cannot currently be accessed.")
 def test_exercise_4():
     modified_notebook_path = os.path.join(local_notebooks_dir, "exercise_4_test.ipynb")
     nb = nbformat.read(
diff --git a/examples/tutorial/jupyter/execution/pandas_on_ray/test/test_notebooks.py b/examples/tutorial/jupyter/execution/pandas_on_ray/test/test_notebooks.py
index 1504143e486..8c6803254e1 100644
--- a/examples/tutorial/jupyter/execution/pandas_on_ray/test/test_notebooks.py
+++ b/examples/tutorial/jupyter/execution/pandas_on_ray/test/test_notebooks.py
@@ -13,6 +13,7 @@
 
 import os
 import sys
+import pytest
 
 import nbformat
 
@@ -47,6 +48,9 @@ def test_exercise_1():
 
 
 # this notebook works "as is" but for testing purposes we can use smaller dataset
+# GH #5252: Access to the modin-test bucket has changed, so we cannot currently run this test.
+# We will need to come back and unskip this test once the access control issue is resolved.
+@pytest.mark.skip(reason="Bucket cannot currently be accessed.")
 def test_exercise_2():
     modified_notebook_path = os.path.join(local_notebooks_dir, "exercise_2_test.ipynb")
     nb = nbformat.read(
@@ -103,6 +107,9 @@ def sq_mad_func(self, axis=None, skipna=True, level=None, **kwargs):
 
 
 # this notebook works "as is" but for testing purposes we can use smaller dataset
+# GH #5252: Access to the modin-test bucket has changed, so we cannot currently run this test.
+# We will need to come back and unskip this test once the access control issue is resolved.
+@pytest.mark.skip(reason="Bucket cannot currently be accessed.")
 def test_exercise_4():
     modified_notebook_path = os.path.join(local_notebooks_dir, "exercise_4_test.ipynb")
     nb = nbformat.read(

From 6d5545f4a132f0efce02db66a6f5d515d4000812 Mon Sep 17 00:00:00 2001
From: Mahesh Vashishtha <mvashishtha@users.noreply.github.com>
Date: Thu, 24 Nov 2022 09:35:58 -0800
Subject: [PATCH 11/13] FIX-#5232: Stop changing original series names during
 binary ops. (#5249)

Signed-off-by: mvashishtha <mahesh@ponder.io>
Co-authored-by: Anatoly Myachev <anatoliimyachev@mail.com>
---
 modin/pandas/series.py           | 15 +++++++++------
 modin/pandas/test/test_series.py | 11 +++++++++++
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index c378ff1bc44..75fa19cc3a8 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -2409,12 +2409,15 @@ def _prepare_inter_op(self, other):
             Prepared `other`.
         """
         if isinstance(other, Series):
-            # NB: deep=False is important for performance bc it retains obj.index._id
-            new_self = self.copy(deep=False)
-            new_other = other.copy(deep=False)
-            if self.name == other.name:
-                new_self.name = new_other.name = self.name
-            else:
+            names_different = self.name != other.name
+            # NB: if we don't need a rename, do the interaction with shallow
+            # copies so that we preserve obj.index._id. It's fine to work
+            # with shallow copies because we'll discard the copies but keep
+            # the result after the interaction opreation. We can't do a rename
+            # on shallow copies because we'll mutate the original objects.
+            new_self = self.copy(deep=names_different)
+            new_other = other.copy(deep=names_different)
+            if names_different:
                 new_self.name = new_other.name = MODIN_UNNAMED_SERIES_LABEL
         else:
             new_self = self
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index 39bf634ef34..6d2860c83e7 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -620,6 +620,17 @@ def test_add(data):
     inter_df_math_helper(modin_series, pandas_series, "add")
 
 
+def test_add_does_not_change_original_series_name():
+    # See https://github.com/modin-project/modin/issues/5232
+    s1 = pd.Series(1, name=1)
+    s2 = pd.Series(2, name=2)
+    original_s1 = s1.copy(deep=True)
+    original_s2 = s2.copy(deep=True)
+    _ = s1 + s2
+    df_equals(s1, original_s1)
+    df_equals(s2, original_s2)
+
+
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 def test_add_prefix(data):
     modin_series, pandas_series = create_test_series(data)

From 8f6e642d8738d627c48eea3937b1baefab9de991 Mon Sep 17 00:00:00 2001
From: Iaroslav Igoshev <iaroslav.igoshev@intel.com>
Date: Fri, 25 Nov 2022 11:41:44 +0100
Subject: [PATCH 12/13] FEAT-#5253: Upgrade pandas to 1.5.2 (#5254)

* Set use-only-tar-bz2 to false

Signed-off-by: Igoshev, Iaroslav <iaroslav.igoshev@intel.com>
---
 .github/workflows/ci-notebooks.yml      |  4 +-
 .github/workflows/ci.yml                | 53 ++++++++++++++++++-------
 .github/workflows/fuzzydata-test.yml    |  4 +-
 .github/workflows/push-to-master.yml    | 12 ++++--
 .github/workflows/push.yml              | 29 ++++++++++----
 environment-dev.yml                     |  2 +-
 modin/pandas/__init__.py                |  2 +-
 requirements-dev.txt                    |  2 +-
 requirements/env_hdk.yml                |  2 +-
 requirements/requirements-no-engine.yml |  2 +-
 setup.py                                |  2 +-
 11 files changed, 81 insertions(+), 33 deletions(-)

diff --git a/.github/workflows/ci-notebooks.yml b/.github/workflows/ci-notebooks.yml
index 18ef4b6101d..04ef8dfa813 100644
--- a/.github/workflows/ci-notebooks.yml
+++ b/.github/workflows/ci-notebooks.yml
@@ -44,7 +44,9 @@ jobs:
           environment-file: requirements/env_hdk.yml
           python-version: 3.8
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
         if: matrix.execution == 'hdk_on_native'
       - name: Cache datasets
         uses: actions/cache@v2
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7e03bf16cef..65d87928426 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -192,7 +192,9 @@ jobs:
           environment-file: environment-dev.yml
           python-version: 3.8
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
@@ -230,7 +232,9 @@ jobs:
           environment-file: environment-dev.yml
           python-version: 3.8
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
@@ -308,7 +312,9 @@ jobs:
           environment-file: environment-dev.yml
           python-version: 3.8
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
@@ -360,7 +366,9 @@ jobs:
           environment-file: environment-dev.yml
           python-version: 3.8
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
@@ -417,7 +425,9 @@ jobs:
           activate-environment: modin_on_hdk
           environment-file: requirements/env_hdk.yml
           python-version: 3.8
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
@@ -549,7 +559,9 @@ jobs:
           environment-file: environment-dev.yml
           python-version: ${{matrix.python-version}}
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
@@ -628,7 +640,9 @@ jobs:
           environment-file: environment-dev.yml
           python-version: 3.8
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
@@ -675,7 +689,9 @@ jobs:
           environment-file: requirements/environment-py36.yml
           python-version: 3.6
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
@@ -736,7 +752,9 @@ jobs:
           environment-file: requirements/environment-py36.yml
           python-version: 3.6
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
@@ -805,7 +823,9 @@ jobs:
           environment-file: environment-dev.yml
           python-version: 3.8
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
@@ -873,8 +893,9 @@ jobs:
           environment-file: environment-dev.yml
           python-version: ${{matrix.python-version}}
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
-          auto-update-conda: true # this enable `use-only-tar-bz2` feature on Windows
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
@@ -920,7 +941,9 @@ jobs:
           environment-file: environment-dev.yml
           python-version: ${{matrix.python-version}}
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
@@ -963,7 +986,9 @@ jobs:
           environment-file: environment-dev.yml
           python-version: ${{matrix.python-version}}
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
diff --git a/.github/workflows/fuzzydata-test.yml b/.github/workflows/fuzzydata-test.yml
index 35579e2f6b1..6d481990315 100644
--- a/.github/workflows/fuzzydata-test.yml
+++ b/.github/workflows/fuzzydata-test.yml
@@ -45,7 +45,9 @@ jobs:
           environment-file: environment-dev.yml
           python-version: 3.8
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
diff --git a/.github/workflows/push-to-master.yml b/.github/workflows/push-to-master.yml
index 9df09f0aa1a..6bc2fde5a72 100644
--- a/.github/workflows/push-to-master.yml
+++ b/.github/workflows/push-to-master.yml
@@ -20,7 +20,9 @@ jobs:
           environment-file: requirements/requirements-no-engine.yml
           python-version: 3.8
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: install Ray nightly build
         run: pip install https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-2.0.0.dev0-cp38-cp38-manylinux2014_x86_64.whl
       - name: Conda environment
@@ -76,7 +78,9 @@ jobs:
           environment-file: environment-dev.yml
           python-version: 3.8
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
@@ -125,7 +129,9 @@ jobs:
           activate-environment: modin
           python-version: ${{matrix.python-version}}
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - run: pip install -r requirements-dev.txt
       # Use a ray master commit that includes the fix here: https://github.com/ray-project/ray/pull/16278
       # Can be changed after a Ray version > 1.4 is released.
diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
index eb3ea952f2e..4990927826e 100644
--- a/.github/workflows/push.yml
+++ b/.github/workflows/push.yml
@@ -28,7 +28,9 @@ jobs:
           environment-file: environment-dev.yml
           python-version: 3.8
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
@@ -76,7 +78,9 @@ jobs:
           environment-file: environment-dev.yml
           python-version: 3.8
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
@@ -122,7 +126,9 @@ jobs:
           activate-environment: modin_on_hdk
           environment-file: requirements/env_hdk.yml
           python-version: 3.8
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
@@ -177,7 +183,9 @@ jobs:
           environment-file: environment-dev.yml
           python-version: ${{matrix.python-version}}
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
@@ -276,8 +284,9 @@ jobs:
           environment-file: environment-dev.yml
           python-version: ${{matrix.python-version}}
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
-          auto-update-conda: true # this enable `use-only-tar-bz2` feature on Windows
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
@@ -322,7 +331,9 @@ jobs:
           environment-file: environment-dev.yml
           python-version: ${{matrix.python-version}}
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
@@ -364,7 +375,9 @@ jobs:
           environment-file: environment-dev.yml
           python-version: ${{matrix.python-version}}
           channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
+          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
+          use-only-tar-bz2: false
       - name: Conda environment
         run: |
           conda info
diff --git a/environment-dev.yml b/environment-dev.yml
index 1a3623f4377..4e7d94c748b 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -2,7 +2,7 @@ name: modin
 channels:
   - conda-forge
 dependencies:
-  - pandas==1.5.1
+  - pandas==1.5.2
   - numpy>=1.18.5
   - pyarrow>=4.0.1
   - dask>=2.22.0
diff --git a/modin/pandas/__init__.py b/modin/pandas/__init__.py
index 55181935aa9..f2b6ee66e97 100644
--- a/modin/pandas/__init__.py
+++ b/modin/pandas/__init__.py
@@ -30,7 +30,7 @@
             f"Starting Modin in compatibility mode to support legacy pandas version {__pandas_version__}"
         )
 elif PandasCompatVersion.CURRENT == PandasCompatVersion.LATEST:
-    __pandas_version__ = "1.5.1"
+    __pandas_version__ = "1.5.2"
 
     if pandas.__version__ != __pandas_version__:
         warnings.warn(
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 3ae9aa82427..8ece14557f5 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,4 +1,4 @@
-pandas==1.5.1
+pandas==1.5.2
 numpy>=1.18.5
 pyarrow>=4.0.1
 dask[complete]>=2.22.0
diff --git a/requirements/env_hdk.yml b/requirements/env_hdk.yml
index 434d00690bf..b8be4a82fd9 100644
--- a/requirements/env_hdk.yml
+++ b/requirements/env_hdk.yml
@@ -2,7 +2,7 @@ name: modin_on_hdk
 channels:
   - conda-forge
 dependencies:
-  - pandas==1.5.1
+  - pandas==1.5.2
   - pyarrow=6
   - numpy>=1.18.5
   - fsspec
diff --git a/requirements/requirements-no-engine.yml b/requirements/requirements-no-engine.yml
index 9fcfd23c661..fad72dc9da8 100644
--- a/requirements/requirements-no-engine.yml
+++ b/requirements/requirements-no-engine.yml
@@ -1,7 +1,7 @@
 channels:
   - conda-forge
 dependencies:
-  - pandas==1.5.1
+  - pandas==1.5.2
   - numpy>=1.18.5
   - pyarrow>=4.0.1
   - fsspec
diff --git a/setup.py b/setup.py
index ec355e46f82..dd6c464f153 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 import versioneer
 import sys
 
-PANDAS_VERSION = "1.5.1" if sys.version_info >= (3, 8) else "1.1.5"
+PANDAS_VERSION = "1.5.2" if sys.version_info >= (3, 8) else "1.1.5"
 
 with open("README.md", "r", encoding="utf-8") as fh:
     long_description = fh.read()

From 9534478597c74a70943cba6fdde5f78f48a8fce9 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sun, 27 Nov 2022 23:31:17 +0100
Subject: [PATCH 13/13] PERF-#5268: Call `get` on all partitions at once in
 `to_pandas` (#4776)

Co-authored-by: Vasily Litvinov <fam1ly.n4me@yandex.ru>
Co-authored-by: Dmitry Chigarev <dmitry.chigarev@intel.com>
Signed-off-by: Myachev <anatoly.myachev@intel.com>
---
 .../pandas/partitioning/partition_manager.py  | 19 ++++++++++++++++++-
 .../partitioning/partition_manager.py         |  3 +++
 .../partitioning/partition_manager.py         |  3 +++
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/modin/core/dataframe/pandas/partitioning/partition_manager.py b/modin/core/dataframe/pandas/partitioning/partition_manager.py
index 9003fc527b0..c0f41740030 100644
--- a/modin/core/dataframe/pandas/partitioning/partition_manager.py
+++ b/modin/core/dataframe/pandas/partitioning/partition_manager.py
@@ -641,7 +641,24 @@ def to_pandas(cls, partitions):
         pandas.DataFrame
             A pandas DataFrame
         """
-        retrieved_objects = [[obj.to_pandas() for obj in part] for part in partitions]
+        retrieved_objects = cls.get_objects_from_partitions(partitions.flatten())
+        if all(
+            isinstance(obj, (pandas.DataFrame, pandas.Series))
+            for obj in retrieved_objects
+        ):
+            height, width, *_ = tuple(partitions.shape) + (0,)
+            # restore 2d array
+            objs = iter(retrieved_objects)
+            retrieved_objects = [
+                [next(objs) for _ in range(width)] for __ in range(height)
+            ]
+        else:
+            # Partitions do not always contain pandas objects, for example, hdk uses pyarrow tables.
+            # This implementation comes from the fact that calling `partition.get`
+            # function is not always equivalent to `partition.to_pandas`.
+            retrieved_objects = [
+                [obj.to_pandas() for obj in part] for part in partitions
+            ]
         if all(
             isinstance(part, pandas.Series) for row in retrieved_objects for part in row
         ):
diff --git a/modin/core/execution/dask/implementations/pandas_on_dask/partitioning/partition_manager.py b/modin/core/execution/dask/implementations/pandas_on_dask/partitioning/partition_manager.py
index d32243c9747..b945f3d65b4 100644
--- a/modin/core/execution/dask/implementations/pandas_on_dask/partitioning/partition_manager.py
+++ b/modin/core/execution/dask/implementations/pandas_on_dask/partitioning/partition_manager.py
@@ -51,6 +51,9 @@ def get_objects_from_partitions(cls, partitions):
         list
             The objects wrapped by `partitions`.
         """
+        for idx, part in enumerate(partitions):
+            if hasattr(part, "force_materialization"):
+                partitions[idx] = part.force_materialization()
         assert all(
             [len(partition.list_of_blocks) == 1 for partition in partitions]
         ), "Implementation assumes that each partition contains a signle block."
diff --git a/modin/core/execution/ray/implementations/pandas_on_ray/partitioning/partition_manager.py b/modin/core/execution/ray/implementations/pandas_on_ray/partitioning/partition_manager.py
index 91a720bcb3e..baa4d5cb38c 100644
--- a/modin/core/execution/ray/implementations/pandas_on_ray/partitioning/partition_manager.py
+++ b/modin/core/execution/ray/implementations/pandas_on_ray/partitioning/partition_manager.py
@@ -108,6 +108,9 @@ def get_objects_from_partitions(cls, partitions):
         list
             The objects wrapped by `partitions`.
         """
+        for idx, part in enumerate(partitions):
+            if hasattr(part, "force_materialization"):
+                partitions[idx] = part.force_materialization()
         assert all(
             [len(partition.list_of_blocks) == 1 for partition in partitions]
         ), "Implementation assumes that each partition contains a signle block."