Skip to content

Commit

Permalink
merging
Browse files Browse the repository at this point in the history
  • Loading branch information
christinafan committed Mar 2, 2024
2 parents 4aa3691 + a966395 commit 8bd2d09
Show file tree
Hide file tree
Showing 418 changed files with 16,101 additions and 11,886 deletions.
2 changes: 2 additions & 0 deletions .github/actions/run-core-tests/group_2/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@ runs:
modin/pandas/test/dataframe/test_pickle.py
echo "::endgroup::"
shell: bash -l {0}
- run: MODIN_RANGE_PARTITIONING=1 python -m pytest modin/pandas/test/dataframe/test_join_sort.py -k "merge"
shell: bash -l {0}
2 changes: 1 addition & 1 deletion .github/actions/run-core-tests/group_3/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ runs:
shell: bash -l {0}
- run: |
echo "::group::Running experimental groupby tests (group 3)..."
MODIN_EXPERIMENTAL_GROUPBY=1 ${{ inputs.runner }} ${{ inputs.parallel }} modin/pandas/test/test_groupby.py
MODIN_RANGE_PARTITIONING_GROUPBY=1 ${{ inputs.runner }} ${{ inputs.parallel }} modin/pandas/test/test_groupby.py
echo "::endgroup::"
shell: bash -l {0}
24 changes: 15 additions & 9 deletions .github/workflows/ci-notebooks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ on:
- setup.cfg
- setup.py
- requirements/env_hdk.yml
- requirements/env_unidist_linux.yml
concurrency:
# Cancel other jobs in the same branch. We don't care whether CI passes
# on old commits.
Expand All @@ -28,12 +29,17 @@ jobs:
steps:
- uses: actions/checkout@v3
- uses: ./.github/actions/python-only
if: matrix.execution != 'hdk_on_native'
if: matrix.execution != 'hdk_on_native' && matrix.execution != 'pandas_on_unidist'
- uses: ./.github/actions/mamba-env
with:
environment-file: requirements/env_hdk.yml
activate-environment: modin_on_hdk
if: matrix.execution == 'hdk_on_native'
- uses: ./.github/actions/mamba-env
with:
environment-file: requirements/env_unidist_linux.yml
activate-environment: modin_on_unidist
if: matrix.execution == 'pandas_on_unidist'
- name: Cache datasets
uses: actions/cache@v2
with:
Expand All @@ -43,29 +49,29 @@ jobs:
# replace modin with . in the tutorial requirements file for `pandas_on_ray` and
# `pandas_on_dask` since we need Modin built from sources
- run: sed -i 's/modin/./g' examples/tutorial/jupyter/execution/${{ matrix.execution }}/requirements.txt
if: matrix.execution != 'hdk_on_native'
if: matrix.execution != 'hdk_on_native' && matrix.execution != 'pandas_on_unidist'
# install dependencies required for notebooks execution for `pandas_on_ray` and `pandas_on_dask`
# Override modin-spreadsheet install for now
- run: |
pip install -r examples/tutorial/jupyter/execution/${{ matrix.execution }}/requirements.txt
pip install git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5
if: matrix.execution != 'hdk_on_native'
# Build Modin from sources for `hdk_on_native`
if: matrix.execution != 'hdk_on_native' && matrix.execution != 'pandas_on_unidist'
# Build Modin from sources for `hdk_on_native` and `pandas_on_unidist`
- run: pip install -e .
if: matrix.execution == 'hdk_on_native'
if: matrix.execution == 'hdk_on_native' || matrix.execution == 'pandas_on_unidist'
# install test dependencies
# NOTE: If you are changing the set of packages installed here, make sure that
# the dev requirements match them.
- run: pip install pytest pytest-cov black flake8 flake8-print flake8-no-implicit-concat
if: matrix.execution != 'hdk_on_native'
if: matrix.execution != 'hdk_on_native' && matrix.execution != 'pandas_on_unidist'
- run: pip install flake8-print jupyter nbformat nbconvert
if: matrix.execution == 'hdk_on_native'
if: matrix.execution == 'hdk_on_native' || matrix.execution == 'pandas_on_unidist'
- run: pip list
if: matrix.execution != 'hdk_on_native'
if: matrix.execution != 'hdk_on_native' && matrix.execution != 'pandas_on_unidist'
- run: |
conda info
conda list
if: matrix.execution == 'hdk_on_native'
if: matrix.execution == 'hdk_on_native' || matrix.execution == 'pandas_on_unidist'
# setup kernel configuration for `pandas_on_unidist` execution with mpi backend
- run: python examples/tutorial/jupyter/execution/${{ matrix.execution }}/setup_kernel.py
if: matrix.execution == 'pandas_on_unidist'
Expand Down
15 changes: 12 additions & 3 deletions .github/workflows/ci-required.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,6 @@ jobs:
asv_bench/benchmarks/__init__.py asv_bench/benchmarks/io/__init__.py \
asv_bench/benchmarks/scalability/__init__.py \
modin/core/io \
modin/experimental/core/execution/ray/implementations/pandas_on_ray \
modin/experimental/core/execution/ray/implementations/pyarrow_on_ray \
modin/pandas/series.py \
modin/core/execution/python \
modin/pandas/dataframe.py \
Expand All @@ -91,7 +89,6 @@ jobs:
python scripts/doc_checker.py modin/experimental/pandas/io.py \
modin/experimental/pandas/__init__.py
- run: python scripts/doc_checker.py modin/core/storage_formats/base
- run: python scripts/doc_checker.py modin/experimental/core/storage_formats/pyarrow
- run: python scripts/doc_checker.py modin/core/storage_formats/pandas
- run: |
python scripts/doc_checker.py \
Expand All @@ -108,3 +105,15 @@ jobs:
- run: python scripts/doc_checker.py modin/experimental/core/execution/native/implementations/hdk_on_native/interchange/dataframe_protocol
- run: python scripts/doc_checker.py modin/experimental/batch/pipeline.py
- run: python scripts/doc_checker.py modin/logging

lint-black-isort:
name: lint (black and isort)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: ./.github/actions/python-only
- run: pip install black>=24.1.0 isort>=5.12
# NOTE: keep the black command here in sync with the pre-commit hook in
# /contributing/pre-commit
- run: black --check --diff modin/ asv_bench/benchmarks scripts/doc_checker.py
- run: isort . --check-only
103 changes: 42 additions & 61 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,6 @@ env:
MODIN_GITHUB_CI: true

jobs:
lint-black:
name: lint (black)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: ./.github/actions/python-only
- run: pip install black
# NOTE: keep the black command here in sync with the pre-commit hook in
# /contributing/pre-commit
- run: black --check --diff modin/ asv_bench/benchmarks scripts/doc_checker.py

lint-mypy:
name: lint (mypy)
runs-on: ubuntu-latest
Expand Down Expand Up @@ -77,7 +66,7 @@ jobs:
- uses: ./.github/actions/upload-coverage

test-clean-install:
needs: [lint-flake8, lint-black]
needs: [lint-flake8]
strategy:
matrix:
os:
Expand All @@ -92,14 +81,20 @@ jobs:
- uses: actions/checkout@v3
- uses: ./.github/actions/python-only
- run: python -m pip install -e ".[all]"
- name: Ensure all engines start up
- name: Ensure Ray and Dask engines start up
run: |
MODIN_ENGINE=dask python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))"
MODIN_ENGINE=ray python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))"
- name: Ensure MPI engine start up
# Install a working MPI implementation beforehand so mpi4py can link to it
run: |
sudo apt install libmpich-dev
python -m pip install -e ".[mpi]"
MODIN_ENGINE=unidist UNIDIST_BACKEND=mpi mpiexec -n 1 python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))"
if: matrix.os == 'ubuntu'

test-internals:
needs: [lint-flake8, lint-black]
needs: [lint-flake8]
runs-on: ubuntu-latest
defaults:
run:
Expand All @@ -124,7 +119,7 @@ jobs:
- uses: ./.github/actions/upload-coverage

test-defaults:
needs: [lint-flake8, lint-black]
needs: [lint-flake8]
runs-on: ubuntu-latest
defaults:
run:
Expand Down Expand Up @@ -155,7 +150,7 @@ jobs:
- uses: ./.github/actions/upload-coverage

test-hdk:
needs: [lint-flake8, lint-black]
needs: [lint-flake8]
runs-on: ubuntu-latest
defaults:
run:
Expand Down Expand Up @@ -193,6 +188,7 @@ jobs:
- run: python -m pytest modin/pandas/test/dataframe/test_binary.py
- run: python -m pytest modin/pandas/test/dataframe/test_reduce.py
- run: python -m pytest modin/pandas/test/dataframe/test_join_sort.py
- run: MODIN_RANGE_PARTITIONING=1 python -m pytest modin/pandas/test/dataframe/test_join_sort.py -k "merge"
- run: python -m pytest modin/pandas/test/test_general.py
- run: python -m pytest modin/pandas/test/dataframe/test_indexing.py
- run: python -m pytest modin/pandas/test/test_series.py
Expand All @@ -212,7 +208,7 @@ jobs:

test-asv-benchmarks:
if: github.event_name == 'pull_request'
needs: [lint-flake8, lint-black]
needs: [lint-flake8]
runs-on: ubuntu-latest
defaults:
run:
Expand Down Expand Up @@ -256,11 +252,6 @@ jobs:
MODIN_ASV_USE_IMPL=pandas asv run --quick --strict --show-stderr --launch-method=spawn \
-b ^benchmarks -b ^io | tee benchmarks.log
# HDK: ERR_OUT_OF_CPU_MEM: Not enough host memory to execute the query (MODIN#4270)
# just disable test for testing - it works well in a machine with more memory
sed -i 's/def time_groupby_agg_nunique(self, \*args, \*\*kwargs):/# def time_groupby_agg_nunique(self, *args, **kwargs):/g' benchmarks/hdk/benchmarks.py
sed -i 's/execute(self.df.groupby(by=self.groupby_columns).agg("nunique"))/# execute(self.df.groupby(by=self.groupby_columns).agg("nunique"))/g' benchmarks/hdk/benchmarks.py
# Otherwise, ASV considers that the environment has already been created, although ASV command is run for another config,
# which requires the creation of a completely new environment. This step will be required after removing the manual environment setup step.
rm -f -R .asv/env/
Expand Down Expand Up @@ -322,7 +313,7 @@ jobs:
"${{ steps.filter.outputs.ray }}" "${{ steps.filter.outputs.dask }}" >> $GITHUB_OUTPUT
test-all-unidist:
needs: [lint-flake8, lint-black, execution-filter]
needs: [lint-flake8, execution-filter]
if: github.event_name == 'push' || needs.execution-filter.outputs.unidist == 'true'
runs-on: ubuntu-latest
defaults:
Expand Down Expand Up @@ -353,7 +344,7 @@ jobs:
- uses: actions/checkout@v3
- uses: ./.github/actions/mamba-env
with:
environment-file: requirements/env_unidist.yml
environment-file: requirements/env_unidist_linux.yml
activate-environment: modin_on_unidist
python-version: ${{matrix.python-version}}
- name: Install HDF5
Expand All @@ -376,8 +367,18 @@ jobs:
- run: ./.github/workflows/sql_server/set_up_sql_server.sh
# need an extra argument "genv" to set environment variables for mpiexec. We need
# these variables to test writing to the mock s3 filesystem.
- run: mpiexec -n 1 -genv AWS_ACCESS_KEY_ID foobar_key -genv AWS_SECRET_ACCESS_KEY foobar_secret python -m pytest modin/pandas/test/test_io.py --verbose
- run: mpiexec -n 1 python -m pytest modin/experimental/pandas/test/test_io_exp.py
- uses: nick-fields/retry@v2
# to avoid issues with non-stable `to_csv` tests for unidist on MPI backend.
# for details see: https://github.com/modin-project/modin/pull/6776
with:
timeout_minutes: 15
max_attempts: 3
command: |
conda run --no-capture-output -n modin_on_unidist mpiexec -n 1 -genv AWS_ACCESS_KEY_ID foobar_key \
-genv AWS_SECRET_ACCESS_KEY foobar_secret python -m pytest modin/pandas/test/test_io.py --verbose
- run: |
mpiexec -n 1 -genv AWS_ACCESS_KEY_ID foobar_key -genv AWS_SECRET_ACCESS_KEY foobar_secret \
python -m pytest modin/experimental/pandas/test/test_io_exp.py
- run: mpiexec -n 1 python -m pytest modin/experimental/sql/test/test_sql.py
- run: mpiexec -n 1 python -m pytest modin/test/interchange/dataframe_protocol/test_general.py
- run: mpiexec -n 1 python -m pytest modin/test/interchange/dataframe_protocol/pandas/test_protocol.py
Expand All @@ -387,7 +388,7 @@ jobs:
- uses: ./.github/actions/upload-coverage

test-all:
needs: [lint-flake8, lint-black, execution-filter]
needs: [lint-flake8, execution-filter]
strategy:
matrix:
os:
Expand Down Expand Up @@ -521,7 +522,7 @@ jobs:
if: matrix.os == 'windows'

test-sanity:
needs: [lint-flake8, lint-black, execution-filter]
needs: [lint-flake8, execution-filter]
if: github.event_name == 'pull_request'
strategy:
matrix:
Expand Down Expand Up @@ -560,7 +561,7 @@ jobs:
- uses: actions/checkout@v3
- uses: ./.github/actions/mamba-env
with:
environment-file: ${{ matrix.execution.name == 'unidist' && 'requirements/env_unidist.yml' || 'environment-dev.yml' }}
environment-file: ${{ matrix.os == 'ubuntu' && matrix.execution.name == 'unidist' && 'requirements/env_unidist_linux.yml' || matrix.os == 'windows' && matrix.execution.name == 'unidist' && 'requirements/env_unidist_win.yml' || 'environment-dev.yml' }}
activate-environment: ${{ matrix.execution.name == 'unidist' && 'modin_on_unidist' || 'modin' }}
python-version: ${{matrix.python-version}}
- name: Install HDF5
Expand All @@ -584,6 +585,7 @@ jobs:
- run: MODIN_BENCHMARK_MODE=True ${{ matrix.execution.shell-ex }} modin/pandas/test/internals/test_benchmark_mode.py
- run: ${{ matrix.execution.shell-ex }} $PARALLEL modin/pandas/test/internals/test_repartition.py
- run: ${{ matrix.execution.shell-ex }} $PARALLEL modin/test/test_partition_api.py
- run: ${{ matrix.execution.shell-ex }} modin/pandas/api/extensions/test
- name: xgboost tests
run: |
# TODO(https://github.com/modin-project/modin/issues/5194): Uncap xgboost
Expand Down Expand Up @@ -630,6 +632,15 @@ jobs:
if: matrix.os != 'windows'
- run: ${{ matrix.execution.shell-ex }} $PARALLEL modin/numpy/test
- run: ${{ matrix.execution.shell-ex }} -m "not exclude_in_sanity" modin/pandas/test/test_io.py --verbose
if: matrix.execution.name != 'unidist'
- uses: nick-fields/retry@v2
# to avoid issues with non-stable `to_csv` tests for unidist on MPI backend.
# for details see: https://github.com/modin-project/modin/pull/6776
with:
timeout_minutes: 15
max_attempts: 3
command: conda run --no-capture-output -n modin_on_unidist ${{ matrix.execution.shell-ex }} -m "not exclude_in_sanity" modin/pandas/test/test_io.py --verbose
if: matrix.execution.name == 'unidist'
- run: ${{ matrix.execution.shell-ex }} modin/experimental/pandas/test/test_io_exp.py
- run: ${{ matrix.execution.shell-ex }} $PARALLEL modin/test/interchange/dataframe_protocol/test_general.py
- run: ${{ matrix.execution.shell-ex }} $PARALLEL modin/test/interchange/dataframe_protocol/pandas/test_protocol.py
Expand All @@ -644,7 +655,7 @@ jobs:
- uses: ./.github/actions/upload-coverage

test-experimental:
needs: [lint-flake8, lint-black]
needs: [lint-flake8]
runs-on: ubuntu-latest
defaults:
run:
Expand Down Expand Up @@ -672,38 +683,8 @@ jobs:
- run: python -m pytest modin/pandas/test/test_io.py --verbose
- uses: ./.github/actions/upload-coverage

test-pyarrow:
needs: [lint-flake8, lint-black]
runs-on: ubuntu-latest
defaults:
run:
shell: bash -l {0}
strategy:
matrix:
python-version: ["3.9"]
env:
MODIN_STORAGE_FORMAT: pyarrow
MODIN_EXPERIMENTAL: "True"
name: test (pyarrow, python ${{matrix.python-version}})
services:
moto:
image: motoserver/moto
ports:
- 5000:5000
env:
AWS_ACCESS_KEY_ID: foobar_key
AWS_SECRET_ACCESS_KEY: foobar_secret
steps:
- uses: actions/checkout@v3
- uses: ./.github/actions/mamba-env
with:
environment-file: environment-dev.yml
python-version: ${{matrix.python-version}}
- run: sudo apt update && sudo apt install -y libhdf5-dev
- run: python -m pytest modin/pandas/test/test_io.py::TestCsv --verbose

test-spreadsheet:
needs: [lint-flake8, lint-black]
needs: [lint-flake8]
runs-on: ubuntu-latest
defaults:
run:
Expand Down
Loading

0 comments on commit 8bd2d09

Please sign in to comment.