From eb78929b84409851e1fead5b089c796ff026a80a Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Tue, 5 Sep 2023 10:17:29 +0800 Subject: [PATCH] Upgrade dependencies, remove support for python 3.7 and add python 3.11 Signed-off-by: Simon Zhao --- .github/workflows/azureml-cpu-nightly.yml | 2 +- .github/workflows/azureml-gpu-nightly.yml | 2 +- .github/workflows/azureml-spark-nightly.yml | 2 +- .github/workflows/azureml-unit-tests.yml | 2 +- .github/workflows/sarplus.yml | 6 +- recommenders/models/rlrmc/RLRMCdataset.py | 4 +- setup.py | 79 +++++++++---------- .../submit_groupwise_azureml_pytest.py | 35 ++++---- .../evaluation/test_python_evaluation.py | 2 +- .../evaluation/test_spark_evaluation.py | 10 +-- 10 files changed, 70 insertions(+), 74 deletions(-) diff --git a/.github/workflows/azureml-cpu-nightly.yml b/.github/workflows/azureml-cpu-nightly.yml index ec4c809bd..337035242 100644 --- a/.github/workflows/azureml-cpu-nightly.yml +++ b/.github/workflows/azureml-cpu-nightly.yml @@ -67,7 +67,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code diff --git a/.github/workflows/azureml-gpu-nightly.yml b/.github/workflows/azureml-gpu-nightly.yml index 08a7f00cc..a342b45c8 100644 --- a/.github/workflows/azureml-gpu-nightly.yml +++ b/.github/workflows/azureml-gpu-nightly.yml @@ -67,7 +67,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code diff --git a/.github/workflows/azureml-spark-nightly.yml b/.github/workflows/azureml-spark-nightly.yml index 6079a5032..b784a3bea 100644 --- a/.github/workflows/azureml-spark-nightly.yml +++ b/.github/workflows/azureml-spark-nightly.yml @@ -66,7 +66,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code diff --git a/.github/workflows/azureml-unit-tests.yml b/.github/workflows/azureml-unit-tests.yml index f068f46f1..977961a1e 100644 --- a/.github/workflows/azureml-unit-tests.yml +++ b/.github/workflows/azureml-unit-tests.yml @@ -54,7 +54,7 @@ jobs: strategy: max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration matrix: - python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"'] + python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"'] test-group: ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }} steps: - name: Check out repository code diff --git a/.github/workflows/sarplus.yml b/.github/workflows/sarplus.yml index e5a25fa14..d2a04044b 100644 --- a/.github/workflows/sarplus.yml +++ b/.github/workflows/sarplus.yml @@ -36,10 +36,10 @@ jobs: # Test pysarplus with different versions of Python. # Package pysarplus and upload as GitHub workflow artifact when merged into # the main branch. - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 strategy: matrix: - python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v3 @@ -111,7 +111,7 @@ jobs: scala: # Test sarplus with different versions of Databricks and Synapse runtime - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 strategy: matrix: include: diff --git a/recommenders/models/rlrmc/RLRMCdataset.py b/recommenders/models/rlrmc/RLRMCdataset.py index 6b1329d1d..7670105b3 100644 --- a/recommenders/models/rlrmc/RLRMCdataset.py +++ b/recommenders/models/rlrmc/RLRMCdataset.py @@ -68,8 +68,8 @@ def _data_processing(self, train, validation=None, test=None, mean_center=True): """ # Data processing and reindexing code is adopted from https://github.com/Microsoft/Recommenders/blob/main/recommenders/models/ncf/dataset.py # If validation dataset is None - df = train if validation is None else train.append(validation) - df = df if test is None else df.append(test) + df = train if validation is None else pd.concat([train, validation]) + df = df if test is None else pd.concat([df, test]) # Reindex user and item index if self.user_idx is None: diff --git a/setup.py b/setup.py index d84c85d81..3b7890a0a 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ import sys import time -# Workround for enabling editable user pip installs +# Workaround for enabling editable user pip installs site.ENABLE_USER_SITE = "--user" in sys.argv[1:] # Version @@ -27,55 +27,49 @@ version += ".post" + str(int(time.time())) install_requires = [ - "numpy>=1.19", # 1.19 required by tensorflow 2.6 - "pandas>1.0.3,<2", - "scipy>=1.0.0,<1.11.0", # FIXME: We limit <1.11.0 until #1954 is fixed - "tqdm>=4.31.1,<5", - "matplotlib>=2.2.2,<4", - "scikit-learn>=0.22.1,<1.0.3", - "numba>=0.38.1,<1", - "lightfm>=1.15,<2", - "lightgbm>=2.2.1", - "memory_profiler>=0.54.0,<1", - "nltk>=3.4,<4", - "seaborn>=0.8.1,<1", - "transformers>=2.5.0,<5", - "category_encoders>=1.3.0,<2", - "jinja2>=2,<3.1", - "requests>=2.0.0,<3", - "cornac>=1.1.2,<1.15.2;python_version<='3.7'", - "cornac>=1.15.2,<2;python_version>='3.8'", # After 1.15.2, Cornac requires python 3.8 - "retrying>=1.3.3", - "pandera[strategies]>=0.6.5", # For generating fake datasets - "scikit-surprise>=1.0.6", - "scrapbook>=0.5.0,<1.0.0", - "hyperopt>=0.1.2,<1", - "ipykernel>=4.6.1,<7", - "jupyter>=1,<2", - "locust>=1,<2", - "papermill>=2.1.2,<3", + "pandas>1.5.2,<3", # requires numpy + "scikit-learn>=1.1.3,<2", # requires scipy + "numba>=0.57.0,<1", + "lightfm>=1.17,<2", + "lightgbm>=3.3.2,<5", + "memory-profiler>=0.61.0,<1", + "nltk>=3.8.1,<4", # requires tqdm + "seaborn>=0.12.0,<1", # requires matplotlib + "transformers>=4.27.0,<5", # requires pyyaml, tqdm + "category-encoders>=2.6.0,<3", + "jinja2>=3.1.0,<3.2", + "cornac>=1.15.2,<2", # requires tqdm + "retrying>=1.3.4", + "pandera[strategies]>=0.15.0", # For generating fake datasets + "scikit-surprise>=1.1.3", + "scrapbook>=0.5.0,<1.0.0", # requires tqdm, papermill + "hyperopt>=0.2.7,<1", + "notebook>=7.0.0,<8", # requires jupyter, ipykernel + "locust>=2.12.2,<3", + # hypothesis 6.83.1 introduced a non-existent attribute '_deferred_pprinters' of IPython.lib.pretty in + # https://github.com/HypothesisWorks/hypothesis/commit/5ea8e0c3e6da1cd9fb3f302124dc74791c14db11 + "hypothesis<6.83.1", ] # shared dependencies extras_require = { "gpu": [ - "nvidia-ml-py3>=7.352.0", - # TensorFlow compiled with CUDA 11.2, cudnn 8.1 - "tensorflow~=2.6.1;python_version=='3.6'", - "tensorflow~=2.7.0;python_version>='3.7'", + "nvidia-ml-py>=11.510.69", + # TensorFlow compiled with CUDA 11.8, cudnn 8.6 + "tensorflow>=2.12.0", "tf-slim>=1.1.0", - "torch>=1.8", # for CUDA 11 support - "fastai>=1.0.46,<2", + "torch>=2.0.1", + "fastai>=2.7.11,<3", ], "spark": [ - "pyarrow>=0.12.1,<7.0.0", - "pyspark>=2.4.5,<3.3.0", + "pyarrow>=10.0.1", + "pyspark>=3.4.0", ], "dev": [ - "black>=18.6b4,<21", - "pytest>=3.6.4", - "pytest-cov>=2.12.1", - "pytest-mock>=3.6.1", # for access to mock fixtures in pytest + "black>=23.3.0,<24", + "pytest>=7.2.1", + "pytest-cov>=4.1.0", + "pytest-mock>=3.10.0", # for access to mock fixtures in pytest ], } # For the brave of heart @@ -116,9 +110,10 @@ "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Software Development :: Libraries :: Python Modules", "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Operating System :: POSIX :: Linux", ], extras_require=extras_require, @@ -126,7 +121,7 @@ "machine learning python spark gpu", install_requires=install_requires, package_dir={"recommenders": "recommenders"}, - python_requires=">=3.6, <3.10", + python_requires=">=3.8, <=3.11", packages=find_packages( where=".", exclude=["contrib", "docs", "examples", "scenarios", "tests", "tools"], diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py index 79a189ccc..0ddc782e2 100644 --- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py +++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py @@ -37,7 +37,6 @@ """ import argparse import logging -import glob from azureml.core.authentication import AzureCliAuthentication from azureml.core import Workspace @@ -146,8 +145,7 @@ def setup_persistent_compute_target(workspace, cluster_name, vm_size, max_nodes) def create_run_config( cpu_cluster, - docker_proc_type, - workspace, + docker_image, add_gpu_dependencies, add_spark_dependencies, conda_pkg_jdk, @@ -166,8 +164,7 @@ def create_run_config( the following: - Reco_cpu_test - Reco_gpu_test - docker_proc_type (str) : processor type, cpu or gpu - workspace : workspace reference + docker_image (str) : docker image for cpu or gpu add_gpu_dependencies (bool) : True if gpu packages should be added to the conda environment, else False add_spark_dependencies (bool) : True if PySpark packages should be @@ -181,7 +178,20 @@ def create_run_config( run_azuremlcompute = RunConfiguration() run_azuremlcompute.target = cpu_cluster run_azuremlcompute.environment.docker.enabled = True - run_azuremlcompute.environment.docker.base_image = docker_proc_type + # See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-train-with-custom-image?view=azureml-api-1#use-a-custom-dockerfile-optional + run_azuremlcompute.environment.docker.base_image = None + run_azuremlcompute.environment.docker.base_dockerfile = f""" + FROM {docker_image} + # Install system-level deps for scipy. See + # https://docs.scipy.org/doc/scipy/dev/contributor/building.html + RUN apt-get update && \ + apt-get install -y \ + gfortran \ + libopenblas-dev \ + liblapack-dev \ + pkg-config + RUN apt-get install -y git + """ # Use conda_dependencies.yml to create a conda environment in # the Docker image for execution @@ -425,13 +435,11 @@ def create_arg_parser(): args = create_arg_parser() if args.dockerproc == "cpu": - from azureml.core.runconfig import DEFAULT_CPU_IMAGE - - docker_proc_type = DEFAULT_CPU_IMAGE + # https://github.com/Azure/AzureML-Containers/blob/master/base/cpu/openmpi4.1.0-ubuntu22.04 + docker_image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04" else: - from azureml.core.runconfig import DEFAULT_GPU_IMAGE - - docker_proc_type = DEFAULT_GPU_IMAGE + # https://github.com/Azure/AzureML-Containers/blob/master/base/gpu/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04 + docker_image = "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04" cli_auth = AzureCliAuthentication() @@ -452,8 +460,7 @@ def create_arg_parser(): run_config = create_run_config( cpu_cluster=cpu_cluster, - docker_proc_type=docker_proc_type, - workspace=workspace, + docker_image=docker_image, add_gpu_dependencies=args.add_gpu_dependencies, add_spark_dependencies=args.add_spark_dependencies, conda_pkg_jdk=args.conda_pkg_jdk, diff --git a/tests/unit/recommenders/evaluation/test_python_evaluation.py b/tests/unit/recommenders/evaluation/test_python_evaluation.py index e5837fc66..cd54ec36b 100644 --- a/tests/unit/recommenders/evaluation/test_python_evaluation.py +++ b/tests/unit/recommenders/evaluation/test_python_evaluation.py @@ -6,7 +6,7 @@ import pytest from unittest.mock import Mock from sklearn.preprocessing import minmax_scale -from pandas.util.testing import assert_frame_equal +from pandas.testing import assert_frame_equal from recommenders.utils.constants import ( DEFAULT_USER_COL, diff --git a/tests/unit/recommenders/evaluation/test_spark_evaluation.py b/tests/unit/recommenders/evaluation/test_spark_evaluation.py index 9cf35ee3e..1b740a91c 100644 --- a/tests/unit/recommenders/evaluation/test_spark_evaluation.py +++ b/tests/unit/recommenders/evaluation/test_spark_evaluation.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd import pytest -from pandas.util.testing import assert_frame_equal +from pandas.testing import assert_frame_equal from recommenders.evaluation.python_evaluation import ( precision_at_k, @@ -441,7 +441,7 @@ def test_item_novelty(spark_diversity_data, target_metrics): ) actual = evaluator.historical_item_novelty().toPandas() assert_frame_equal( - target_metrics["item_novelty"], actual, check_exact=False, check_less_precise=4 + target_metrics["item_novelty"], actual, check_exact=False ) assert np.all(actual["item_novelty"].values >= 0) # Test that novelty is zero when data includes only one item @@ -482,7 +482,6 @@ def test_user_diversity(spark_diversity_data, target_metrics): target_metrics["user_diversity"], actual, check_exact=False, - check_less_precise=4, ) @@ -510,7 +509,6 @@ def test_user_item_serendipity(spark_diversity_data, target_metrics): target_metrics["user_item_serendipity"], actual, check_exact=False, - check_less_precise=4, ) @@ -529,7 +527,6 @@ def test_user_serendipity(spark_diversity_data, target_metrics): target_metrics["user_serendipity"], actual, check_exact=False, - check_less_precise=4, ) @@ -562,7 +559,6 @@ def test_user_diversity_item_feature_vector(spark_diversity_data, target_metrics target_metrics["user_diversity_item_feature_vector"], actual, check_exact=False, - check_less_precise=4, ) @@ -599,7 +595,6 @@ def test_user_item_serendipity_item_feature_vector( target_metrics["user_item_serendipity_item_feature_vector"], actual, check_exact=False, - check_less_precise=4, ) @@ -620,7 +615,6 @@ def test_user_serendipity_item_feature_vector(spark_diversity_data, target_metri target_metrics["user_serendipity_item_feature_vector"], actual, check_exact=False, - check_less_precise=4, )