From ffac856156996011328841dff8fb292bc5de053f Mon Sep 17 00:00:00 2001
From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com>
Date: Tue, 6 Jun 2023 19:49:59 +0800
Subject: [PATCH 01/61] Add support for Python 3.10 and 3.11

---
 setup.py | 81 +++++++++++++++++++++++++++-----------------------------
 1 file changed, 39 insertions(+), 42 deletions(-)

diff --git a/setup.py b/setup.py
index 080dad2cb..0db28d225 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@
 import sys
 import time
 
-# workround for enabling editable user pip installs
+# workaround for enabling editable user pip installs
 site.ENABLE_USER_SITE = "--user" in sys.argv[1:]
 
 # version
@@ -27,61 +27,57 @@
     version += ".post" + str(int(time.time()))
 
 install_requires = [
-    "numpy>=1.19",  # 1.19 required by tensorflow 2.6
-    "pandas>1.0.3,<2",
-    "scipy>=1.0.0,<2",
-    "tqdm>=4.31.1,<5",
-    "matplotlib>=2.2.2,<4",
-    "scikit-learn>=0.22.1,<1.0.3",
-    "numba>=0.38.1,<1",
-    "lightfm>=1.15,<2",
-    "lightgbm>=2.2.1",
-    "memory_profiler>=0.54.0,<1",
-    "nltk>=3.4,<4",
-    "seaborn>=0.8.1,<1",
-    "transformers>=2.5.0,<5",
-    "bottleneck>=1.2.1,<2",
-    "category_encoders>=1.3.0,<2",
-    "jinja2>=2,<3.1",
-    "pyyaml>=5.4.1,<6",
-    "requests>=2.0.0,<3",
-    "cornac>=1.1.2,<2",
-    "retrying>=1.3.3",
-    "pandera[strategies]>=0.6.5",  # For generating fake datasets
-    "scikit-surprise>=1.0.6",
+    "pandas>1.5.2,<2.1",  # requires numpy
+    "tqdm>=4.65.0,<5",
+    "matplotlib>=3.6.0,<4",
+    "scikit-learn>=1.1.3,<2",  # 1.0.2 may not support Python 3.10.  requires scipy
+    "numba>=0.57.0,<1",
+    "lightfm>=1.17,<2",
+    "lightgbm>=3.3.2,<4",
+    "memory_profiler>=0.61.0,<1",
+    "nltk>=3.8.1,<4",
+    "seaborn>=0.12.0,<1",
+    "transformers>=4.26.0,<5",  # requires pyyaml
+    "bottleneck>=1.3.7,<2",
+    "category_encoders>=2.6.0,<2",
+    "jinja2>=3.1.0,<3.2",
+    "cornac>=1.15.2,<2",
+    "retrying>=1.3.4",
+    "pandera[strategies]>=0.15.0",  # For generating fake datasets
+    "scikit-surprise>=1.1.3",
     "scrapbook>=0.5.0,<1.0.0",
 ]
 
 # shared dependencies
 extras_require = {
     "examples": [
-        "azure.mgmt.cosmosdb>=0.8.0,<1",
-        "hyperopt>=0.1.2,<1",
-        "ipykernel>=4.6.1,<7",
-        "jupyter>=1,<2",
-        "locust>=1,<2",
-        "papermill>=2.1.2,<3",
+        "azure-mgmt-cosmosdb>=9.0.0,<10",
+        "hyperopt>=0.2.7,<1",
+        "ipykernel>=6.20.1,<7",
+        "notebook>=6.5.4,<8",
+        "locust>=2.15.1,<3",
+        "papermill>=2.4.0,<3",
     ],
     "gpu": [
-        "nvidia-ml-py3>=7.352.0",
+        "nvidia-ml-py3>=11.510.69",
         # TensorFlow compiled with CUDA 11.2, cudnn 8.1
         "tensorflow~=2.6.1;python_version=='3.6'",
         "tensorflow~=2.7.0;python_version>='3.7'",
         "tf-slim>=1.1.0",
-        "torch>=1.8",  # for CUDA 11 support
-        "fastai>=1.0.46,<2",
+        "torch>=2.0.1",
+        "fastai>=2.7.11,<3",
     ],
     "spark": [
-        "databricks_cli>=0.8.6,<1",
-        "pyarrow>=0.12.1,<7.0.0",
-        "pyspark>=2.4.5,<3.3.0",
+        "databricks_cli>=0.17.7,<1",
+        "pyarrow>=10.0.1",
+        "pyspark>=3.0.1,<=3.4.0",
     ],
     "dev": [
-        "black>=18.6b4,<21",
-        "pytest>=3.6.4",
-        "pytest-cov>=2.12.1",
-        "pytest-mock>=3.6.1",  # for access to mock fixtures in pytest
-        "pytest-rerunfailures>=10.2",  # to mark flaky tests
+        "black>=23.3.0,<24",
+        "pytest>=7.2.1",
+        "pytest-cov>=4.1.0",
+        "pytest-mock>=3.10.0",  # for access to mock fixtures in pytest
+        "pytest-rerunfailures>=11.1.2",  # to mark flaky tests
     ],
 }
 # for the brave of heart
@@ -124,9 +120,10 @@
         "Topic :: Scientific/Engineering :: Artificial Intelligence",
         "Topic :: Software Development :: Libraries :: Python Modules",
         "License :: OSI Approved :: MIT License",
-        "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
         "Operating System :: Microsoft :: Windows",
         "Operating System :: POSIX :: Linux",
         "Operating System :: MacOS",
@@ -136,7 +133,7 @@
     "machine learning python spark gpu",
     install_requires=install_requires,
     package_dir={"recommenders": "recommenders"},
-    python_requires=">=3.6, <3.10",
+    python_requires=">=3.8, <3.12",
     packages=find_packages(where=".", exclude=["contrib", "docs", "examples", "scenarios", "tests", "tools"]),
     setup_requires=["numpy>=1.15"]
 )

From ffd8b9e13dafdb96ac52a34f0e614c2059b7dae5 Mon Sep 17 00:00:00 2001
From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com>
Date: Wed, 7 Jun 2023 09:57:41 +0800
Subject: [PATCH 02/61] Correct upper bound version for category_encoders

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 0db28d225..3a49bf185 100644
--- a/setup.py
+++ b/setup.py
@@ -39,7 +39,7 @@
     "seaborn>=0.12.0,<1",
     "transformers>=4.26.0,<5",  # requires pyyaml
     "bottleneck>=1.3.7,<2",
-    "category_encoders>=2.6.0,<2",
+    "category_encoders>=2.6.0,<3",
     "jinja2>=3.1.0,<3.2",
     "cornac>=1.15.2,<2",
     "retrying>=1.3.4",

From 644aa5d70468d391bfe696293727a5ae6eb17391 Mon Sep 17 00:00:00 2001
From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com>
Date: Wed, 7 Jun 2023 10:25:15 +0800
Subject: [PATCH 03/61] Add tests for Python 3.10 and 3.11

---
 .github/workflows/azureml-cpu-nightly.yml   | 2 +-
 .github/workflows/azureml-gpu-nightly.yml   | 2 +-
 .github/workflows/azureml-spark-nightly.yml | 2 +-
 .github/workflows/azureml-unit-tests.yml    | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/azureml-cpu-nightly.yml b/.github/workflows/azureml-cpu-nightly.yml
index 36e333551..e52058a2c 100644
--- a/.github/workflows/azureml-cpu-nightly.yml
+++ b/.github/workflows/azureml-cpu-nightly.yml
@@ -67,7 +67,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code
diff --git a/.github/workflows/azureml-gpu-nightly.yml b/.github/workflows/azureml-gpu-nightly.yml
index dc4601f10..94ac06a00 100644
--- a/.github/workflows/azureml-gpu-nightly.yml
+++ b/.github/workflows/azureml-gpu-nightly.yml
@@ -67,7 +67,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code
diff --git a/.github/workflows/azureml-spark-nightly.yml b/.github/workflows/azureml-spark-nightly.yml
index 74695b1ce..66f981abc 100644
--- a/.github/workflows/azureml-spark-nightly.yml
+++ b/.github/workflows/azureml-spark-nightly.yml
@@ -66,7 +66,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code
diff --git a/.github/workflows/azureml-unit-tests.yml b/.github/workflows/azureml-unit-tests.yml
index 64761d52c..2e28740b8 100644
--- a/.github/workflows/azureml-unit-tests.yml
+++ b/.github/workflows/azureml-unit-tests.yml
@@ -54,7 +54,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.7"', '"python=3.8"', '"python=3.9"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code

From 900345083e4629954b16e03849fb06bc34a2d9e8 Mon Sep 17 00:00:00 2001
From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com>
Date: Wed, 7 Jun 2023 10:54:19 +0800
Subject: [PATCH 04/61] Remove dependencies that others require

---
 setup.py | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/setup.py b/setup.py
index 3a49bf185..3a5835391 100644
--- a/setup.py
+++ b/setup.py
@@ -28,24 +28,22 @@
 
 install_requires = [
     "pandas>1.5.2,<2.1",  # requires numpy
-    "tqdm>=4.65.0,<5",
-    "matplotlib>=3.6.0,<4",
-    "scikit-learn>=1.1.3,<2",  # 1.0.2 may not support Python 3.10.  requires scipy
+    "scikit-learn>=1.1.3,<2",  # requires scipy
     "numba>=0.57.0,<1",
     "lightfm>=1.17,<2",
     "lightgbm>=3.3.2,<4",
-    "memory_profiler>=0.61.0,<1",
-    "nltk>=3.8.1,<4",
-    "seaborn>=0.12.0,<1",
-    "transformers>=4.26.0,<5",  # requires pyyaml
+    "memory-profiler>=0.61.0,<1",
+    "nltk>=3.8.1,<4",  # requires tqdm
+    "seaborn>=0.12.0,<1",  # requires matplotlib
+    "transformers>=4.26.0,<5",  # requires pyyaml, tqdm
     "bottleneck>=1.3.7,<2",
-    "category_encoders>=2.6.0,<3",
+    "category-encoders>=2.6.0,<3",
     "jinja2>=3.1.0,<3.2",
-    "cornac>=1.15.2,<2",
+    "cornac>=1.15.2,<2",  # requires tqdm
     "retrying>=1.3.4",
     "pandera[strategies]>=0.15.0",  # For generating fake datasets
     "scikit-surprise>=1.1.3",
-    "scrapbook>=0.5.0,<1.0.0",
+    "scrapbook>=0.5.0,<1.0.0",  # requires tqdm, papermill
 ]
 
 # shared dependencies
@@ -53,10 +51,8 @@
     "examples": [
         "azure-mgmt-cosmosdb>=9.0.0,<10",
         "hyperopt>=0.2.7,<1",
-        "ipykernel>=6.20.1,<7",
         "notebook>=6.5.4,<8",
         "locust>=2.15.1,<3",
-        "papermill>=2.4.0,<3",
     ],
     "gpu": [
         "nvidia-ml-py3>=11.510.69",
@@ -68,7 +64,7 @@
         "fastai>=2.7.11,<3",
     ],
     "spark": [
-        "databricks_cli>=0.17.7,<1",
+        "databricks-cli>=0.17.7,<1",
         "pyarrow>=10.0.1",
         "pyspark>=3.0.1,<=3.4.0",
     ],

From 793ec8785a647bd97c03d1f979e9b831922f9a1b Mon Sep 17 00:00:00 2001
From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com>
Date: Fri, 9 Jun 2023 11:44:20 +0800
Subject: [PATCH 05/61] Update nvidia-ml-py and tensorflow version

---
 setup.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/setup.py b/setup.py
index c07825fcc..1d9228d85 100644
--- a/setup.py
+++ b/setup.py
@@ -54,10 +54,9 @@
         "locust>=2.15.1,<3",
     ],
     "gpu": [
-        "nvidia-ml-py3>=11.510.69",
-        # TensorFlow compiled with CUDA 11.2, cudnn 8.1
-        "tensorflow~=2.6.1;python_version=='3.6'",
-        "tensorflow~=2.7.0;python_version>='3.7'",
+        "nvidia-ml-py>=11.510.69",
+        # TensorFlow compiled with CUDA 11.8, cudnn 8.6.0.163
+        "tensorflow~=2.12.0",
         "tf-slim>=1.1.0",
         "torch>=2.0.1",
         "fastai>=2.7.11,<3",

From 1556eb4c4ba528014bfff200c6730cc44f10952e Mon Sep 17 00:00:00 2001
From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com>
Date: Fri, 9 Jun 2023 12:22:27 +0800
Subject: [PATCH 06/61] Install system level dependencies for scipy

---
 .github/actions/azureml-test/action.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/actions/azureml-test/action.yml b/.github/actions/azureml-test/action.yml
index 72b7e7dea..73c65a041 100644
--- a/.github/actions/azureml-test/action.yml
+++ b/.github/actions/azureml-test/action.yml
@@ -68,6 +68,11 @@ inputs:
 runs:
   using: "composite"
   steps:
+    - name: Install system-level dependencies
+      shell: bash
+      run: |
+        # for scipy, see https://docs.scipy.org/doc/scipy/dev/contributor/building.html
+        sudo apt install -y gcc g++ gfortran libopenblas-dev liblapack-dev pkg-config
     - name: Setup python
       uses: actions/setup-python@v4
       with:

From 890a3fe14875418633d92878007e90b7cd2ab9dc Mon Sep 17 00:00:00 2001
From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com>
Date: Fri, 9 Jun 2023 14:58:32 +0800
Subject: [PATCH 07/61] Support from Python 3.8 to 3.11

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 398941ea3..3c0c40d0d 100644
--- a/setup.py
+++ b/setup.py
@@ -123,7 +123,7 @@
     "machine learning python spark gpu",
     install_requires=install_requires,
     package_dir={"recommenders": "recommenders"},
-    python_requires=">=3.6, <3.10",
+    python_requires=">=3.8, <3.12",
     packages=find_packages(
         where=".",
         exclude=["contrib", "docs", "examples", "scenarios", "tests", "tools"],

From c7bb84627e913091ac3af46a4ece820903b11a55 Mon Sep 17 00:00:00 2001
From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com>
Date: Fri, 9 Jun 2023 14:59:55 +0800
Subject: [PATCH 08/61] Remove unused system deps

---
 .github/actions/azureml-test/action.yml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/.github/actions/azureml-test/action.yml b/.github/actions/azureml-test/action.yml
index 73c65a041..72b7e7dea 100644
--- a/.github/actions/azureml-test/action.yml
+++ b/.github/actions/azureml-test/action.yml
@@ -68,11 +68,6 @@ inputs:
 runs:
   using: "composite"
   steps:
-    - name: Install system-level dependencies
-      shell: bash
-      run: |
-        # for scipy, see https://docs.scipy.org/doc/scipy/dev/contributor/building.html
-        sudo apt install -y gcc g++ gfortran libopenblas-dev liblapack-dev pkg-config
     - name: Setup python
       uses: actions/setup-python@v4
       with:

From 7f2e29896715107ce1c61701b78657944cfbe1e4 Mon Sep 17 00:00:00 2001
From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com>
Date: Fri, 9 Jun 2023 15:43:14 +0800
Subject: [PATCH 09/61] Drop python 3.11 because some packages do not support
 3.11

---
 .github/workflows/azureml-cpu-nightly.yml   | 2 +-
 .github/workflows/azureml-gpu-nightly.yml   | 2 +-
 .github/workflows/azureml-spark-nightly.yml | 2 +-
 .github/workflows/azureml-unit-tests.yml    | 2 +-
 setup.py                                    | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/azureml-cpu-nightly.yml b/.github/workflows/azureml-cpu-nightly.yml
index e52058a2c..5010b35ad 100644
--- a/.github/workflows/azureml-cpu-nightly.yml
+++ b/.github/workflows/azureml-cpu-nightly.yml
@@ -67,7 +67,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code
diff --git a/.github/workflows/azureml-gpu-nightly.yml b/.github/workflows/azureml-gpu-nightly.yml
index 94ac06a00..d7b485528 100644
--- a/.github/workflows/azureml-gpu-nightly.yml
+++ b/.github/workflows/azureml-gpu-nightly.yml
@@ -67,7 +67,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code
diff --git a/.github/workflows/azureml-spark-nightly.yml b/.github/workflows/azureml-spark-nightly.yml
index 66f981abc..b0d085fcf 100644
--- a/.github/workflows/azureml-spark-nightly.yml
+++ b/.github/workflows/azureml-spark-nightly.yml
@@ -66,7 +66,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code
diff --git a/.github/workflows/azureml-unit-tests.yml b/.github/workflows/azureml-unit-tests.yml
index 2e28740b8..89854ef99 100644
--- a/.github/workflows/azureml-unit-tests.yml
+++ b/.github/workflows/azureml-unit-tests.yml
@@ -54,7 +54,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code
diff --git a/setup.py b/setup.py
index 3c0c40d0d..1107dada1 100644
--- a/setup.py
+++ b/setup.py
@@ -123,7 +123,7 @@
     "machine learning python spark gpu",
     install_requires=install_requires,
     package_dir={"recommenders": "recommenders"},
-    python_requires=">=3.8, <3.12",
+    python_requires=">=3.8, <3.11",
     packages=find_packages(
         where=".",
         exclude=["contrib", "docs", "examples", "scenarios", "tests", "tools"],

From 09069f7ce8b23dc91d70bea577bb15603f07e823 Mon Sep 17 00:00:00 2001
From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com>
Date: Sat, 10 Jun 2023 09:47:02 +0800
Subject: [PATCH 10/61] Install dependencies for scipy in docker image

---
 .../submit_groupwise_azureml_pytest.py        | 36 +++++++++++--------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
index 190089054..4a34127e5 100644
--- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
+++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
@@ -37,7 +37,6 @@
 """
 import argparse
 import logging
-import glob
 
 from azureml.core.authentication import AzureCliAuthentication
 from azureml.core import Workspace
@@ -146,8 +145,7 @@ def setup_persistent_compute_target(workspace, cluster_name, vm_size, max_nodes)
 
 def create_run_config(
     cpu_cluster,
-    docker_proc_type,
-    workspace,
+    docker_image,
     add_gpu_dependencies,
     add_spark_dependencies,
     conda_pkg_jdk,
@@ -166,8 +164,7 @@ def create_run_config(
                                                 the following:
                                                 - Reco_cpu_test
                                                 - Reco_gpu_test
-            docker_proc_type (str)          : processor type, cpu or gpu
-            workspace                       : workspace reference
+            docker_image (str)              : docker image for cpu or gpu
             add_gpu_dependencies (bool)     : True if gpu packages should be
                                         added to the conda environment, else False
             add_spark_dependencies (bool)   : True if PySpark packages should be
@@ -181,7 +178,21 @@ def create_run_config(
     run_azuremlcompute = RunConfiguration()
     run_azuremlcompute.target = cpu_cluster
     run_azuremlcompute.environment.docker.enabled = True
-    run_azuremlcompute.environment.docker.base_image = docker_proc_type
+    # See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-train-with-custom-image?view=azureml-api-1#use-a-custom-dockerfile-optional
+    run_azuremlcompute.environment.docker.base_image = None
+    run_azuremlcompute.environment.docker.base_dockerfile = f"""
+    FROM {docker_image}
+    # Install system-level deps for scipy.  See
+    # https://docs.scipy.org/doc/scipy/dev/contributor/building.html
+    RUN apt-get update && \
+        apt-get install -y \
+        gcc \
+        g++ \
+        gfortran \
+        libopenblas-dev \
+        liblapack-dev \
+        pkg-config
+    """
 
     # Use conda_dependencies.yml to create a conda environment in
     # the Docker image for execution
@@ -425,13 +436,11 @@ def create_arg_parser():
     args = create_arg_parser()
 
     if args.dockerproc == "cpu":
-        from azureml.core.runconfig import DEFAULT_CPU_IMAGE
-
-        docker_proc_type = DEFAULT_CPU_IMAGE
+        # https://github.com/Azure/AzureML-Containers/blob/master/base/cpu/openmpi4.1.0-ubuntu22.04
+        docker_image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04"
     else:
-        from azureml.core.runconfig import DEFAULT_GPU_IMAGE
-
-        docker_proc_type = DEFAULT_GPU_IMAGE
+        # https://github.com/Azure/AzureML-Containers/blob/master/base/gpu/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04
+        docker_image = "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04"
 
     cli_auth = AzureCliAuthentication()
 
@@ -452,8 +461,7 @@ def create_arg_parser():
 
     run_config = create_run_config(
         cpu_cluster=cpu_cluster,
-        docker_proc_type=docker_proc_type,
-        workspace=workspace,
+        docker_image=docker_image,
         add_gpu_dependencies=args.add_gpu_dependencies,
         add_spark_dependencies=args.add_spark_dependencies,
         conda_pkg_jdk=args.conda_pkg_jdk,

From 643fed667ecddd715ecaaac62a88edb0849ad87a Mon Sep 17 00:00:00 2001
From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com>
Date: Sat, 10 Jun 2023 10:23:17 +0800
Subject: [PATCH 11/61] Change docker image

---
 .../azureml_tests/submit_groupwise_azureml_pytest.py  | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
index 4a34127e5..af7f931b8 100644
--- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
+++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
@@ -435,12 +435,15 @@ def create_arg_parser():
     logger = logging.getLogger("submit_groupwise_azureml_pytest.py")
     args = create_arg_parser()
 
+    # See Dockerfiles at https://github.com/Azure/AzureML-Containers/tree/master/base
     if args.dockerproc == "cpu":
-        # https://github.com/Azure/AzureML-Containers/blob/master/base/cpu/openmpi4.1.0-ubuntu22.04
-        docker_image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04"
+        from azureml.core.runconfig import DEFAULT_CPU_IMAGE
+
+        docker_image = DEFAULT_CPU_IMAGE
     else:
-        # https://github.com/Azure/AzureML-Containers/blob/master/base/gpu/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04
-        docker_image = "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04"
+        from azureml.core.runconfig import DEFAULT_GPU_IMAGE
+
+        docker_image = DEFAULT_GPU_IMAGE
 
     cli_auth = AzureCliAuthentication()
 

From db4c9c3729e6bdb9577647ad8bb7da31d9dfd230 Mon Sep 17 00:00:00 2001
From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com>
Date: Sat, 10 Jun 2023 16:31:17 +0800
Subject: [PATCH 12/61] Add pip==20.1.1

---
 tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
index af7f931b8..194490129 100644
--- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
+++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
@@ -202,6 +202,7 @@ def create_run_config(
 
     conda_dep = CondaDependencies()
     conda_dep.add_conda_package(conda_pkg_python)
+    conda_dep.add_conda_package("pip==20.1.1")
     conda_dep.add_pip_package(
         "pymanopt@https://github.com/pymanopt/pymanopt/archive/fb36a272cdeecb21992cfd9271eb82baafeb316d.zip"
     )

From 9e05e4fa5d339f0ff04c1ad585366a5476c8a199 Mon Sep 17 00:00:00 2001
From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com>
Date: Sat, 10 Jun 2023 16:39:17 +0800
Subject: [PATCH 13/61] Correct conda package format

---
 tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
index 194490129..2f407fbf8 100644
--- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
+++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
@@ -202,7 +202,7 @@ def create_run_config(
 
     conda_dep = CondaDependencies()
     conda_dep.add_conda_package(conda_pkg_python)
-    conda_dep.add_conda_package("pip==20.1.1")
+    conda_dep.add_conda_package("pip=20.1.1")
     conda_dep.add_pip_package(
         "pymanopt@https://github.com/pymanopt/pymanopt/archive/fb36a272cdeecb21992cfd9271eb82baafeb316d.zip"
     )

From e1d6acf75807ac04678d2ecb54d608eec628bf55 Mon Sep 17 00:00:00 2001
From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com>
Date: Sat, 10 Jun 2023 17:32:30 +0800
Subject: [PATCH 14/61] Remove pip downgrade code

---
 tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
index 2f407fbf8..af7f931b8 100644
--- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
+++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
@@ -202,7 +202,6 @@ def create_run_config(
 
     conda_dep = CondaDependencies()
     conda_dep.add_conda_package(conda_pkg_python)
-    conda_dep.add_conda_package("pip=20.1.1")
     conda_dep.add_pip_package(
         "pymanopt@https://github.com/pymanopt/pymanopt/archive/fb36a272cdeecb21992cfd9271eb82baafeb316d.zip"
     )

From b71c4ed66991f8eddfd80a8afbff05b34591c7ee Mon Sep 17 00:00:00 2001
From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com>
Date: Sun, 11 Jun 2023 09:21:31 +0800
Subject: [PATCH 15/61] Use docker images for ubuntu 22.04

---
 setup.py                                           |  1 +
 .../submit_groupwise_azureml_pytest.py             | 14 +++++---------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/setup.py b/setup.py
index 1107dada1..e8353577c 100644
--- a/setup.py
+++ b/setup.py
@@ -114,6 +114,7 @@
         "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
         "Operating System :: Microsoft :: Windows",
         "Operating System :: POSIX :: Linux",
         "Operating System :: MacOS",
diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
index af7f931b8..030877356 100644
--- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
+++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
@@ -186,12 +186,11 @@ def create_run_config(
     # https://docs.scipy.org/doc/scipy/dev/contributor/building.html
     RUN apt-get update && \
         apt-get install -y \
-        gcc \
-        g++ \
         gfortran \
         libopenblas-dev \
         liblapack-dev \
         pkg-config
+    RUN apt-get install -y git
     """
 
     # Use conda_dependencies.yml to create a conda environment in
@@ -435,15 +434,12 @@ def create_arg_parser():
     logger = logging.getLogger("submit_groupwise_azureml_pytest.py")
     args = create_arg_parser()
 
-    # See Dockerfiles at https://github.com/Azure/AzureML-Containers/tree/master/base
     if args.dockerproc == "cpu":
-        from azureml.core.runconfig import DEFAULT_CPU_IMAGE
-
-        docker_image = DEFAULT_CPU_IMAGE
+        # https://github.com/Azure/AzureML-Containers/blob/master/base/cpu/openmpi4.1.0-ubuntu22.04
+        docker_image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04"
     else:
-        from azureml.core.runconfig import DEFAULT_GPU_IMAGE
-
-        docker_image = DEFAULT_GPU_IMAGE
+        # https://github.com/Azure/AzureML-Containers/blob/master/base/gpu/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04
+        docker_image = "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04"
 
     cli_auth = AzureCliAuthentication()
 

From d80002ed494947747a104a98139e488eb7782470 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Sat, 2 Sep 2023 09:52:19 +0800
Subject: [PATCH 16/61] Replace pandas.util.testing with pandas.testing

---
 tests/unit/recommenders/evaluation/test_python_evaluation.py | 2 +-
 tests/unit/recommenders/evaluation/test_spark_evaluation.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/unit/recommenders/evaluation/test_python_evaluation.py b/tests/unit/recommenders/evaluation/test_python_evaluation.py
index e5837fc66..cd54ec36b 100644
--- a/tests/unit/recommenders/evaluation/test_python_evaluation.py
+++ b/tests/unit/recommenders/evaluation/test_python_evaluation.py
@@ -6,7 +6,7 @@
 import pytest
 from unittest.mock import Mock
 from sklearn.preprocessing import minmax_scale
-from pandas.util.testing import assert_frame_equal
+from pandas.testing import assert_frame_equal
 
 from recommenders.utils.constants import (
     DEFAULT_USER_COL,
diff --git a/tests/unit/recommenders/evaluation/test_spark_evaluation.py b/tests/unit/recommenders/evaluation/test_spark_evaluation.py
index 9cf35ee3e..c45f095ba 100644
--- a/tests/unit/recommenders/evaluation/test_spark_evaluation.py
+++ b/tests/unit/recommenders/evaluation/test_spark_evaluation.py
@@ -4,7 +4,7 @@
 import numpy as np
 import pandas as pd
 import pytest
-from pandas.util.testing import assert_frame_equal
+from pandas.testing import assert_frame_equal
 
 from recommenders.evaluation.python_evaluation import (
     precision_at_k,

From e084412a5ec5ef3e5fab2f13832395a18c996a5c Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Sat, 2 Sep 2023 10:52:54 +0800
Subject: [PATCH 17/61] Remove nonexistent argument check_less_precise of
 assert_frame_equal()

---
 .../unit/recommenders/evaluation/test_spark_evaluation.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/tests/unit/recommenders/evaluation/test_spark_evaluation.py b/tests/unit/recommenders/evaluation/test_spark_evaluation.py
index c45f095ba..1b740a91c 100644
--- a/tests/unit/recommenders/evaluation/test_spark_evaluation.py
+++ b/tests/unit/recommenders/evaluation/test_spark_evaluation.py
@@ -441,7 +441,7 @@ def test_item_novelty(spark_diversity_data, target_metrics):
     )
     actual = evaluator.historical_item_novelty().toPandas()
     assert_frame_equal(
-        target_metrics["item_novelty"], actual, check_exact=False, check_less_precise=4
+        target_metrics["item_novelty"], actual, check_exact=False
     )
     assert np.all(actual["item_novelty"].values >= 0)
     # Test that novelty is zero when data includes only one item
@@ -482,7 +482,6 @@ def test_user_diversity(spark_diversity_data, target_metrics):
         target_metrics["user_diversity"],
         actual,
         check_exact=False,
-        check_less_precise=4,
     )
 
 
@@ -510,7 +509,6 @@ def test_user_item_serendipity(spark_diversity_data, target_metrics):
         target_metrics["user_item_serendipity"],
         actual,
         check_exact=False,
-        check_less_precise=4,
     )
 
 
@@ -529,7 +527,6 @@ def test_user_serendipity(spark_diversity_data, target_metrics):
         target_metrics["user_serendipity"],
         actual,
         check_exact=False,
-        check_less_precise=4,
     )
 
 
@@ -562,7 +559,6 @@ def test_user_diversity_item_feature_vector(spark_diversity_data, target_metrics
         target_metrics["user_diversity_item_feature_vector"],
         actual,
         check_exact=False,
-        check_less_precise=4,
     )
 
 
@@ -599,7 +595,6 @@ def test_user_item_serendipity_item_feature_vector(
         target_metrics["user_item_serendipity_item_feature_vector"],
         actual,
         check_exact=False,
-        check_less_precise=4,
     )
 
 
@@ -620,7 +615,6 @@ def test_user_serendipity_item_feature_vector(spark_diversity_data, target_metri
         target_metrics["user_serendipity_item_feature_vector"],
         actual,
         check_exact=False,
-        check_less_precise=4,
     )
 
 

From a6054048bd0576be0728b021c40bf672883efa07 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Mon, 4 Sep 2023 09:12:01 +0800
Subject: [PATCH 18/61] Remove tests for sarplus for Python 3.7

---
 .github/workflows/sarplus.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/sarplus.yml b/.github/workflows/sarplus.yml
index e5a25fa14..05c0923a2 100644
--- a/.github/workflows/sarplus.yml
+++ b/.github/workflows/sarplus.yml
@@ -36,10 +36,10 @@ jobs:
     # Test pysarplus with different versions of Python.
     # Package pysarplus and upload as GitHub workflow artifact when merged into
     # the main branch.
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10"]
     steps:
       - uses: actions/checkout@v3
 

From 40361f4b007979b62e422414f48af39dfac58907 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Mon, 4 Sep 2023 09:13:28 +0800
Subject: [PATCH 19/61] Fixed error: 'DataFrame' object has no attribute
 'append'

---
 recommenders/models/rlrmc/RLRMCdataset.py |  4 ++--
 setup.py                                  | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/recommenders/models/rlrmc/RLRMCdataset.py b/recommenders/models/rlrmc/RLRMCdataset.py
index 6b1329d1d..7670105b3 100644
--- a/recommenders/models/rlrmc/RLRMCdataset.py
+++ b/recommenders/models/rlrmc/RLRMCdataset.py
@@ -68,8 +68,8 @@ def _data_processing(self, train, validation=None, test=None, mean_center=True):
         """
         # Data processing and reindexing code is adopted from https://github.com/Microsoft/Recommenders/blob/main/recommenders/models/ncf/dataset.py
         # If validation dataset is None
-        df = train if validation is None else train.append(validation)
-        df = df if test is None else df.append(test)
+        df = train if validation is None else pd.concat([train, validation])
+        df = df if test is None else pd.concat([df, test])
 
         # Reindex user and item index
         if self.user_idx is None:
diff --git a/setup.py b/setup.py
index 1524611dc..15829f73d 100644
--- a/setup.py
+++ b/setup.py
@@ -27,15 +27,15 @@
     version += ".post" + str(int(time.time()))
 
 install_requires = [
-    "pandas>1.5.2,<2.1",  # requires numpy
+    "pandas>1.5.2,<1.6",  # requires numpy
     "scikit-learn>=1.1.3,<2",  # requires scipy
     "numba>=0.57.0,<1",
     "lightfm>=1.17,<2",
-    "lightgbm>=3.3.2,<4",
+    "lightgbm>=3.3.2,<5",
     "memory-profiler>=0.61.0,<1",
     "nltk>=3.8.1,<4",  # requires tqdm
     "seaborn>=0.12.0,<1",  # requires matplotlib
-    "transformers>=4.26.0,<5",  # requires pyyaml, tqdm
+    "transformers>=4.27.0,<5",  # requires pyyaml, tqdm
     "category-encoders>=2.6.0,<3",
     "jinja2>=3.1.0,<3.2",
     "cornac>=1.15.2,<2",  # requires tqdm
@@ -44,8 +44,8 @@
     "scikit-surprise>=1.1.3",
     "scrapbook>=0.5.0,<1.0.0",  # requires tqdm, papermill
     "hyperopt>=0.2.7,<1",
-    "notebook>=6.5.4,<8",  # requires jupyter, ipykernel
-    "locust>=2.15.1,<3",
+    "notebook>=7.0.0,<8",  # requires jupyter, ipykernel
+    "locust>=2.12.2,<3",
 ]
 
 # shared dependencies

From 9364c9b7305fa0a8073816cee3c4a157afe94b65 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Mon, 4 Sep 2023 14:52:43 +0800
Subject: [PATCH 20/61] Add hypothesis<6.83.1

---
 setup.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/setup.py b/setup.py
index 15829f73d..93266abcc 100644
--- a/setup.py
+++ b/setup.py
@@ -46,6 +46,9 @@
     "hyperopt>=0.2.7,<1",
     "notebook>=7.0.0,<8",  # requires jupyter, ipykernel
     "locust>=2.12.2,<3",
+    # 6.83.1 introduced a non-existent attribute '_deferred_pprinters' of IPython.lib.pretty in
+    # https://github.com/HypothesisWorks/hypothesis/commit/5ea8e0c3e6da1cd9fb3f302124dc74791c14db11
+    "hypothesis<6.83.1",
 ]
 
 # shared dependencies

From be86a29bc2a0e67e45de9689369395965e08f5f2 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Mon, 4 Sep 2023 15:40:10 +0800
Subject: [PATCH 21/61] Use ubuntu-22.04 instead of latest

Co-authored-by: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com>
---
 .github/workflows/sarplus.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/sarplus.yml b/.github/workflows/sarplus.yml
index 05c0923a2..e0e483ab5 100644
--- a/.github/workflows/sarplus.yml
+++ b/.github/workflows/sarplus.yml
@@ -36,7 +36,7 @@ jobs:
     # Test pysarplus with different versions of Python.
     # Package pysarplus and upload as GitHub workflow artifact when merged into
     # the main branch.
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
         python-version: ["3.8", "3.9", "3.10"]

From 0641d9531384f2f30b2c16cb368e6b57f32cd829 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Mon, 4 Sep 2023 15:42:13 +0800
Subject: [PATCH 22/61] Update comments

---
 setup.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 93266abcc..f8e0f5bef 100644
--- a/setup.py
+++ b/setup.py
@@ -27,7 +27,8 @@
     version += ".post" + str(int(time.time()))
 
 install_requires = [
-    "pandas>1.5.2,<1.6",  # requires numpy
+    # requires numpy and pandas>1.6 removes DataFrame.append() which is used in scrapbook.models
+    "pandas>1.5.2,<1.6",
     "scikit-learn>=1.1.3,<2",  # requires scipy
     "numba>=0.57.0,<1",
     "lightfm>=1.17,<2",
@@ -46,7 +47,7 @@
     "hyperopt>=0.2.7,<1",
     "notebook>=7.0.0,<8",  # requires jupyter, ipykernel
     "locust>=2.12.2,<3",
-    # 6.83.1 introduced a non-existent attribute '_deferred_pprinters' of IPython.lib.pretty in
+    # hypothesis 6.83.1 introduced a non-existent attribute '_deferred_pprinters' of IPython.lib.pretty in
     # https://github.com/HypothesisWorks/hypothesis/commit/5ea8e0c3e6da1cd9fb3f302124dc74791c14db11
     "hypothesis<6.83.1",
 ]

From 60e847a12120b068e3319c4f72dd3d6bfefcd139 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Mon, 4 Sep 2023 16:56:56 +0800
Subject: [PATCH 23/61] Add python 3.11

Co-authored-by: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com>
---
 .github/workflows/azureml-cpu-nightly.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/azureml-cpu-nightly.yml b/.github/workflows/azureml-cpu-nightly.yml
index 14f45531c..337035242 100644
--- a/.github/workflows/azureml-cpu-nightly.yml
+++ b/.github/workflows/azureml-cpu-nightly.yml
@@ -67,7 +67,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code

From 313de47909fa18d5746fde038e9c5e3dba9af751 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Mon, 4 Sep 2023 16:59:58 +0800
Subject: [PATCH 24/61] Add python 3.11

Co-authored-by: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com>
---
 .github/workflows/azureml-gpu-nightly.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/azureml-gpu-nightly.yml b/.github/workflows/azureml-gpu-nightly.yml
index 1b5e7ce74..a342b45c8 100644
--- a/.github/workflows/azureml-gpu-nightly.yml
+++ b/.github/workflows/azureml-gpu-nightly.yml
@@ -67,7 +67,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code

From 91be6aec94e4edf978b60582a578b7d2e975a24c Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Mon, 4 Sep 2023 17:00:27 +0800
Subject: [PATCH 25/61] Add python 3.11

Co-authored-by: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com>
---
 .github/workflows/azureml-spark-nightly.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/azureml-spark-nightly.yml b/.github/workflows/azureml-spark-nightly.yml
index 6a12190c0..b784a3bea 100644
--- a/.github/workflows/azureml-spark-nightly.yml
+++ b/.github/workflows/azureml-spark-nightly.yml
@@ -66,7 +66,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code

From 015ce4ae90bfdbccb47566f8701b75eaf3e07918 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Mon, 4 Sep 2023 17:01:11 +0800
Subject: [PATCH 26/61] Add python 3.11

Co-authored-by: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com>
---
 .github/workflows/azureml-unit-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/azureml-unit-tests.yml b/.github/workflows/azureml-unit-tests.yml
index 92c59f7ef..977961a1e 100644
--- a/.github/workflows/azureml-unit-tests.yml
+++ b/.github/workflows/azureml-unit-tests.yml
@@ -54,7 +54,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code

From 76901c6e1ee74921db93e373d452a00b8acc06b3 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Mon, 4 Sep 2023 17:02:06 +0800
Subject: [PATCH 27/61] Add python 3.11

Co-authored-by: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com>
---
 .github/workflows/sarplus.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/sarplus.yml b/.github/workflows/sarplus.yml
index e0e483ab5..66dd7d829 100644
--- a/.github/workflows/sarplus.yml
+++ b/.github/workflows/sarplus.yml
@@ -39,7 +39,7 @@ jobs:
     runs-on: ubuntu-22.04
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
     steps:
       - uses: actions/checkout@v3
 

From 22ac9e2590b239c9f0d8a0269bb457de82a8cb25 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Mon, 4 Sep 2023 17:07:24 +0800
Subject: [PATCH 28/61] Add python 3.11

---
 setup.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index f8e0f5bef..384d34cf5 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@
 import sys
 import time
 
-# workaround for enabling editable user pip installs
+# Workaround for enabling editable user pip installs
 site.ENABLE_USER_SITE = "--user" in sys.argv[1:]
 
 # Version
@@ -111,10 +111,10 @@
         "Topic :: Scientific/Engineering :: Artificial Intelligence",
         "Topic :: Software Development :: Libraries :: Python Modules",
         "License :: OSI Approved :: MIT License",
-        "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
         "Operating System :: POSIX :: Linux",
     ],
     extras_require=extras_require,
@@ -122,7 +122,7 @@
     "machine learning python spark gpu",
     install_requires=install_requires,
     package_dir={"recommenders": "recommenders"},
-    python_requires=">=3.8, <3.11",
+    python_requires=">=3.8, <=3.11",
     packages=find_packages(
         where=".",
         exclude=["contrib", "docs", "examples", "scenarios", "tests", "tools"],

From c3a70302f3b6f530d6737f097d7fc5a494790e70 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Mon, 4 Sep 2023 19:20:23 +0800
Subject: [PATCH 29/61] Remove python 3.11

---
 .github/workflows/azureml-cpu-nightly.yml   | 2 +-
 .github/workflows/azureml-gpu-nightly.yml   | 2 +-
 .github/workflows/azureml-spark-nightly.yml | 2 +-
 .github/workflows/azureml-unit-tests.yml    | 2 +-
 .github/workflows/sarplus.yml               | 2 +-
 setup.py                                    | 3 +--
 6 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/azureml-cpu-nightly.yml b/.github/workflows/azureml-cpu-nightly.yml
index 337035242..14f45531c 100644
--- a/.github/workflows/azureml-cpu-nightly.yml
+++ b/.github/workflows/azureml-cpu-nightly.yml
@@ -67,7 +67,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code
diff --git a/.github/workflows/azureml-gpu-nightly.yml b/.github/workflows/azureml-gpu-nightly.yml
index a342b45c8..1b5e7ce74 100644
--- a/.github/workflows/azureml-gpu-nightly.yml
+++ b/.github/workflows/azureml-gpu-nightly.yml
@@ -67,7 +67,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code
diff --git a/.github/workflows/azureml-spark-nightly.yml b/.github/workflows/azureml-spark-nightly.yml
index b784a3bea..6a12190c0 100644
--- a/.github/workflows/azureml-spark-nightly.yml
+++ b/.github/workflows/azureml-spark-nightly.yml
@@ -66,7 +66,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code
diff --git a/.github/workflows/azureml-unit-tests.yml b/.github/workflows/azureml-unit-tests.yml
index 977961a1e..92c59f7ef 100644
--- a/.github/workflows/azureml-unit-tests.yml
+++ b/.github/workflows/azureml-unit-tests.yml
@@ -54,7 +54,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code
diff --git a/.github/workflows/sarplus.yml b/.github/workflows/sarplus.yml
index 66dd7d829..e0e483ab5 100644
--- a/.github/workflows/sarplus.yml
+++ b/.github/workflows/sarplus.yml
@@ -39,7 +39,7 @@ jobs:
     runs-on: ubuntu-22.04
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.8", "3.9", "3.10"]
     steps:
       - uses: actions/checkout@v3
 
diff --git a/setup.py b/setup.py
index 384d34cf5..166049dca 100644
--- a/setup.py
+++ b/setup.py
@@ -114,7 +114,6 @@
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
-        "Programming Language :: Python :: 3.11",
         "Operating System :: POSIX :: Linux",
     ],
     extras_require=extras_require,
@@ -122,7 +121,7 @@
     "machine learning python spark gpu",
     install_requires=install_requires,
     package_dir={"recommenders": "recommenders"},
-    python_requires=">=3.8, <=3.11",
+    python_requires=">=3.8, <=3.10",
     packages=find_packages(
         where=".",
         exclude=["contrib", "docs", "examples", "scenarios", "tests", "tools"],

From 15fbf90eb993b17f592d8c980dece4de1bbdb2ad Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Thu, 22 Feb 2024 11:37:49 +0800
Subject: [PATCH 30/61] Pin pip=20.1.1

---
 tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
index 86d0c80ab..af8b22b19 100644
--- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
+++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
@@ -197,12 +197,14 @@ def create_run_config(
     apt-get clean -y && \
     rm -rf /var/lib/apt/lists/*
 # Conda Environment
+# Pin pip=20.1.1 due to the issue: No module named 'ruamel'
+# See https://learn.microsoft.com/en-us/python/api/overview/azure/ml/install?view=azure-ml-py#troubleshooting
 ENV MINICONDA_VERSION py38_23.3.1-0
 ENV PATH /opt/miniconda/bin:$PATH
 ENV CONDA_PACKAGE 23.5.0
 RUN wget -qO /tmp/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh && \
     bash /tmp/miniconda.sh -bf -p /opt/miniconda && \
-    conda install conda=${CONDA_PACKAGE} -y && \
+    conda install -y conda=${CONDA_PACKAGE} python=3.8 pip=20.1.1 && \
     conda update --all -c conda-forge -y && \
     conda clean -ay && \
     rm -rf /opt/miniconda/pkgs && \

From a7f8346b3ef4842c7e6c7056f3e62e8141922845 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Thu, 22 Feb 2024 13:21:34 +0800
Subject: [PATCH 31/61] Update dep versions

---
 setup.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/setup.py b/setup.py
index f3dd7854f..f6875cdba 100644
--- a/setup.py
+++ b/setup.py
@@ -27,19 +27,19 @@
     version += ".post" + str(int(time.time()))
 
 install_requires = [
-    "pandas>1.5.2,<1.6",  # requires numpy
+    "pandas>1.5.3,<3.0.0",  # requires numpy
     "scikit-learn>=1.1.3,<2",  # requires scipy
     "numba>=0.57.0,<1",
     "lightfm>=1.17,<2",
-    "lightgbm>=3.3.2,<5",
+    "lightgbm>=4.0.0,<5",
     "memory-profiler>=0.61.0,<1",
     "nltk>=3.8.1,<4",  # requires tqdm
-    "seaborn>=0.12.0,<1",  # requires matplotlib
+    "seaborn>=0.13.0,<1",  # requires matplotlib
     "transformers>=4.27.0,<5",  # requires pyyaml, tqdm
     "category-encoders>=2.6.0,<3",
     "jinja2>=3.1.0,<3.2",
     "cornac>=1.15.2,<2",  # requires tqdm
-    "retrying>=1.3.4",
+    "retrying>=1.3.4,<2",
     "pandera[strategies]>=0.6.5,<0.18;python_version<='3.8'",  # For generating fake datasets
     "pandera[strategies]>=0.15.0;python_version>='3.9'",
     "scikit-surprise>=1.1.3",
@@ -51,22 +51,22 @@
 # shared dependencies
 extras_require = {
     "gpu": [
-        "nvidia-ml-py>=11.510.69",
+        "nvidia-ml-py>=11.525.84",
         "tensorflow>=2.8.4,!=2.9.0.*,!=2.9.1,!=2.9.2,!=2.10.0.*,<3",
-        "tf-slim>=1.1.0",
-        "torch>=2.0.1",
+        "tf-slim>=1.1.0",  # No python_requires in its setup.py
+        "torch>=2.0.1,<3",
         "fastai>=2.7.11,<3",
     ],
     "spark": [
         "pyarrow>=10.0.1",
-        "pyspark>=3.0.1,<=3.4.0",
+        "pyspark>=3.3.0,<=4",
     ],
     "dev": [
-        "black>=23.3.0,<24",
+        "black>=23.3.0",
         "pytest>=7.2.1",
         "pytest-cov>=4.1.0",
         "pytest-mock>=3.10.0",  # for access to mock fixtures in pytest
-        "packaging>=20.9",     # for version comparison in test_dependency_security.py
+        "packaging>=22.0",     # for version comparison in test_dependency_security.py
     ],
 }
 # For the brave of heart

From 9f9c81531b86211af0227e166851821959c0486b Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Thu, 22 Feb 2024 14:53:47 +0800
Subject: [PATCH 32/61] Fix pandas import

---
 setup.py                                                    | 2 +-
 tests/unit/recommenders/evaluation/test_spark_evaluation.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index f6875cdba..cc784959f 100644
--- a/setup.py
+++ b/setup.py
@@ -27,7 +27,7 @@
     version += ".post" + str(int(time.time()))
 
 install_requires = [
-    "pandas>1.5.3,<3.0.0",  # requires numpy
+    "pandas>2.0.0,<3.0.0",  # requires numpy
     "scikit-learn>=1.1.3,<2",  # requires scipy
     "numba>=0.57.0,<1",
     "lightfm>=1.17,<2",
diff --git a/tests/unit/recommenders/evaluation/test_spark_evaluation.py b/tests/unit/recommenders/evaluation/test_spark_evaluation.py
index 278a2e287..55c064e8b 100644
--- a/tests/unit/recommenders/evaluation/test_spark_evaluation.py
+++ b/tests/unit/recommenders/evaluation/test_spark_evaluation.py
@@ -5,7 +5,7 @@
 import pytest
 import numpy as np
 import pandas as pd
-from pandas.util.testing import assert_frame_equal
+from pandas.testing import assert_frame_equal
 
 from recommenders.evaluation.python_evaluation import (
     precision_at_k,

From 2fdf5901aaaabf59c687163e46dbfe8ff86826c7 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Thu, 22 Feb 2024 20:30:38 +0800
Subject: [PATCH 33/61] Set scipy <1.11.0 and sort dependencies alphabetically

---
 setup.py | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/setup.py b/setup.py
index cc784959f..7a558ca73 100644
--- a/setup.py
+++ b/setup.py
@@ -27,35 +27,36 @@
     version += ".post" + str(int(time.time()))
 
 install_requires = [
-    "pandas>2.0.0,<3.0.0",  # requires numpy
-    "scikit-learn>=1.1.3,<2",  # requires scipy
-    "numba>=0.57.0,<1",
+    "category-encoders>=2.6.0,<3",
+    "cornac>=1.15.2,<2",  # requires tqdm
+    "hyperopt>=0.2.7,<1",
+    "jinja2>=3.1.0,<3.2",
     "lightfm>=1.17,<2",
     "lightgbm>=4.0.0,<5",
+    "locust>=2.12.2,<3",
     "memory-profiler>=0.61.0,<1",
     "nltk>=3.8.1,<4",  # requires tqdm
-    "seaborn>=0.13.0,<1",  # requires matplotlib
-    "transformers>=4.27.0,<5",  # requires pyyaml, tqdm
-    "category-encoders>=2.6.0,<3",
-    "jinja2>=3.1.0,<3.2",
-    "cornac>=1.15.2,<2",  # requires tqdm
-    "retrying>=1.3.4,<2",
+    "notebook>=7.0.0,<8",  # requires jupyter, ipykernel
+    "numba>=0.57.0,<1",
+    "pandas>2.0.0,<3.0.0",  # requires numpy
     "pandera[strategies]>=0.6.5,<0.18;python_version<='3.8'",  # For generating fake datasets
     "pandera[strategies]>=0.15.0;python_version>='3.9'",
+    "retrying>=1.3.4,<2",
+    "scikit-learn>=1.1.3,<2",  # requires scipy
     "scikit-surprise>=1.1.3",
-    "hyperopt>=0.2.7,<1",
-    "notebook>=7.0.0,<8",  # requires jupyter, ipykernel
-    "locust>=2.12.2,<3",
+    "scipy>=1.7.2,<1.11.0",  # FIXME: We limit <1.11.0 until #1954 is fixed
+    "seaborn>=0.13.0,<1",  # requires matplotlib
+    "transformers>=4.27.0,<5",  # requires pyyaml, tqdm
 ]
 
 # shared dependencies
 extras_require = {
     "gpu": [
+        "fastai>=2.7.11,<3",
         "nvidia-ml-py>=11.525.84",
         "tensorflow>=2.8.4,!=2.9.0.*,!=2.9.1,!=2.9.2,!=2.10.0.*,<3",
         "tf-slim>=1.1.0",  # No python_requires in its setup.py
         "torch>=2.0.1,<3",
-        "fastai>=2.7.11,<3",
     ],
     "spark": [
         "pyarrow>=10.0.1",
@@ -63,10 +64,10 @@
     ],
     "dev": [
         "black>=23.3.0",
+        "packaging>=22.0",     # for version comparison in test_dependency_security.py
         "pytest>=7.2.1",
         "pytest-cov>=4.1.0",
         "pytest-mock>=3.10.0",  # for access to mock fixtures in pytest
-        "packaging>=22.0",     # for version comparison in test_dependency_security.py
     ],
 }
 # For the brave of heart

From b2fef7a9d4705200e9be8c89a7290b1d36204acb Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Thu, 22 Feb 2024 22:04:48 +0800
Subject: [PATCH 34/61] Fix error caused by changes in scikit-learn

---
 recommenders/models/tfidf/tfidf_utils.py | 4 ++--
 setup.py                                 | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/recommenders/models/tfidf/tfidf_utils.py b/recommenders/models/tfidf/tfidf_utils.py
index 24575121c..6a6d22389 100644
--- a/recommenders/models/tfidf/tfidf_utils.py
+++ b/recommenders/models/tfidf/tfidf_utils.py
@@ -115,7 +115,7 @@ def clean_dataframe(self, df, cols_to_clean, new_col_name="cleaned_text"):
         return df
 
     def tokenize_text(
-        self, df_clean, text_col="cleaned_text", ngram_range=(1, 3), min_df=0
+        self, df_clean, text_col="cleaned_text", ngram_range=(1, 3), min_df=0.0
     ):
         """Tokenize the input text.
         For more details on the TfidfVectorizer, see https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html
@@ -124,7 +124,7 @@ def tokenize_text(
             df_clean (pandas.DataFrame): Dataframe with cleaned text in the new column.
             text_col (str): Name of column containing the cleaned text.
             ngram_range (tuple of int): The lower and upper boundary of the range of n-values for different n-grams to be extracted.
-            min_df (int): When building the vocabulary ignore terms that have a document frequency strictly lower than the given threshold.
+            min_df (float): When building the vocabulary ignore terms that have a document frequency strictly lower than the given threshold.
 
         Returns:
             TfidfVectorizer, pandas.Series:
diff --git a/setup.py b/setup.py
index 7a558ca73..72fc94819 100644
--- a/setup.py
+++ b/setup.py
@@ -42,7 +42,7 @@
     "pandera[strategies]>=0.6.5,<0.18;python_version<='3.8'",  # For generating fake datasets
     "pandera[strategies]>=0.15.0;python_version>='3.9'",
     "retrying>=1.3.4,<2",
-    "scikit-learn>=1.1.3,<2",  # requires scipy
+    "scikit-learn>=1.2.0,<2",  # requires scipy, and introduce breaking change affects feature_extraction.text.TfidfVectorizer.min_df
     "scikit-surprise>=1.1.3",
     "scipy>=1.7.2,<1.11.0",  # FIXME: We limit <1.11.0 until #1954 is fixed
     "seaborn>=0.13.0,<1",  # requires matplotlib

From 9a225d5464cc2bc9961dce41d4014dba8574deb8 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Fri, 23 Feb 2024 11:01:33 +0800
Subject: [PATCH 35/61] Replace CollabDataBunch with CollabDataLoaders

---
 examples/00_quick_start/fastai_movielens.ipynb | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/00_quick_start/fastai_movielens.ipynb b/examples/00_quick_start/fastai_movielens.ipynb
index 517673178..f32c93be6 100644
--- a/examples/00_quick_start/fastai_movielens.ipynb
+++ b/examples/00_quick_start/fastai_movielens.ipynb
@@ -46,7 +46,7 @@
                 "import fastai\n",
                 "from tempfile import TemporaryDirectory\n",
                 "\n",
-                "from fastai.collab import collab_learner, CollabDataBunch, load_learner\n",
+                "from fastai.collab import collab_learner, CollabDataLoaders, load_learner\n",
                 "\n",
                 "from recommenders.utils.constants import (\n",
                 "    DEFAULT_USER_COL as USER, \n",
@@ -258,11 +258,11 @@
             "outputs": [],
             "source": [
                 "with Timer() as preprocess_time:\n",
-                "    data = CollabDataBunch.from_df(train_valid_df, \n",
-                "                                   user_name=USER, \n",
-                "                                   item_name=ITEM, \n",
-                "                                   rating_name=RATING, \n",
-                "                                   valid_pct=0)\n"
+                "    data = CollabDataLoaders.from_df(train_valid_df, \n",
+                "                                     user_name=USER, \n",
+                "                                     item_name=ITEM, \n",
+                "                                     rating_name=RATING, \n",
+                "                                     valid_pct=0)\n"
             ]
         },
         {

From 5484d9bced6f1eae0d0e467a28f0db081550e7b0 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Fri, 23 Feb 2024 12:17:37 +0800
Subject: [PATCH 36/61] Replace max_lr with lr_max

---
 examples/00_quick_start/fastai_movielens.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/00_quick_start/fastai_movielens.ipynb b/examples/00_quick_start/fastai_movielens.ipynb
index f32c93be6..227effc97 100644
--- a/examples/00_quick_start/fastai_movielens.ipynb
+++ b/examples/00_quick_start/fastai_movielens.ipynb
@@ -432,7 +432,7 @@
             ],
             "source": [
                 "with Timer() as train_time:\n",
-                "    learn.fit_one_cycle(EPOCHS, max_lr=5e-3)\n",
+                "    learn.fit_one_cycle(EPOCHS, lr_max=5e-3)\n",
                 "\n",
                 "print(\"Took {} seconds for training.\".format(train_time))"
             ]

From 69444044e4beda8b8d86d3c38decbed58172dfe3 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Fri, 23 Feb 2024 15:02:15 +0800
Subject: [PATCH 37/61] Correct usage of load_learner in fastai

---
 examples/00_quick_start/fastai_movielens.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/00_quick_start/fastai_movielens.ipynb b/examples/00_quick_start/fastai_movielens.ipynb
index 227effc97..0417e59eb 100644
--- a/examples/00_quick_start/fastai_movielens.ipynb
+++ b/examples/00_quick_start/fastai_movielens.ipynb
@@ -478,7 +478,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "learner = load_learner(tmp.name, \"movielens_model.pkl\")"
+                "learner = load_learner(model_path)"
             ]
         },
         {

From 0e69106333c11b84a7d69e159191a514f3996b71 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Fri, 23 Feb 2024 17:24:23 +0800
Subject: [PATCH 38/61] Replace learner.data.train_ds.x.classes.values() with
 learner.dls.classes.values()

---
 examples/00_quick_start/fastai_movielens.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/00_quick_start/fastai_movielens.ipynb b/examples/00_quick_start/fastai_movielens.ipynb
index 0417e59eb..b475d09cf 100644
--- a/examples/00_quick_start/fastai_movielens.ipynb
+++ b/examples/00_quick_start/fastai_movielens.ipynb
@@ -494,7 +494,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "total_users, total_items = learner.data.train_ds.x.classes.values()\n",
+                "total_users, total_items = learner.dls.classes.values()\n",
                 "total_items = total_items[1:]\n",
                 "total_users = total_users[1:]"
             ]

From 22ef9b9902525053b82c586cb2bbd73f6511696d Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Fri, 23 Feb 2024 18:22:38 +0800
Subject: [PATCH 39/61] Replace learner.data.train_ds.x.classes.values() with
 learner.dls.classes.values() in fastai_utils

---
 recommenders/models/fastai/fastai_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py
index ab756c7e8..60500b1e7 100644
--- a/recommenders/models/fastai/fastai_utils.py
+++ b/recommenders/models/fastai/fastai_utils.py
@@ -51,7 +51,7 @@ def score(
         pandas.DataFrame: Result of recommendation
     """
     # replace values not known to the model with NaN
-    total_users, total_items = learner.data.train_ds.x.classes.values()
+    total_users, total_items = learner.dls.classes.values()
     test_df.loc[~test_df[user_col].isin(total_users), user_col] = np.nan
     test_df.loc[~test_df[item_col].isin(total_items), item_col] = np.nan
 

From a5fea78dd48952193d70abd221ac0c1b1709e572 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Fri, 23 Feb 2024 20:02:16 +0800
Subject: [PATCH 40/61] Upgrade fastai code

---
 recommenders/models/fastai/fastai_utils.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py
index 60500b1e7..ccc7232d0 100644
--- a/recommenders/models/fastai/fastai_utils.py
+++ b/recommenders/models/fastai/fastai_utils.py
@@ -6,6 +6,7 @@
 import pandas as pd
 import fastai
 import fastprogress
+import torch
 from fastprogress.fastprogress import force_console_behavior
 
 from recommenders.utils import constants as cc
@@ -56,11 +57,11 @@ def score(
     test_df.loc[~test_df[item_col].isin(total_items), item_col] = np.nan
 
     # map ids to embedding ids
-    u = learner.get_idx(test_df[user_col], is_item=False)
-    m = learner.get_idx(test_df[item_col], is_item=True)
+    u = learner._get_idx(test_df[user_col], is_item=False)
+    m = learner._get_idx(test_df[item_col], is_item=True)
 
     # score the pytorch model
-    pred = learner.model.forward(u, m)
+    pred = learner.model.forward(torch.column_stack(u, m))
     scores = pd.DataFrame(
         {user_col: test_df[user_col], item_col: test_df[item_col], prediction_col: pred}
     )

From dccac172e9692f42b51cf970d1da2d4555d3ec9d Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Fri, 23 Feb 2024 21:19:56 +0800
Subject: [PATCH 41/61] Correct the usage of torch.column_stack()

---
 recommenders/models/fastai/fastai_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py
index ccc7232d0..3188cb460 100644
--- a/recommenders/models/fastai/fastai_utils.py
+++ b/recommenders/models/fastai/fastai_utils.py
@@ -61,7 +61,7 @@ def score(
     m = learner._get_idx(test_df[item_col], is_item=True)
 
     # score the pytorch model
-    pred = learner.model.forward(torch.column_stack(u, m))
+    pred = learner.model.forward(torch.column_stack((u, m)))
     scores = pd.DataFrame(
         {user_col: test_df[user_col], item_col: test_df[item_col], prediction_col: pred}
     )

From d3b0ad7e71b2f88981cb30281dd0b7eab4eb8de7 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Sat, 24 Feb 2024 09:16:59 +0800
Subject: [PATCH 42/61] Correct conversion from tensor to numpy

---
 recommenders/models/fastai/fastai_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py
index 3188cb460..e5dc502aa 100644
--- a/recommenders/models/fastai/fastai_utils.py
+++ b/recommenders/models/fastai/fastai_utils.py
@@ -61,7 +61,7 @@ def score(
     m = learner._get_idx(test_df[item_col], is_item=True)
 
     # score the pytorch model
-    pred = learner.model.forward(torch.column_stack((u, m)))
+    pred = learner.model.forward(torch.column_stack((u, m))).detach().numpy()
     scores = pd.DataFrame(
         {user_col: test_df[user_col], item_col: test_df[item_col], prediction_col: pred}
     )

From d249bfe6aa5814d7219160ce0b850dd7b5dbe15b Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Sat, 24 Feb 2024 11:33:27 +0800
Subject: [PATCH 43/61] Remove duplicate dependencies jinja2 and packaging
 required other packages

---
 setup.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/setup.py b/setup.py
index 72fc94819..57d22117e 100644
--- a/setup.py
+++ b/setup.py
@@ -27,16 +27,15 @@
     version += ".post" + str(int(time.time()))
 
 install_requires = [
-    "category-encoders>=2.6.0,<3",
-    "cornac>=1.15.2,<2",  # requires tqdm
+    "category-encoders>=2.6.0,<3",  # requires packaging
+    "cornac>=1.15.2,<2",  # requires packaging, tqdm
     "hyperopt>=0.2.7,<1",
-    "jinja2>=3.1.0,<3.2",
-    "lightfm>=1.17,<2",
+    "lightfm>=1.17,<2",  # requires requests
     "lightgbm>=4.0.0,<5",
-    "locust>=2.12.2,<3",
+    "locust>=2.12.2,<3",  # requires jinja2
     "memory-profiler>=0.61.0,<1",
     "nltk>=3.8.1,<4",  # requires tqdm
-    "notebook>=7.0.0,<8",  # requires jupyter, ipykernel
+    "notebook>=7.0.0,<8",  # requires ipykernel, jinja2, jupyter, nbconvert, nbformat, packaging, requests
     "numba>=0.57.0,<1",
     "pandas>2.0.0,<3.0.0",  # requires numpy
     "pandera[strategies]>=0.6.5,<0.18;python_version<='3.8'",  # For generating fake datasets
@@ -44,9 +43,9 @@
     "retrying>=1.3.4,<2",
     "scikit-learn>=1.2.0,<2",  # requires scipy, and introduce breaking change affects feature_extraction.text.TfidfVectorizer.min_df
     "scikit-surprise>=1.1.3",
-    "scipy>=1.7.2,<1.11.0",  # FIXME: We limit <1.11.0 until #1954 is fixed
-    "seaborn>=0.13.0,<1",  # requires matplotlib
-    "transformers>=4.27.0,<5",  # requires pyyaml, tqdm
+    "scipy>=1.10.1,<1.11.0",  # FIXME: We limit <1.11.0 until #1954 is fixed
+    "seaborn>=0.13.0,<1",  # requires matplotlib, packaging
+    "transformers>=4.27.0,<5",  # requires packaging, pyyaml, requests, tqdm
 ]
 
 # shared dependencies
@@ -64,7 +63,6 @@
     ],
     "dev": [
         "black>=23.3.0",
-        "packaging>=22.0",     # for version comparison in test_dependency_security.py
         "pytest>=7.2.1",
         "pytest-cov>=4.1.0",
         "pytest-mock>=3.10.0",  # for access to mock fixtures in pytest

From 547ab6663628742efa5bab7924f964a35f9a04fb Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Sat, 24 Feb 2024 11:36:19 +0800
Subject: [PATCH 44/61] Try Python 3.11

---
 .github/workflows/azureml-cpu-nightly.yml   | 2 +-
 .github/workflows/azureml-gpu-nightly.yml   | 2 +-
 .github/workflows/azureml-spark-nightly.yml | 2 +-
 .github/workflows/azureml-unit-tests.yml    | 2 +-
 .github/workflows/sarplus.yml               | 2 +-
 setup.py                                    | 1 +
 6 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/azureml-cpu-nightly.yml b/.github/workflows/azureml-cpu-nightly.yml
index f6c3837b8..93e414564 100644
--- a/.github/workflows/azureml-cpu-nightly.yml
+++ b/.github/workflows/azureml-cpu-nightly.yml
@@ -67,7 +67,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code
diff --git a/.github/workflows/azureml-gpu-nightly.yml b/.github/workflows/azureml-gpu-nightly.yml
index 0f122bda5..3b9f6d6b4 100644
--- a/.github/workflows/azureml-gpu-nightly.yml
+++ b/.github/workflows/azureml-gpu-nightly.yml
@@ -67,7 +67,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code
diff --git a/.github/workflows/azureml-spark-nightly.yml b/.github/workflows/azureml-spark-nightly.yml
index 30f88a52d..8f28be6f2 100644
--- a/.github/workflows/azureml-spark-nightly.yml
+++ b/.github/workflows/azureml-spark-nightly.yml
@@ -66,7 +66,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code
diff --git a/.github/workflows/azureml-unit-tests.yml b/.github/workflows/azureml-unit-tests.yml
index 385bf005a..b39268318 100644
--- a/.github/workflows/azureml-unit-tests.yml
+++ b/.github/workflows/azureml-unit-tests.yml
@@ -56,7 +56,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"']
+        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code
diff --git a/.github/workflows/sarplus.yml b/.github/workflows/sarplus.yml
index 0e743d792..90d03fef6 100644
--- a/.github/workflows/sarplus.yml
+++ b/.github/workflows/sarplus.yml
@@ -39,7 +39,7 @@ jobs:
     runs-on: ubuntu-22.04
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
     steps:
       - uses: actions/checkout@v4
 
diff --git a/setup.py b/setup.py
index 57d22117e..9d606c8dc 100644
--- a/setup.py
+++ b/setup.py
@@ -110,6 +110,7 @@
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
         "Operating System :: POSIX :: Linux",
     ],
     extras_require=extras_require,

From ed3b632ef0f1406204ce080cf99b320c3e8680b4 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Sat, 24 Feb 2024 13:41:29 +0800
Subject: [PATCH 45/61] Allow Python 3.11 for sarplus

---
 contrib/sarplus/python/setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/contrib/sarplus/python/setup.py b/contrib/sarplus/python/setup.py
index 4009ec751..f755f5310 100644
--- a/contrib/sarplus/python/setup.py
+++ b/contrib/sarplus/python/setup.py
@@ -42,6 +42,7 @@ def __str__(self):
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
         "Intended Audience :: Developers",
         "Intended Audience :: Science/Research",
         "Topic :: Scientific/Engineering :: Mathematics",
@@ -49,7 +50,7 @@ def __str__(self):
     setup_requires=["pytest-runner"],
     install_requires=DEPENDENCIES,
     tests_require=["pytest"],
-    python_requires=">=3.6,<3.11",
+    python_requires=">=3.6,<3.12",
     packages=["pysarplus"],
     package_data={"": ["VERSION"]},
     ext_modules=[

From c8d90f755b08a18ec3f03554c6b92feadc7ec50b Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 24 Feb 2024 19:23:32 +0100
Subject: [PATCH 46/61] Rerun and fix fastai movielens notebook

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 .../00_quick_start/fastai_movielens.ipynb     | 353 ++++++++++--------
 1 file changed, 205 insertions(+), 148 deletions(-)

diff --git a/examples/00_quick_start/fastai_movielens.ipynb b/examples/00_quick_start/fastai_movielens.ipynb
index b475d09cf..944b92623 100644
--- a/examples/00_quick_start/fastai_movielens.ipynb
+++ b/examples/00_quick_start/fastai_movielens.ipynb
@@ -27,17 +27,21 @@
                     "name": "stdout",
                     "output_type": "stream",
                     "text": [
-                        "System version: 3.6.11 | packaged by conda-forge | (default, Aug  5 2020, 20:09:42) \n",
-                        "[GCC 7.5.0]\n",
-                        "Pandas version: 0.25.3\n",
-                        "Fast AI version: 1.0.46\n",
-                        "Torch version: 1.4.0\n",
-                        "Cuda Available: False\n",
+                        "System version: 3.9.16 (main, May 15 2023, 23:46:34) \n",
+                        "[GCC 11.2.0]\n",
+                        "Pandas version: 1.5.3\n",
+                        "Fast AI version: 2.7.11\n",
+                        "Torch version: 1.13.1+cu117\n",
+                        "CUDA Available: True\n",
                         "CuDNN Enabled: True\n"
                     ]
                 }
             ],
             "source": [
+                "# Suppress all warnings\n",
+                "import warnings\n",
+                "warnings.filterwarnings(\"ignore\")\n",
+                "\n",
                 "import os\n",
                 "import sys\n",
                 "import numpy as np\n",
@@ -67,7 +71,7 @@
                 "print(\"Pandas version: {}\".format(pd.__version__))\n",
                 "print(\"Fast AI version: {}\".format(fastai.__version__))\n",
                 "print(\"Torch version: {}\".format(torch.__version__))\n",
-                "print(\"Cuda Available: {}\".format(torch.cuda.is_available()))\n",
+                "print(\"CUDA Available: {}\".format(torch.cuda.is_available()))\n",
                 "print(\"CuDNN Enabled: {}\".format(torch.backends.cudnn.enabled))"
             ]
         },
@@ -80,7 +84,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 3,
+            "execution_count": 2,
             "metadata": {
                 "tags": [
                     "parameters"
@@ -101,14 +105,14 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 4,
+            "execution_count": 3,
             "metadata": {},
             "outputs": [
                 {
                     "name": "stderr",
                     "output_type": "stream",
                     "text": [
-                        "100%|██████████| 4.81k/4.81k [00:01<00:00, 4.49kKB/s]\n"
+                        "100%|██████████| 4.81k/4.81k [00:01<00:00, 3.52kKB/s]\n"
                     ]
                 },
                 {
@@ -132,10 +136,10 @@
                             "  <thead>\n",
                             "    <tr style=\"text-align: right;\">\n",
                             "      <th></th>\n",
-                            "      <th>UserId</th>\n",
-                            "      <th>MovieId</th>\n",
-                            "      <th>Rating</th>\n",
-                            "      <th>Timestamp</th>\n",
+                            "      <th>userID</th>\n",
+                            "      <th>itemID</th>\n",
+                            "      <th>rating</th>\n",
+                            "      <th>timestamp</th>\n",
                             "    </tr>\n",
                             "  </thead>\n",
                             "  <tbody>\n",
@@ -179,15 +183,15 @@
                             "</div>"
                         ],
                         "text/plain": [
-                            "  UserId MovieId  Rating  Timestamp\n",
-                            "0    196     242     3.0  881250949\n",
-                            "1    186     302     3.0  891717742\n",
-                            "2     22     377     1.0  878887116\n",
-                            "3    244      51     2.0  880606923\n",
-                            "4    166     346     1.0  886397596"
+                            "  userID itemID  rating  timestamp\n",
+                            "0    196    242     3.0  881250949\n",
+                            "1    186    302     3.0  891717742\n",
+                            "2     22    377     1.0  878887116\n",
+                            "3    244     51     2.0  880606923\n",
+                            "4    166    346     1.0  886397596"
                         ]
                     },
-                    "execution_count": 4,
+                    "execution_count": 3,
                     "metadata": {},
                     "output_type": "execute_result"
                 }
@@ -207,7 +211,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 5,
+            "execution_count": 4,
             "metadata": {},
             "outputs": [],
             "source": [
@@ -224,7 +228,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": null,
+            "execution_count": 5,
             "metadata": {},
             "outputs": [],
             "source": [
@@ -276,37 +280,73 @@
                             "<table border=\"1\" class=\"dataframe\">\n",
                             "  <thead>\n",
                             "    <tr style=\"text-align: right;\">\n",
-                            "      <th>UserId</th>\n",
-                            "      <th>MovieId</th>\n",
-                            "      <th>target</th>\n",
+                            "      <th></th>\n",
+                            "      <th>userID</th>\n",
+                            "      <th>itemID</th>\n",
+                            "      <th>rating</th>\n",
                             "    </tr>\n",
                             "  </thead>\n",
                             "  <tbody>\n",
                             "    <tr>\n",
-                            "      <td>543</td>\n",
-                            "      <td>1555</td>\n",
-                            "      <td>3.0</td>\n",
+                            "      <th>0</th>\n",
+                            "      <td>104</td>\n",
+                            "      <td>840</td>\n",
+                            "      <td>1.0</td>\n",
                             "    </tr>\n",
                             "    <tr>\n",
-                            "      <td>90</td>\n",
-                            "      <td>945</td>\n",
-                            "      <td>5.0</td>\n",
+                            "      <th>1</th>\n",
+                            "      <td>881</td>\n",
+                            "      <td>112</td>\n",
+                            "      <td>2.0</td>\n",
+                            "    </tr>\n",
+                            "    <tr>\n",
+                            "      <th>2</th>\n",
+                            "      <td>746</td>\n",
+                            "      <td>506</td>\n",
+                            "      <td>3.0</td>\n",
                             "    </tr>\n",
                             "    <tr>\n",
-                            "      <td>292</td>\n",
-                            "      <td>515</td>\n",
+                            "      <th>3</th>\n",
+                            "      <td>104</td>\n",
+                            "      <td>257</td>\n",
                             "      <td>4.0</td>\n",
                             "    </tr>\n",
                             "    <tr>\n",
-                            "      <td>303</td>\n",
-                            "      <td>1092</td>\n",
-                            "      <td>1.0</td>\n",
+                            "      <th>4</th>\n",
+                            "      <td>511</td>\n",
+                            "      <td>1527</td>\n",
+                            "      <td>4.0</td>\n",
                             "    </tr>\n",
                             "    <tr>\n",
+                            "      <th>5</th>\n",
                             "      <td>497</td>\n",
-                            "      <td>946</td>\n",
+                            "      <td>763</td>\n",
+                            "      <td>3.0</td>\n",
+                            "    </tr>\n",
+                            "    <tr>\n",
+                            "      <th>6</th>\n",
+                            "      <td>407</td>\n",
+                            "      <td>869</td>\n",
+                            "      <td>3.0</td>\n",
+                            "    </tr>\n",
+                            "    <tr>\n",
+                            "      <th>7</th>\n",
+                            "      <td>291</td>\n",
+                            "      <td>924</td>\n",
+                            "      <td>4.0</td>\n",
+                            "    </tr>\n",
+                            "    <tr>\n",
+                            "      <th>8</th>\n",
+                            "      <td>109</td>\n",
+                            "      <td>94</td>\n",
                             "      <td>4.0</td>\n",
                             "    </tr>\n",
+                            "    <tr>\n",
+                            "      <th>9</th>\n",
+                            "      <td>82</td>\n",
+                            "      <td>597</td>\n",
+                            "      <td>3.0</td>\n",
+                            "    </tr>\n",
                             "  </tbody>\n",
                             "</table>"
                         ],
@@ -369,6 +409,33 @@
             "execution_count": 10,
             "metadata": {},
             "outputs": [
+                {
+                    "data": {
+                        "text/html": [
+                            "\n",
+                            "<style>\n",
+                            "    /* Turns off some styling */\n",
+                            "    progress {\n",
+                            "        /* gets rid of default border in Firefox and Opera. */\n",
+                            "        border: none;\n",
+                            "        /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
+                            "        background-size: auto;\n",
+                            "    }\n",
+                            "    progress:not([value]), progress:not([value])::-webkit-progress-bar {\n",
+                            "        background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n",
+                            "    }\n",
+                            "    .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
+                            "        background: #F44336;\n",
+                            "    }\n",
+                            "</style>\n"
+                        ],
+                        "text/plain": [
+                            "<IPython.core.display.HTML object>"
+                        ]
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                },
                 {
                     "data": {
                         "text/html": [
@@ -383,34 +450,34 @@
                             "  </thead>\n",
                             "  <tbody>\n",
                             "    <tr>\n",
+                            "      <td>0</td>\n",
+                            "      <td>0.961789</td>\n",
+                            "      <td>None</td>\n",
+                            "      <td>00:09</td>\n",
+                            "    </tr>\n",
+                            "    <tr>\n",
                             "      <td>1</td>\n",
-                            "      <td>0.985993</td>\n",
-                            "      <td></td>\n",
-                            "      <td>00:05</td>\n",
+                            "      <td>0.863359</td>\n",
+                            "      <td>None</td>\n",
+                            "      <td>00:08</td>\n",
                             "    </tr>\n",
                             "    <tr>\n",
                             "      <td>2</td>\n",
-                            "      <td>0.885496</td>\n",
-                            "      <td></td>\n",
-                            "      <td>00:05</td>\n",
+                            "      <td>0.750853</td>\n",
+                            "      <td>None</td>\n",
+                            "      <td>00:07</td>\n",
                             "    </tr>\n",
                             "    <tr>\n",
                             "      <td>3</td>\n",
-                            "      <td>0.777637</td>\n",
-                            "      <td></td>\n",
-                            "      <td>00:05</td>\n",
+                            "      <td>0.637868</td>\n",
+                            "      <td>None</td>\n",
+                            "      <td>00:08</td>\n",
                             "    </tr>\n",
                             "    <tr>\n",
                             "      <td>4</td>\n",
-                            "      <td>0.628971</td>\n",
-                            "      <td></td>\n",
-                            "      <td>00:05</td>\n",
-                            "    </tr>\n",
-                            "    <tr>\n",
-                            "      <td>5</td>\n",
-                            "      <td>0.532328</td>\n",
-                            "      <td></td>\n",
-                            "      <td>00:06</td>\n",
+                            "      <td>0.526907</td>\n",
+                            "      <td>None</td>\n",
+                            "      <td>00:09</td>\n",
                             "    </tr>\n",
                             "  </tbody>\n",
                             "</table>"
@@ -426,7 +493,7 @@
                     "name": "stdout",
                     "output_type": "stream",
                     "text": [
-                        "Took 29.5549 seconds for training.\n"
+                        "Took 51.5260 seconds for training.\n"
                     ]
                 }
             ],
@@ -446,7 +513,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": null,
+            "execution_count": 11,
             "metadata": {},
             "outputs": [],
             "source": [
@@ -456,7 +523,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 11,
+            "execution_count": 12,
             "metadata": {},
             "outputs": [],
             "source": [
@@ -474,7 +541,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 12,
+            "execution_count": 13,
             "metadata": {},
             "outputs": [],
             "source": [
@@ -490,7 +557,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 13,
+            "execution_count": 14,
             "metadata": {},
             "outputs": [],
             "source": [
@@ -508,7 +575,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 14,
+            "execution_count": 15,
             "metadata": {},
             "outputs": [],
             "source": [
@@ -525,7 +592,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 15,
+            "execution_count": 16,
             "metadata": {},
             "outputs": [],
             "source": [
@@ -545,7 +612,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 16,
+            "execution_count": 17,
             "metadata": {
                 "scrolled": false
             },
@@ -564,14 +631,14 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 17,
+            "execution_count": 18,
             "metadata": {},
             "outputs": [
                 {
                     "name": "stdout",
                     "output_type": "stream",
                     "text": [
-                        "Took 1.9734 seconds for 1511060 predictions.\n"
+                        "Took 5.1570 seconds for 1511060 predictions.\n"
                     ]
                 }
             ],
@@ -595,7 +662,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 18,
+            "execution_count": 19,
             "metadata": {},
             "outputs": [],
             "source": [
@@ -606,7 +673,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 19,
+            "execution_count": 20,
             "metadata": {},
             "outputs": [],
             "source": [
@@ -617,7 +684,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 20,
+            "execution_count": 21,
             "metadata": {},
             "outputs": [],
             "source": [
@@ -628,7 +695,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 21,
+            "execution_count": 22,
             "metadata": {},
             "outputs": [],
             "source": [
@@ -639,27 +706,27 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 22,
+            "execution_count": 23,
             "metadata": {},
             "outputs": [
                 {
                     "name": "stdout",
                     "output_type": "stream",
                     "text": [
-                        "Model:\tCollabLearner\n",
-                        "Top K:\t10\n",
-                        "MAP:\t0.026115\n",
-                        "NDCG:\t0.155065\n",
-                        "Precision@K:\t0.136691\n",
-                        "Recall@K:\t0.054940\n"
+                        "Model:\t\tLearner\n",
+                        "Top K:\t\t10\n",
+                        "MAP:\t\t0.024119\n",
+                        "NDCG:\t\t0.152808\n",
+                        "Precision@K:\t0.139130\n",
+                        "Recall@K:\t0.054943\n"
                     ]
                 }
             ],
             "source": [
-                "print(\"Model:\\t\" + learn.__class__.__name__,\n",
-                "      \"Top K:\\t%d\" % TOP_K,\n",
-                "      \"MAP:\\t%f\" % eval_map,\n",
-                "      \"NDCG:\\t%f\" % eval_ndcg,\n",
+                "print(\"Model:\\t\\t\" + learn.__class__.__name__,\n",
+                "      \"Top K:\\t\\t%d\" % TOP_K,\n",
+                "      \"MAP:\\t\\t%f\" % eval_map,\n",
+                "      \"NDCG:\\t\\t%f\" % eval_ndcg,\n",
                 "      \"Precision@K:\\t%f\" % eval_precision,\n",
                 "      \"Recall@K:\\t%f\" % eval_recall, sep='\\n')"
             ]
@@ -673,7 +740,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 23,
+            "execution_count": 24,
             "metadata": {},
             "outputs": [],
             "source": [
@@ -693,18 +760,18 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 24,
+            "execution_count": 25,
             "metadata": {},
             "outputs": [
                 {
                     "name": "stdout",
                     "output_type": "stream",
                     "text": [
-                        "Model:\tCollabLearner\n",
-                        "RMSE:\t0.902379\n",
-                        "MAE:\t0.712163\n",
-                        "Explained variance:\t0.346523\n",
-                        "R squared:\t0.345672\n"
+                        "Model:\t\t\tLearner\n",
+                        "RMSE:\t\t\t0.904589\n",
+                        "MAE:\t\t\t0.715827\n",
+                        "Explained variance:\t0.356082\n",
+                        "R squared:\t\t0.355173\n"
                     ]
                 }
             ],
@@ -714,36 +781,35 @@
                 "eval_mae = mae(test_df, scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION)\n",
                 "eval_exp_var = exp_var(test_df, scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION)\n",
                 "\n",
-                "print(\"Model:\\t\" + learn.__class__.__name__,\n",
-                "      \"RMSE:\\t%f\" % eval_rmse,\n",
-                "      \"MAE:\\t%f\" % eval_mae,\n",
+                "print(\"Model:\\t\\t\\t\" + learn.__class__.__name__,\n",
+                "      \"RMSE:\\t\\t\\t%f\" % eval_rmse,\n",
+                "      \"MAE:\\t\\t\\t%f\" % eval_mae,\n",
                 "      \"Explained variance:\\t%f\" % eval_exp_var,\n",
-                "      \"R squared:\\t%f\" % eval_r2, sep='\\n')"
+                "      \"R squared:\\t\\t%f\" % eval_r2, sep='\\n')"
             ]
         },
         {
             "cell_type": "markdown",
             "metadata": {},
             "source": [
-                "That RMSE is actually quite good when compared to these benchmarks: https://www.librec.net/release/v1.3/example.html"
+                "That RMSE is competitive in comparison with other models."
             ]
         },
         {
             "cell_type": "code",
-            "execution_count": 25,
+            "execution_count": 26,
             "metadata": {},
             "outputs": [
                 {
                     "data": {
-                        "application/scrapbook.scrap.json+json": {
-                            "data": 0.02611475567509659,
+                        "application/notebook_utils.json+json": {
+                            "data": 0.024118782738867094,
                             "encoder": "json",
-                            "name": "map",
-                            "version": 1
+                            "name": "map"
                         }
                     },
                     "metadata": {
-                        "scrapbook": {
+                        "notebook_utils": {
                             "data": true,
                             "display": false,
                             "name": "map"
@@ -753,15 +819,14 @@
                 },
                 {
                     "data": {
-                        "application/scrapbook.scrap.json+json": {
-                            "data": 0.15506533130248687,
+                        "application/notebook_utils.json+json": {
+                            "data": 0.1528081472533914,
                             "encoder": "json",
-                            "name": "ndcg",
-                            "version": 1
+                            "name": "ndcg"
                         }
                     },
                     "metadata": {
-                        "scrapbook": {
+                        "notebook_utils": {
                             "data": true,
                             "display": false,
                             "name": "ndcg"
@@ -771,15 +836,14 @@
                 },
                 {
                     "data": {
-                        "application/scrapbook.scrap.json+json": {
-                            "data": 0.13669141039236482,
+                        "application/notebook_utils.json+json": {
+                            "data": 0.13913043478260873,
                             "encoder": "json",
-                            "name": "precision",
-                            "version": 1
+                            "name": "precision"
                         }
                     },
                     "metadata": {
-                        "scrapbook": {
+                        "notebook_utils": {
                             "data": true,
                             "display": false,
                             "name": "precision"
@@ -789,15 +853,14 @@
                 },
                 {
                     "data": {
-                        "application/scrapbook.scrap.json+json": {
-                            "data": 0.05493986799753499,
+                        "application/notebook_utils.json+json": {
+                            "data": 0.05494302697544413,
                             "encoder": "json",
-                            "name": "recall",
-                            "version": 1
+                            "name": "recall"
                         }
                     },
                     "metadata": {
-                        "scrapbook": {
+                        "notebook_utils": {
                             "data": true,
                             "display": false,
                             "name": "recall"
@@ -807,15 +870,14 @@
                 },
                 {
                     "data": {
-                        "application/scrapbook.scrap.json+json": {
-                            "data": 0.9023793356156464,
+                        "application/notebook_utils.json+json": {
+                            "data": 0.9045892929999733,
                             "encoder": "json",
-                            "name": "rmse",
-                            "version": 1
+                            "name": "rmse"
                         }
                     },
                     "metadata": {
-                        "scrapbook": {
+                        "notebook_utils": {
                             "data": true,
                             "display": false,
                             "name": "rmse"
@@ -825,15 +887,14 @@
                 },
                 {
                     "data": {
-                        "application/scrapbook.scrap.json+json": {
-                            "data": 0.7121634655740025,
+                        "application/notebook_utils.json+json": {
+                            "data": 0.7158267242352735,
                             "encoder": "json",
-                            "name": "mae",
-                            "version": 1
+                            "name": "mae"
                         }
                     },
                     "metadata": {
-                        "scrapbook": {
+                        "notebook_utils": {
                             "data": true,
                             "display": false,
                             "name": "mae"
@@ -843,15 +904,14 @@
                 },
                 {
                     "data": {
-                        "application/scrapbook.scrap.json+json": {
-                            "data": 0.34652281723228295,
+                        "application/notebook_utils.json+json": {
+                            "data": 0.3560824305444269,
                             "encoder": "json",
-                            "name": "exp_var",
-                            "version": 1
+                            "name": "exp_var"
                         }
                     },
                     "metadata": {
-                        "scrapbook": {
+                        "notebook_utils": {
                             "data": true,
                             "display": false,
                             "name": "exp_var"
@@ -861,15 +921,14 @@
                 },
                 {
                     "data": {
-                        "application/scrapbook.scrap.json+json": {
-                            "data": 0.3456716162958503,
+                        "application/notebook_utils.json+json": {
+                            "data": 0.35517333876960555,
                             "encoder": "json",
-                            "name": "rsquared",
-                            "version": 1
+                            "name": "rsquared"
                         }
                     },
                     "metadata": {
-                        "scrapbook": {
+                        "notebook_utils": {
                             "data": true,
                             "display": false,
                             "name": "rsquared"
@@ -879,15 +938,14 @@
                 },
                 {
                     "data": {
-                        "application/scrapbook.scrap.json+json": {
-                            "data": 29.554921820759773,
+                        "application/notebook_utils.json+json": {
+                            "data": 51.52598460000445,
                             "encoder": "json",
-                            "name": "train_time",
-                            "version": 1
+                            "name": "train_time"
                         }
                     },
                     "metadata": {
-                        "scrapbook": {
+                        "notebook_utils": {
                             "data": true,
                             "display": false,
                             "name": "train_time"
@@ -897,15 +955,14 @@
                 },
                 {
                     "data": {
-                        "application/scrapbook.scrap.json+json": {
-                            "data": 1.973397959023714,
+                        "application/notebook_utils.json+json": {
+                            "data": 5.156951100005244,
                             "encoder": "json",
-                            "name": "test_time",
-                            "version": 1
+                            "name": "test_time"
                         }
                     },
                     "metadata": {
-                        "scrapbook": {
+                        "notebook_utils": {
                             "data": true,
                             "display": false,
                             "name": "test_time"
@@ -930,7 +987,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": null,
+            "execution_count": 27,
             "metadata": {},
             "outputs": [],
             "source": [
@@ -946,9 +1003,9 @@
     "metadata": {
         "celltoolbar": "Tags",
         "kernelspec": {
-            "display_name": "Python (reco_gpu)",
+            "display_name": "recommenders",
             "language": "python",
-            "name": "reco_gpu"
+            "name": "python3"
         },
         "language_info": {
             "codemirror_mode": {
@@ -960,7 +1017,7 @@
             "name": "python",
             "nbconvert_exporter": "python",
             "pygments_lexer": "ipython3",
-            "version": "3.6.11"
+            "version": "3.9.16"
         }
     },
     "nbformat": 4,

From d9ec1cd1ccd7fb77b45a0c4d43559041539fc6c3 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 24 Feb 2024 19:32:50 +0100
Subject: [PATCH 47/61] Fixed deprecated attribute in fastai

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 recommenders/models/fastai/fastai_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py
index e5dc502aa..6e805ae17 100644
--- a/recommenders/models/fastai/fastai_utils.py
+++ b/recommenders/models/fastai/fastai_utils.py
@@ -78,7 +78,7 @@ def hide_fastai_progress_bar():
     fastprogress.fastprogress.NO_BAR = True
     fastprogress.fastprogress.WRITER_FN = str
     master_bar, progress_bar = force_console_behavior()
-    fastai.basic_train.master_bar, fastai.basic_train.progress_bar = (
+    fastai.callback.progress.master_bar, fastai.callback.progress.progress_bar = (
         master_bar,
         progress_bar,
     )

From fda52655762580e68e500784c307d7110682e119 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Mon, 4 Mar 2024 17:38:57 +0100
Subject: [PATCH 48/61] Fixing breaking changes in fastai

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 examples/06_benchmarks/benchmark_utils.py | 6 +++---
 examples/06_benchmarks/movielens.ipynb    | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/06_benchmarks/benchmark_utils.py b/examples/06_benchmarks/benchmark_utils.py
index e28fa6ab7..90bcfb822 100644
--- a/examples/06_benchmarks/benchmark_utils.py
+++ b/examples/06_benchmarks/benchmark_utils.py
@@ -15,7 +15,7 @@
 except ImportError:
     pass  # skip this import if we are not in a Spark environment
 try:
-    from fastai.collab import collab_learner, CollabDataBunch
+    from fastai.collab import collab_learner, CollabDataLoaders
 except ImportError:
     pass  # skip this import if we are not in a GPU environment
 
@@ -181,7 +181,7 @@ def prepare_training_fastai(train, test):
     data = train.copy()
     data[DEFAULT_USER_COL] = data[DEFAULT_USER_COL].astype("str")
     data[DEFAULT_ITEM_COL] = data[DEFAULT_ITEM_COL].astype("str")
-    data = CollabDataBunch.from_df(
+    data = CollabDataLoaders.from_df(
         data,
         user_name=DEFAULT_USER_COL,
         item_name=DEFAULT_ITEM_COL,
@@ -196,7 +196,7 @@ def train_fastai(params, data):
         data, n_factors=params["n_factors"], y_range=params["y_range"], wd=params["wd"]
     )
     with Timer() as t:
-        model.fit_one_cycle(cyc_len=params["epochs"], max_lr=params["max_lr"])
+        model.fit_one_cycle(params["epochs"], lr_max=params["lr_max"])
     return model, t
 
 
diff --git a/examples/06_benchmarks/movielens.ipynb b/examples/06_benchmarks/movielens.ipynb
index 2f7a857ce..8c8ee6d2f 100644
--- a/examples/06_benchmarks/movielens.ipynb
+++ b/examples/06_benchmarks/movielens.ipynb
@@ -299,7 +299,7 @@
                 "    \"n_factors\": 40, \n",
                 "    \"y_range\": [0,5.5], \n",
                 "    \"wd\": 1e-1,\n",
-                "    \"max_lr\": 5e-3,\n",
+                "    \"lr_max\": 5e-3,\n",
                 "    \"epochs\": 15\n",
                 "}\n",
                 "\n",

From ac90e543e0b5c77315a452e69ce1b9efb654349e Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Tue, 12 Mar 2024 15:22:23 +0800
Subject: [PATCH 49/61] Upgrade GitHub Action azure/login

---
 .github/actions/azureml-test/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/actions/azureml-test/action.yml b/.github/actions/azureml-test/action.yml
index d74b88160..85ae9f84a 100644
--- a/.github/actions/azureml-test/action.yml
+++ b/.github/actions/azureml-test/action.yml
@@ -76,7 +76,7 @@ runs:
       shell: bash
       run: pip install --quiet "azureml-core>1,<2" "azure-cli>2,<3"
     - name: Log in to Azure
-      uses: azure/login@v1
+      uses: azure/login@v2
       with:
         creds: ${{inputs.AZUREML_TEST_CREDENTIALS}}
     - name: Install wheel package

From 1d0fe7d22431b75840c5e725ec44753431fef70b Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Fri, 15 Mar 2024 11:03:48 +0800
Subject: [PATCH 50/61] Update fastai usage in utils

---
 examples/06_benchmarks/benchmark_utils.py  | 6 +++---
 recommenders/models/fastai/fastai_utils.py | 5 ++++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/examples/06_benchmarks/benchmark_utils.py b/examples/06_benchmarks/benchmark_utils.py
index 90bcfb822..c62518838 100644
--- a/examples/06_benchmarks/benchmark_utils.py
+++ b/examples/06_benchmarks/benchmark_utils.py
@@ -221,9 +221,9 @@ def predict_fastai(model, test):
 
 def recommend_k_fastai(model, test, train, top_k=DEFAULT_K, remove_seen=True):
     with Timer() as t:
-        total_users, total_items = model.data.train_ds.x.classes.values()
-        total_items = total_items[1:]
-        total_users = total_users[1:]
+        total_users, total_items = model.dls.classes.values()
+        total_items = np.array(total_items[1:])
+        total_users = np.array(total_users[1:])
         test_users = test[DEFAULT_USER_COL].unique()
         test_users = np.intersect1d(test_users, total_users)
         users_items = cartesian_product(test_users, total_items)
diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py
index 6e805ae17..44705924c 100644
--- a/recommenders/models/fastai/fastai_utils.py
+++ b/recommenders/models/fastai/fastai_utils.py
@@ -61,7 +61,10 @@ def score(
     m = learner._get_idx(test_df[item_col], is_item=True)
 
     # score the pytorch model
-    pred = learner.model.forward(torch.column_stack((u, m))).detach().numpy()
+    x = torch.column_stack((u, m))
+    if torch.cuda.is_available()
+        x = x.to('cuda')
+    pred = learner.model.forward(x).detach().cpu().numpy()
     scores = pd.DataFrame(
         {user_col: test_df[user_col], item_col: test_df[item_col], prediction_col: pred}
     )

From 0740b1687a088d4cc2e13a5ea1aba43b9ef2877f Mon Sep 17 00:00:00 2001
From: Jun Ki Min <42475935+loomlike@users.noreply.github.com>
Date: Fri, 15 Mar 2024 10:13:43 -0700
Subject: [PATCH 51/61] change deprecated azureml option (#2069)

Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com>
---
 .../submit_groupwise_azureml_pytest.py           | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
index af8b22b19..adda7e172 100644
--- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
+++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
@@ -29,11 +29,12 @@
 Example:
     Usually, this script is run by a DevOps pipeline. It can also be
     run from cmd line.
-    >>> python tests/ci/refac.py --clustername 'cluster-d3-v2'
-                                 --subid '12345678-9012-3456-abcd-123456789012'
-                                 --pr '666'
-                                 --reponame 'Recommenders'
-                                 --branch 'staging'
+    >>> python tests/ci/submit_groupwise_azureml_pytest.py \
+            --clustername 'cluster-d3-v2' \
+            --subid '12345678-9012-3456-abcd-123456789012' \
+            --pr '666' \
+            --reponame 'Recommenders' \
+            --branch 'staging'
 """
 import argparse
 import logging
@@ -41,7 +42,7 @@
 from azureml.core.authentication import AzureCliAuthentication
 from azureml.core import Workspace
 from azureml.core import Experiment
-from azureml.core.runconfig import RunConfiguration
+from azureml.core.runconfig import RunConfiguration, DockerConfiguration
 from azureml.core.conda_dependencies import CondaDependencies
 from azureml.core.script_run_config import ScriptRunConfig
 from azureml.core.compute import ComputeTarget, AmlCompute
@@ -175,7 +176,6 @@ def create_run_config(
 
     run_azuremlcompute = RunConfiguration()
     run_azuremlcompute.target = cpu_cluster
-    run_azuremlcompute.environment.docker.enabled = True
     if not add_gpu_dependencies:
         # https://github.com/Azure/AzureML-Containers/blob/master/base/cpu/openmpi4.1.0-ubuntu22.04
         run_azuremlcompute.environment.docker.base_image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04"
@@ -292,8 +292,10 @@ def submit_experiment_to_azureml(
         source_directory=".",
         script=test,
         run_config=run_config,
+        docker_runtime_config=DockerConfiguration(use_docker=True),
         arguments=arguments,
     )
+
     run = experiment.submit(script_run_config)
     # waits only for configuration to complete
     run.wait_for_completion(show_output=True, wait_post_processing=True)

From 89cc98514badca994b8e61cdaca895aa0ba7b54d Mon Sep 17 00:00:00 2001
From: Jun Ki Min <42475935+loomlike@users.noreply.github.com>
Date: Fri, 15 Mar 2024 11:35:29 -0700
Subject: [PATCH 52/61] Update SP creation doc

Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com>
---
 tests/README.md | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/tests/README.md b/tests/README.md
index 813b433ba..650990c9e 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -216,8 +216,23 @@ Then, follow the steps below to create the AzureML infrastructure:
 3. Add the subscription ID to GitHub action secrets [here](https://github.com/microsoft/recommenders/settings/secrets/actions). Create a new repository secret called `AZUREML_TEST_SUBID` and add the subscription ID as the value.
 4. Make sure you have installed [Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli), and that you are logged in: `az login`.
 5. Select your subscription: `az account set -s $AZURE_SUBSCRIPTION_ID`.
-5. Create a Service Principal: `az ad sp create-for-rbac --name "recommenders-cicd" --role contributor --scopes /subscriptions/$AZURE_SUBSCRIPTION_ID --sdk-auth`.
-6. Add the output from the Service Principal (should be a JSON blob) as an action secret `AZUREML_TEST_CREDENTIALS`.
+5. Create a Service Principal: `az ad sp create-for-rbac --name $SERVICE_PRINCIPAL_NAME --role contributor --scopes /subscriptions/$AZURE_SUBSCRIPTION_ID --json-auth`. This will output a JSON blob with the credentials of the Service Principal:
+    ```
+    {
+        "clientId": ...,
+        "clientSecret": ...,
+        "subscriptionId": ...,
+        "tenantId": ...,
+        "activeDirectoryEndpointUrl": "https://login.microsoftonline.com",
+        "resourceManagerEndpointUrl": "https://management.azure.com/",
+        "activeDirectoryGraphResourceId": "https://graph.windows.net/",
+        "sqlManagementEndpointUrl": "https://management.core.windows.net:8443/",
+        "galleryEndpointUrl": "https://gallery.azure.com/",
+        "managementEndpointUrl": "https://management.core.windows.net/"
+    }
+    ```
+6. Add the output as github's action secret `AZUREML_TEST_CREDENTIALS` under repository's **Settings > Security > Secrets and variables > Actions**.
+
 
 ## How to execute tests in your local environment
 

From 55433c56241bb7b3d53faf7ee1066b6e12f18092 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Sat, 16 Mar 2024 20:42:36 +0100
Subject: [PATCH 53/61] :memo:

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/README.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/README.md b/tests/README.md
index 650990c9e..a6068daec 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -213,16 +213,16 @@ Then, follow the steps below to create the AzureML infrastructure:
 2. Create two new clusters: `cpu-cluster` and `gpu-cluster`. Go to compute, then compute cluster, then new.
     - Select the CPU VM base. Anything above 64GB of RAM, and 8 cores should be fine.
     - Select the GPU VM base. Anything above 56GB of RAM, and 6 cores, and an NVIDIA K80 should be fine.
-3. Add the subscription ID to GitHub action secrets [here](https://github.com/microsoft/recommenders/settings/secrets/actions). Create a new repository secret called `AZUREML_TEST_SUBID` and add the subscription ID as the value.
+3. Add the subscription ID to GitHub action secrets [here](https://github.com/recommenders-team/recommenders/settings/secrets/actions). Create a new repository secret called `AZUREML_TEST_SUBID` and add the subscription ID as the value.
 4. Make sure you have installed [Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli), and that you are logged in: `az login`.
 5. Select your subscription: `az account set -s $AZURE_SUBSCRIPTION_ID`.
-5. Create a Service Principal: `az ad sp create-for-rbac --name $SERVICE_PRINCIPAL_NAME --role contributor --scopes /subscriptions/$AZURE_SUBSCRIPTION_ID --json-auth`. This will output a JSON blob with the credentials of the Service Principal:
+6. Create a Service Principal: `az ad sp create-for-rbac --name $SERVICE_PRINCIPAL_NAME --role contributor --scopes /subscriptions/$AZURE_SUBSCRIPTION_ID --json-auth`. This will output a JSON blob with the credentials of the Service Principal:
     ```
     {
-        "clientId": ...,
-        "clientSecret": ...,
-        "subscriptionId": ...,
-        "tenantId": ...,
+        "clientId": "XXXXXXXXXXXXXXXXXXXXX",
+        "clientSecret": "XXXXXXXXXXXXXXXXXXXXX",
+        "subscriptionId": "XXXXXXXXXXXXXXXXXXXXX",
+        "tenantId": "XXXXXXXXXXXXXXXXXXXXX",
         "activeDirectoryEndpointUrl": "https://login.microsoftonline.com",
         "resourceManagerEndpointUrl": "https://management.azure.com/",
         "activeDirectoryGraphResourceId": "https://graph.windows.net/",
@@ -231,7 +231,7 @@ Then, follow the steps below to create the AzureML infrastructure:
         "managementEndpointUrl": "https://management.core.windows.net/"
     }
     ```
-6. Add the output as github's action secret `AZUREML_TEST_CREDENTIALS` under repository's **Settings > Security > Secrets and variables > Actions**.
+7. Add the output as github's action secret `AZUREML_TEST_CREDENTIALS` under repository's **Settings > Security > Secrets and variables > Actions**.
 
 
 ## How to execute tests in your local environment

From 730a5e98d7ad8b2adf5d6a14ce64984c3041a251 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Mon, 18 Mar 2024 20:16:32 +0100
Subject: [PATCH 54/61] Fixing TF to < 2.16

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 9d606c8dc..89d3af503 100644
--- a/setup.py
+++ b/setup.py
@@ -53,7 +53,7 @@
     "gpu": [
         "fastai>=2.7.11,<3",
         "nvidia-ml-py>=11.525.84",
-        "tensorflow>=2.8.4,!=2.9.0.*,!=2.9.1,!=2.9.2,!=2.10.0.*,<3",
+        "tensorflow>=2.8.4,!=2.9.0.*,!=2.9.1,!=2.9.2,!=2.10.0.*,<2.16",
         "tf-slim>=1.1.0",  # No python_requires in its setup.py
         "torch>=2.0.1,<3",
     ],

From 657531ac355d87cc319a849cdd0c7fa18a3ae552 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Mon, 18 Mar 2024 21:06:05 +0100
Subject: [PATCH 55/61] :bug:

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 recommenders/models/fastai/fastai_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py
index 44705924c..0742857f3 100644
--- a/recommenders/models/fastai/fastai_utils.py
+++ b/recommenders/models/fastai/fastai_utils.py
@@ -62,8 +62,8 @@ def score(
 
     # score the pytorch model
     x = torch.column_stack((u, m))
-    if torch.cuda.is_available()
-        x = x.to('cuda')
+    if torch.cuda.is_available():
+        x = x.to("cuda")
     pred = learner.model.forward(x).detach().cpu().numpy()
     scores = pd.DataFrame(
         {user_col: test_df[user_col], item_col: test_df[item_col], prediction_col: pred}

From e99b8d04f46861daf60181b956a4a42714b52957 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Mon, 18 Mar 2024 22:18:12 +0100
Subject: [PATCH 56/61] model to CUDA as well as data

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 recommenders/models/fastai/fastai_utils.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py
index 0742857f3..f6b6a8986 100644
--- a/recommenders/models/fastai/fastai_utils.py
+++ b/recommenders/models/fastai/fastai_utils.py
@@ -62,17 +62,22 @@ def score(
 
     # score the pytorch model
     x = torch.column_stack((u, m))
+
     if torch.cuda.is_available():
         x = x.to("cuda")
+        learner.model = learner.model.to("cuda")
+
     pred = learner.model.forward(x).detach().cpu().numpy()
     scores = pd.DataFrame(
         {user_col: test_df[user_col], item_col: test_df[item_col], prediction_col: pred}
     )
     scores = scores.sort_values([user_col, prediction_col], ascending=[True, False])
+
     if top_k is not None:
         top_scores = scores.groupby(user_col).head(top_k).reset_index(drop=True)
     else:
         top_scores = scores
+
     return top_scores
 
 

From 03554deff45d0d18b90c11ea64136f67147e892d Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Tue, 19 Mar 2024 09:25:35 +0800
Subject: [PATCH 57/61] Set tensorflow <= 2.15.0

Signed-off-by: Simon Zhao <simonyansenzhao@gmail.com>
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 9d606c8dc..75b44f775 100644
--- a/setup.py
+++ b/setup.py
@@ -53,7 +53,7 @@
     "gpu": [
         "fastai>=2.7.11,<3",
         "nvidia-ml-py>=11.525.84",
-        "tensorflow>=2.8.4,!=2.9.0.*,!=2.9.1,!=2.9.2,!=2.10.0.*,<3",
+        "tensorflow>=2.8.4,!=2.9.0.*,!=2.9.1,!=2.9.2,!=2.10.0.*,<=2.15.0",
         "tf-slim>=1.1.0",  # No python_requires in its setup.py
         "torch>=2.0.1,<3",
     ],

From 47281c8a31db03959345c391c8a756ea2ec72475 Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Tue, 19 Mar 2024 10:30:32 +0800
Subject: [PATCH 58/61] Add missing colon

Signed-off-by: Simon Zhao <simonyansenzhao@gmail.com>
---
 recommenders/models/fastai/fastai_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py
index 44705924c..61b576f3b 100644
--- a/recommenders/models/fastai/fastai_utils.py
+++ b/recommenders/models/fastai/fastai_utils.py
@@ -62,7 +62,7 @@ def score(
 
     # score the pytorch model
     x = torch.column_stack((u, m))
-    if torch.cuda.is_available()
+    if torch.cuda.is_available():
         x = x.to('cuda')
     pred = learner.model.forward(x).detach().cpu().numpy()
     scores = pd.DataFrame(

From b255fae97203ee0f82bbc5faba1f971d07a25f80 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Tue, 19 Mar 2024 07:14:39 +0100
Subject: [PATCH 59/61] :memo:

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 89d3af503..c5fc49bb8 100644
--- a/setup.py
+++ b/setup.py
@@ -53,7 +53,7 @@
     "gpu": [
         "fastai>=2.7.11,<3",
         "nvidia-ml-py>=11.525.84",
-        "tensorflow>=2.8.4,!=2.9.0.*,!=2.9.1,!=2.9.2,!=2.10.0.*,<2.16",
+        "tensorflow>=2.8.4,!=2.9.0.*,!=2.9.1,!=2.9.2,!=2.10.0.*,<2.16",  # Fixed TF due to constant security problems and breaking changes #2073
         "tf-slim>=1.1.0",  # No python_requires in its setup.py
         "torch>=2.0.1,<3",
     ],

From 85899cf7f9bad8ad84f405074dbe4bd6acea0371 Mon Sep 17 00:00:00 2001
From: miguelgfierro <miguelgfierro@users.noreply.github.com>
Date: Tue, 19 Mar 2024 09:22:46 +0100
Subject: [PATCH 60/61] Reducing DKN batch size to 200

Signed-off-by: miguelgfierro <miguelgfierro@users.noreply.github.com>
---
 tests/functional/examples/test_notebooks_gpu.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/functional/examples/test_notebooks_gpu.py b/tests/functional/examples/test_notebooks_gpu.py
index 2007cc1a7..05b53c68e 100644
--- a/tests/functional/examples/test_notebooks_gpu.py
+++ b/tests/functional/examples/test_notebooks_gpu.py
@@ -247,7 +247,9 @@ def test_wide_deep_functional(
             os.path.join("tests", "resources", "deeprec", "slirec"),
             10,
             400,
-            {"auc": 0.7183},  # Don't do logloss check as SLi-Rec uses ranking loss, not a point-wise loss
+            {
+                "auc": 0.7183
+            },  # Don't do logloss check as SLi-Rec uses ranking loss, not a point-wise loss
             42,
         )
     ],
@@ -278,7 +280,7 @@ def test_slirec_quickstart_functional(
     results = read_notebook(output_notebook)
 
     assert results["auc"] == pytest.approx(expected_values["auc"], rel=TOL, abs=ABS_TOL)
-    
+
 
 @pytest.mark.gpu
 @pytest.mark.notebooks
@@ -567,7 +569,7 @@ def test_dkn_quickstart_functional(notebooks, output_notebook, kernel_name):
         notebook_path,
         output_notebook,
         kernel_name=kernel_name,
-        parameters=dict(EPOCHS=5, BATCH_SIZE=500),
+        parameters=dict(EPOCHS=5, BATCH_SIZE=200),
     )
     results = read_notebook(output_notebook)
 

From d8e8ac30e8eca6ff33f4c9a90a1fdfdeb36942be Mon Sep 17 00:00:00 2001
From: Simon Zhao <simonyansenzhao@gmail.com>
Date: Tue, 19 Mar 2024 17:17:07 +0800
Subject: [PATCH 61/61] Move learner.model to cuda if cuda is available

Signed-off-by: Simon Zhao <simonyansenzhao@gmail.com>
---
 recommenders/models/fastai/fastai_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/recommenders/models/fastai/fastai_utils.py b/recommenders/models/fastai/fastai_utils.py
index 61b576f3b..062a8e5e7 100644
--- a/recommenders/models/fastai/fastai_utils.py
+++ b/recommenders/models/fastai/fastai_utils.py
@@ -63,7 +63,8 @@ def score(
     # score the pytorch model
     x = torch.column_stack((u, m))
     if torch.cuda.is_available():
-        x = x.to('cuda')
+        x = x.to("cuda")
+        learner.model = learner.model.to("cuda")
     pred = learner.model.forward(x).detach().cpu().numpy()
     scores = pd.DataFrame(
         {user_col: test_df[user_col], item_col: test_df[item_col], prediction_col: pred}