From 82ee6d38cea43e54172782c8200f3bbe42d54fc6 Mon Sep 17 00:00:00 2001
From: Jun Ki Min <42475935+loomlike@users.noreply.github.com>
Date: Thu, 21 Dec 2023 10:28:30 -0800
Subject: [PATCH] Fix nightly test errors (#2045)

* Revert tests tolerance
* Fix notebook parameter parsing
* Add notebook utils tests to test groups
* Fix notebooks
* Fix notebook unit tests
* Update evaluation metrics name map. Handle None for exp_var
* Fix smoke tests
* cleanup
* Fix functional test errors
* make notebook parameter update function to be private
* Fix benchmark notebook bug
* fix remaining bugs
---------

Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com>
---
 .../00_quick_start/fastai_movielens.ipynb     |   8 +-
 examples/00_quick_start/naml_MIND.ipynb       |  30 +---
 examples/00_quick_start/ncf_movielens.ipynb   |   8 +-
 examples/00_quick_start/nrms_MIND.ipynb       |   1 -
 examples/00_quick_start/rlrmc_movielens.ipynb |   1 -
 examples/00_quick_start/sar_movielens.ipynb   |   4 +-
 examples/00_quick_start/sasrec_amazon.ipynb   |  10 +-
 .../sequential_recsys_amazondataset.ipynb     |  19 +--
 .../00_quick_start/wide_deep_movielens.ipynb  |   2 -
 .../baseline_deep_dive.ipynb                  |   4 +-
 .../cornac_bivae_deep_dive.ipynb              |   5 +-
 .../cornac_bpr_deep_dive.ipynb                |   4 +-
 .../lightgcn_deep_dive.ipynb                  |   4 +-
 .../ncf_deep_dive.ipynb                       |  21 ++-
 examples/06_benchmarks/benchmark_utils.py     |   6 +-
 examples/06_benchmarks/movielens.ipynb        |   6 +-
 recommenders/evaluation/python_evaluation.py  |   1 +
 recommenders/evaluation/spark_evaluation.py   |  12 +-
 recommenders/utils/notebook_utils.py          |  47 +++---
 tests/ci/azureml_tests/test_groups.py         |   4 +
 .../functional/examples/test_notebooks_gpu.py |  30 ++--
 .../examples/test_notebooks_python.py         |   1 +
 tests/smoke/examples/test_notebooks_gpu.py    |  38 ++---
 tests/unit/examples/test_notebooks_gpu.py     |   8 +-
 tests/unit/examples/test_notebooks_pyspark.py |   8 +-
 tests/unit/examples/test_notebooks_python.py  |   1 +
 .../evaluation/test_python_evaluation.py      |   2 +-
 .../evaluation/test_spark_evaluation.py       |   2 +-
 .../recommenders/utils/test_notebook_utils.py | 138 ++++++++----------
 29 files changed, 176 insertions(+), 249 deletions(-)

diff --git a/examples/00_quick_start/fastai_movielens.ipynb b/examples/00_quick_start/fastai_movielens.ipynb
index c5de1996d..517673178 100644
--- a/examples/00_quick_start/fastai_movielens.ipynb
+++ b/examples/00_quick_start/fastai_movielens.ipynb
@@ -59,7 +59,7 @@
                 "from recommenders.datasets import movielens\n",
                 "from recommenders.datasets.python_splitters import python_stratified_split\n",
                 "from recommenders.models.fastai.fastai_utils import cartesian_product, score\n",
-                "from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n",
+                "from recommenders.evaluation.python_evaluation import map, ndcg_at_k, precision_at_k, recall_at_k\n",
                 "from recommenders.evaluation.python_evaluation import rmse, mae, rsquared, exp_var\n",
                 "from recommenders.utils.notebook_utils import store_metadata\n",
                 "\n",
@@ -599,9 +599,9 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "eval_map = map_at_k(test_df, top_k_scores, col_user=USER, col_item=ITEM, \n",
-                "                    col_rating=RATING, col_prediction=PREDICTION, \n",
-                "                    relevancy_method=\"top_k\", k=TOP_K)"
+                "eval_map = map(test_df, top_k_scores, col_user=USER, col_item=ITEM, \n",
+                "               col_rating=RATING, col_prediction=PREDICTION, \n",
+                "               relevancy_method=\"top_k\", k=TOP_K)"
             ]
         },
         {
diff --git a/examples/00_quick_start/naml_MIND.ipynb b/examples/00_quick_start/naml_MIND.ipynb
index b202a390e..c505bd91f 100644
--- a/examples/00_quick_start/naml_MIND.ipynb
+++ b/examples/00_quick_start/naml_MIND.ipynb
@@ -246,34 +246,6 @@
                 "model = NAMLModel(hparams, iterator, seed=seed)"
             ]
         },
-        {
-            "cell_type": "code",
-            "execution_count": 7,
-            "metadata": {
-                "scrolled": true
-            },
-            "outputs": [
-                {
-                    "name": "stderr",
-                    "output_type": "stream",
-                    "text": [
-                        "18693it [01:18, 239.62it/s]\n",
-                        "7507it [00:30, 249.74it/s]\n",
-                        "7538it [00:01, 6423.03it/s]\n"
-                    ]
-                },
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "{'group_auc': 0.4807, 'mean_mrr': 0.2104, 'ndcg@5': 0.2141, 'ndcg@10': 0.2766}\n"
-                    ]
-                }
-            ],
-            "source": [
-                "print(model.run_eval(valid_news_file, valid_behaviors_file))"
-            ]
-        },
         {
             "cell_type": "code",
             "execution_count": 8,
@@ -398,7 +370,7 @@
             ],
             "source": [
                 "%%time\n",
-                "model.fit(train_news_file, train_behaviors_file,valid_news_file, valid_behaviors_file)"
+                "model.fit(train_news_file, train_behaviors_file, valid_news_file, valid_behaviors_file)"
             ]
         },
         {
diff --git a/examples/00_quick_start/ncf_movielens.ipynb b/examples/00_quick_start/ncf_movielens.ipynb
index 23142e458..2b48e3ce1 100644
--- a/examples/00_quick_start/ncf_movielens.ipynb
+++ b/examples/00_quick_start/ncf_movielens.ipynb
@@ -56,10 +56,10 @@
                 "from recommenders.models.ncf.ncf_singlenode import NCF\n",
                 "from recommenders.models.ncf.dataset import Dataset as NCFDataset\n",
                 "from recommenders.datasets import movielens\n",
-                "from recommenders.utils.notebook_utils import is_jupyter\n",
                 "from recommenders.datasets.python_splitters import python_chrono_split\n",
-                "from recommenders.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k, \n",
-                "                                                     recall_at_k, get_top_k_items)\n",
+                "from recommenders.evaluation.python_evaluation import (\n",
+                "    map, ndcg_at_k, precision_at_k, recall_at_k\n",
+                ")\n",
                 "from recommenders.utils.notebook_utils import store_metadata\n",
                 "\n",
                 "print(\"System version: {}\".format(sys.version))\n",
@@ -334,7 +334,7 @@
                 }
             ],
             "source": [
-                "eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
+                "eval_map = map(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
                 "eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
                 "eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
                 "eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
diff --git a/examples/00_quick_start/nrms_MIND.ipynb b/examples/00_quick_start/nrms_MIND.ipynb
index 391d18712..e94c48733 100644
--- a/examples/00_quick_start/nrms_MIND.ipynb
+++ b/examples/00_quick_start/nrms_MIND.ipynb
@@ -99,7 +99,6 @@
                 "import numpy as np\n",
                 "import zipfile\n",
                 "from tqdm import tqdm\n",
-                "import scrapbook as sb\n",
                 "from tempfile import TemporaryDirectory\n",
                 "import tensorflow as tf\n",
                 "tf.get_logger().setLevel('ERROR') # only show error messages\n",
diff --git a/examples/00_quick_start/rlrmc_movielens.ipynb b/examples/00_quick_start/rlrmc_movielens.ipynb
index 6ec3e6a86..34a9deb3f 100644
--- a/examples/00_quick_start/rlrmc_movielens.ipynb
+++ b/examples/00_quick_start/rlrmc_movielens.ipynb
@@ -40,7 +40,6 @@
             "source": [
                 "import sys\n",
                 "import time\n",
-                "import numpy as np\n",
                 "import pandas as pd\n",
                 "\n",
                 "from recommenders.datasets.python_splitters import python_random_split\n",
diff --git a/examples/00_quick_start/sar_movielens.ipynb b/examples/00_quick_start/sar_movielens.ipynb
index 09243e6fd..4eba126ac 100644
--- a/examples/00_quick_start/sar_movielens.ipynb
+++ b/examples/00_quick_start/sar_movielens.ipynb
@@ -68,7 +68,7 @@
                 "from recommenders.datasets.python_splitters import python_stratified_split\n",
                 "from recommenders.models.sar import SAR\n",
                 "from recommenders.evaluation.python_evaluation import (\n",
-                "    map_at_k,\n",
+                "    map,\n",
                 "    ndcg_at_k,\n",
                 "    precision_at_k,\n",
                 "    recall_at_k,\n",
@@ -509,7 +509,7 @@
             "outputs": [],
             "source": [
                 "# Ranking metrics\n",
-                "eval_map = map_at_k(test, top_k, col_user=\"userID\", col_item=\"itemID\", col_rating=\"rating\", k=TOP_K)\n",
+                "eval_map = map(test, top_k, col_user=\"userID\", col_item=\"itemID\", col_rating=\"rating\", k=TOP_K)\n",
                 "eval_ndcg = ndcg_at_k(test, top_k, col_user=\"userID\", col_item=\"itemID\", col_rating=\"rating\", k=TOP_K)\n",
                 "eval_precision = precision_at_k(test, top_k, col_user=\"userID\", col_item=\"itemID\", col_rating=\"rating\", k=TOP_K)\n",
                 "eval_recall = recall_at_k(test, top_k, col_user=\"userID\", col_item=\"itemID\", col_rating=\"rating\", k=TOP_K)\n"
diff --git a/examples/00_quick_start/sasrec_amazon.ipynb b/examples/00_quick_start/sasrec_amazon.ipynb
index 2e44fbe25..fad74fc69 100644
--- a/examples/00_quick_start/sasrec_amazon.ipynb
+++ b/examples/00_quick_start/sasrec_amazon.ipynb
@@ -57,23 +57,21 @@
     }
    ],
    "source": [
-    "import re\n",
     "import sys\n",
     "import os\n",
-    "from tempfile import TemporaryDirectory\n",
-    "import numpy as np\n",
     "import pandas as pd \n",
-    "from collections import defaultdict\n",
     "import tensorflow as tf\n",
     "tf.get_logger().setLevel('ERROR') # only show error messages\n",
     "\n",
-    "from recommenders.utils.timer import Timer\n",
     "from recommenders.datasets.amazon_reviews import get_review_data\n",
     "from recommenders.datasets.split_utils import filter_k_core\n",
     "from recommenders.models.sasrec.model import SASREC\n",
     "from recommenders.models.sasrec.ssept import SSEPT\n",
     "from recommenders.models.sasrec.sampler import WarpSampler\n",
     "from recommenders.models.sasrec.util import SASRecDataSet\n",
+    "from recommenders.utils.notebook_utils import store_metadata\n",
+    "from recommenders.utils.timer import Timer\n",
+    "\n",
     "\n",
     "print(f\"System version: {sys.version}\")\n",
     "print(f\"Tensorflow version: {tf.__version__}\")"
@@ -98,7 +96,7 @@
    "source": [
     "num_epochs = 5\n",
     "batch_size = 128\n",
-    "RANDOM_SEED = 100  # Set None for non-deterministic result\n",
+    "seed = 100  # Set None for non-deterministic result\n",
     "\n",
     "# data_dir = os.path.join(\"tests\", \"recsys_data\", \"RecSys\", \"SASRec-tf2\", \"data\")\n",
     "data_dir = os.path.join(\"..\", \"..\", \"tests\", \"resources\", \"deeprec\", \"sasrec\")\n",
diff --git a/examples/00_quick_start/sequential_recsys_amazondataset.ipynb b/examples/00_quick_start/sequential_recsys_amazondataset.ipynb
index f563e7bdb..ace5ed39f 100644
--- a/examples/00_quick_start/sequential_recsys_amazondataset.ipynb
+++ b/examples/00_quick_start/sequential_recsys_amazondataset.ipynb
@@ -63,9 +63,6 @@
             "source": [
                 "import os\n",
                 "import sys\n",
-                "import logging\n",
-                "from tempfile import TemporaryDirectory\n",
-                "import numpy as np\n",
                 "import tensorflow.compat.v1 as tf\n",
                 "tf.get_logger().setLevel('ERROR') # only show error messages\n",
                 "\n",
@@ -75,7 +72,6 @@
                 "    prepare_hparams\n",
                 ")\n",
                 "from recommenders.datasets.amazon_reviews import download_and_extract, data_preprocessing\n",
-                "from recommenders.datasets.download_utils import maybe_download\n",
                 "from recommenders.models.deeprec.models.sequential.sli_rec import SLI_RECModel as SeqModel\n",
                 "####  to use the other model, use one of the following lines:\n",
                 "# from recommenders.models.deeprec.models.sequential.asvd import A2SVDModel as SeqModel\n",
@@ -92,16 +88,6 @@
                 "\n"
             ]
         },
-        {
-            "cell_type": "code",
-            "execution_count": 2,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "##  ATTENTION: change to the corresponding config file, e.g., caser.yaml for CaserModel, sum.yaml for SUMModel\n",
-                "yaml_file = '../../recommenders/models/deeprec/config/sli_rec.yaml'  "
-            ]
-        },
         {
             "cell_type": "markdown",
             "metadata": {},
@@ -123,7 +109,10 @@
                 "BATCH_SIZE = 400\n",
                 "RANDOM_SEED = SEED  # Set None for non-deterministic result\n",
                 "\n",
-                "data_path = os.path.join(\"..\", \"..\", \"tests\", \"resources\", \"deeprec\", \"slirec\")"
+                "data_path = os.path.join(\"..\", \"..\", \"tests\", \"resources\", \"deeprec\", \"slirec\")\n",
+                "\n",
+                "##  ATTENTION: change to the corresponding config file, e.g., caser.yaml for CaserModel, sum.yaml for SUMModel\n",
+                "yaml_file = '../../recommenders/models/deeprec/config/sli_rec.yaml'  "
             ]
         },
         {
diff --git a/examples/00_quick_start/wide_deep_movielens.ipynb b/examples/00_quick_start/wide_deep_movielens.ipynb
index 60cb1da42..131d245d1 100644
--- a/examples/00_quick_start/wide_deep_movielens.ipynb
+++ b/examples/00_quick_start/wide_deep_movielens.ipynb
@@ -59,8 +59,6 @@
                 "import os\n",
                 "import sys\n",
                 "import math\n",
-                "import itertools\n",
-                "import numpy as np\n",
                 "import pandas as pd\n",
                 "import sklearn.preprocessing\n",
                 "from tempfile import TemporaryDirectory\n",
diff --git a/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb b/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb
index f8b627a54..6b950bc37 100644
--- a/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb
+++ b/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb
@@ -78,7 +78,7 @@
                 "    mae,\n",
                 "    rsquared,\n",
                 "    exp_var,\n",
-                "    map_at_k,\n",
+                "    map,\n",
                 "    ndcg_at_k,\n",
                 "    precision_at_k,\n",
                 "    recall_at_k,\n",
@@ -689,7 +689,7 @@
             "source": [
                 "cols[\"col_prediction\"] = \"Count\"\n",
                 "\n",
-                "eval_map = map_at_k(test, baseline_recommendations, k=TOP_K, **cols)\n",
+                "eval_map = map(test, baseline_recommendations, k=TOP_K, **cols)\n",
                 "eval_ndcg = ndcg_at_k(test, baseline_recommendations, k=TOP_K, **cols)\n",
                 "eval_precision = precision_at_k(test, baseline_recommendations, k=TOP_K, **cols)\n",
                 "eval_recall = recall_at_k(test, baseline_recommendations, k=TOP_K, **cols)\n",
diff --git a/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb b/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb
index c2ff20337..731ab0c12 100644
--- a/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb
+++ b/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb
@@ -48,7 +48,6 @@
                 "import sys\n",
                 "import torch\n",
                 "import cornac\n",
-                "import pandas as pd\n",
                 "\n",
                 "from recommenders.datasets import movielens\n",
                 "from recommenders.datasets.python_splitters import python_random_split\n",
@@ -56,7 +55,7 @@
                 "from recommenders.utils.timer import Timer\n",
                 "from recommenders.utils.constants import SEED\n",
                 "from recommenders.evaluation.python_evaluation import (\n",
-                "    map_at_k,\n",
+                "    map,\n",
                 "    ndcg_at_k,\n",
                 "    precision_at_k,\n",
                 "    recall_at_k,\n",
@@ -508,7 +507,7 @@
                 }
             ],
             "source": [
-                "eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
+                "eval_map = map(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
                 "eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
                 "eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
                 "eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
diff --git a/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb b/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb
index fb0253a2f..6a164127b 100644
--- a/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb
+++ b/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb
@@ -49,7 +49,7 @@
                 "\n",
                 "from recommenders.datasets import movielens\n",
                 "from recommenders.datasets.python_splitters import python_random_split\n",
-                "from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n",
+                "from recommenders.evaluation.python_evaluation import map, ndcg_at_k, precision_at_k, recall_at_k\n",
                 "from recommenders.models.cornac.cornac_utils import predict_ranking\n",
                 "from recommenders.utils.timer import Timer\n",
                 "from recommenders.utils.constants import SEED\n",
@@ -557,7 +557,7 @@
             ],
             "source": [
                 "k = 10\n",
-                "eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=k)\n",
+                "eval_map = map(test, all_predictions, col_prediction='prediction', k=k)\n",
                 "eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=k)\n",
                 "eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=k)\n",
                 "eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=k)\n",
diff --git a/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb b/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb
index 2180a975d..247dd0bd8 100644
--- a/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb
+++ b/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb
@@ -62,7 +62,7 @@
                 "from recommenders.models.deeprec.DataModel.ImplicitCF import ImplicitCF\n",
                 "from recommenders.datasets import movielens\n",
                 "from recommenders.datasets.python_splitters import python_stratified_split\n",
-                "from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n",
+                "from recommenders.evaluation.python_evaluation import map, ndcg_at_k, precision_at_k, recall_at_k\n",
                 "from recommenders.utils.constants import SEED as DEFAULT_SEED\n",
                 "from recommenders.models.deeprec.deeprec_utils import prepare_hparams\n",
                 "from recommenders.utils.notebook_utils import store_metadata\n",
@@ -640,7 +640,7 @@
                 }
             ],
             "source": [
-                "eval_map = map_at_k(test, topk_scores, k=TOP_K)\n",
+                "eval_map = map(test, topk_scores, k=TOP_K)\n",
                 "eval_ndcg = ndcg_at_k(test, topk_scores, k=TOP_K)\n",
                 "eval_precision = precision_at_k(test, topk_scores, k=TOP_K)\n",
                 "eval_recall = recall_at_k(test, topk_scores, k=TOP_K)\n",
diff --git a/examples/02_model_collaborative_filtering/ncf_deep_dive.ipynb b/examples/02_model_collaborative_filtering/ncf_deep_dive.ipynb
index 55790696c..76136e8e6 100644
--- a/examples/02_model_collaborative_filtering/ncf_deep_dive.ipynb
+++ b/examples/02_model_collaborative_filtering/ncf_deep_dive.ipynb
@@ -63,8 +63,9 @@
                 "from recommenders.models.ncf.dataset import Dataset as NCFDataset\n",
                 "from recommenders.datasets import movielens\n",
                 "from recommenders.datasets.python_splitters import python_chrono_split\n",
-                "from recommenders.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k, \n",
-                "                                                     recall_at_k, get_top_k_items)\n",
+                "from recommenders.evaluation.python_evaluation import (\n",
+                "    map, ndcg_at_k, precision_at_k, recall_at_k\n",
+                ")\n",
                 "from recommenders.utils.constants import SEED as DEFAULT_SEED\n",
                 "from recommenders.utils.notebook_utils import store_metadata\n",
                 "\n",
@@ -428,7 +429,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "model = NCF (\n",
+                "model = NCF(\n",
                 "    n_users=data.n_users, \n",
                 "    n_items=data.n_items,\n",
                 "    model_type=\"NeuMF\",\n",
@@ -625,8 +626,7 @@
                 }
             ],
             "source": [
-                "\n",
-                "eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
+                "eval_map = map(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
                 "eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
                 "eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
                 "eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
@@ -718,7 +718,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "model = NCF (\n",
+                "model = NCF(\n",
                 "    n_users=data.n_users, \n",
                 "    n_items=data.n_items,\n",
                 "    model_type=\"GMF\",\n",
@@ -760,7 +760,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "model = NCF (\n",
+                "model = NCF(\n",
                 "    n_users=data.n_users, \n",
                 "    n_items=data.n_items,\n",
                 "    model_type=\"MLP\",\n",
@@ -771,8 +771,7 @@
                 "    learning_rate=1e-3,\n",
                 "    verbose=10,\n",
                 "    seed=SEED\n",
-                ")\n",
-                "\n"
+                ")"
             ]
         },
         {
@@ -811,7 +810,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "model = NCF (\n",
+                "model = NCF(\n",
                 "    n_users=data.n_users, \n",
                 "    n_items=data.n_items,\n",
                 "    model_type=\"NeuMF\",\n",
@@ -905,7 +904,7 @@
                 }
             ],
             "source": [
-                "eval_map2 = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
+                "eval_map2 = map(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
                 "eval_ndcg2 = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
                 "eval_precision2 = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
                 "eval_recall2 = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n",
diff --git a/examples/06_benchmarks/benchmark_utils.py b/examples/06_benchmarks/benchmark_utils.py
index 4b3d8c0d9..e28fa6ab7 100644
--- a/examples/06_benchmarks/benchmark_utils.py
+++ b/examples/06_benchmarks/benchmark_utils.py
@@ -37,7 +37,7 @@
 )
 from recommenders.models.cornac.cornac_utils import predict_ranking
 from recommenders.evaluation.python_evaluation import (
-    map_at_k,
+    map,
     ndcg_at_k,
     precision_at_k,
     recall_at_k,
@@ -387,7 +387,7 @@ def ranking_metrics_pyspark(test, predictions, k=DEFAULT_K):
         test, predictions, k=k, relevancy_method="top_k", **COL_DICT
     )
     return {
-        "MAP": rank_eval.map_at_k(),
+        "MAP": rank_eval.map(),
         "nDCG@k": rank_eval.ndcg_at_k(),
         "Precision@k": rank_eval.precision_at_k(),
         "Recall@k": rank_eval.recall_at_k(),
@@ -405,7 +405,7 @@ def rating_metrics_python(test, predictions):
 
 def ranking_metrics_python(test, predictions, k=DEFAULT_K):
     return {
-        "MAP": map_at_k(test, predictions, k=k, **COL_DICT),
+        "MAP": map(test, predictions, k=k, **COL_DICT),
         "nDCG@k": ndcg_at_k(test, predictions, k=k, **COL_DICT),
         "Precision@k": precision_at_k(test, predictions, k=k, **COL_DICT),
         "Recall@k": recall_at_k(test, predictions, k=k, **COL_DICT),
diff --git a/examples/06_benchmarks/movielens.ipynb b/examples/06_benchmarks/movielens.ipynb
index 70822b69e..b2526c388 100644
--- a/examples/06_benchmarks/movielens.ipynb
+++ b/examples/06_benchmarks/movielens.ipynb
@@ -109,10 +109,8 @@
             "source": [
                 "import os\n",
                 "import sys\n",
-                "import json\n",
                 "import numpy as np\n",
                 "import pandas as pd\n",
-                "import seaborn as sns\n",
                 "import surprise\n",
                 "import cornac\n",
                 "\n",
@@ -1208,8 +1206,8 @@
             "outputs": [],
             "source": [
                 "# Record results for tests - ignore this cell\n",
-                "for algos in algorithms:\n",
-                "    store_metadata(algos, df_results.loc[df_results[\"Algo\"] == algo, \"nDCG@k\"].values)\n"
+                "for algo in algorithms:\n",
+                "    store_metadata(algo, df_results.loc[df_results[\"Algo\"] == algo, \"nDCG@k\"].values)\n"
             ]
         }
     ],
diff --git a/recommenders/evaluation/python_evaluation.py b/recommenders/evaluation/python_evaluation.py
index c0c2af5d4..7569c7246 100644
--- a/recommenders/evaluation/python_evaluation.py
+++ b/recommenders/evaluation/python_evaluation.py
@@ -802,6 +802,7 @@ def get_top_k_items(
     recall_at_k.__name__: recall_at_k,
     ndcg_at_k.__name__: ndcg_at_k,
     map_at_k.__name__: map_at_k,
+    map.__name__: map,
 }
 
 
diff --git a/recommenders/evaluation/spark_evaluation.py b/recommenders/evaluation/spark_evaluation.py
index a05f010b7..b4d6ea689 100644
--- a/recommenders/evaluation/spark_evaluation.py
+++ b/recommenders/evaluation/spark_evaluation.py
@@ -156,12 +156,14 @@ def exp_var(self):
         Returns:
             float: Explained variance (min=0, max=1).
         """
-        var1 = self.y_pred_true.selectExpr("variance(label - prediction)").collect()[0][
-            0
-        ]
+        var1 = self.y_pred_true.selectExpr("variance(label-prediction)").collect()[0][0]
         var2 = self.y_pred_true.selectExpr("variance(label)").collect()[0][0]
-        # numpy divide is more tolerant to var2 being zero
-        return 1 - np.divide(var1, var2)
+
+        if var1 is None or var2 is None:
+            return -np.inf
+        else: 
+            # numpy divide is more tolerant to var2 being zero
+            return 1 - np.divide(var1, var2)
 
 
 class SparkRankingEvaluation:
diff --git a/recommenders/utils/notebook_utils.py b/recommenders/utils/notebook_utils.py
index 2009dd5fe..148b9ce2b 100644
--- a/recommenders/utils/notebook_utils.py
+++ b/recommenders/utils/notebook_utils.py
@@ -44,6 +44,28 @@ def is_databricks():
         return False
 
 
+def _update_parameters(parameter_cell_source, new_parameters):
+    """Replace parameter values in the cell source code."""
+    modified_cell_source = parameter_cell_source
+    for param, new_value in new_parameters.items():
+        if (
+            isinstance(new_value, str)
+            and not (new_value.startswith('"') and new_value.endswith('"'))
+            and not (new_value.startswith("'") and new_value.endswith("'"))
+        ):
+            # Check if the new value is a string and surround it with quotes if necessary
+            new_value = f'"{new_value}"'
+
+        # Define a regular expression pattern to match parameter assignments and ignore comments
+        pattern = re.compile(
+            rf"(\b{param})\s*=\s*([^#\n]+)(?:#.*$)?",
+            re.MULTILINE
+        )
+        modified_cell_source = pattern.sub(rf"\1 = {new_value}", modified_cell_source)
+
+    return modified_cell_source
+
+
 def execute_notebook(
     input_notebook, output_notebook, parameters={}, kernel_name="python3", timeout=2200
 ):
@@ -74,31 +96,8 @@ def execute_notebook(
             and "parameters" in cell.metadata["tags"]
             and cell.cell_type == "code"
         ):
-            cell_source = cell.source
-            modified_cell_source = (
-                cell_source  # Initialize a variable to hold the modified source
-            )
-            for param, new_value in parameters.items():
-                if (
-                    isinstance(new_value, str)
-                    and not (new_value.startswith('"') and new_value.endswith('"'))
-                    and not (new_value.startswith("'") and new_value.endswith("'"))
-                ):
-                    # Check if the new value is a string and surround it with quotes if necessary
-                    new_value = f'"{new_value}"'
-                # # Check if the new value is a string and surround it with quotes if necessary
-                # if isinstance(new_value, str):
-                #     new_value = f'"{new_value}"'
-                # Define a regular expression pattern to match parameter assignments and ignore comments
-                pattern = re.compile(
-                    rf"(\b{param})\s*=\s*([^#\n]+)(?:#.*$)?",
-                    re.MULTILINE
-                    # rf"\b{param}\s*=\s*([^\n]+)\b"
-                )
-                modified_cell_source = pattern.sub(rf"\1 = {new_value}", cell_source)
-
             # Update the cell's source within notebook_content
-            cell.source = modified_cell_source
+            cell.source = _update_parameters(cell.source, parameters)
 
     # Create an execution preprocessor
     execute_preprocessor = ExecutePreprocessor(timeout=timeout, kernel_name=kernel_name)
diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py
index c800baa1b..6c44411fe 100644
--- a/tests/ci/azureml_tests/test_groups.py
+++ b/tests/ci/azureml_tests/test_groups.py
@@ -318,6 +318,10 @@
         "tests/unit/examples/test_notebooks_python.py::test_baseline_deep_dive_runs",
         "tests/unit/examples/test_notebooks_python.py::test_template_runs",
         "tests/unit/recommenders/utils/test_notebook_utils.py::test_is_jupyter",
+        "tests/unit/recommenders/utils/test_notebook_utils.py::test_update_parameters",
+        "tests/unit/recommenders/utils/test_notebook_utils.py::test_notebook_execution",
+        "tests/unit/recommenders/utils/test_notebook_utils.py::test_notebook_execution_with_parameters",
+        "tests/unit/recommenders/utils/test_notebook_utils.py::test_notebook_execution_value_error_fails",
         "tests/unit/examples/test_notebooks_python.py::test_surprise_deep_dive_runs",
         "tests/unit/examples/test_notebooks_python.py::test_lightgbm",
         "tests/unit/examples/test_notebooks_python.py::test_cornac_deep_dive_runs",
diff --git a/tests/functional/examples/test_notebooks_gpu.py b/tests/functional/examples/test_notebooks_gpu.py
index 83fe9861f..2007cc1a7 100644
--- a/tests/functional/examples/test_notebooks_gpu.py
+++ b/tests/functional/examples/test_notebooks_gpu.py
@@ -196,7 +196,7 @@ def test_xdeepfm_functional(
                 "rsquared": 0.262963,
                 "exp_var": 0.268413,
                 "ndcg_at_k": 0.118114,
-                "map_at_k": 0.0139213,
+                "map": 0.0139213,
                 "precision_at_k": 0.107087,
                 "recall_at_k": 0.0328638,
             },
@@ -225,7 +225,7 @@ def test_wide_deep_functional(
         "MODEL_DIR": tmp,
         "EXPORT_DIR_BASE": tmp,
         "RATING_METRICS": ["rmse", "mae", "rsquared", "exp_var"],
-        "RANKING_METRICS": ["ndcg_at_k", "map_at_k", "precision_at_k", "recall_at_k"],
+        "RANKING_METRICS": ["ndcg_at_k", "map", "precision_at_k", "recall_at_k"],
         "RANDOM_SEED": seed,
     }
     execute_notebook(
@@ -247,7 +247,7 @@ def test_wide_deep_functional(
             os.path.join("tests", "resources", "deeprec", "slirec"),
             10,
             400,
-            {"auc": 0.7183, "logloss": 0.6045},
+            {"auc": 0.7183},  # Don't do logloss check as SLi-Rec uses ranking loss, not a point-wise loss
             42,
         )
     ],
@@ -278,11 +278,7 @@ def test_slirec_quickstart_functional(
     results = read_notebook(output_notebook)
 
     assert results["auc"] == pytest.approx(expected_values["auc"], rel=TOL, abs=ABS_TOL)
-    ## disable logloss check, because so far SLi-Rec uses ranking loss, not a point-wise loss
-    # assert results["logloss"] == pytest.approx(
-    #     expected_values["logloss"], rel=TOL, abs=ABS_TOL
-    # )
-
+    
 
 @pytest.mark.gpu
 @pytest.mark.notebooks
@@ -444,11 +440,11 @@ def test_lstur_quickstart_functional(
     assert results["mean_mrr"] == pytest.approx(
         expected_values["mean_mrr"], rel=TOL, abs=ABS_TOL
     )
-    assert expected_values["ndcg@5"] == pytest.approx(
-        value["ndcg@5"], rel=TOL, abs=ABS_TOL
+    assert results["ndcg@5"] == pytest.approx(
+        expected_values["ndcg@5"], rel=TOL, abs=ABS_TOL
     )
-    assert expected_values["ndcg@10"] == pytest.approx(
-        value["ndcg@10"], rel=TOL, abs=ABS_TOL
+    assert results["ndcg@10"] == pytest.approx(
+        expected_values["ndcg@10"], rel=TOL, abs=ABS_TOL
     )
 
 
@@ -609,23 +605,21 @@ def test_cornac_bivae_functional(
 @pytest.mark.gpu
 @pytest.mark.notebooks
 @pytest.mark.parametrize(
-    "data_dir, num_epochs, batch_size, model_name, expected_values, seed",
+    "data_dir, num_epochs, batch_size, model_name, expected_values",
     [
         (
             os.path.join("tests", "recsys_data", "RecSys", "SASRec-tf2", "data"),
             1,
             128,
             "sasrec",
-            {"ndcg@10": 0.2626, "Hit@10": 0.4244},
-            42,
+            {"ndcg@10": 0.2297, "Hit@10": 0.3789},
         ),
         (
             os.path.join("tests", "recsys_data", "RecSys", "SASRec-tf2", "data"),
             1,
             128,
             "ssept",
-            {"ndcg@10": 0.2626, "Hit@10": 0.4244},
-            42,
+            {"ndcg@10": 0.2245, "Hit@10": 0.3743},
         ),
     ],
 )
@@ -638,7 +632,6 @@ def test_sasrec_quickstart_functional(
     batch_size,
     model_name,
     expected_values,
-    seed,
 ):
     notebook_path = notebooks["sasrec_quickstart"]
     params = {
@@ -646,7 +639,6 @@ def test_sasrec_quickstart_functional(
         "num_epochs": num_epochs,
         "batch_size": batch_size,
         "model_name": model_name,
-        "seed": seed,
     }
     execute_notebook(
         notebook_path,
diff --git a/tests/functional/examples/test_notebooks_python.py b/tests/functional/examples/test_notebooks_python.py
index 6ce2c6741..014cf5996 100644
--- a/tests/functional/examples/test_notebooks_python.py
+++ b/tests/functional/examples/test_notebooks_python.py
@@ -237,6 +237,7 @@ def test_lightfm_functional(
     "expected_values",
     [({"rmse": 0.4969, "mae": 0.4761})],
 )
+@pytest.mark.skip(reason="geoimc doesn't work with any officially released pymanopt package")
 def test_geoimc_functional(notebooks, output_notebook, kernel_name, expected_values):
     notebook_path = notebooks["geoimc_quickstart"]
     execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name)
diff --git a/tests/smoke/examples/test_notebooks_gpu.py b/tests/smoke/examples/test_notebooks_gpu.py
index 6d77fe6b0..f14498ff8 100644
--- a/tests/smoke/examples/test_notebooks_gpu.py
+++ b/tests/smoke/examples/test_notebooks_gpu.py
@@ -43,9 +43,7 @@ def test_ncf_deep_dive_smoke(notebooks, output_notebook, kernel_name):
         notebook_path,
         output_notebook,
         kernel_name=kernel_name,
-        parameters=dict(
-            TOP_K=10, MOVIELENS_DATA_SIZE="100k", EPOCHS=1, BATCH_SIZE=1024
-        ),
+        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="100k", EPOCHS=1, BATCH_SIZE=1024),
     )
     results = read_notebook(output_notebook)
 
@@ -91,19 +89,15 @@ def test_xdeepfm_smoke(notebooks, output_notebook, kernel_name):
         output_notebook,
         kernel_name=kernel_name,
         parameters=dict(
-            EPOCHS_FOR_SYNTHETIC_RUN=1,
-            EPOCHS_FOR_CRITEO_RUN=1,
-            BATCH_SIZE_SYNTHETIC=128,
-            BATCH_SIZE_CRITEO=512,
+            EPOCHS=1,
+            BATCH_SIZE=512,
             RANDOM_SEED=42,
         ),
     )
     results = read_notebook(output_notebook)
 
-    assert results["res_syn"]["auc"] == pytest.approx(0.5043, rel=TOL, abs=ABS_TOL)
-    assert results["res_syn"]["logloss"] == pytest.approx(0.7046, rel=TOL, abs=ABS_TOL)
-    assert results["res_real"]["auc"] == pytest.approx(0.7251, rel=TOL, abs=ABS_TOL)
-    assert results["res_real"]["logloss"] == pytest.approx(0.508, rel=TOL, abs=ABS_TOL)
+    assert results["auc"] == pytest.approx(0.7251, rel=TOL, abs=ABS_TOL)
+    assert results["logloss"] == pytest.approx(0.508, rel=TOL, abs=ABS_TOL)
 
 
 @pytest.mark.notebooks
@@ -140,14 +134,14 @@ def test_naml_smoke(notebooks, output_notebook, kernel_name):
         notebook_path,
         output_notebook,
         kernel_name=kernel_name,
-        parameters=dict(epochs=1, seed=42, MIND_type="demo"),
+        parameters=dict(epochs=1, batch_size=64, seed=42, MIND_type="demo"),
     )
     results = read_notebook(output_notebook)
 
-    assert results["res_syn"]["group_auc"] == pytest.approx(
+    assert results["group_auc"] == pytest.approx(
         0.5801, rel=TOL, abs=ABS_TOL
     )
-    assert results["res_syn"]["mean_mrr"] == pytest.approx(0.2512, rel=TOL, abs=ABS_TOL)
+    assert results["mean_mrr"] == pytest.approx(0.2512, rel=TOL, abs=ABS_TOL)
 
 
 @pytest.mark.notebooks
@@ -162,10 +156,10 @@ def test_nrms_smoke(notebooks, output_notebook, kernel_name):
     )
     results = read_notebook(output_notebook)
 
-    assert results["res_syn"]["group_auc"] == pytest.approx(
+    assert results["group_auc"] == pytest.approx(
         0.5768, rel=TOL, abs=ABS_TOL
     )
-    assert results["res_syn"]["mean_mrr"] == pytest.approx(0.2457, rel=TOL, abs=ABS_TOL)
+    assert results["mean_mrr"] == pytest.approx(0.2457, rel=TOL, abs=ABS_TOL)
 
 
 @pytest.mark.notebooks
@@ -176,14 +170,14 @@ def test_npa_smoke(notebooks, output_notebook, kernel_name):
         notebook_path,
         output_notebook,
         kernel_name=kernel_name,
-        parameters=dict(epochs=1, seed=42, MIND_type="demo"),
+        parameters=dict(epochs=1, batch_size=64, seed=42, MIND_type="demo"),
     )
     results = read_notebook(output_notebook)
 
-    assert results["res_syn"]["group_auc"] == pytest.approx(
+    assert results["group_auc"] == pytest.approx(
         0.5861, rel=TOL, abs=ABS_TOL
     )
-    assert results["res_syn"]["mean_mrr"] == pytest.approx(0.255, rel=TOL, abs=ABS_TOL)
+    assert results["mean_mrr"] == pytest.approx(0.255, rel=TOL, abs=ABS_TOL)
 
 
 @pytest.mark.notebooks
@@ -194,14 +188,14 @@ def test_lstur_smoke(notebooks, output_notebook, kernel_name):
         notebook_path,
         output_notebook,
         kernel_name=kernel_name,
-        parameters=dict(epochs=1, seed=40, MIND_type="demo"),
+        parameters=dict(epochs=1, batch_size=64, seed=40, MIND_type="demo"),
     )
     results = read_notebook(output_notebook)
 
-    assert results["res_syn"]["group_auc"] == pytest.approx(
+    assert results["group_auc"] == pytest.approx(
         0.5977, rel=TOL, abs=ABS_TOL
     )
-    assert results["res_syn"]["mean_mrr"] == pytest.approx(0.2618, rel=TOL, abs=ABS_TOL)
+    assert results["mean_mrr"] == pytest.approx(0.2618, rel=TOL, abs=ABS_TOL)
 
 
 @pytest.mark.notebooks
diff --git a/tests/unit/examples/test_notebooks_gpu.py b/tests/unit/examples/test_notebooks_gpu.py
index a5e9b47ab..630538b9e 100644
--- a/tests/unit/examples/test_notebooks_gpu.py
+++ b/tests/unit/examples/test_notebooks_gpu.py
@@ -64,10 +64,8 @@ def test_xdeepfm(notebooks, output_notebook, kernel_name):
         output_notebook,
         kernel_name=kernel_name,
         parameters=dict(
-            EPOCHS_FOR_SYNTHETIC_RUN=1,
-            EPOCHS_FOR_CRITEO_RUN=1,
-            BATCH_SIZE_SYNTHETIC=128,
-            BATCH_SIZE_CRITEO=512,
+            EPOCHS=1,
+            BATCH_SIZE=1024,
         ),
     )
 
@@ -119,5 +117,5 @@ def test_dkn_quickstart(notebooks, output_notebook, kernel_name):
         notebook_path,
         output_notebook,
         kernel_name=kernel_name,
-        parameters=dict(EPOCHS=1, BATCH_SIZE=500),
+        parameters=dict(EPOCHS=1, BATCH_SIZE=500, HISTORY_SIZE=5),
     )
diff --git a/tests/unit/examples/test_notebooks_pyspark.py b/tests/unit/examples/test_notebooks_pyspark.py
index 991095f75..e0ca07f6e 100644
--- a/tests/unit/examples/test_notebooks_pyspark.py
+++ b/tests/unit/examples/test_notebooks_pyspark.py
@@ -96,7 +96,11 @@ def test_evaluation_diversity_runs(notebooks, output_notebook, kernel_name):
     )
 
 
-# This is a flaky test that can fail unexpectedly
+# mock100 dataset throws the following error:
+#   TrainValidationSplit IllegalArgumentException: requirement failed:
+#   Nothing has been added to this summarizer.
+# This seems to be caused by cold start problem -- https://stackoverflow.com/questions/58827795/requirement-failed-nothing-has-been-added-to-this-summarizer
+# In terms of the processing speed at Spark, "100k" dataset does not take much longer than "mock100" dataset and thus use "100k" here to go around the issue. 
 @pytest.mark.flaky(reruns=5, reruns_delay=2)
 @pytest.mark.notebooks
 @pytest.mark.spark
@@ -110,7 +114,7 @@ def test_spark_tuning(notebooks, output_notebook, kernel_name):
         output_notebook,
         kernel_name=kernel_name,
         parameters=dict(
-            MOVIELENS_DATA_SIZE="mock100",
+            MOVIELENS_DATA_SIZE="100k",  # Note: mock100 throws an error   
             NUMBER_CORES="1",
             NUMBER_ITERATIONS=3,
             SUBSET_RATIO=0.5,
diff --git a/tests/unit/examples/test_notebooks_python.py b/tests/unit/examples/test_notebooks_python.py
index 4fb386f40..7457fe2de 100644
--- a/tests/unit/examples/test_notebooks_python.py
+++ b/tests/unit/examples/test_notebooks_python.py
@@ -83,6 +83,7 @@ def test_cornac_deep_dive_runs(notebooks, output_notebook, kernel_name):
 
 @pytest.mark.notebooks
 @pytest.mark.experimental
+@pytest.mark.skip(reason="rlrmc doesn't work with any officially released pymanopt package")
 def test_rlrmc_quickstart_runs(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["rlrmc_quickstart"]
     execute_notebook(
diff --git a/tests/unit/recommenders/evaluation/test_python_evaluation.py b/tests/unit/recommenders/evaluation/test_python_evaluation.py
index cab117b15..4f0d4730b 100644
--- a/tests/unit/recommenders/evaluation/test_python_evaluation.py
+++ b/tests/unit/recommenders/evaluation/test_python_evaluation.py
@@ -181,7 +181,7 @@ def test_python_exp_var(rating_true, rating_pred):
         rating_pred=rating_true,
         col_prediction=DEFAULT_RATING_COL,
     ) == pytest.approx(1.0, TOL)
-    assert exp_var(rating_true, rating_pred) == pytest.approx(-6.4466, 0.01)
+    assert exp_var(rating_true, rating_pred) == pytest.approx(-6.4466, TOL)
 
 
 def test_get_top_k_items(rating_true):
diff --git a/tests/unit/recommenders/evaluation/test_spark_evaluation.py b/tests/unit/recommenders/evaluation/test_spark_evaluation.py
index 10cacaf83..278a2e287 100644
--- a/tests/unit/recommenders/evaluation/test_spark_evaluation.py
+++ b/tests/unit/recommenders/evaluation/test_spark_evaluation.py
@@ -109,7 +109,7 @@ def test_spark_exp_var(spark_data):
     assert evaluator.exp_var() == pytest.approx(1.0, TOL)
 
     evaluator = SparkRatingEvaluation(df_true, df_pred)
-    assert evaluator.exp_var() == pytest.approx(-6.4466, 0.01)
+    assert evaluator.exp_var() == pytest.approx(-6.4466, TOL)
 
 
 @pytest.mark.spark
diff --git a/tests/unit/recommenders/utils/test_notebook_utils.py b/tests/unit/recommenders/utils/test_notebook_utils.py
index dc6a88de2..417cb0ac8 100644
--- a/tests/unit/recommenders/utils/test_notebook_utils.py
+++ b/tests/unit/recommenders/utils/test_notebook_utils.py
@@ -10,6 +10,7 @@
     is_databricks,
     execute_notebook,
     read_notebook,
+    _update_parameters,
 )
 
 
@@ -52,6 +53,44 @@ def test_is_databricks():
     pass
 
 
+@pytest.mark.notebooks
+def test_update_parameters():
+    parameter_cell_source = '''
+# Integer
+TOP_K = 10
+# Float
+LEARNING_RATE = 0.001
+# String
+MOVIELENS_DATA_SIZE = "100k"
+# List
+RANKING_METRICS = [ evaluator.ndcg_at_k.__name__, evaluator.precision_at_k.__name__ ]
+# Boolean
+EVALUATE_WHILE_TRAINING = True
+'''
+
+    new_parameters = {
+        "MOVIELENS_DATA_SIZE": "1m",
+        "TOP_K": 1,
+        "EVALUATE_WHILE_TRAINING": False,
+        "RANKING_METRICS": ["ndcg_at_k", "precision_at_k"],
+        "LEARNING_RATE": 0.1,
+    }
+
+    new_cell_source = _update_parameters(parameter_cell_source, new_parameters)
+    assert new_cell_source == '''
+# Integer
+TOP_K = 1
+# Float
+LEARNING_RATE = 0.1
+# String
+MOVIELENS_DATA_SIZE = "1m"
+# List
+RANKING_METRICS = ['ndcg_at_k', 'precision_at_k']
+# Boolean
+EVALUATE_WHILE_TRAINING = False
+'''
+
+
 @pytest.mark.notebooks
 def test_notebook_execution(notebook_programmatic, output_notebook, kernel_name):
     """Test that the notebook executes and returns the correct results without params."""
@@ -68,77 +107,34 @@ def test_notebook_execution(notebook_programmatic, output_notebook, kernel_name)
 
 
 @pytest.mark.notebooks
-def test_notebook_execution_int(notebook_programmatic, output_notebook, kernel_name):
-    """Test that the notebook executes and returns the correct results with integers."""
-    execute_notebook(
-        notebook_programmatic,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(a=6),
-    )
-
-    results = read_notebook(output_notebook)
-    assert results["response1"] == 8
-
-
-@pytest.mark.notebooks
-def test_notebook_execution_float(notebook_programmatic, output_notebook, kernel_name):
-    """Test that the notebook executes and returns the correct results with floats."""
-    execute_notebook(
-        notebook_programmatic,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(a=1.5),
-    )
-
-    results = read_notebook(output_notebook)
-    assert results["response1"] == 3.5
-
-
-@pytest.mark.notebooks
-def test_notebook_execution_letter(notebook_programmatic, output_notebook, kernel_name):
-    """Test that the notebook executes and returns the correct results with a string."""
-    execute_notebook(
-        notebook_programmatic,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(b="M"),
-    )
-
-    results = read_notebook(output_notebook)
-    assert results["response2"] is True
-
-
-@pytest.mark.notebooks
-def test_notebook_execution_other_letter(
-    notebook_programmatic, output_notebook, kernel_name
-):
-    """Test that the notebook executes and returns the correct results with a different string."""
-    execute_notebook(
-        notebook_programmatic,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(b="A"),
-    )
-
-    results = read_notebook(output_notebook)
-    assert results["response2"] == "A"
-
-
-@pytest.mark.notebooks
-def test_notebook_execution_letter_and_number(
-    notebook_programmatic, output_notebook, kernel_name
+@pytest.mark.parametrize(
+    "parameters,expected_key,expected_value", [
+        (dict(a=6), "response1", 8),            # Test the correct results with integers
+        (dict(a=1.5), "response1", 3.5),        # Test the correct results with floats
+        (dict(b="M"), "response2", True),       # Test the correct results with strings
+        (dict(b="A"), "response2", "A"),        # Test the correct results with different strings
+        (dict(b="100k"), "response2", "100k"),  # Test the correct results with strings that have numbers
+        (dict(c=10), "response3", 12),          # Test the correct results with integers and a comment
+    ]
+)
+def test_notebook_execution_with_parameters(
+    notebook_programmatic,
+    output_notebook,
+    kernel_name,
+    parameters,
+    expected_key,
+    expected_value,    
 ):
-    """Test that the notebook executes and returns the correct results with string that has a number."""
+    """Test that the notebook executes."""
     execute_notebook(
         notebook_programmatic,
         output_notebook,
         kernel_name=kernel_name,
-        parameters=dict(b="100k"),
+        parameters=parameters,
     )
 
     results = read_notebook(output_notebook)
-    assert results["response2"] == "100k"
+    assert results[expected_key] == expected_value
 
 
 @pytest.mark.notebooks
@@ -153,19 +149,3 @@ def test_notebook_execution_value_error_fails(
             kernel_name=kernel_name,
             parameters=dict(b=1),
         )
-
-
-@pytest.mark.notebooks
-def test_notebook_execution_int_with_comment(
-    notebook_programmatic, output_notebook, kernel_name
-):
-    """Test that the notebook executes and returns the correct results with integers and a comment."""
-    execute_notebook(
-        notebook_programmatic,
-        output_notebook,
-        kernel_name=kernel_name,
-        parameters=dict(c=10),
-    )
-
-    results = read_notebook(output_notebook)
-    assert results["response3"] == 12