From 82ee6d38cea43e54172782c8200f3bbe42d54fc6 Mon Sep 17 00:00:00 2001 From: Jun Ki Min <42475935+loomlike@users.noreply.github.com> Date: Thu, 21 Dec 2023 10:28:30 -0800 Subject: [PATCH] Fix nightly test errors (#2045) * Revert tests tolerance * Fix notebook parameter parsing * Add notebook utils tests to test groups * Fix notebooks * Fix notebook unit tests * Update evaluation metrics name map. Handle None for exp_var * Fix smoke tests * cleanup * Fix functional test errors * make notebook parameter update function to be private * Fix benchmark notebook bug * fix remaining bugs --------- Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com> --- .../00_quick_start/fastai_movielens.ipynb | 8 +- examples/00_quick_start/naml_MIND.ipynb | 30 +--- examples/00_quick_start/ncf_movielens.ipynb | 8 +- examples/00_quick_start/nrms_MIND.ipynb | 1 - examples/00_quick_start/rlrmc_movielens.ipynb | 1 - examples/00_quick_start/sar_movielens.ipynb | 4 +- examples/00_quick_start/sasrec_amazon.ipynb | 10 +- .../sequential_recsys_amazondataset.ipynb | 19 +-- .../00_quick_start/wide_deep_movielens.ipynb | 2 - .../baseline_deep_dive.ipynb | 4 +- .../cornac_bivae_deep_dive.ipynb | 5 +- .../cornac_bpr_deep_dive.ipynb | 4 +- .../lightgcn_deep_dive.ipynb | 4 +- .../ncf_deep_dive.ipynb | 21 ++- examples/06_benchmarks/benchmark_utils.py | 6 +- examples/06_benchmarks/movielens.ipynb | 6 +- recommenders/evaluation/python_evaluation.py | 1 + recommenders/evaluation/spark_evaluation.py | 12 +- recommenders/utils/notebook_utils.py | 47 +++--- tests/ci/azureml_tests/test_groups.py | 4 + .../functional/examples/test_notebooks_gpu.py | 30 ++-- .../examples/test_notebooks_python.py | 1 + tests/smoke/examples/test_notebooks_gpu.py | 38 ++--- tests/unit/examples/test_notebooks_gpu.py | 8 +- tests/unit/examples/test_notebooks_pyspark.py | 8 +- tests/unit/examples/test_notebooks_python.py | 1 + .../evaluation/test_python_evaluation.py | 2 +- .../evaluation/test_spark_evaluation.py | 2 +- .../recommenders/utils/test_notebook_utils.py | 138 ++++++++---------- 29 files changed, 176 insertions(+), 249 deletions(-) diff --git a/examples/00_quick_start/fastai_movielens.ipynb b/examples/00_quick_start/fastai_movielens.ipynb index c5de1996d..517673178 100644 --- a/examples/00_quick_start/fastai_movielens.ipynb +++ b/examples/00_quick_start/fastai_movielens.ipynb @@ -59,7 +59,7 @@ "from recommenders.datasets import movielens\n", "from recommenders.datasets.python_splitters import python_stratified_split\n", "from recommenders.models.fastai.fastai_utils import cartesian_product, score\n", - "from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n", + "from recommenders.evaluation.python_evaluation import map, ndcg_at_k, precision_at_k, recall_at_k\n", "from recommenders.evaluation.python_evaluation import rmse, mae, rsquared, exp_var\n", "from recommenders.utils.notebook_utils import store_metadata\n", "\n", @@ -599,9 +599,9 @@ "metadata": {}, "outputs": [], "source": [ - "eval_map = map_at_k(test_df, top_k_scores, col_user=USER, col_item=ITEM, \n", - " col_rating=RATING, col_prediction=PREDICTION, \n", - " relevancy_method=\"top_k\", k=TOP_K)" + "eval_map = map(test_df, top_k_scores, col_user=USER, col_item=ITEM, \n", + " col_rating=RATING, col_prediction=PREDICTION, \n", + " relevancy_method=\"top_k\", k=TOP_K)" ] }, { diff --git a/examples/00_quick_start/naml_MIND.ipynb b/examples/00_quick_start/naml_MIND.ipynb index b202a390e..c505bd91f 100644 --- a/examples/00_quick_start/naml_MIND.ipynb +++ b/examples/00_quick_start/naml_MIND.ipynb @@ -246,34 +246,6 @@ "model = NAMLModel(hparams, iterator, seed=seed)" ] }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "18693it [01:18, 239.62it/s]\n", - "7507it [00:30, 249.74it/s]\n", - "7538it [00:01, 6423.03it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'group_auc': 0.4807, 'mean_mrr': 0.2104, 'ndcg@5': 0.2141, 'ndcg@10': 0.2766}\n" - ] - } - ], - "source": [ - "print(model.run_eval(valid_news_file, valid_behaviors_file))" - ] - }, { "cell_type": "code", "execution_count": 8, @@ -398,7 +370,7 @@ ], "source": [ "%%time\n", - "model.fit(train_news_file, train_behaviors_file,valid_news_file, valid_behaviors_file)" + "model.fit(train_news_file, train_behaviors_file, valid_news_file, valid_behaviors_file)" ] }, { diff --git a/examples/00_quick_start/ncf_movielens.ipynb b/examples/00_quick_start/ncf_movielens.ipynb index 23142e458..2b48e3ce1 100644 --- a/examples/00_quick_start/ncf_movielens.ipynb +++ b/examples/00_quick_start/ncf_movielens.ipynb @@ -56,10 +56,10 @@ "from recommenders.models.ncf.ncf_singlenode import NCF\n", "from recommenders.models.ncf.dataset import Dataset as NCFDataset\n", "from recommenders.datasets import movielens\n", - "from recommenders.utils.notebook_utils import is_jupyter\n", "from recommenders.datasets.python_splitters import python_chrono_split\n", - "from recommenders.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k, \n", - " recall_at_k, get_top_k_items)\n", + "from recommenders.evaluation.python_evaluation import (\n", + " map, ndcg_at_k, precision_at_k, recall_at_k\n", + ")\n", "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "print(\"System version: {}\".format(sys.version))\n", @@ -334,7 +334,7 @@ } ], "source": [ - "eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", + "eval_map = map(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", "eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", "eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", "eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", diff --git a/examples/00_quick_start/nrms_MIND.ipynb b/examples/00_quick_start/nrms_MIND.ipynb index 391d18712..e94c48733 100644 --- a/examples/00_quick_start/nrms_MIND.ipynb +++ b/examples/00_quick_start/nrms_MIND.ipynb @@ -99,7 +99,6 @@ "import numpy as np\n", "import zipfile\n", "from tqdm import tqdm\n", - "import scrapbook as sb\n", "from tempfile import TemporaryDirectory\n", "import tensorflow as tf\n", "tf.get_logger().setLevel('ERROR') # only show error messages\n", diff --git a/examples/00_quick_start/rlrmc_movielens.ipynb b/examples/00_quick_start/rlrmc_movielens.ipynb index 6ec3e6a86..34a9deb3f 100644 --- a/examples/00_quick_start/rlrmc_movielens.ipynb +++ b/examples/00_quick_start/rlrmc_movielens.ipynb @@ -40,7 +40,6 @@ "source": [ "import sys\n", "import time\n", - "import numpy as np\n", "import pandas as pd\n", "\n", "from recommenders.datasets.python_splitters import python_random_split\n", diff --git a/examples/00_quick_start/sar_movielens.ipynb b/examples/00_quick_start/sar_movielens.ipynb index 09243e6fd..4eba126ac 100644 --- a/examples/00_quick_start/sar_movielens.ipynb +++ b/examples/00_quick_start/sar_movielens.ipynb @@ -68,7 +68,7 @@ "from recommenders.datasets.python_splitters import python_stratified_split\n", "from recommenders.models.sar import SAR\n", "from recommenders.evaluation.python_evaluation import (\n", - " map_at_k,\n", + " map,\n", " ndcg_at_k,\n", " precision_at_k,\n", " recall_at_k,\n", @@ -509,7 +509,7 @@ "outputs": [], "source": [ "# Ranking metrics\n", - "eval_map = map_at_k(test, top_k, col_user=\"userID\", col_item=\"itemID\", col_rating=\"rating\", k=TOP_K)\n", + "eval_map = map(test, top_k, col_user=\"userID\", col_item=\"itemID\", col_rating=\"rating\", k=TOP_K)\n", "eval_ndcg = ndcg_at_k(test, top_k, col_user=\"userID\", col_item=\"itemID\", col_rating=\"rating\", k=TOP_K)\n", "eval_precision = precision_at_k(test, top_k, col_user=\"userID\", col_item=\"itemID\", col_rating=\"rating\", k=TOP_K)\n", "eval_recall = recall_at_k(test, top_k, col_user=\"userID\", col_item=\"itemID\", col_rating=\"rating\", k=TOP_K)\n" diff --git a/examples/00_quick_start/sasrec_amazon.ipynb b/examples/00_quick_start/sasrec_amazon.ipynb index 2e44fbe25..fad74fc69 100644 --- a/examples/00_quick_start/sasrec_amazon.ipynb +++ b/examples/00_quick_start/sasrec_amazon.ipynb @@ -57,23 +57,21 @@ } ], "source": [ - "import re\n", "import sys\n", "import os\n", - "from tempfile import TemporaryDirectory\n", - "import numpy as np\n", "import pandas as pd \n", - "from collections import defaultdict\n", "import tensorflow as tf\n", "tf.get_logger().setLevel('ERROR') # only show error messages\n", "\n", - "from recommenders.utils.timer import Timer\n", "from recommenders.datasets.amazon_reviews import get_review_data\n", "from recommenders.datasets.split_utils import filter_k_core\n", "from recommenders.models.sasrec.model import SASREC\n", "from recommenders.models.sasrec.ssept import SSEPT\n", "from recommenders.models.sasrec.sampler import WarpSampler\n", "from recommenders.models.sasrec.util import SASRecDataSet\n", + "from recommenders.utils.notebook_utils import store_metadata\n", + "from recommenders.utils.timer import Timer\n", + "\n", "\n", "print(f\"System version: {sys.version}\")\n", "print(f\"Tensorflow version: {tf.__version__}\")" @@ -98,7 +96,7 @@ "source": [ "num_epochs = 5\n", "batch_size = 128\n", - "RANDOM_SEED = 100 # Set None for non-deterministic result\n", + "seed = 100 # Set None for non-deterministic result\n", "\n", "# data_dir = os.path.join(\"tests\", \"recsys_data\", \"RecSys\", \"SASRec-tf2\", \"data\")\n", "data_dir = os.path.join(\"..\", \"..\", \"tests\", \"resources\", \"deeprec\", \"sasrec\")\n", diff --git a/examples/00_quick_start/sequential_recsys_amazondataset.ipynb b/examples/00_quick_start/sequential_recsys_amazondataset.ipynb index f563e7bdb..ace5ed39f 100644 --- a/examples/00_quick_start/sequential_recsys_amazondataset.ipynb +++ b/examples/00_quick_start/sequential_recsys_amazondataset.ipynb @@ -63,9 +63,6 @@ "source": [ "import os\n", "import sys\n", - "import logging\n", - "from tempfile import TemporaryDirectory\n", - "import numpy as np\n", "import tensorflow.compat.v1 as tf\n", "tf.get_logger().setLevel('ERROR') # only show error messages\n", "\n", @@ -75,7 +72,6 @@ " prepare_hparams\n", ")\n", "from recommenders.datasets.amazon_reviews import download_and_extract, data_preprocessing\n", - "from recommenders.datasets.download_utils import maybe_download\n", "from recommenders.models.deeprec.models.sequential.sli_rec import SLI_RECModel as SeqModel\n", "#### to use the other model, use one of the following lines:\n", "# from recommenders.models.deeprec.models.sequential.asvd import A2SVDModel as SeqModel\n", @@ -92,16 +88,6 @@ "\n" ] }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "## ATTENTION: change to the corresponding config file, e.g., caser.yaml for CaserModel, sum.yaml for SUMModel\n", - "yaml_file = '../../recommenders/models/deeprec/config/sli_rec.yaml' " - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -123,7 +109,10 @@ "BATCH_SIZE = 400\n", "RANDOM_SEED = SEED # Set None for non-deterministic result\n", "\n", - "data_path = os.path.join(\"..\", \"..\", \"tests\", \"resources\", \"deeprec\", \"slirec\")" + "data_path = os.path.join(\"..\", \"..\", \"tests\", \"resources\", \"deeprec\", \"slirec\")\n", + "\n", + "## ATTENTION: change to the corresponding config file, e.g., caser.yaml for CaserModel, sum.yaml for SUMModel\n", + "yaml_file = '../../recommenders/models/deeprec/config/sli_rec.yaml' " ] }, { diff --git a/examples/00_quick_start/wide_deep_movielens.ipynb b/examples/00_quick_start/wide_deep_movielens.ipynb index 60cb1da42..131d245d1 100644 --- a/examples/00_quick_start/wide_deep_movielens.ipynb +++ b/examples/00_quick_start/wide_deep_movielens.ipynb @@ -59,8 +59,6 @@ "import os\n", "import sys\n", "import math\n", - "import itertools\n", - "import numpy as np\n", "import pandas as pd\n", "import sklearn.preprocessing\n", "from tempfile import TemporaryDirectory\n", diff --git a/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb b/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb index f8b627a54..6b950bc37 100644 --- a/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb @@ -78,7 +78,7 @@ " mae,\n", " rsquared,\n", " exp_var,\n", - " map_at_k,\n", + " map,\n", " ndcg_at_k,\n", " precision_at_k,\n", " recall_at_k,\n", @@ -689,7 +689,7 @@ "source": [ "cols[\"col_prediction\"] = \"Count\"\n", "\n", - "eval_map = map_at_k(test, baseline_recommendations, k=TOP_K, **cols)\n", + "eval_map = map(test, baseline_recommendations, k=TOP_K, **cols)\n", "eval_ndcg = ndcg_at_k(test, baseline_recommendations, k=TOP_K, **cols)\n", "eval_precision = precision_at_k(test, baseline_recommendations, k=TOP_K, **cols)\n", "eval_recall = recall_at_k(test, baseline_recommendations, k=TOP_K, **cols)\n", diff --git a/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb b/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb index c2ff20337..731ab0c12 100644 --- a/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb @@ -48,7 +48,6 @@ "import sys\n", "import torch\n", "import cornac\n", - "import pandas as pd\n", "\n", "from recommenders.datasets import movielens\n", "from recommenders.datasets.python_splitters import python_random_split\n", @@ -56,7 +55,7 @@ "from recommenders.utils.timer import Timer\n", "from recommenders.utils.constants import SEED\n", "from recommenders.evaluation.python_evaluation import (\n", - " map_at_k,\n", + " map,\n", " ndcg_at_k,\n", " precision_at_k,\n", " recall_at_k,\n", @@ -508,7 +507,7 @@ } ], "source": [ - "eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", + "eval_map = map(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", "eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", "eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", "eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", diff --git a/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb b/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb index fb0253a2f..6a164127b 100644 --- a/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb @@ -49,7 +49,7 @@ "\n", "from recommenders.datasets import movielens\n", "from recommenders.datasets.python_splitters import python_random_split\n", - "from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n", + "from recommenders.evaluation.python_evaluation import map, ndcg_at_k, precision_at_k, recall_at_k\n", "from recommenders.models.cornac.cornac_utils import predict_ranking\n", "from recommenders.utils.timer import Timer\n", "from recommenders.utils.constants import SEED\n", @@ -557,7 +557,7 @@ ], "source": [ "k = 10\n", - "eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=k)\n", + "eval_map = map(test, all_predictions, col_prediction='prediction', k=k)\n", "eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=k)\n", "eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=k)\n", "eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=k)\n", diff --git a/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb b/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb index 2180a975d..247dd0bd8 100644 --- a/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb @@ -62,7 +62,7 @@ "from recommenders.models.deeprec.DataModel.ImplicitCF import ImplicitCF\n", "from recommenders.datasets import movielens\n", "from recommenders.datasets.python_splitters import python_stratified_split\n", - "from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n", + "from recommenders.evaluation.python_evaluation import map, ndcg_at_k, precision_at_k, recall_at_k\n", "from recommenders.utils.constants import SEED as DEFAULT_SEED\n", "from recommenders.models.deeprec.deeprec_utils import prepare_hparams\n", "from recommenders.utils.notebook_utils import store_metadata\n", @@ -640,7 +640,7 @@ } ], "source": [ - "eval_map = map_at_k(test, topk_scores, k=TOP_K)\n", + "eval_map = map(test, topk_scores, k=TOP_K)\n", "eval_ndcg = ndcg_at_k(test, topk_scores, k=TOP_K)\n", "eval_precision = precision_at_k(test, topk_scores, k=TOP_K)\n", "eval_recall = recall_at_k(test, topk_scores, k=TOP_K)\n", diff --git a/examples/02_model_collaborative_filtering/ncf_deep_dive.ipynb b/examples/02_model_collaborative_filtering/ncf_deep_dive.ipynb index 55790696c..76136e8e6 100644 --- a/examples/02_model_collaborative_filtering/ncf_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/ncf_deep_dive.ipynb @@ -63,8 +63,9 @@ "from recommenders.models.ncf.dataset import Dataset as NCFDataset\n", "from recommenders.datasets import movielens\n", "from recommenders.datasets.python_splitters import python_chrono_split\n", - "from recommenders.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k, \n", - " recall_at_k, get_top_k_items)\n", + "from recommenders.evaluation.python_evaluation import (\n", + " map, ndcg_at_k, precision_at_k, recall_at_k\n", + ")\n", "from recommenders.utils.constants import SEED as DEFAULT_SEED\n", "from recommenders.utils.notebook_utils import store_metadata\n", "\n", @@ -428,7 +429,7 @@ "metadata": {}, "outputs": [], "source": [ - "model = NCF (\n", + "model = NCF(\n", " n_users=data.n_users, \n", " n_items=data.n_items,\n", " model_type=\"NeuMF\",\n", @@ -625,8 +626,7 @@ } ], "source": [ - "\n", - "eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", + "eval_map = map(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", "eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", "eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", "eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", @@ -718,7 +718,7 @@ "metadata": {}, "outputs": [], "source": [ - "model = NCF (\n", + "model = NCF(\n", " n_users=data.n_users, \n", " n_items=data.n_items,\n", " model_type=\"GMF\",\n", @@ -760,7 +760,7 @@ "metadata": {}, "outputs": [], "source": [ - "model = NCF (\n", + "model = NCF(\n", " n_users=data.n_users, \n", " n_items=data.n_items,\n", " model_type=\"MLP\",\n", @@ -771,8 +771,7 @@ " learning_rate=1e-3,\n", " verbose=10,\n", " seed=SEED\n", - ")\n", - "\n" + ")" ] }, { @@ -811,7 +810,7 @@ "metadata": {}, "outputs": [], "source": [ - "model = NCF (\n", + "model = NCF(\n", " n_users=data.n_users, \n", " n_items=data.n_items,\n", " model_type=\"NeuMF\",\n", @@ -905,7 +904,7 @@ } ], "source": [ - "eval_map2 = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", + "eval_map2 = map(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", "eval_ndcg2 = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", "eval_precision2 = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", "eval_recall2 = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", diff --git a/examples/06_benchmarks/benchmark_utils.py b/examples/06_benchmarks/benchmark_utils.py index 4b3d8c0d9..e28fa6ab7 100644 --- a/examples/06_benchmarks/benchmark_utils.py +++ b/examples/06_benchmarks/benchmark_utils.py @@ -37,7 +37,7 @@ ) from recommenders.models.cornac.cornac_utils import predict_ranking from recommenders.evaluation.python_evaluation import ( - map_at_k, + map, ndcg_at_k, precision_at_k, recall_at_k, @@ -387,7 +387,7 @@ def ranking_metrics_pyspark(test, predictions, k=DEFAULT_K): test, predictions, k=k, relevancy_method="top_k", **COL_DICT ) return { - "MAP": rank_eval.map_at_k(), + "MAP": rank_eval.map(), "nDCG@k": rank_eval.ndcg_at_k(), "Precision@k": rank_eval.precision_at_k(), "Recall@k": rank_eval.recall_at_k(), @@ -405,7 +405,7 @@ def rating_metrics_python(test, predictions): def ranking_metrics_python(test, predictions, k=DEFAULT_K): return { - "MAP": map_at_k(test, predictions, k=k, **COL_DICT), + "MAP": map(test, predictions, k=k, **COL_DICT), "nDCG@k": ndcg_at_k(test, predictions, k=k, **COL_DICT), "Precision@k": precision_at_k(test, predictions, k=k, **COL_DICT), "Recall@k": recall_at_k(test, predictions, k=k, **COL_DICT), diff --git a/examples/06_benchmarks/movielens.ipynb b/examples/06_benchmarks/movielens.ipynb index 70822b69e..b2526c388 100644 --- a/examples/06_benchmarks/movielens.ipynb +++ b/examples/06_benchmarks/movielens.ipynb @@ -109,10 +109,8 @@ "source": [ "import os\n", "import sys\n", - "import json\n", "import numpy as np\n", "import pandas as pd\n", - "import seaborn as sns\n", "import surprise\n", "import cornac\n", "\n", @@ -1208,8 +1206,8 @@ "outputs": [], "source": [ "# Record results for tests - ignore this cell\n", - "for algos in algorithms:\n", - " store_metadata(algos, df_results.loc[df_results[\"Algo\"] == algo, \"nDCG@k\"].values)\n" + "for algo in algorithms:\n", + " store_metadata(algo, df_results.loc[df_results[\"Algo\"] == algo, \"nDCG@k\"].values)\n" ] } ], diff --git a/recommenders/evaluation/python_evaluation.py b/recommenders/evaluation/python_evaluation.py index c0c2af5d4..7569c7246 100644 --- a/recommenders/evaluation/python_evaluation.py +++ b/recommenders/evaluation/python_evaluation.py @@ -802,6 +802,7 @@ def get_top_k_items( recall_at_k.__name__: recall_at_k, ndcg_at_k.__name__: ndcg_at_k, map_at_k.__name__: map_at_k, + map.__name__: map, } diff --git a/recommenders/evaluation/spark_evaluation.py b/recommenders/evaluation/spark_evaluation.py index a05f010b7..b4d6ea689 100644 --- a/recommenders/evaluation/spark_evaluation.py +++ b/recommenders/evaluation/spark_evaluation.py @@ -156,12 +156,14 @@ def exp_var(self): Returns: float: Explained variance (min=0, max=1). """ - var1 = self.y_pred_true.selectExpr("variance(label - prediction)").collect()[0][ - 0 - ] + var1 = self.y_pred_true.selectExpr("variance(label-prediction)").collect()[0][0] var2 = self.y_pred_true.selectExpr("variance(label)").collect()[0][0] - # numpy divide is more tolerant to var2 being zero - return 1 - np.divide(var1, var2) + + if var1 is None or var2 is None: + return -np.inf + else: + # numpy divide is more tolerant to var2 being zero + return 1 - np.divide(var1, var2) class SparkRankingEvaluation: diff --git a/recommenders/utils/notebook_utils.py b/recommenders/utils/notebook_utils.py index 2009dd5fe..148b9ce2b 100644 --- a/recommenders/utils/notebook_utils.py +++ b/recommenders/utils/notebook_utils.py @@ -44,6 +44,28 @@ def is_databricks(): return False +def _update_parameters(parameter_cell_source, new_parameters): + """Replace parameter values in the cell source code.""" + modified_cell_source = parameter_cell_source + for param, new_value in new_parameters.items(): + if ( + isinstance(new_value, str) + and not (new_value.startswith('"') and new_value.endswith('"')) + and not (new_value.startswith("'") and new_value.endswith("'")) + ): + # Check if the new value is a string and surround it with quotes if necessary + new_value = f'"{new_value}"' + + # Define a regular expression pattern to match parameter assignments and ignore comments + pattern = re.compile( + rf"(\b{param})\s*=\s*([^#\n]+)(?:#.*$)?", + re.MULTILINE + ) + modified_cell_source = pattern.sub(rf"\1 = {new_value}", modified_cell_source) + + return modified_cell_source + + def execute_notebook( input_notebook, output_notebook, parameters={}, kernel_name="python3", timeout=2200 ): @@ -74,31 +96,8 @@ def execute_notebook( and "parameters" in cell.metadata["tags"] and cell.cell_type == "code" ): - cell_source = cell.source - modified_cell_source = ( - cell_source # Initialize a variable to hold the modified source - ) - for param, new_value in parameters.items(): - if ( - isinstance(new_value, str) - and not (new_value.startswith('"') and new_value.endswith('"')) - and not (new_value.startswith("'") and new_value.endswith("'")) - ): - # Check if the new value is a string and surround it with quotes if necessary - new_value = f'"{new_value}"' - # # Check if the new value is a string and surround it with quotes if necessary - # if isinstance(new_value, str): - # new_value = f'"{new_value}"' - # Define a regular expression pattern to match parameter assignments and ignore comments - pattern = re.compile( - rf"(\b{param})\s*=\s*([^#\n]+)(?:#.*$)?", - re.MULTILINE - # rf"\b{param}\s*=\s*([^\n]+)\b" - ) - modified_cell_source = pattern.sub(rf"\1 = {new_value}", cell_source) - # Update the cell's source within notebook_content - cell.source = modified_cell_source + cell.source = _update_parameters(cell.source, parameters) # Create an execution preprocessor execute_preprocessor = ExecutePreprocessor(timeout=timeout, kernel_name=kernel_name) diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py index c800baa1b..6c44411fe 100644 --- a/tests/ci/azureml_tests/test_groups.py +++ b/tests/ci/azureml_tests/test_groups.py @@ -318,6 +318,10 @@ "tests/unit/examples/test_notebooks_python.py::test_baseline_deep_dive_runs", "tests/unit/examples/test_notebooks_python.py::test_template_runs", "tests/unit/recommenders/utils/test_notebook_utils.py::test_is_jupyter", + "tests/unit/recommenders/utils/test_notebook_utils.py::test_update_parameters", + "tests/unit/recommenders/utils/test_notebook_utils.py::test_notebook_execution", + "tests/unit/recommenders/utils/test_notebook_utils.py::test_notebook_execution_with_parameters", + "tests/unit/recommenders/utils/test_notebook_utils.py::test_notebook_execution_value_error_fails", "tests/unit/examples/test_notebooks_python.py::test_surprise_deep_dive_runs", "tests/unit/examples/test_notebooks_python.py::test_lightgbm", "tests/unit/examples/test_notebooks_python.py::test_cornac_deep_dive_runs", diff --git a/tests/functional/examples/test_notebooks_gpu.py b/tests/functional/examples/test_notebooks_gpu.py index 83fe9861f..2007cc1a7 100644 --- a/tests/functional/examples/test_notebooks_gpu.py +++ b/tests/functional/examples/test_notebooks_gpu.py @@ -196,7 +196,7 @@ def test_xdeepfm_functional( "rsquared": 0.262963, "exp_var": 0.268413, "ndcg_at_k": 0.118114, - "map_at_k": 0.0139213, + "map": 0.0139213, "precision_at_k": 0.107087, "recall_at_k": 0.0328638, }, @@ -225,7 +225,7 @@ def test_wide_deep_functional( "MODEL_DIR": tmp, "EXPORT_DIR_BASE": tmp, "RATING_METRICS": ["rmse", "mae", "rsquared", "exp_var"], - "RANKING_METRICS": ["ndcg_at_k", "map_at_k", "precision_at_k", "recall_at_k"], + "RANKING_METRICS": ["ndcg_at_k", "map", "precision_at_k", "recall_at_k"], "RANDOM_SEED": seed, } execute_notebook( @@ -247,7 +247,7 @@ def test_wide_deep_functional( os.path.join("tests", "resources", "deeprec", "slirec"), 10, 400, - {"auc": 0.7183, "logloss": 0.6045}, + {"auc": 0.7183}, # Don't do logloss check as SLi-Rec uses ranking loss, not a point-wise loss 42, ) ], @@ -278,11 +278,7 @@ def test_slirec_quickstart_functional( results = read_notebook(output_notebook) assert results["auc"] == pytest.approx(expected_values["auc"], rel=TOL, abs=ABS_TOL) - ## disable logloss check, because so far SLi-Rec uses ranking loss, not a point-wise loss - # assert results["logloss"] == pytest.approx( - # expected_values["logloss"], rel=TOL, abs=ABS_TOL - # ) - + @pytest.mark.gpu @pytest.mark.notebooks @@ -444,11 +440,11 @@ def test_lstur_quickstart_functional( assert results["mean_mrr"] == pytest.approx( expected_values["mean_mrr"], rel=TOL, abs=ABS_TOL ) - assert expected_values["ndcg@5"] == pytest.approx( - value["ndcg@5"], rel=TOL, abs=ABS_TOL + assert results["ndcg@5"] == pytest.approx( + expected_values["ndcg@5"], rel=TOL, abs=ABS_TOL ) - assert expected_values["ndcg@10"] == pytest.approx( - value["ndcg@10"], rel=TOL, abs=ABS_TOL + assert results["ndcg@10"] == pytest.approx( + expected_values["ndcg@10"], rel=TOL, abs=ABS_TOL ) @@ -609,23 +605,21 @@ def test_cornac_bivae_functional( @pytest.mark.gpu @pytest.mark.notebooks @pytest.mark.parametrize( - "data_dir, num_epochs, batch_size, model_name, expected_values, seed", + "data_dir, num_epochs, batch_size, model_name, expected_values", [ ( os.path.join("tests", "recsys_data", "RecSys", "SASRec-tf2", "data"), 1, 128, "sasrec", - {"ndcg@10": 0.2626, "Hit@10": 0.4244}, - 42, + {"ndcg@10": 0.2297, "Hit@10": 0.3789}, ), ( os.path.join("tests", "recsys_data", "RecSys", "SASRec-tf2", "data"), 1, 128, "ssept", - {"ndcg@10": 0.2626, "Hit@10": 0.4244}, - 42, + {"ndcg@10": 0.2245, "Hit@10": 0.3743}, ), ], ) @@ -638,7 +632,6 @@ def test_sasrec_quickstart_functional( batch_size, model_name, expected_values, - seed, ): notebook_path = notebooks["sasrec_quickstart"] params = { @@ -646,7 +639,6 @@ def test_sasrec_quickstart_functional( "num_epochs": num_epochs, "batch_size": batch_size, "model_name": model_name, - "seed": seed, } execute_notebook( notebook_path, diff --git a/tests/functional/examples/test_notebooks_python.py b/tests/functional/examples/test_notebooks_python.py index 6ce2c6741..014cf5996 100644 --- a/tests/functional/examples/test_notebooks_python.py +++ b/tests/functional/examples/test_notebooks_python.py @@ -237,6 +237,7 @@ def test_lightfm_functional( "expected_values", [({"rmse": 0.4969, "mae": 0.4761})], ) +@pytest.mark.skip(reason="geoimc doesn't work with any officially released pymanopt package") def test_geoimc_functional(notebooks, output_notebook, kernel_name, expected_values): notebook_path = notebooks["geoimc_quickstart"] execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name) diff --git a/tests/smoke/examples/test_notebooks_gpu.py b/tests/smoke/examples/test_notebooks_gpu.py index 6d77fe6b0..f14498ff8 100644 --- a/tests/smoke/examples/test_notebooks_gpu.py +++ b/tests/smoke/examples/test_notebooks_gpu.py @@ -43,9 +43,7 @@ def test_ncf_deep_dive_smoke(notebooks, output_notebook, kernel_name): notebook_path, output_notebook, kernel_name=kernel_name, - parameters=dict( - TOP_K=10, MOVIELENS_DATA_SIZE="100k", EPOCHS=1, BATCH_SIZE=1024 - ), + parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="100k", EPOCHS=1, BATCH_SIZE=1024), ) results = read_notebook(output_notebook) @@ -91,19 +89,15 @@ def test_xdeepfm_smoke(notebooks, output_notebook, kernel_name): output_notebook, kernel_name=kernel_name, parameters=dict( - EPOCHS_FOR_SYNTHETIC_RUN=1, - EPOCHS_FOR_CRITEO_RUN=1, - BATCH_SIZE_SYNTHETIC=128, - BATCH_SIZE_CRITEO=512, + EPOCHS=1, + BATCH_SIZE=512, RANDOM_SEED=42, ), ) results = read_notebook(output_notebook) - assert results["res_syn"]["auc"] == pytest.approx(0.5043, rel=TOL, abs=ABS_TOL) - assert results["res_syn"]["logloss"] == pytest.approx(0.7046, rel=TOL, abs=ABS_TOL) - assert results["res_real"]["auc"] == pytest.approx(0.7251, rel=TOL, abs=ABS_TOL) - assert results["res_real"]["logloss"] == pytest.approx(0.508, rel=TOL, abs=ABS_TOL) + assert results["auc"] == pytest.approx(0.7251, rel=TOL, abs=ABS_TOL) + assert results["logloss"] == pytest.approx(0.508, rel=TOL, abs=ABS_TOL) @pytest.mark.notebooks @@ -140,14 +134,14 @@ def test_naml_smoke(notebooks, output_notebook, kernel_name): notebook_path, output_notebook, kernel_name=kernel_name, - parameters=dict(epochs=1, seed=42, MIND_type="demo"), + parameters=dict(epochs=1, batch_size=64, seed=42, MIND_type="demo"), ) results = read_notebook(output_notebook) - assert results["res_syn"]["group_auc"] == pytest.approx( + assert results["group_auc"] == pytest.approx( 0.5801, rel=TOL, abs=ABS_TOL ) - assert results["res_syn"]["mean_mrr"] == pytest.approx(0.2512, rel=TOL, abs=ABS_TOL) + assert results["mean_mrr"] == pytest.approx(0.2512, rel=TOL, abs=ABS_TOL) @pytest.mark.notebooks @@ -162,10 +156,10 @@ def test_nrms_smoke(notebooks, output_notebook, kernel_name): ) results = read_notebook(output_notebook) - assert results["res_syn"]["group_auc"] == pytest.approx( + assert results["group_auc"] == pytest.approx( 0.5768, rel=TOL, abs=ABS_TOL ) - assert results["res_syn"]["mean_mrr"] == pytest.approx(0.2457, rel=TOL, abs=ABS_TOL) + assert results["mean_mrr"] == pytest.approx(0.2457, rel=TOL, abs=ABS_TOL) @pytest.mark.notebooks @@ -176,14 +170,14 @@ def test_npa_smoke(notebooks, output_notebook, kernel_name): notebook_path, output_notebook, kernel_name=kernel_name, - parameters=dict(epochs=1, seed=42, MIND_type="demo"), + parameters=dict(epochs=1, batch_size=64, seed=42, MIND_type="demo"), ) results = read_notebook(output_notebook) - assert results["res_syn"]["group_auc"] == pytest.approx( + assert results["group_auc"] == pytest.approx( 0.5861, rel=TOL, abs=ABS_TOL ) - assert results["res_syn"]["mean_mrr"] == pytest.approx(0.255, rel=TOL, abs=ABS_TOL) + assert results["mean_mrr"] == pytest.approx(0.255, rel=TOL, abs=ABS_TOL) @pytest.mark.notebooks @@ -194,14 +188,14 @@ def test_lstur_smoke(notebooks, output_notebook, kernel_name): notebook_path, output_notebook, kernel_name=kernel_name, - parameters=dict(epochs=1, seed=40, MIND_type="demo"), + parameters=dict(epochs=1, batch_size=64, seed=40, MIND_type="demo"), ) results = read_notebook(output_notebook) - assert results["res_syn"]["group_auc"] == pytest.approx( + assert results["group_auc"] == pytest.approx( 0.5977, rel=TOL, abs=ABS_TOL ) - assert results["res_syn"]["mean_mrr"] == pytest.approx(0.2618, rel=TOL, abs=ABS_TOL) + assert results["mean_mrr"] == pytest.approx(0.2618, rel=TOL, abs=ABS_TOL) @pytest.mark.notebooks diff --git a/tests/unit/examples/test_notebooks_gpu.py b/tests/unit/examples/test_notebooks_gpu.py index a5e9b47ab..630538b9e 100644 --- a/tests/unit/examples/test_notebooks_gpu.py +++ b/tests/unit/examples/test_notebooks_gpu.py @@ -64,10 +64,8 @@ def test_xdeepfm(notebooks, output_notebook, kernel_name): output_notebook, kernel_name=kernel_name, parameters=dict( - EPOCHS_FOR_SYNTHETIC_RUN=1, - EPOCHS_FOR_CRITEO_RUN=1, - BATCH_SIZE_SYNTHETIC=128, - BATCH_SIZE_CRITEO=512, + EPOCHS=1, + BATCH_SIZE=1024, ), ) @@ -119,5 +117,5 @@ def test_dkn_quickstart(notebooks, output_notebook, kernel_name): notebook_path, output_notebook, kernel_name=kernel_name, - parameters=dict(EPOCHS=1, BATCH_SIZE=500), + parameters=dict(EPOCHS=1, BATCH_SIZE=500, HISTORY_SIZE=5), ) diff --git a/tests/unit/examples/test_notebooks_pyspark.py b/tests/unit/examples/test_notebooks_pyspark.py index 991095f75..e0ca07f6e 100644 --- a/tests/unit/examples/test_notebooks_pyspark.py +++ b/tests/unit/examples/test_notebooks_pyspark.py @@ -96,7 +96,11 @@ def test_evaluation_diversity_runs(notebooks, output_notebook, kernel_name): ) -# This is a flaky test that can fail unexpectedly +# mock100 dataset throws the following error: +# TrainValidationSplit IllegalArgumentException: requirement failed: +# Nothing has been added to this summarizer. +# This seems to be caused by cold start problem -- https://stackoverflow.com/questions/58827795/requirement-failed-nothing-has-been-added-to-this-summarizer +# In terms of the processing speed at Spark, "100k" dataset does not take much longer than "mock100" dataset and thus use "100k" here to go around the issue. @pytest.mark.flaky(reruns=5, reruns_delay=2) @pytest.mark.notebooks @pytest.mark.spark @@ -110,7 +114,7 @@ def test_spark_tuning(notebooks, output_notebook, kernel_name): output_notebook, kernel_name=kernel_name, parameters=dict( - MOVIELENS_DATA_SIZE="mock100", + MOVIELENS_DATA_SIZE="100k", # Note: mock100 throws an error NUMBER_CORES="1", NUMBER_ITERATIONS=3, SUBSET_RATIO=0.5, diff --git a/tests/unit/examples/test_notebooks_python.py b/tests/unit/examples/test_notebooks_python.py index 4fb386f40..7457fe2de 100644 --- a/tests/unit/examples/test_notebooks_python.py +++ b/tests/unit/examples/test_notebooks_python.py @@ -83,6 +83,7 @@ def test_cornac_deep_dive_runs(notebooks, output_notebook, kernel_name): @pytest.mark.notebooks @pytest.mark.experimental +@pytest.mark.skip(reason="rlrmc doesn't work with any officially released pymanopt package") def test_rlrmc_quickstart_runs(notebooks, output_notebook, kernel_name): notebook_path = notebooks["rlrmc_quickstart"] execute_notebook( diff --git a/tests/unit/recommenders/evaluation/test_python_evaluation.py b/tests/unit/recommenders/evaluation/test_python_evaluation.py index cab117b15..4f0d4730b 100644 --- a/tests/unit/recommenders/evaluation/test_python_evaluation.py +++ b/tests/unit/recommenders/evaluation/test_python_evaluation.py @@ -181,7 +181,7 @@ def test_python_exp_var(rating_true, rating_pred): rating_pred=rating_true, col_prediction=DEFAULT_RATING_COL, ) == pytest.approx(1.0, TOL) - assert exp_var(rating_true, rating_pred) == pytest.approx(-6.4466, 0.01) + assert exp_var(rating_true, rating_pred) == pytest.approx(-6.4466, TOL) def test_get_top_k_items(rating_true): diff --git a/tests/unit/recommenders/evaluation/test_spark_evaluation.py b/tests/unit/recommenders/evaluation/test_spark_evaluation.py index 10cacaf83..278a2e287 100644 --- a/tests/unit/recommenders/evaluation/test_spark_evaluation.py +++ b/tests/unit/recommenders/evaluation/test_spark_evaluation.py @@ -109,7 +109,7 @@ def test_spark_exp_var(spark_data): assert evaluator.exp_var() == pytest.approx(1.0, TOL) evaluator = SparkRatingEvaluation(df_true, df_pred) - assert evaluator.exp_var() == pytest.approx(-6.4466, 0.01) + assert evaluator.exp_var() == pytest.approx(-6.4466, TOL) @pytest.mark.spark diff --git a/tests/unit/recommenders/utils/test_notebook_utils.py b/tests/unit/recommenders/utils/test_notebook_utils.py index dc6a88de2..417cb0ac8 100644 --- a/tests/unit/recommenders/utils/test_notebook_utils.py +++ b/tests/unit/recommenders/utils/test_notebook_utils.py @@ -10,6 +10,7 @@ is_databricks, execute_notebook, read_notebook, + _update_parameters, ) @@ -52,6 +53,44 @@ def test_is_databricks(): pass +@pytest.mark.notebooks +def test_update_parameters(): + parameter_cell_source = ''' +# Integer +TOP_K = 10 +# Float +LEARNING_RATE = 0.001 +# String +MOVIELENS_DATA_SIZE = "100k" +# List +RANKING_METRICS = [ evaluator.ndcg_at_k.__name__, evaluator.precision_at_k.__name__ ] +# Boolean +EVALUATE_WHILE_TRAINING = True +''' + + new_parameters = { + "MOVIELENS_DATA_SIZE": "1m", + "TOP_K": 1, + "EVALUATE_WHILE_TRAINING": False, + "RANKING_METRICS": ["ndcg_at_k", "precision_at_k"], + "LEARNING_RATE": 0.1, + } + + new_cell_source = _update_parameters(parameter_cell_source, new_parameters) + assert new_cell_source == ''' +# Integer +TOP_K = 1 +# Float +LEARNING_RATE = 0.1 +# String +MOVIELENS_DATA_SIZE = "1m" +# List +RANKING_METRICS = ['ndcg_at_k', 'precision_at_k'] +# Boolean +EVALUATE_WHILE_TRAINING = False +''' + + @pytest.mark.notebooks def test_notebook_execution(notebook_programmatic, output_notebook, kernel_name): """Test that the notebook executes and returns the correct results without params.""" @@ -68,77 +107,34 @@ def test_notebook_execution(notebook_programmatic, output_notebook, kernel_name) @pytest.mark.notebooks -def test_notebook_execution_int(notebook_programmatic, output_notebook, kernel_name): - """Test that the notebook executes and returns the correct results with integers.""" - execute_notebook( - notebook_programmatic, - output_notebook, - kernel_name=kernel_name, - parameters=dict(a=6), - ) - - results = read_notebook(output_notebook) - assert results["response1"] == 8 - - -@pytest.mark.notebooks -def test_notebook_execution_float(notebook_programmatic, output_notebook, kernel_name): - """Test that the notebook executes and returns the correct results with floats.""" - execute_notebook( - notebook_programmatic, - output_notebook, - kernel_name=kernel_name, - parameters=dict(a=1.5), - ) - - results = read_notebook(output_notebook) - assert results["response1"] == 3.5 - - -@pytest.mark.notebooks -def test_notebook_execution_letter(notebook_programmatic, output_notebook, kernel_name): - """Test that the notebook executes and returns the correct results with a string.""" - execute_notebook( - notebook_programmatic, - output_notebook, - kernel_name=kernel_name, - parameters=dict(b="M"), - ) - - results = read_notebook(output_notebook) - assert results["response2"] is True - - -@pytest.mark.notebooks -def test_notebook_execution_other_letter( - notebook_programmatic, output_notebook, kernel_name -): - """Test that the notebook executes and returns the correct results with a different string.""" - execute_notebook( - notebook_programmatic, - output_notebook, - kernel_name=kernel_name, - parameters=dict(b="A"), - ) - - results = read_notebook(output_notebook) - assert results["response2"] == "A" - - -@pytest.mark.notebooks -def test_notebook_execution_letter_and_number( - notebook_programmatic, output_notebook, kernel_name +@pytest.mark.parametrize( + "parameters,expected_key,expected_value", [ + (dict(a=6), "response1", 8), # Test the correct results with integers + (dict(a=1.5), "response1", 3.5), # Test the correct results with floats + (dict(b="M"), "response2", True), # Test the correct results with strings + (dict(b="A"), "response2", "A"), # Test the correct results with different strings + (dict(b="100k"), "response2", "100k"), # Test the correct results with strings that have numbers + (dict(c=10), "response3", 12), # Test the correct results with integers and a comment + ] +) +def test_notebook_execution_with_parameters( + notebook_programmatic, + output_notebook, + kernel_name, + parameters, + expected_key, + expected_value, ): - """Test that the notebook executes and returns the correct results with string that has a number.""" + """Test that the notebook executes.""" execute_notebook( notebook_programmatic, output_notebook, kernel_name=kernel_name, - parameters=dict(b="100k"), + parameters=parameters, ) results = read_notebook(output_notebook) - assert results["response2"] == "100k" + assert results[expected_key] == expected_value @pytest.mark.notebooks @@ -153,19 +149,3 @@ def test_notebook_execution_value_error_fails( kernel_name=kernel_name, parameters=dict(b=1), ) - - -@pytest.mark.notebooks -def test_notebook_execution_int_with_comment( - notebook_programmatic, output_notebook, kernel_name -): - """Test that the notebook executes and returns the correct results with integers and a comment.""" - execute_notebook( - notebook_programmatic, - output_notebook, - kernel_name=kernel_name, - parameters=dict(c=10), - ) - - results = read_notebook(output_notebook) - assert results["response3"] == 12