From d0a4af3ae6ad8af47c0ec525c7681fd20e6175ab Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Tue, 27 Aug 2024 18:45:07 +0800 Subject: [PATCH 1/7] Update dev container (#2157) * Update dev contaienr configuration Signed-off-by: Simon Zhao * Use conda to manage environments Signed-off-by: Simon Zhao * Set Python interpreter Signed-off-by: Simon Zhao * Update Signed-off-by: Simon Zhao * Add machine specs Signed-off-by: Simon Zhao --------- Signed-off-by: Simon Zhao --- .devcontainer/Dockerfile | 28 ----------- .devcontainer/devcontainer.json | 88 ++++++++++++++++++--------------- 2 files changed, 47 insertions(+), 69 deletions(-) delete mode 100644 .devcontainer/Dockerfile diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile deleted file mode 100644 index 02ad5a0e3b..0000000000 --- a/.devcontainer/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -ARG PYTHON_VERSION -FROM mcr.microsoft.com/vscode/devcontainers/python:${PYTHON_VERSION} - -ARG REMOTE_USER -ENV HOME="/home/${REMOTE_USER}" \ - JAVA_HOME="/usr/lib/jvm/java-8-openjdk-amd64" \ - PYSPARK_PYTHON="/usr/local/bin/python" \ - PYSPARK_DRIVER_PYTHON="/usr/local/bin/python" - -RUN apt-get update && \ - apt-get -y install --no-install-recommends software-properties-common && \ - apt-add-repository 'deb http://security.debian.org/debian-security stretch/updates main' && \ - apt-get update && \ - apt-get -y install --no-install-recommends \ - openjdk-8-jre \ - cmake - -# Switch to non-root user -USER ${REMOTE_USER} -WORKDIR ${HOME} - -# Setup Jupyter Notebook -ENV NOTEBOOK_CONFIG="${HOME}/.jupyter/jupyter_notebook_config.py" -RUN mkdir -p $(dirname ${NOTEBOOK_CONFIG}) && \ - echo "c.NotebookApp.ip='0.0.0.0'" >> ${NOTEBOOK_CONFIG} && \ - echo "c.NotebookApp.open_browser=False" >> ${NOTEBOOK_CONFIG} && \ - echo "c.NotebookApp.allow_origin='*'" >> ${NOTEBOOK_CONFIG} -EXPOSE 8888 diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4b74a526c5..12d6ed8228 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,44 +1,50 @@ { - "name": "Recommenders", - "build": { - "dockerfile": "Dockerfile", - "context": "..", - "args": { - // Python version: 3, 3.6, 3.7 - "PYTHON_VERSION": "3.7", - "REMOTE_USER": "vscode" - } - }, + "name": "Recommenders", + // Version list: https://github.com/devcontainers/images/tree/main/src/base-ubuntu + // Includes: curl, wget, ca-certificates, git, Oh My Zsh!, + "image": "mcr.microsoft.com/devcontainers/base:ubuntu-24.04", + "hostRequirements": { + "cpus": 4, + "memory": "16gb", + "storage": "32gb" + }, + "features": { + // https://github.com/devcontainers/features/blob/main/src/anaconda/devcontainer-feature.json + "ghcr.io/devcontainers/features/anaconda:1": { + "version": "2024.06-1" + } + }, + "customizations": { + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.organizeImports": "explicit" + } + }, + "isort.args": ["--profile", "black"], + "python.analysis.autoImportCompletions": true, + "python.defaultInterpreterPath": "/usr/local/conda/envs/Recommenders/bin/python", + "python.testing.pytestEnabled": true, + // set the directory where all tests are + "python.testing.pytestArgs": ["tests"] + }, + // Add the IDs of extensions you want installed when the container is created. + "extensions": [ + "ms-python.black-formatter", // https://marketplace.visualstudio.com/items?itemName=ms-python.black-formatter + "ms-python.isort", // https://marketplace.visualstudio.com/items?itemName=ms-python.isort + "ms-python.mypy-type-checker", // https://marketplace.visualstudio.com/items?itemName=ms-python.mypy-type-checker + "ms-python.pylint", // https://marketplace.visualstudio.com/items?itemName=ms-python.pylint + "ms-python.python", // https://marketplace.visualstudio.com/items?itemName=ms-python.python + "ms-toolsai.datawrangler", // https://marketplace.visualstudio.com/items?itemName=ms-toolsai.datawrangler + "ms-toolsai.jupyter" // https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter + ] + } + }, - // Set *default* container specific settings.json values on container create. - "settings": { - "python.pythonPath": "/usr/local/bin/python", - "python.languageServer": "Pylance", - "python.linting.enabled": true, - "python.linting.pylintEnabled": true, - "python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8", - "python.formatting.blackPath": "/usr/local/py-utils/bin/black", - "python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf", - "python.linting.banditPath": "/usr/local/py-utils/bin/bandit", - "python.linting.flake8Path": "/usr/local/py-utils/bin/flake8", - "python.linting.mypyPath": "/usr/local/py-utils/bin/mypy", - "python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle", - "python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle", - "python.linting.pylintPath": "/usr/local/py-utils/bin/pylint" - }, - - // Add the IDs of extensions you want installed when the container is created. - "extensions": [ - "ms-python.python", - "ms-python.vscode-pylance" - ], - - // Use 'forwardPorts' to make a list of ports inside the container available locally. - "forwardPorts": [8888], - - // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": "pip install -U pip && pip install --user -e .[dev,examples,spark,xlearn]", - - // Comment out connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. - "remoteUser": "vscode" + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": "conda create -n Recommenders -c conda-forge -y python=3.10 openjdk=21 pip && conda init bash && bash -c -i 'conda activate Recommenders && pip install -e .[dev,spark]' && conda config --set auto_activate_base false" } From ba8b24c44b481960a2229600285c4e3b82aa2f9a Mon Sep 17 00:00:00 2001 From: aaronpal Date: Tue, 27 Aug 2024 20:33:12 +0800 Subject: [PATCH 2/7] Correct variable used in pickle dump in `mind_utils.ipynb` Fixed an issue where the incorrect variable `word_dict` was being dumped to word_dict_all.pkl instead of `word_dict_all` in the `mind_utils.ipynb` notebook. --- examples/01_prepare_data/mind_utils.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/01_prepare_data/mind_utils.ipynb b/examples/01_prepare_data/mind_utils.ipynb index e03a3683d9..7a2d81e6e6 100644 --- a/examples/01_prepare_data/mind_utils.ipynb +++ b/examples/01_prepare_data/mind_utils.ipynb @@ -306,7 +306,7 @@ " pickle.dump(word_dict, f)\n", " \n", "with open(os.path.join(output_path, 'word_dict_all.pkl'), 'wb') as f:\n", - " pickle.dump(word_dict, f)" + " pickle.dump(word_dict_all, f)" ] }, { From 1eb6619e7d78a8e4b9ac1750ef7ae6c61219af66 Mon Sep 17 00:00:00 2001 From: aaronpal Date: Tue, 27 Aug 2024 20:33:12 +0800 Subject: [PATCH 3/7] Correct variable used in pickle dump in `mind_utils.ipynb` Fixed an issue where the incorrect variable `word_dict` was being dumped to word_dict_all.pkl instead of `word_dict_all` in the `mind_utils.ipynb` notebook. Signed-off-by: aaron --- AUTHORS.md | 2 ++ examples/01_prepare_data/mind_utils.ipynb | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/AUTHORS.md b/AUTHORS.md index 1816f73e27..b70bfa644b 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -52,6 +52,8 @@ To contributors: please add your name to the list when you submit a patch to the * **[Aaron He](https://github.com/AaronHeee)** * Reco utils of NCF * Deep dive notebook demonstrating the use of NCF +* **[Aaron Palpallatoc](https://github.com/ubergonmx)** + * Corrected variable in pickle dump in `mind_utils.ipynb` notebook * **[Abir Chakraborty](https://github.com/aeroabir)** * Self-Attentive Sequential Recommendation (SASRec) * Sequential Recommendation Via Personalized Transformer (SSEPT) diff --git a/examples/01_prepare_data/mind_utils.ipynb b/examples/01_prepare_data/mind_utils.ipynb index e03a3683d9..7a2d81e6e6 100644 --- a/examples/01_prepare_data/mind_utils.ipynb +++ b/examples/01_prepare_data/mind_utils.ipynb @@ -306,7 +306,7 @@ " pickle.dump(word_dict, f)\n", " \n", "with open(os.path.join(output_path, 'word_dict_all.pkl'), 'wb') as f:\n", - " pickle.dump(word_dict, f)" + " pickle.dump(word_dict_all, f)" ] }, { From baca6cf05cbee2ff81b7ae19ac6fceab6856c236 Mon Sep 17 00:00:00 2001 From: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com> Date: Wed, 28 Aug 2024 17:02:30 +0200 Subject: [PATCH 4/7] Update action.yml --- .github/actions/get-test-groups/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/get-test-groups/action.yml b/.github/actions/get-test-groups/action.yml index dc50e4b93c..6e87da900f 100644 --- a/.github/actions/get-test-groups/action.yml +++ b/.github/actions/get-test-groups/action.yml @@ -8,7 +8,7 @@ description: "Get test group names from tests_groups.py" inputs: TEST_KIND: required: true - description: Type of test - unit or nightly + description: Type of test - pr gate or nightly TEST_ENV: required: false description: Test environment - cpu, gpu or spark From 610e66346300ed87fc0f12a06cd9160a4aea5956 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Wed, 28 Aug 2024 19:57:27 +0200 Subject: [PATCH 5/7] Added extra MIND urls Signed-off-by: miguelgfierro --- recommenders/datasets/mind.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/recommenders/datasets/mind.py b/recommenders/datasets/mind.py index 23b7a8db22..7295786c2e 100644 --- a/recommenders/datasets/mind.py +++ b/recommenders/datasets/mind.py @@ -17,26 +17,37 @@ ) -URL_MIND_LARGE_TRAIN = ( - "https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip" +URL_MIND_DEMO_TRAIN = ( + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_train.zip" ) -URL_MIND_LARGE_VALID = ( - "https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip" +URL_MIND_DEMO_VALID = ( + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_dev.zip" +) +URL_MIND_DEMO_UTILS = ( + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_utils.zip" ) + URL_MIND_SMALL_TRAIN = ( "https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip" ) URL_MIND_SMALL_VALID = ( "https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip" ) -URL_MIND_DEMO_TRAIN = ( - "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_train.zip" +URL_MIND_SMALL_UTILS = ( + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_utils.zip" ) -URL_MIND_DEMO_VALID = ( - "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_dev.zip" + +URL_MIND_LARGE_TRAIN = ( + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip" ) -URL_MIND_DEMO_UTILS = ( - "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_utils.zip" +URL_MIND_LARGE_VALID = ( + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip" +) +URL_MIND_LARGE_TEST = ( + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_test.zip" +) +URL_MIND_LARGE_UTILS = ( + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_utils.zip" ) URL_MIND = { From b049091d8dc236143018b2e0702b4988c5286f27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Dav=C3=B3?= Date: Fri, 6 Sep 2024 07:53:41 +0000 Subject: [PATCH 6/7] Added assert to avoid infinite loop in negative sampling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: David Davó --- recommenders/models/deeprec/DataModel/ImplicitCF.py | 1 + 1 file changed, 1 insertion(+) diff --git a/recommenders/models/deeprec/DataModel/ImplicitCF.py b/recommenders/models/deeprec/DataModel/ImplicitCF.py index 3cfbb2821f..5a91743124 100644 --- a/recommenders/models/deeprec/DataModel/ImplicitCF.py +++ b/recommenders/models/deeprec/DataModel/ImplicitCF.py @@ -206,6 +206,7 @@ def train_loader(self, batch_size): """ def sample_neg(x): + assert len(x) < self.n_items, "A user has voted in every item. Can't find a negative sample" while True: neg_id = random.randint(0, self.n_items - 1) if neg_id not in x: From 84497f23d588159fc5788e8b13e72bbec4ae38dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Dav=C3=B3?= Date: Mon, 9 Sep 2024 07:56:02 +0000 Subject: [PATCH 7/7] Changed assert to ValueError MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: David Davó --- recommenders/models/deeprec/DataModel/ImplicitCF.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/recommenders/models/deeprec/DataModel/ImplicitCF.py b/recommenders/models/deeprec/DataModel/ImplicitCF.py index 5a91743124..42bb319c46 100644 --- a/recommenders/models/deeprec/DataModel/ImplicitCF.py +++ b/recommenders/models/deeprec/DataModel/ImplicitCF.py @@ -206,7 +206,8 @@ def train_loader(self, batch_size): """ def sample_neg(x): - assert len(x) < self.n_items, "A user has voted in every item. Can't find a negative sample" + if len(x) >= self.n_items: + raise ValueError("A user has voted in every item. Can't find a negative sample.") while True: neg_id = random.randint(0, self.n_items - 1) if neg_id not in x: