diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile deleted file mode 100644 index 02ad5a0e3b..0000000000 --- a/.devcontainer/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -ARG PYTHON_VERSION -FROM mcr.microsoft.com/vscode/devcontainers/python:${PYTHON_VERSION} - -ARG REMOTE_USER -ENV HOME="/home/${REMOTE_USER}" \ - JAVA_HOME="/usr/lib/jvm/java-8-openjdk-amd64" \ - PYSPARK_PYTHON="/usr/local/bin/python" \ - PYSPARK_DRIVER_PYTHON="/usr/local/bin/python" - -RUN apt-get update && \ - apt-get -y install --no-install-recommends software-properties-common && \ - apt-add-repository 'deb http://security.debian.org/debian-security stretch/updates main' && \ - apt-get update && \ - apt-get -y install --no-install-recommends \ - openjdk-8-jre \ - cmake - -# Switch to non-root user -USER ${REMOTE_USER} -WORKDIR ${HOME} - -# Setup Jupyter Notebook -ENV NOTEBOOK_CONFIG="${HOME}/.jupyter/jupyter_notebook_config.py" -RUN mkdir -p $(dirname ${NOTEBOOK_CONFIG}) && \ - echo "c.NotebookApp.ip='0.0.0.0'" >> ${NOTEBOOK_CONFIG} && \ - echo "c.NotebookApp.open_browser=False" >> ${NOTEBOOK_CONFIG} && \ - echo "c.NotebookApp.allow_origin='*'" >> ${NOTEBOOK_CONFIG} -EXPOSE 8888 diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4b74a526c5..12d6ed8228 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,44 +1,50 @@ { - "name": "Recommenders", - "build": { - "dockerfile": "Dockerfile", - "context": "..", - "args": { - // Python version: 3, 3.6, 3.7 - "PYTHON_VERSION": "3.7", - "REMOTE_USER": "vscode" - } - }, + "name": "Recommenders", + // Version list: https://github.com/devcontainers/images/tree/main/src/base-ubuntu + // Includes: curl, wget, ca-certificates, git, Oh My Zsh!, + "image": "mcr.microsoft.com/devcontainers/base:ubuntu-24.04", + "hostRequirements": { + "cpus": 4, + "memory": "16gb", + "storage": "32gb" + }, + "features": { + // https://github.com/devcontainers/features/blob/main/src/anaconda/devcontainer-feature.json + "ghcr.io/devcontainers/features/anaconda:1": { + "version": "2024.06-1" + } + }, + "customizations": { + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.organizeImports": "explicit" + } + }, + "isort.args": ["--profile", "black"], + "python.analysis.autoImportCompletions": true, + "python.defaultInterpreterPath": "/usr/local/conda/envs/Recommenders/bin/python", + "python.testing.pytestEnabled": true, + // set the directory where all tests are + "python.testing.pytestArgs": ["tests"] + }, + // Add the IDs of extensions you want installed when the container is created. + "extensions": [ + "ms-python.black-formatter", // https://marketplace.visualstudio.com/items?itemName=ms-python.black-formatter + "ms-python.isort", // https://marketplace.visualstudio.com/items?itemName=ms-python.isort + "ms-python.mypy-type-checker", // https://marketplace.visualstudio.com/items?itemName=ms-python.mypy-type-checker + "ms-python.pylint", // https://marketplace.visualstudio.com/items?itemName=ms-python.pylint + "ms-python.python", // https://marketplace.visualstudio.com/items?itemName=ms-python.python + "ms-toolsai.datawrangler", // https://marketplace.visualstudio.com/items?itemName=ms-toolsai.datawrangler + "ms-toolsai.jupyter" // https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter + ] + } + }, - // Set *default* container specific settings.json values on container create. - "settings": { - "python.pythonPath": "/usr/local/bin/python", - "python.languageServer": "Pylance", - "python.linting.enabled": true, - "python.linting.pylintEnabled": true, - "python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8", - "python.formatting.blackPath": "/usr/local/py-utils/bin/black", - "python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf", - "python.linting.banditPath": "/usr/local/py-utils/bin/bandit", - "python.linting.flake8Path": "/usr/local/py-utils/bin/flake8", - "python.linting.mypyPath": "/usr/local/py-utils/bin/mypy", - "python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle", - "python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle", - "python.linting.pylintPath": "/usr/local/py-utils/bin/pylint" - }, - - // Add the IDs of extensions you want installed when the container is created. - "extensions": [ - "ms-python.python", - "ms-python.vscode-pylance" - ], - - // Use 'forwardPorts' to make a list of ports inside the container available locally. - "forwardPorts": [8888], - - // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": "pip install -U pip && pip install --user -e .[dev,examples,spark,xlearn]", - - // Comment out connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. - "remoteUser": "vscode" + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": "conda create -n Recommenders -c conda-forge -y python=3.10 openjdk=21 pip && conda init bash && bash -c -i 'conda activate Recommenders && pip install -e .[dev,spark]' && conda config --set auto_activate_base false" } diff --git a/.github/actions/get-test-groups/action.yml b/.github/actions/get-test-groups/action.yml index dc50e4b93c..6e87da900f 100644 --- a/.github/actions/get-test-groups/action.yml +++ b/.github/actions/get-test-groups/action.yml @@ -8,7 +8,7 @@ description: "Get test group names from tests_groups.py" inputs: TEST_KIND: required: true - description: Type of test - unit or nightly + description: Type of test - pr gate or nightly TEST_ENV: required: false description: Test environment - cpu, gpu or spark diff --git a/AUTHORS.md b/AUTHORS.md index 1816f73e27..b70bfa644b 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -52,6 +52,8 @@ To contributors: please add your name to the list when you submit a patch to the * **[Aaron He](https://github.com/AaronHeee)** * Reco utils of NCF * Deep dive notebook demonstrating the use of NCF +* **[Aaron Palpallatoc](https://github.com/ubergonmx)** + * Corrected variable in pickle dump in `mind_utils.ipynb` notebook * **[Abir Chakraborty](https://github.com/aeroabir)** * Self-Attentive Sequential Recommendation (SASRec) * Sequential Recommendation Via Personalized Transformer (SSEPT) diff --git a/examples/01_prepare_data/mind_utils.ipynb b/examples/01_prepare_data/mind_utils.ipynb index e03a3683d9..7a2d81e6e6 100644 --- a/examples/01_prepare_data/mind_utils.ipynb +++ b/examples/01_prepare_data/mind_utils.ipynb @@ -306,7 +306,7 @@ " pickle.dump(word_dict, f)\n", " \n", "with open(os.path.join(output_path, 'word_dict_all.pkl'), 'wb') as f:\n", - " pickle.dump(word_dict, f)" + " pickle.dump(word_dict_all, f)" ] }, { diff --git a/recommenders/datasets/mind.py b/recommenders/datasets/mind.py index 23b7a8db22..7295786c2e 100644 --- a/recommenders/datasets/mind.py +++ b/recommenders/datasets/mind.py @@ -17,26 +17,37 @@ ) -URL_MIND_LARGE_TRAIN = ( - "https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip" +URL_MIND_DEMO_TRAIN = ( + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_train.zip" ) -URL_MIND_LARGE_VALID = ( - "https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip" +URL_MIND_DEMO_VALID = ( + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_dev.zip" +) +URL_MIND_DEMO_UTILS = ( + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_utils.zip" ) + URL_MIND_SMALL_TRAIN = ( "https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip" ) URL_MIND_SMALL_VALID = ( "https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip" ) -URL_MIND_DEMO_TRAIN = ( - "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_train.zip" +URL_MIND_SMALL_UTILS = ( + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_utils.zip" ) -URL_MIND_DEMO_VALID = ( - "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_dev.zip" + +URL_MIND_LARGE_TRAIN = ( + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip" ) -URL_MIND_DEMO_UTILS = ( - "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_utils.zip" +URL_MIND_LARGE_VALID = ( + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip" +) +URL_MIND_LARGE_TEST = ( + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_test.zip" +) +URL_MIND_LARGE_UTILS = ( + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_utils.zip" ) URL_MIND = { diff --git a/recommenders/models/deeprec/DataModel/ImplicitCF.py b/recommenders/models/deeprec/DataModel/ImplicitCF.py index 3cfbb2821f..42bb319c46 100644 --- a/recommenders/models/deeprec/DataModel/ImplicitCF.py +++ b/recommenders/models/deeprec/DataModel/ImplicitCF.py @@ -206,6 +206,8 @@ def train_loader(self, batch_size): """ def sample_neg(x): + if len(x) >= self.n_items: + raise ValueError("A user has voted in every item. Can't find a negative sample.") while True: neg_id = random.randint(0, self.n_items - 1) if neg_id not in x: