diff --git a/notebooks/lm_similarity_example.ipynb b/notebooks/lm_similarity_example.ipynb index 7cbece8..3fb8136 100644 --- a/notebooks/lm_similarity_example.ipynb +++ b/notebooks/lm_similarity_example.ipynb @@ -7,7 +7,22 @@ "source": [ "# Language Model Similarity Example\n", "\n", - "This notebook shows how to provide a language model to a similarity anchor, allowing the utilization of knowledge inside embedding spaces as part of the ICAT model." + "This notebook shows how to provide a language model to a similarity anchor, allowing the utilization of knowledge inside embedding spaces as part of the ICAT model.\n", + "\n", + "You will need to install the huggingface transformers and pytorch libraries for this notebook to run, please use\n", + "```\n", + "pip install transformers torch\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb6a33c6-e0f2-414f-9356-97f6fb47e2b9", + "metadata": {}, + "outputs": [], + "source": [ + "import torch" ] }, { @@ -21,7 +36,7 @@ "source": [ "# change these constants as needed based on your hardware constraints\n", "BATCH_SIZE = 16\n", - "DEVICE = \"cuda\"\n", + "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "MODEL_NAME = \"bert-base-uncased\"" ] }, @@ -175,7 +190,7 @@ "\n", "dataset = fetch_20newsgroups(subset=\"train\")\n", "df = pd.DataFrame({\"text\": dataset[\"data\"], \"category\": [dataset[\"target_names\"][i] for i in dataset[\"target\"]]})\n", - "#df = df.iloc[0:1999]\n", + "df = df.iloc[0:1999] # NOTE: if running on CPU or weaker GPU, recommend uncommenting this to avoid long processing times on first BERT anchor creation.\n", "df.head()" ] }, @@ -196,7 +211,7 @@ }, "outputs": [], "source": [ - "icat.initialize(offline=True)" + "icat.initialize(offline=False)" ] }, { @@ -279,7 +294,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.10.15" } }, "nbformat": 4,