From 4a67aa6b373ad2d88daf00c4c7c65df3523e7f64 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Tue, 21 Nov 2023 09:38:24 -0600 Subject: [PATCH] Fix openai issues --- .../notebook.ipynb | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/notebooks/semantic-search-with-openai-embedding-creation/notebook.ipynb b/notebooks/semantic-search-with-openai-embedding-creation/notebook.ipynb index 3795aff..08a6895 100644 --- a/notebooks/semantic-search-with-openai-embedding-creation/notebook.ipynb +++ b/notebooks/semantic-search-with-openai-embedding-creation/notebook.ipynb @@ -85,15 +85,13 @@ "metadata": {}, "outputs": [], "source": [ - "!pip3 install openai matplotlib plotly pandas scipy scikit-learn requests --quiet\n", + "!pip3 install openai==1.3.3 requests --quiet\n", "\n", "import json\n", "import os\n", "\n", - "import openai\n", - "import requests\n", - "from openai.embeddings_utils import get_embedding\n", - "from openai.embeddings_utils import get_embeddings" + "from openai import OpenAI\n", + "import requests" ] }, { @@ -129,7 +127,9 @@ "source": [ "import getpass\n", "\n", - "openai.api_key = getpass.getpass('OpenAI API Key: ')" + "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass('OpenAI API Key: ')\n", + "\n", + "client = OpenAI()" ] }, { @@ -174,8 +174,7 @@ "metadata": {}, "outputs": [], "source": [ - "url = 'https://raw.githubusercontent.com/singlestore-labs/singlestoredb-samples/main/' + \\\n", - " 'Tutorials/ai-powered-semantic-search/hr_sample_data.sql'" + "url = 'https://raw.githubusercontent.com/singlestore-labs/singlestoredb-samples/main/Tutorials/ai-powered-semantic-search/hr_sample_data.sql'" ] }, { @@ -217,12 +216,18 @@ "source": [ "%sql ALTER TABLE reviews ADD embeddings BLOB;\n", "\n", - "reviews = %sql SELECT review FROM reviews;\n", + "from typing import List\n", "\n", + "reviews = %sql SELECT review FROM reviews;\n", "reviews = [x.review for x in reviews]\n", - "embeddings = get_embeddings(reviews, engine='text-embedding-ada-002')\n", "\n", - "for review, embedding in zip(reviews, embeddings):\n", + "def get_embeddings(inputs: List[str], model: str = 'text-embedding-ada-002') -> List[str]:\n", + " \"\"\"Return list of embeddings.\"\"\"\n", + " return [x.embedding for x in client.embeddings.create(input=inputs, model=model).data]\n", + "\n", + "embeddings = get_embeddings(reviews)\n", + "\n", + "for embedding, review in zip(embeddings, reviews):\n", " %sql UPDATE reviews SET embeddings = JSON_ARRAY_PACK('{{json.dumps(embedding)}}') WHERE review='{{review}}';" ] }, @@ -245,7 +250,7 @@ "source": [ "searchstring = input('Please enter a search string: ')\n", "\n", - "search_embedding = json.dumps(get_embedding(searchstring, engine='text-embedding-ada-002'))\n", + "search_embedding = json.dumps(get_embeddings([searchstring])[0])\n", "\n", "results = %sql SELECT review, DOT_PRODUCT(embeddings, JSON_ARRAY_PACK('{{search_embedding}}')) AS score FROM reviews ORDER BY score DESC LIMIT 5;\n", "\n",