diff --git a/notebooks/hybrid-search/notebook.ipynb b/notebooks/hybrid-search/notebook.ipynb index 91fff25..1ebd161 100644 --- a/notebooks/hybrid-search/notebook.ipynb +++ b/notebooks/hybrid-search/notebook.ipynb @@ -1 +1 @@ -{"cells":[{"cell_type":"markdown","id":"505a207d-82ee-406d-bb92-e6a6900d6d18","metadata":{},"source":"\u003cdiv id=\"singlestore-header\" style=\"display: flex; background-color: rgba(209, 153, 255, 0.25); padding: 5px;\"\u003e\n \u003cdiv id=\"icon-image\" style=\"width: 90px; height: 90px;\"\u003e\n \u003cimg width=\"100%\" height=\"100%\" src=\"https://raw.githubusercontent.com/singlestore-labs/spaces-notebooks/master/common/images/header-icons/vector-circle.png\" /\u003e\n \u003c/div\u003e\n \u003cdiv id=\"text\" style=\"padding: 5px; margin-left: 10px;\"\u003e\n \u003cdiv id=\"badge\" style=\"display: inline-block; background-color: rgba(0, 0, 0, 0.15); border-radius: 4px; padding: 4px 8px; align-items: center; margin-top: 6px; margin-bottom: -2px; font-size: 80%\"\u003eSingleStore Notebooks\u003c/div\u003e\n \u003ch1 style=\"font-weight: 500; margin: 8px 0 0 4px;\"\u003eHybrid Search\u003c/h1\u003e\n \u003c/div\u003e\n\u003c/div\u003e"},{"cell_type":"markdown","id":"d9f9e629-6eb9-4ca5-bcf2-1b8672b86725","metadata":{"execution":{"iopub.execute_input":"2023-06-06T03:34:15.712942Z","iopub.status.busy":"2023-06-06T03:34:15.712613Z","iopub.status.idle":"2023-06-06T03:34:15.715753Z","shell.execute_reply":"2023-06-06T03:34:15.715128Z","shell.execute_reply.started":"2023-06-06T03:34:15.712919Z"},"tags":[]},"source":"*Source*: [OpenAI Cookbook](https://github.com/openai/openai-cookbook/blob/main/examples/data/AG_news_samples.csv)\n\nHybrid search integrates both keyword-based search and semantic search in order to combine the strengths of both and provide users with a more comprehensive and efficient search experience. This notebook is an example on how to perform hybrid search with SingleStore's database and notebooks."},{"cell_type":"markdown","id":"532e8d3f-007d-48a4-8d36-44b561dd1109","metadata":{},"source":"## Setup\nLet's first download the libraries necessary."},{"cell_type":"code","execution_count":4,"id":"07990b64-9447-46a8-abbc-51be1972dfda","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:10:26.985627Z","iopub.status.busy":"2023-08-03T20:10:26.985087Z","iopub.status.idle":"2023-08-03T20:10:40.655368Z","shell.execute_reply":"2023-08-03T20:10:40.654602Z","shell.execute_reply.started":"2023-08-03T20:10:26.985608Z"},"tags":[],"trusted":true},"outputs":[],"source":"!pip install matplotlib --quiet\n!pip install plotly.express --quiet\n!pip install scikit-learn --quiet\n!pip install tabulate --quiet\n!pip install tiktoken --quiet\n!pip install wget --quiet\n!pip install openai --quiet"},{"cell_type":"code","execution_count":5,"id":"a592dd5e-4114-4abf-923d-74038f5244eb","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:10:40.657067Z","iopub.status.busy":"2023-08-03T20:10:40.656816Z","iopub.status.idle":"2023-08-03T20:10:40.663127Z","shell.execute_reply":"2023-08-03T20:10:40.662413Z","shell.execute_reply.started":"2023-08-03T20:10:40.657044Z"},"tags":[],"trusted":true},"outputs":[],"source":"import pandas as pd\nimport os\nimport wget\nimport ast\nimport json"},{"cell_type":"code","execution_count":6,"id":"c2bffc74-4b6a-4c0f-acef-f72bb255ec79","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:10:40.664287Z","iopub.status.busy":"2023-08-03T20:10:40.664046Z","iopub.status.idle":"2023-08-03T20:10:45.214897Z","shell.execute_reply":"2023-08-03T20:10:45.214240Z","shell.execute_reply.started":"2023-08-03T20:10:40.664266Z"},"tags":[],"trusted":true},"outputs":[],"source":"# Import the library for vectorizing the data (Up to 2 minutes)\n!pip install sentence-transformers --quiet\nfrom sentence_transformers import SentenceTransformer\nmodel = SentenceTransformer('flax-sentence-embeddings/all_datasets_v3_mpnet-base')"},{"cell_type":"markdown","id":"0aa95a80-5683-4dc3-9e52-c3e890ab87af","metadata":{},"source":"## Import data from CSV File\nThis csv file holds the title, summary, and category of approximately 2000 news articles."},{"cell_type":"code","execution_count":7,"id":"b1b2971e-d0f6-4cfa-a9a7-954602bda460","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:10:45.216911Z","iopub.status.busy":"2023-08-03T20:10:45.216522Z","iopub.status.idle":"2023-08-03T20:10:45.221195Z","shell.execute_reply":"2023-08-03T20:10:45.220623Z","shell.execute_reply.started":"2023-08-03T20:10:45.216889Z"},"tags":[],"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":"File already exists in the local file system.\n"}],"source":"# download reviews csv file\ncvs_file_path = \"https://raw.githubusercontent.com/openai/openai-cookbook/main/examples/data/AG_news_samples.csv\"\nfile_path = \"AG_news_samples.csv\"\n\nif not os.path.exists(file_path):\n wget.download(cvs_file_path, file_path)\n print(\"File downloaded successfully.\")\nelse:\n print(\"File already exists in the local file system.\")"},{"cell_type":"code","execution_count":8,"id":"6c821edd-ce7b-46d9-aa79-0ab1766266a0","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:10:45.222435Z","iopub.status.busy":"2023-08-03T20:10:45.222055Z","iopub.status.idle":"2023-08-03T20:10:45.241632Z","shell.execute_reply":"2023-08-03T20:10:45.240965Z","shell.execute_reply.started":"2023-08-03T20:10:45.222413Z"},"tags":[],"trusted":true},"outputs":[{"data":{"text/html":"\u003cdiv\u003e\n\u003cstyle scoped\u003e\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n\u003c/style\u003e\n\u003ctable border=\"1\" class=\"dataframe\"\u003e\n \u003cthead\u003e\n \u003ctr style=\"text-align: right;\"\u003e\n \u003cth\u003e\u003c/th\u003e\n \u003cth\u003etitle\u003c/th\u003e\n \u003cth\u003edescription\u003c/th\u003e\n \u003cth\u003elabel\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003cth\u003e0\u003c/th\u003e\n \u003ctd\u003eWorld Briefings\u003c/td\u003e\n \u003ctd\u003eBRITAIN: BLAIR WARNS OF CLIMATE THREAT Prime M...\u003c/td\u003e\n \u003ctd\u003eWorld\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1\u003c/th\u003e\n \u003ctd\u003eNvidia Puts a Firewall on a Motherboard (PC Wo...\u003c/td\u003e\n \u003ctd\u003ePC World - Upcoming chip set will include buil...\u003c/td\u003e\n \u003ctd\u003eSci/Tech\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e2\u003c/th\u003e\n \u003ctd\u003eOlympic joy in Greek, Chinese press\u003c/td\u003e\n \u003ctd\u003eNewspapers in Greece reflect a mixture of exhi...\u003c/td\u003e\n \u003ctd\u003eSports\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e3\u003c/th\u003e\n \u003ctd\u003eU2 Can iPod with Pictures\u003c/td\u003e\n \u003ctd\u003eSAN JOSE, Calif. -- Apple Computer (Quote, Cha...\u003c/td\u003e\n \u003ctd\u003eSci/Tech\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e4\u003c/th\u003e\n \u003ctd\u003eThe Dream Factory\u003c/td\u003e\n \u003ctd\u003eAny product, any shape, any size -- manufactur...\u003c/td\u003e\n \u003ctd\u003eSci/Tech\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e...\u003c/th\u003e\n \u003ctd\u003e...\u003c/td\u003e\n \u003ctd\u003e...\u003c/td\u003e\n \u003ctd\u003e...\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1995\u003c/th\u003e\n \u003ctd\u003eYou Control: iTunes puts control in OS X menu ...\u003c/td\u003e\n \u003ctd\u003eMacCentral - You Software Inc. announced on Tu...\u003c/td\u003e\n \u003ctd\u003eSci/Tech\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1996\u003c/th\u003e\n \u003ctd\u003eArgentina beat Italy for place in football final\u003c/td\u003e\n \u003ctd\u003eFavourites Argentina beat Italy 3-0 this morni...\u003c/td\u003e\n \u003ctd\u003eSports\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1997\u003c/th\u003e\n \u003ctd\u003eNCAA case no worry for Spurrier\u003c/td\u003e\n \u003ctd\u003eShortly after Steve Spurrier arrived at Florid...\u003c/td\u003e\n \u003ctd\u003eSports\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1998\u003c/th\u003e\n \u003ctd\u003eSecret Service Busts Cyber Gangs\u003c/td\u003e\n \u003ctd\u003eThe US Secret Service Thursday announced arres...\u003c/td\u003e\n \u003ctd\u003eSci/Tech\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1999\u003c/th\u003e\n \u003ctd\u003eStocks Flat; Higher Oil Limits Gains\u003c/td\u003e\n \u003ctd\u003eUS stocks were little changed on Thursday as a...\u003c/td\u003e\n \u003ctd\u003eBusiness\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n\u003c/table\u003e\n\u003cp\u003e2000 rows × 3 columns\u003c/p\u003e\n\u003c/div\u003e","text/plain":" title \\\n0 World Briefings \n1 Nvidia Puts a Firewall on a Motherboard (PC Wo... \n2 Olympic joy in Greek, Chinese press \n3 U2 Can iPod with Pictures \n4 The Dream Factory \n... ... \n1995 You Control: iTunes puts control in OS X menu ... \n1996 Argentina beat Italy for place in football final \n1997 NCAA case no worry for Spurrier \n1998 Secret Service Busts Cyber Gangs \n1999 Stocks Flat; Higher Oil Limits Gains \n\n description label \n0 BRITAIN: BLAIR WARNS OF CLIMATE THREAT Prime M... World \n1 PC World - Upcoming chip set will include buil... Sci/Tech \n2 Newspapers in Greece reflect a mixture of exhi... Sports \n3 SAN JOSE, Calif. -- Apple Computer (Quote, Cha... Sci/Tech \n4 Any product, any shape, any size -- manufactur... Sci/Tech \n... ... ... \n1995 MacCentral - You Software Inc. announced on Tu... Sci/Tech \n1996 Favourites Argentina beat Italy 3-0 this morni... Sports \n1997 Shortly after Steve Spurrier arrived at Florid... Sports \n1998 The US Secret Service Thursday announced arres... Sci/Tech \n1999 US stocks were little changed on Thursday as a... Business \n\n[2000 rows x 3 columns]"},"execution_count":8,"metadata":{},"output_type":"execute_result"}],"source":"df = pd.read_csv('AG_news_samples.csv')\ndf.pop('label_int')\ndf"},{"cell_type":"code","execution_count":9,"id":"e30c69d3-a807-4437-84e9-6972e3bc3d85","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:10:45.242886Z","iopub.status.busy":"2023-08-03T20:10:45.242651Z","iopub.status.idle":"2023-08-03T20:10:45.246343Z","shell.execute_reply":"2023-08-03T20:10:45.245714Z","shell.execute_reply.started":"2023-08-03T20:10:45.242864Z"},"tags":[],"trusted":true},"outputs":[],"source":"data = df.values.tolist()"},{"cell_type":"markdown","id":"0b6c6560-bc60-43ba-93a4-1b4aee933d5b","metadata":{},"source":"## Set up SingleStore Database"},{"cell_type":"markdown","id":"d6a1952b-7313-4007-9ec5-4c506425190f","metadata":{},"source":"Connect to your SingleStoreDB Cloud workspaces using SQLAlchemy."},{"cell_type":"code","execution_count":10,"id":"1e8b918f-d849-4bad-b5e9-1cf8be138026","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:10:45.247824Z","iopub.status.busy":"2023-08-03T20:10:45.247190Z","iopub.status.idle":"2023-08-03T20:10:45.293473Z","shell.execute_reply":"2023-08-03T20:10:45.292864Z","shell.execute_reply.started":"2023-08-03T20:10:45.247804Z"},"tags":[],"trusted":true},"outputs":[],"source":"from sqlalchemy import *\n\ndb_connection = create_engine(connection_url).connect()"},{"cell_type":"markdown","id":"e1dd6296-54b0-4f8d-886a-13cacfc28163","metadata":{},"source":"Set up the SingleStore Database which will hold your data."},{"cell_type":"code","execution_count":11,"id":"e1874b6f-706a-4638-ad2a-ca387953acaa","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:10:45.294726Z","iopub.status.busy":"2023-08-03T20:10:45.294501Z","iopub.status.idle":"2023-08-03T20:11:33.916475Z","shell.execute_reply":"2023-08-03T20:11:33.915742Z","shell.execute_reply.started":"2023-08-03T20:10:45.294706Z"},"tags":[],"trusted":true},"outputs":[{"data":{"text/plain":""},"execution_count":11,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\n-- Create the database\nDROP DATABASE IF EXISTS news;\nCREATE DATABASE IF NOT EXISTS news;"},{"cell_type":"code","execution_count":12,"id":"3f1e2c3d-6fbd-46bb-9bd3-235eb51941cf","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:11:33.917770Z","iopub.status.busy":"2023-08-03T20:11:33.917541Z","iopub.status.idle":"2023-08-03T20:11:34.179324Z","shell.execute_reply":"2023-08-03T20:11:34.178632Z","shell.execute_reply.started":"2023-08-03T20:11:33.917754Z"},"tags":[],"trusted":true},"outputs":[{"data":{"text/plain":""},"execution_count":12,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\nUSE news;\n-- Create the table\nDROP TABLE IF EXISTS news_articles;\nCREATE TABLE IF NOT EXISTS news_articles (\n title TEXT,\n description TEXT,\n genre TEXT,\n embedding BLOB,\n FULLTEXT (title, description)\n);"},{"cell_type":"markdown","id":"8bd97023-3d02-44d4-8bd3-59875cb22b6c","metadata":{"execution":{"iopub.execute_input":"2023-06-06T06:31:11.967693Z","iopub.status.busy":"2023-06-06T06:31:11.967312Z","iopub.status.idle":"2023-06-06T06:31:11.971035Z","shell.execute_reply":"2023-06-06T06:31:11.970370Z","shell.execute_reply.started":"2023-06-06T06:31:11.967669Z"},"tags":[]},"source":"### Get embeddings for every row based on the description column."},{"cell_type":"code","execution_count":13,"id":"496f84d0-51b6-4b66-bf5b-b1b260e4c2de","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:11:34.181980Z","iopub.status.busy":"2023-08-03T20:11:34.181788Z","iopub.status.idle":"2023-08-03T20:14:59.572758Z","shell.execute_reply":"2023-08-03T20:14:59.571998Z","shell.execute_reply.started":"2023-08-03T20:11:34.181964Z"},"tags":[],"trusted":true},"outputs":[{"data":{"text/plain":"(2000, 768)"},"execution_count":13,"metadata":{},"output_type":"execute_result"}],"source":"# Will take around 3.5 minutes to get embeddings for all 2000 columns\n\ndescriptions = [row[1] for row in data]\nall_embeddings = model.encode(descriptions)\nall_embeddings.shape"},{"cell_type":"code","execution_count":14,"id":"05b2f3fe-c35c-4252-b416-9f7b7aec60a6","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:14:59.573852Z","iopub.status.busy":"2023-08-03T20:14:59.573612Z","iopub.status.idle":"2023-08-03T20:14:59.578230Z","shell.execute_reply":"2023-08-03T20:14:59.577595Z","shell.execute_reply.started":"2023-08-03T20:14:59.573821Z"},"tags":[],"trusted":true},"outputs":[],"source":"combined_data = [tuple(row) + (embedding,) for embedding, row in zip(all_embeddings, data)]"},{"cell_type":"markdown","id":"46b1628c-0ffc-4a84-ba8b-43e8df081b01","metadata":{},"source":"### Populate the database"},{"cell_type":"code","execution_count":15,"id":"cd3e5f9b-d9e5-45fe-ba20-4fb021d7a425","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:14:59.579579Z","iopub.status.busy":"2023-08-03T20:14:59.579094Z","iopub.status.idle":"2023-08-03T20:15:12.791488Z","shell.execute_reply":"2023-08-03T20:15:12.790862Z","shell.execute_reply.started":"2023-08-03T20:14:59.579558Z"},"tags":[],"trusted":true},"outputs":[],"source":"%sql TRUNCATE TABLE news_articles;\nstatement = '''\n INSERT INTO news.news_articles (\n title,\n description,\n genre,\n embedding\n )\n VALUES (\n %s,\n %s,\n %s,\n %s\n )\n '''\n\nfor i, row in enumerate(combined_data):\n try:\n db_connection.execute(statement, row)\n except Exception as e:\n print(\"Error inserting row {}: {}\".format(i, e))\n continue"},{"cell_type":"markdown","id":"a2f3d567-eaf4-487a-a1f9-2eb7e1071991","metadata":{"tags":[]},"source":"## Semantic Search"},{"cell_type":"markdown","id":"7ad3b8f6-d3a8-4954-a737-f11c785ce9ce","metadata":{},"source":"### Connect to OpenAI"},{"cell_type":"code","execution_count":16,"id":"598d7077-d04c-46b3-b7c4-7b4362dd4507","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:15:12.792954Z","iopub.status.busy":"2023-08-03T20:15:12.792546Z","iopub.status.idle":"2023-08-03T20:15:12.845759Z","shell.execute_reply":"2023-08-03T20:15:12.845167Z","shell.execute_reply.started":"2023-08-03T20:15:12.792931Z"},"tags":[],"trusted":true},"outputs":[],"source":"import openai\n\n# models\nEMBEDDING_MODEL = \"text-embedding-ada-002\"\nGPT_MODEL = \"gpt-3.5-turbo\""},{"cell_type":"code","execution_count":17,"id":"9eea2f67-3c2e-4d1a-87c2-052c2acf4026","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:15:12.847096Z","iopub.status.busy":"2023-08-03T20:15:12.846702Z","iopub.status.idle":"2023-08-03T20:15:12.850061Z","shell.execute_reply":"2023-08-03T20:15:12.849401Z","shell.execute_reply.started":"2023-08-03T20:15:12.847074Z"},"tags":[],"trusted":true},"outputs":[],"source":"openai.api_key = 'YOUR_API_KEY_HERE'"},{"cell_type":"markdown","id":"6504f561-1ab1-4dbf-a523-0aef23b66e4b","metadata":{},"source":"### Run Semantic Search and get scores"},{"cell_type":"code","execution_count":18,"id":"a62a4c06-d77a-49b1-beaf-4c54b04d001f","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:15:12.851400Z","iopub.status.busy":"2023-08-03T20:15:12.851132Z","iopub.status.idle":"2023-08-03T20:15:13.128352Z","shell.execute_reply":"2023-08-03T20:15:13.127794Z","shell.execute_reply.started":"2023-08-03T20:15:12.851379Z"},"tags":[],"trusted":true},"outputs":[{"data":{"text/html":"\u003cdiv\u003e\n\u003cstyle scoped\u003e\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n\u003c/style\u003e\n\u003ctable border=\"1\" class=\"dataframe\"\u003e\n \u003cthead\u003e\n \u003ctr style=\"text-align: right;\"\u003e\n \u003cth\u003e\u003c/th\u003e\n \u003cth\u003etitle\u003c/th\u003e\n \u003cth\u003edescription\u003c/th\u003e\n \u003cth\u003egenre\u003c/th\u003e\n \u003cth\u003escore\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003cth\u003e0\u003c/th\u003e\n \u003ctd\u003eAll Australians accounted for in Iraq: Downer ...\u003c/td\u003e\n \u003ctd\u003eAFP - Australia has accounted for all its nati...\u003c/td\u003e\n \u003ctd\u003eWorld\u003c/td\u003e\n \u003ctd\u003e0.445395\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1\u003c/th\u003e\n \u003ctd\u003eCricket: Aussies dominate India\u003c/td\u003e\n \u003ctd\u003eAustralia tighten their grip on the third Test...\u003c/td\u003e\n \u003ctd\u003eWorld\u003c/td\u003e\n \u003ctd\u003e0.368577\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e2\u003c/th\u003e\n \u003ctd\u003eMan tried for UK student's murder\u003c/td\u003e\n \u003ctd\u003eThe trial of a man accused of murdering York b...\u003c/td\u003e\n \u003ctd\u003eWorld\u003c/td\u003e\n \u003ctd\u003e0.350485\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e3\u003c/th\u003e\n \u003ctd\u003ePonting doesn #39;t think much of Kiwis or win...\u003c/td\u003e\n \u003ctd\u003eRICKY PONTING believes the game #39;s watchers...\u003c/td\u003e\n \u003ctd\u003eSports\u003c/td\u003e\n \u003ctd\u003e0.345483\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e4\u003c/th\u003e\n \u003ctd\u003eHassan Body Found in Fallujah: Australian PM\u003c/td\u003e\n \u003ctd\u003eAustralia #39;s prime minister says a body fou...\u003c/td\u003e\n \u003ctd\u003eWorld\u003c/td\u003e\n \u003ctd\u003e0.341777\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e5\u003c/th\u003e\n \u003ctd\u003eAussie alive after capture in Iraq\u003c/td\u003e\n \u003ctd\u003eAUSTRALIAN journalist John Martinkus is lucky ...\u003c/td\u003e\n \u003ctd\u003eWorld\u003c/td\u003e\n \u003ctd\u003e0.334077\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e6\u003c/th\u003e\n \u003ctd\u003eA trio of television technologies\u003c/td\u003e\n \u003ctd\u003eAUSTRALIANS went into a television-buying fren...\u003c/td\u003e\n \u003ctd\u003eSci/Tech\u003c/td\u003e\n \u003ctd\u003e0.332006\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e7\u003c/th\u003e\n \u003ctd\u003eAustralia PM Gets Down to Work on Fourth Term ...\u003c/td\u003e\n \u003ctd\u003eReuters - Australia's conservative Prime Minis...\u003c/td\u003e\n \u003ctd\u003eWorld\u003c/td\u003e\n \u003ctd\u003e0.324335\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e8\u003c/th\u003e\n \u003ctd\u003ePolice pull body of lost autistic man, 46, fro...\u003c/td\u003e\n \u003ctd\u003eCanadian Press - OAKVILLE, Ont. (CP) - The bod...\u003c/td\u003e\n \u003ctd\u003eWorld\u003c/td\u003e\n \u003ctd\u003e0.322738\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e9\u003c/th\u003e\n \u003ctd\u003eAustralia targeted for first time in Iraq car ...\u003c/td\u003e\n \u003ctd\u003eAustralian troops in Baghdad came under attack...\u003c/td\u003e\n \u003ctd\u003eWorld\u003c/td\u003e\n \u003ctd\u003e0.321895\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n\u003c/table\u003e\n\u003c/div\u003e","text/plain":" title \\\n0 All Australians accounted for in Iraq: Downer ... \n1 Cricket: Aussies dominate India \n2 Man tried for UK student's murder \n3 Ponting doesn #39;t think much of Kiwis or win... \n4 Hassan Body Found in Fallujah: Australian PM \n5 Aussie alive after capture in Iraq \n6 A trio of television technologies \n7 Australia PM Gets Down to Work on Fourth Term ... \n8 Police pull body of lost autistic man, 46, fro... \n9 Australia targeted for first time in Iraq car ... \n\n description genre score \n0 AFP - Australia has accounted for all its nati... World 0.445395 \n1 Australia tighten their grip on the third Test... World 0.368577 \n2 The trial of a man accused of murdering York b... World 0.350485 \n3 RICKY PONTING believes the game #39;s watchers... Sports 0.345483 \n4 Australia #39;s prime minister says a body fou... World 0.341777 \n5 AUSTRALIAN journalist John Martinkus is lucky ... World 0.334077 \n6 AUSTRALIANS went into a television-buying fren... Sci/Tech 0.332006 \n7 Reuters - Australia's conservative Prime Minis... World 0.324335 \n8 Canadian Press - OAKVILLE, Ont. (CP) - The bod... World 0.322738 \n9 Australian troops in Baghdad came under attack... World 0.321895 "},"execution_count":18,"metadata":{},"output_type":"execute_result"}],"source":"from openai.embeddings_utils import get_embedding\nsearch_query = \"Articles about Aussie captures\"\nsearch_embedding = model.encode(search_query)\n\n# Create the SQL statement.\nquery_statement = \"\"\"\n SELECT\n title,\n description,\n genre,\n DOT_PRODUCT(embedding, %(embedding)s) AS score\n FROM news.news_articles\n ORDER BY score DESC\n LIMIT 10\n \"\"\"\n\n# Execute the SQL statement.\nresults = pd.DataFrame(db_connection.execute(query_statement, dict(embedding=search_embedding)))\nresults"},{"cell_type":"markdown","id":"2c8ff862-ea5b-4960-be5b-bcd530d6e918","metadata":{},"source":"## Hybrid Search"},{"cell_type":"markdown","id":"d0b2cff3-76f8-4a35-a596-4f001a9b4c8c","metadata":{},"source":"This search finds the average of the score gotten from the semantic search and the score gotten from the key-word search and sorts the news articles by this combined score to perform an effective hybrid search."},{"cell_type":"code","execution_count":19,"id":"9df7073f-6a89-4528-968c-7d5c21876a83","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:15:13.129535Z","iopub.status.busy":"2023-08-03T20:15:13.129108Z","iopub.status.idle":"2023-08-03T20:15:13.334690Z","shell.execute_reply":"2023-08-03T20:15:13.334045Z","shell.execute_reply.started":"2023-08-03T20:15:13.129512Z"},"tags":[],"trusted":true},"outputs":[{"data":{"text/html":"\u003cdiv\u003e\n\u003cstyle scoped\u003e\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n\u003c/style\u003e\n\u003ctable border=\"1\" class=\"dataframe\"\u003e\n \u003cthead\u003e\n \u003ctr style=\"text-align: right;\"\u003e\n \u003cth\u003e\u003c/th\u003e\n \u003cth\u003etitle\u003c/th\u003e\n \u003cth\u003edescription\u003c/th\u003e\n \u003cth\u003egenre\u003c/th\u003e\n \u003cth\u003esemantic_score\u003c/th\u003e\n \u003cth\u003ekeyword_score\u003c/th\u003e\n \u003cth\u003ecombined_score\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003cth\u003e0\u003c/th\u003e\n \u003ctd\u003eAll Australians accounted for in Iraq: Downer ...\u003c/td\u003e\n \u003ctd\u003eAFP - Australia has accounted for all its nati...\u003c/td\u003e\n \u003ctd\u003eWorld\u003c/td\u003e\n \u003ctd\u003e0.445395\u003c/td\u003e\n \u003ctd\u003e0.0\u003c/td\u003e\n \u003ctd\u003e0.222698\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1\u003c/th\u003e\n \u003ctd\u003eCricket: Aussies dominate India\u003c/td\u003e\n \u003ctd\u003eAustralia tighten their grip on the third Test...\u003c/td\u003e\n \u003ctd\u003eWorld\u003c/td\u003e\n \u003ctd\u003e0.368577\u003c/td\u003e\n \u003ctd\u003e0.0\u003c/td\u003e\n \u003ctd\u003e0.184289\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e2\u003c/th\u003e\n \u003ctd\u003eMan tried for UK student's murder\u003c/td\u003e\n \u003ctd\u003eThe trial of a man accused of murdering York b...\u003c/td\u003e\n \u003ctd\u003eWorld\u003c/td\u003e\n \u003ctd\u003e0.350485\u003c/td\u003e\n \u003ctd\u003e0.0\u003c/td\u003e\n \u003ctd\u003e0.175242\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e3\u003c/th\u003e\n \u003ctd\u003ePonting doesn #39;t think much of Kiwis or win...\u003c/td\u003e\n \u003ctd\u003eRICKY PONTING believes the game #39;s watchers...\u003c/td\u003e\n \u003ctd\u003eSports\u003c/td\u003e\n \u003ctd\u003e0.345483\u003c/td\u003e\n \u003ctd\u003e0.0\u003c/td\u003e\n \u003ctd\u003e0.172742\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e4\u003c/th\u003e\n \u003ctd\u003eHassan Body Found in Fallujah: Australian PM\u003c/td\u003e\n \u003ctd\u003eAustralia #39;s prime minister says a body fou...\u003c/td\u003e\n \u003ctd\u003eWorld\u003c/td\u003e\n \u003ctd\u003e0.341777\u003c/td\u003e\n \u003ctd\u003e0.0\u003c/td\u003e\n \u003ctd\u003e0.170889\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e5\u003c/th\u003e\n \u003ctd\u003eAussie alive after capture in Iraq\u003c/td\u003e\n \u003ctd\u003eAUSTRALIAN journalist John Martinkus is lucky ...\u003c/td\u003e\n \u003ctd\u003eWorld\u003c/td\u003e\n \u003ctd\u003e0.334077\u003c/td\u003e\n \u003ctd\u003e0.0\u003c/td\u003e\n \u003ctd\u003e0.167039\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e6\u003c/th\u003e\n \u003ctd\u003eA trio of television technologies\u003c/td\u003e\n \u003ctd\u003eAUSTRALIANS went into a television-buying fren...\u003c/td\u003e\n \u003ctd\u003eSci/Tech\u003c/td\u003e\n \u003ctd\u003e0.332006\u003c/td\u003e\n \u003ctd\u003e0.0\u003c/td\u003e\n \u003ctd\u003e0.166003\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e7\u003c/th\u003e\n \u003ctd\u003eAustralia PM Gets Down to Work on Fourth Term ...\u003c/td\u003e\n \u003ctd\u003eReuters - Australia's conservative Prime Minis...\u003c/td\u003e\n \u003ctd\u003eWorld\u003c/td\u003e\n \u003ctd\u003e0.324335\u003c/td\u003e\n \u003ctd\u003e0.0\u003c/td\u003e\n \u003ctd\u003e0.162168\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e8\u003c/th\u003e\n \u003ctd\u003ePolice pull body of lost autistic man, 46, fro...\u003c/td\u003e\n \u003ctd\u003eCanadian Press - OAKVILLE, Ont. (CP) - The bod...\u003c/td\u003e\n \u003ctd\u003eWorld\u003c/td\u003e\n \u003ctd\u003e0.322738\u003c/td\u003e\n \u003ctd\u003e0.0\u003c/td\u003e\n \u003ctd\u003e0.161369\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e9\u003c/th\u003e\n \u003ctd\u003eAustralia targeted for first time in Iraq car ...\u003c/td\u003e\n \u003ctd\u003eAustralian troops in Baghdad came under attack...\u003c/td\u003e\n \u003ctd\u003eWorld\u003c/td\u003e\n \u003ctd\u003e0.321895\u003c/td\u003e\n \u003ctd\u003e0.0\u003c/td\u003e\n \u003ctd\u003e0.160948\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n\u003c/table\u003e\n\u003c/div\u003e","text/plain":" title \\\n0 All Australians accounted for in Iraq: Downer ... \n1 Cricket: Aussies dominate India \n2 Man tried for UK student's murder \n3 Ponting doesn #39;t think much of Kiwis or win... \n4 Hassan Body Found in Fallujah: Australian PM \n5 Aussie alive after capture in Iraq \n6 A trio of television technologies \n7 Australia PM Gets Down to Work on Fourth Term ... \n8 Police pull body of lost autistic man, 46, fro... \n9 Australia targeted for first time in Iraq car ... \n\n description genre \\\n0 AFP - Australia has accounted for all its nati... World \n1 Australia tighten their grip on the third Test... World \n2 The trial of a man accused of murdering York b... World \n3 RICKY PONTING believes the game #39;s watchers... Sports \n4 Australia #39;s prime minister says a body fou... World \n5 AUSTRALIAN journalist John Martinkus is lucky ... World \n6 AUSTRALIANS went into a television-buying fren... Sci/Tech \n7 Reuters - Australia's conservative Prime Minis... World \n8 Canadian Press - OAKVILLE, Ont. (CP) - The bod... World \n9 Australian troops in Baghdad came under attack... World \n\n semantic_score keyword_score combined_score \n0 0.445395 0.0 0.222698 \n1 0.368577 0.0 0.184289 \n2 0.350485 0.0 0.175242 \n3 0.345483 0.0 0.172742 \n4 0.341777 0.0 0.170889 \n5 0.334077 0.0 0.167039 \n6 0.332006 0.0 0.166003 \n7 0.324335 0.0 0.162168 \n8 0.322738 0.0 0.161369 \n9 0.321895 0.0 0.160948 "},"execution_count":19,"metadata":{},"output_type":"execute_result"}],"source":"hyb_query = \"Articles about Aussie captures\"\nhyb_embedding = model.encode(hyb_query)\n\n# Create the SQL statement.\nhyb_statement = \"\"\"\n SELECT\n title,\n description,\n genre,\n DOT_PRODUCT(embedding, %(embedding)s) AS semantic_score,\n MATCH(title, description) AGAINST (%(query)s) AS keyword_score,\n (semantic_score + keyword_score) / 2 AS combined_score\n FROM news.news_articles\n ORDER BY combined_score DESC\n LIMIT 10\n \"\"\"\n\n# Execute the SQL statement.\nhyb_results = pd.DataFrame(db_connection.execute(hyb_statement, dict(embedding=hyb_embedding, query=hyb_query)))\nhyb_results"},{"cell_type":"markdown","id":"f9f6e53b-fb02-4d1a-908f-b96d1c2cdfd0","metadata":{},"source":"\u003cdiv id=\"singlestore-footer\" style=\"background-color: rgba(194, 193, 199, 0.25); height:2px; margin-bottom:10px\"\u003e\u003c/div\u003e\n\u003cdiv\u003e\u003cimg src=\"https://raw.githubusercontent.com/singlestore-labs/spaces-notebooks/master/common/images/singlestore-logo-grey.png\" style=\"padding: 0px; margin: 0px; height: 24px\"/\u003e\u003c/div\u003e\n\u003c/div\u003e"}],"metadata":{"jupyterlab":{"notebooks":{"version_major":6,"version_minor":4}},"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.9"},"singlestore_connection":{"connectionID":"1efa4dba-bf60-42f3-8d19-19dc6b6ffb35","defaultDatabase":"news"},"singlestore_row_limit":300},"nbformat":4,"nbformat_minor":5} +{"cells":[{"cell_type":"markdown","id":"505a207d-82ee-406d-bb92-e6a6900d6d18","metadata":{},"source":["
\n"," | title | \n","description | \n","label | \n","
---|---|---|---|
0 | \n","World Briefings | \n","BRITAIN: BLAIR WARNS OF CLIMATE THREAT Prime M... | \n","World | \n","
1 | \n","Nvidia Puts a Firewall on a Motherboard (PC Wo... | \n","PC World - Upcoming chip set will include buil... | \n","Sci/Tech | \n","
2 | \n","Olympic joy in Greek, Chinese press | \n","Newspapers in Greece reflect a mixture of exhi... | \n","Sports | \n","
3 | \n","U2 Can iPod with Pictures | \n","SAN JOSE, Calif. -- Apple Computer (Quote, Cha... | \n","Sci/Tech | \n","
4 | \n","The Dream Factory | \n","Any product, any shape, any size -- manufactur... | \n","Sci/Tech | \n","
... | \n","... | \n","... | \n","... | \n","
1995 | \n","You Control: iTunes puts control in OS X menu ... | \n","MacCentral - You Software Inc. announced on Tu... | \n","Sci/Tech | \n","
1996 | \n","Argentina beat Italy for place in football final | \n","Favourites Argentina beat Italy 3-0 this morni... | \n","Sports | \n","
1997 | \n","NCAA case no worry for Spurrier | \n","Shortly after Steve Spurrier arrived at Florid... | \n","Sports | \n","
1998 | \n","Secret Service Busts Cyber Gangs | \n","The US Secret Service Thursday announced arres... | \n","Sci/Tech | \n","
1999 | \n","Stocks Flat; Higher Oil Limits Gains | \n","US stocks were little changed on Thursday as a... | \n","Business | \n","
2000 rows × 3 columns
\n","Action Required
\n","Make sure to select the news database from the drop-down menu at the top of this notebook. It updates the connection_url which is used by the %%sql magic command and SQLAlchemy to make connections to the selected database.\n","
\n","\n"," | title | \n","description | \n","genre | \n","score | \n","
---|---|---|---|---|
0 | \n","All Australians accounted for in Iraq: Downer ... | \n","AFP - Australia has accounted for all its nati... | \n","World | \n","0.445395 | \n","
1 | \n","Cricket: Aussies dominate India | \n","Australia tighten their grip on the third Test... | \n","World | \n","0.368577 | \n","
2 | \n","Man tried for UK student's murder | \n","The trial of a man accused of murdering York b... | \n","World | \n","0.350485 | \n","
3 | \n","Ponting doesn #39;t think much of Kiwis or win... | \n","RICKY PONTING believes the game #39;s watchers... | \n","Sports | \n","0.345483 | \n","
4 | \n","Hassan Body Found in Fallujah: Australian PM | \n","Australia #39;s prime minister says a body fou... | \n","World | \n","0.341777 | \n","
5 | \n","Aussie alive after capture in Iraq | \n","AUSTRALIAN journalist John Martinkus is lucky ... | \n","World | \n","0.334077 | \n","
6 | \n","A trio of television technologies | \n","AUSTRALIANS went into a television-buying fren... | \n","Sci/Tech | \n","0.332006 | \n","
7 | \n","Australia PM Gets Down to Work on Fourth Term ... | \n","Reuters - Australia's conservative Prime Minis... | \n","World | \n","0.324335 | \n","
8 | \n","Police pull body of lost autistic man, 46, fro... | \n","Canadian Press - OAKVILLE, Ont. (CP) - The bod... | \n","World | \n","0.322738 | \n","
9 | \n","Australia targeted for first time in Iraq car ... | \n","Australian troops in Baghdad came under attack... | \n","World | \n","0.321895 | \n","
\n"," | title | \n","description | \n","genre | \n","semantic_score | \n","keyword_score | \n","combined_score | \n","
---|---|---|---|---|---|---|
0 | \n","All Australians accounted for in Iraq: Downer ... | \n","AFP - Australia has accounted for all its nati... | \n","World | \n","0.445395 | \n","0.0 | \n","0.222698 | \n","
1 | \n","Cricket: Aussies dominate India | \n","Australia tighten their grip on the third Test... | \n","World | \n","0.368577 | \n","0.0 | \n","0.184289 | \n","
2 | \n","Man tried for UK student's murder | \n","The trial of a man accused of murdering York b... | \n","World | \n","0.350485 | \n","0.0 | \n","0.175242 | \n","
3 | \n","Ponting doesn #39;t think much of Kiwis or win... | \n","RICKY PONTING believes the game #39;s watchers... | \n","Sports | \n","0.345483 | \n","0.0 | \n","0.172742 | \n","
4 | \n","Hassan Body Found in Fallujah: Australian PM | \n","Australia #39;s prime minister says a body fou... | \n","World | \n","0.341777 | \n","0.0 | \n","0.170889 | \n","
5 | \n","Aussie alive after capture in Iraq | \n","AUSTRALIAN journalist John Martinkus is lucky ... | \n","World | \n","0.334077 | \n","0.0 | \n","0.167039 | \n","
6 | \n","A trio of television technologies | \n","AUSTRALIANS went into a television-buying fren... | \n","Sci/Tech | \n","0.332006 | \n","0.0 | \n","0.166003 | \n","
7 | \n","Australia PM Gets Down to Work on Fourth Term ... | \n","Reuters - Australia's conservative Prime Minis... | \n","World | \n","0.324335 | \n","0.0 | \n","0.162168 | \n","
8 | \n","Police pull body of lost autistic man, 46, fro... | \n","Canadian Press - OAKVILLE, Ont. (CP) - The bod... | \n","World | \n","0.322738 | \n","0.0 | \n","0.161369 | \n","
9 | \n","Australia targeted for first time in Iraq car ... | \n","Australian troops in Baghdad came under attack... | \n","World | \n","0.321895 | \n","0.0 | \n","0.160948 | \n","