diff --git a/notebooks/hybrid-search/meta.toml b/notebooks/hybrid-search/meta.toml new file mode 100644 index 0000000..a4da391 --- /dev/null +++ b/notebooks/hybrid-search/meta.toml @@ -0,0 +1,4 @@ +[meta] +title="Hybrid Search" +description="Hybrid search combines keyword search with semantic search, aiming to provide more accurate results." +tags=[] diff --git a/notebooks/hybrid-search/notebook.ipynb b/notebooks/hybrid-search/notebook.ipynb new file mode 100644 index 0000000..1ebd161 --- /dev/null +++ b/notebooks/hybrid-search/notebook.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","id":"505a207d-82ee-406d-bb92-e6a6900d6d18","metadata":{},"source":["
\n","
\n"," \n","
\n","
\n","
SingleStore Notebooks
\n","

Hybrid Search

\n","
\n","
"]},{"cell_type":"markdown","id":"f3a978dd","metadata":{},"source":[]},{"cell_type":"markdown","id":"d9f9e629-6eb9-4ca5-bcf2-1b8672b86725","metadata":{"execution":{"iopub.execute_input":"2023-06-06T03:34:15.712942Z","iopub.status.busy":"2023-06-06T03:34:15.712613Z","iopub.status.idle":"2023-06-06T03:34:15.715753Z","shell.execute_reply":"2023-06-06T03:34:15.715128Z","shell.execute_reply.started":"2023-06-06T03:34:15.712919Z"},"tags":[]},"source":["*Source*: [OpenAI Cookbook](https://github.com/openai/openai-cookbook/blob/main/examples/data/AG_news_samples.csv)\n","\n","Hybrid search integrates both keyword-based search and semantic search in order to combine the strengths of both and provide users with a more comprehensive and efficient search experience. This notebook is an example on how to perform hybrid search with SingleStore's database and notebooks."]},{"cell_type":"markdown","id":"532e8d3f-007d-48a4-8d36-44b561dd1109","metadata":{},"source":["## Setup\n","Let's first download the libraries necessary."]},{"cell_type":"code","execution_count":5,"id":"07990b64-9447-46a8-abbc-51be1972dfda","metadata":{"execution":{"iopub.execute_input":"2023-08-04T17:43:48.417768Z","iopub.status.busy":"2023-08-04T17:43:48.417505Z","iopub.status.idle":"2023-08-04T17:44:11.220790Z","shell.execute_reply":"2023-08-04T17:44:11.219877Z","shell.execute_reply.started":"2023-08-04T17:43:48.417751Z"},"tags":[],"trusted":true},"outputs":[],"source":["!pip install matplotlib --quiet\n","!pip install plotly.express --quiet\n","!pip install scikit-learn --quiet\n","!pip install tabulate --quiet\n","!pip install tiktoken --quiet\n","!pip install wget --quiet\n","!pip install openai --quiet"]},{"cell_type":"code","execution_count":6,"id":"a592dd5e-4114-4abf-923d-74038f5244eb","metadata":{"execution":{"iopub.execute_input":"2023-08-04T17:44:11.226906Z","iopub.status.busy":"2023-08-04T17:44:11.224865Z","iopub.status.idle":"2023-08-04T17:44:11.232457Z","shell.execute_reply":"2023-08-04T17:44:11.231887Z","shell.execute_reply.started":"2023-08-04T17:44:11.226882Z"},"tags":[],"trusted":true},"outputs":[],"source":["import pandas as pd\n","import os\n","import wget\n","import json"]},{"cell_type":"code","execution_count":7,"id":"c2bffc74-4b6a-4c0f-acef-f72bb255ec79","metadata":{"execution":{"iopub.execute_input":"2023-08-04T17:44:11.235361Z","iopub.status.busy":"2023-08-04T17:44:11.235113Z","iopub.status.idle":"2023-08-04T17:45:38.257796Z","shell.execute_reply":"2023-08-04T17:45:38.257117Z","shell.execute_reply.started":"2023-08-04T17:44:11.235346Z"},"tags":[],"trusted":true},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"7746987e293444e382f2b9efa253c86d","version_major":2,"version_minor":0},"text/plain":["Downloading (…)e933c/.gitattributes: 0%| | 0.00/737 [00:00\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
titledescriptionlabel
0World BriefingsBRITAIN: BLAIR WARNS OF CLIMATE THREAT Prime M...World
1Nvidia Puts a Firewall on a Motherboard (PC Wo...PC World - Upcoming chip set will include buil...Sci/Tech
2Olympic joy in Greek, Chinese pressNewspapers in Greece reflect a mixture of exhi...Sports
3U2 Can iPod with PicturesSAN JOSE, Calif. -- Apple Computer (Quote, Cha...Sci/Tech
4The Dream FactoryAny product, any shape, any size -- manufactur...Sci/Tech
............
1995You Control: iTunes puts control in OS X menu ...MacCentral - You Software Inc. announced on Tu...Sci/Tech
1996Argentina beat Italy for place in football finalFavourites Argentina beat Italy 3-0 this morni...Sports
1997NCAA case no worry for SpurrierShortly after Steve Spurrier arrived at Florid...Sports
1998Secret Service Busts Cyber GangsThe US Secret Service Thursday announced arres...Sci/Tech
1999Stocks Flat; Higher Oil Limits GainsUS stocks were little changed on Thursday as a...Business
\n","

2000 rows × 3 columns

\n",""],"text/plain":[" title \\\n","0 World Briefings \n","1 Nvidia Puts a Firewall on a Motherboard (PC Wo... \n","2 Olympic joy in Greek, Chinese press \n","3 U2 Can iPod with Pictures \n","4 The Dream Factory \n","... ... \n","1995 You Control: iTunes puts control in OS X menu ... \n","1996 Argentina beat Italy for place in football final \n","1997 NCAA case no worry for Spurrier \n","1998 Secret Service Busts Cyber Gangs \n","1999 Stocks Flat; Higher Oil Limits Gains \n","\n"," description label \n","0 BRITAIN: BLAIR WARNS OF CLIMATE THREAT Prime M... World \n","1 PC World - Upcoming chip set will include buil... Sci/Tech \n","2 Newspapers in Greece reflect a mixture of exhi... Sports \n","3 SAN JOSE, Calif. -- Apple Computer (Quote, Cha... Sci/Tech \n","4 Any product, any shape, any size -- manufactur... Sci/Tech \n","... ... ... \n","1995 MacCentral - You Software Inc. announced on Tu... Sci/Tech \n","1996 Favourites Argentina beat Italy 3-0 this morni... Sports \n","1997 Shortly after Steve Spurrier arrived at Florid... Sports \n","1998 The US Secret Service Thursday announced arres... Sci/Tech \n","1999 US stocks were little changed on Thursday as a... Business \n","\n","[2000 rows x 3 columns]"]},"execution_count":15,"metadata":{},"output_type":"execute_result"}],"source":["df = pd.read_csv('AG_news_samples.csv')\n","df.pop('label_int')\n","df"]},{"cell_type":"code","execution_count":16,"id":"e30c69d3-a807-4437-84e9-6972e3bc3d85","metadata":{"execution":{"iopub.execute_input":"2023-08-04T17:46:28.813086Z","iopub.status.busy":"2023-08-04T17:46:28.812535Z","iopub.status.idle":"2023-08-04T17:46:28.817391Z","shell.execute_reply":"2023-08-04T17:46:28.816738Z","shell.execute_reply.started":"2023-08-04T17:46:28.813067Z"},"tags":[],"trusted":true},"outputs":[],"source":["data = df.values.tolist()"]},{"cell_type":"markdown","id":"0b6c6560-bc60-43ba-93a4-1b4aee933d5b","metadata":{},"source":["## Set up SingleStore Database"]},{"cell_type":"markdown","id":"d6a1952b-7313-4007-9ec5-4c506425190f","metadata":{},"source":["Connect to your SingleStoreDB Cloud workspaces using SQLAlchemy."]},{"cell_type":"code","execution_count":17,"id":"1e8b918f-d849-4bad-b5e9-1cf8be138026","metadata":{"execution":{"iopub.execute_input":"2023-08-04T17:46:30.588682Z","iopub.status.busy":"2023-08-04T17:46:30.588364Z","iopub.status.idle":"2023-08-04T17:46:30.647333Z","shell.execute_reply":"2023-08-04T17:46:30.646647Z","shell.execute_reply.started":"2023-08-04T17:46:30.588665Z"},"tags":[],"trusted":true},"outputs":[],"source":["from singlestoredb import create_engine\n","\n","db_connection = create_engine().connect()"]},{"cell_type":"markdown","id":"e1dd6296-54b0-4f8d-886a-13cacfc28163","metadata":{},"source":["Set up the SingleStore Database which will hold your data."]},{"cell_type":"code","execution_count":11,"id":"e1874b6f-706a-4638-ad2a-ca387953acaa","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:10:45.294726Z","iopub.status.busy":"2023-08-03T20:10:45.294501Z","iopub.status.idle":"2023-08-03T20:11:33.916475Z","shell.execute_reply":"2023-08-03T20:11:33.915742Z","shell.execute_reply.started":"2023-08-03T20:10:45.294706Z"},"tags":[],"trusted":true},"outputs":[{"data":{"text/plain":[]},"execution_count":11,"metadata":{},"output_type":"execute_result"}],"source":["%%sql\n","-- Create the database\n","DROP DATABASE IF EXISTS news;\n","CREATE DATABASE IF NOT EXISTS news;"]},{"cell_type":"markdown","id":"553f42af-0b29-4e11-a54b-9879447b2a27","metadata":{},"source":["
\n"," \n","
\n","

Action Required

\n","

Make sure to select the news database from the drop-down menu at the top of this notebook. It updates the connection_url which is used by the %%sql magic command and SQLAlchemy to make connections to the selected database.\n","

\n","
\n","
"]},{"cell_type":"code","execution_count":12,"id":"3f1e2c3d-6fbd-46bb-9bd3-235eb51941cf","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:11:33.917770Z","iopub.status.busy":"2023-08-03T20:11:33.917541Z","iopub.status.idle":"2023-08-03T20:11:34.179324Z","shell.execute_reply":"2023-08-03T20:11:34.178632Z","shell.execute_reply.started":"2023-08-03T20:11:33.917754Z"},"tags":[],"trusted":true},"outputs":[{"data":{"text/plain":[]},"execution_count":12,"metadata":{},"output_type":"execute_result"}],"source":["%%sql\n","-- Create the table\n","DROP TABLE IF EXISTS news_articles;\n","CREATE TABLE IF NOT EXISTS news_articles (\n"," title TEXT,\n"," description TEXT,\n"," genre TEXT,\n"," embedding BLOB,\n"," FULLTEXT (title, description)\n",");"]},{"cell_type":"markdown","id":"8bd97023-3d02-44d4-8bd3-59875cb22b6c","metadata":{"execution":{"iopub.execute_input":"2023-06-06T06:31:11.967693Z","iopub.status.busy":"2023-06-06T06:31:11.967312Z","iopub.status.idle":"2023-06-06T06:31:11.971035Z","shell.execute_reply":"2023-06-06T06:31:11.970370Z","shell.execute_reply.started":"2023-06-06T06:31:11.967669Z"},"tags":[]},"source":["### Get embeddings for every row based on the description column."]},{"cell_type":"code","execution_count":13,"id":"496f84d0-51b6-4b66-bf5b-b1b260e4c2de","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:11:34.181980Z","iopub.status.busy":"2023-08-03T20:11:34.181788Z","iopub.status.idle":"2023-08-03T20:14:59.572758Z","shell.execute_reply":"2023-08-03T20:14:59.571998Z","shell.execute_reply.started":"2023-08-03T20:11:34.181964Z"},"tags":[],"trusted":true},"outputs":[{"data":{"text/plain":["(2000, 768)"]},"execution_count":13,"metadata":{},"output_type":"execute_result"}],"source":["# Will take around 3.5 minutes to get embeddings for all 2000 columns\n","\n","descriptions = [row[1] for row in data]\n","all_embeddings = model.encode(descriptions)\n","all_embeddings.shape"]},{"cell_type":"code","execution_count":14,"id":"05b2f3fe-c35c-4252-b416-9f7b7aec60a6","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:14:59.573852Z","iopub.status.busy":"2023-08-03T20:14:59.573612Z","iopub.status.idle":"2023-08-03T20:14:59.578230Z","shell.execute_reply":"2023-08-03T20:14:59.577595Z","shell.execute_reply.started":"2023-08-03T20:14:59.573821Z"},"tags":[],"trusted":true},"outputs":[],"source":["combined_data = [tuple(row) + (embedding,) for embedding, row in zip(all_embeddings, data)]"]},{"cell_type":"markdown","id":"46b1628c-0ffc-4a84-ba8b-43e8df081b01","metadata":{},"source":["### Populate the database"]},{"cell_type":"code","execution_count":15,"id":"cd3e5f9b-d9e5-45fe-ba20-4fb021d7a425","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:14:59.579579Z","iopub.status.busy":"2023-08-03T20:14:59.579094Z","iopub.status.idle":"2023-08-03T20:15:12.791488Z","shell.execute_reply":"2023-08-03T20:15:12.790862Z","shell.execute_reply.started":"2023-08-03T20:14:59.579558Z"},"tags":[],"trusted":true},"outputs":[],"source":["%sql TRUNCATE TABLE news_articles;\n","statement = '''\n"," INSERT INTO news.news_articles (\n"," title,\n"," description,\n"," genre,\n"," embedding\n"," )\n"," VALUES (\n"," %s,\n"," %s,\n"," %s,\n"," %s\n"," )\n"," '''\n","\n","for i, row in enumerate(combined_data):\n"," try:\n"," db_connection.execute(statement, row)\n"," except Exception as e:\n"," print(\"Error inserting row {}: {}\".format(i, e))\n"," continue"]},{"cell_type":"markdown","id":"a2f3d567-eaf4-487a-a1f9-2eb7e1071991","metadata":{"tags":[]},"source":["## Semantic Search"]},{"cell_type":"markdown","id":"7ad3b8f6-d3a8-4954-a737-f11c785ce9ce","metadata":{},"source":["### Connect to OpenAI"]},{"cell_type":"code","execution_count":16,"id":"598d7077-d04c-46b3-b7c4-7b4362dd4507","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:15:12.792954Z","iopub.status.busy":"2023-08-03T20:15:12.792546Z","iopub.status.idle":"2023-08-03T20:15:12.845759Z","shell.execute_reply":"2023-08-03T20:15:12.845167Z","shell.execute_reply.started":"2023-08-03T20:15:12.792931Z"},"tags":[],"trusted":true},"outputs":[],"source":["import openai\n","\n","# models\n","EMBEDDING_MODEL = \"text-embedding-ada-002\"\n","GPT_MODEL = \"gpt-3.5-turbo\""]},{"cell_type":"code","execution_count":17,"id":"9eea2f67-3c2e-4d1a-87c2-052c2acf4026","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:15:12.847096Z","iopub.status.busy":"2023-08-03T20:15:12.846702Z","iopub.status.idle":"2023-08-03T20:15:12.850061Z","shell.execute_reply":"2023-08-03T20:15:12.849401Z","shell.execute_reply.started":"2023-08-03T20:15:12.847074Z"},"tags":[],"trusted":true},"outputs":[],"source":["openai.api_key = 'YOUR_API_KEY_HERE'"]},{"cell_type":"markdown","id":"6504f561-1ab1-4dbf-a523-0aef23b66e4b","metadata":{},"source":["### Run Semantic Search and get scores"]},{"cell_type":"code","execution_count":18,"id":"a62a4c06-d77a-49b1-beaf-4c54b04d001f","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:15:12.851400Z","iopub.status.busy":"2023-08-03T20:15:12.851132Z","iopub.status.idle":"2023-08-03T20:15:13.128352Z","shell.execute_reply":"2023-08-03T20:15:13.127794Z","shell.execute_reply.started":"2023-08-03T20:15:12.851379Z"},"tags":[],"trusted":true},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
titledescriptiongenrescore
0All Australians accounted for in Iraq: Downer ...AFP - Australia has accounted for all its nati...World0.445395
1Cricket: Aussies dominate IndiaAustralia tighten their grip on the third Test...World0.368577
2Man tried for UK student's murderThe trial of a man accused of murdering York b...World0.350485
3Ponting doesn #39;t think much of Kiwis or win...RICKY PONTING believes the game #39;s watchers...Sports0.345483
4Hassan Body Found in Fallujah: Australian PMAustralia #39;s prime minister says a body fou...World0.341777
5Aussie alive after capture in IraqAUSTRALIAN journalist John Martinkus is lucky ...World0.334077
6A trio of television technologiesAUSTRALIANS went into a television-buying fren...Sci/Tech0.332006
7Australia PM Gets Down to Work on Fourth Term ...Reuters - Australia's conservative Prime Minis...World0.324335
8Police pull body of lost autistic man, 46, fro...Canadian Press - OAKVILLE, Ont. (CP) - The bod...World0.322738
9Australia targeted for first time in Iraq car ...Australian troops in Baghdad came under attack...World0.321895
\n","
"],"text/plain":[" title \\\n","0 All Australians accounted for in Iraq: Downer ... \n","1 Cricket: Aussies dominate India \n","2 Man tried for UK student's murder \n","3 Ponting doesn #39;t think much of Kiwis or win... \n","4 Hassan Body Found in Fallujah: Australian PM \n","5 Aussie alive after capture in Iraq \n","6 A trio of television technologies \n","7 Australia PM Gets Down to Work on Fourth Term ... \n","8 Police pull body of lost autistic man, 46, fro... \n","9 Australia targeted for first time in Iraq car ... \n","\n"," description genre score \n","0 AFP - Australia has accounted for all its nati... World 0.445395 \n","1 Australia tighten their grip on the third Test... World 0.368577 \n","2 The trial of a man accused of murdering York b... World 0.350485 \n","3 RICKY PONTING believes the game #39;s watchers... Sports 0.345483 \n","4 Australia #39;s prime minister says a body fou... World 0.341777 \n","5 AUSTRALIAN journalist John Martinkus is lucky ... World 0.334077 \n","6 AUSTRALIANS went into a television-buying fren... Sci/Tech 0.332006 \n","7 Reuters - Australia's conservative Prime Minis... World 0.324335 \n","8 Canadian Press - OAKVILLE, Ont. (CP) - The bod... World 0.322738 \n","9 Australian troops in Baghdad came under attack... World 0.321895 "]},"execution_count":18,"metadata":{},"output_type":"execute_result"}],"source":["from openai.embeddings_utils import get_embedding\n","search_query = \"Articles about Aussie captures\"\n","search_embedding = model.encode(search_query)\n","\n","# Create the SQL statement.\n","query_statement = \"\"\"\n"," SELECT\n"," title,\n"," description,\n"," genre,\n"," DOT_PRODUCT(embedding, %(embedding)s) AS score\n"," FROM news.news_articles\n"," ORDER BY score DESC\n"," LIMIT 10\n"," \"\"\"\n","\n","# Execute the SQL statement.\n","results = pd.DataFrame(db_connection.execute(query_statement, dict(embedding=search_embedding)))\n","results"]},{"cell_type":"markdown","id":"2c8ff862-ea5b-4960-be5b-bcd530d6e918","metadata":{},"source":["## Hybrid Search"]},{"cell_type":"markdown","id":"d0b2cff3-76f8-4a35-a596-4f001a9b4c8c","metadata":{},"source":["This search finds the average of the score gotten from the semantic search and the score gotten from the key-word search and sorts the news articles by this combined score to perform an effective hybrid search."]},{"cell_type":"code","execution_count":19,"id":"9df7073f-6a89-4528-968c-7d5c21876a83","metadata":{"execution":{"iopub.execute_input":"2023-08-03T20:15:13.129535Z","iopub.status.busy":"2023-08-03T20:15:13.129108Z","iopub.status.idle":"2023-08-03T20:15:13.334690Z","shell.execute_reply":"2023-08-03T20:15:13.334045Z","shell.execute_reply.started":"2023-08-03T20:15:13.129512Z"},"tags":[],"trusted":true},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
titledescriptiongenresemantic_scorekeyword_scorecombined_score
0All Australians accounted for in Iraq: Downer ...AFP - Australia has accounted for all its nati...World0.4453950.00.222698
1Cricket: Aussies dominate IndiaAustralia tighten their grip on the third Test...World0.3685770.00.184289
2Man tried for UK student's murderThe trial of a man accused of murdering York b...World0.3504850.00.175242
3Ponting doesn #39;t think much of Kiwis or win...RICKY PONTING believes the game #39;s watchers...Sports0.3454830.00.172742
4Hassan Body Found in Fallujah: Australian PMAustralia #39;s prime minister says a body fou...World0.3417770.00.170889
5Aussie alive after capture in IraqAUSTRALIAN journalist John Martinkus is lucky ...World0.3340770.00.167039
6A trio of television technologiesAUSTRALIANS went into a television-buying fren...Sci/Tech0.3320060.00.166003
7Australia PM Gets Down to Work on Fourth Term ...Reuters - Australia's conservative Prime Minis...World0.3243350.00.162168
8Police pull body of lost autistic man, 46, fro...Canadian Press - OAKVILLE, Ont. (CP) - The bod...World0.3227380.00.161369
9Australia targeted for first time in Iraq car ...Australian troops in Baghdad came under attack...World0.3218950.00.160948
\n","
"],"text/plain":[" title \\\n","0 All Australians accounted for in Iraq: Downer ... \n","1 Cricket: Aussies dominate India \n","2 Man tried for UK student's murder \n","3 Ponting doesn #39;t think much of Kiwis or win... \n","4 Hassan Body Found in Fallujah: Australian PM \n","5 Aussie alive after capture in Iraq \n","6 A trio of television technologies \n","7 Australia PM Gets Down to Work on Fourth Term ... \n","8 Police pull body of lost autistic man, 46, fro... \n","9 Australia targeted for first time in Iraq car ... \n","\n"," description genre \\\n","0 AFP - Australia has accounted for all its nati... World \n","1 Australia tighten their grip on the third Test... World \n","2 The trial of a man accused of murdering York b... World \n","3 RICKY PONTING believes the game #39;s watchers... Sports \n","4 Australia #39;s prime minister says a body fou... World \n","5 AUSTRALIAN journalist John Martinkus is lucky ... World \n","6 AUSTRALIANS went into a television-buying fren... Sci/Tech \n","7 Reuters - Australia's conservative Prime Minis... World \n","8 Canadian Press - OAKVILLE, Ont. (CP) - The bod... World \n","9 Australian troops in Baghdad came under attack... World \n","\n"," semantic_score keyword_score combined_score \n","0 0.445395 0.0 0.222698 \n","1 0.368577 0.0 0.184289 \n","2 0.350485 0.0 0.175242 \n","3 0.345483 0.0 0.172742 \n","4 0.341777 0.0 0.170889 \n","5 0.334077 0.0 0.167039 \n","6 0.332006 0.0 0.166003 \n","7 0.324335 0.0 0.162168 \n","8 0.322738 0.0 0.161369 \n","9 0.321895 0.0 0.160948 "]},"execution_count":19,"metadata":{},"output_type":"execute_result"}],"source":["hyb_query = \"Articles about Aussie captures\"\n","hyb_embedding = model.encode(hyb_query)\n","\n","# Create the SQL statement.\n","hyb_statement = \"\"\"\n"," SELECT\n"," title,\n"," description,\n"," genre,\n"," DOT_PRODUCT(embedding, %(embedding)s) AS semantic_score,\n"," MATCH(title, description) AGAINST (%(query)s) AS keyword_score,\n"," (semantic_score + keyword_score) / 2 AS combined_score\n"," FROM news.news_articles\n"," ORDER BY combined_score DESC\n"," LIMIT 10\n"," \"\"\"\n","\n","# Execute the SQL statement.\n","hyb_results = pd.DataFrame(db_connection.execute(hyb_statement, dict(embedding=hyb_embedding, query=hyb_query)))\n","hyb_results"]},{"cell_type":"markdown","id":"f9f6e53b-fb02-4d1a-908f-b96d1c2cdfd0","metadata":{},"source":["
\n","
\n",""]}],"metadata":{"jupyterlab":{"notebooks":{"version_major":6,"version_minor":4}},"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.9"},"singlestore_connection":{"connectionID":"1efa4dba-bf60-42f3-8d19-19dc6b6ffb35","defaultDatabase":""},"singlestore_row_limit":300},"nbformat":4,"nbformat_minor":5}