diff --git a/notebooks/atlas-and-kai/notebook.ipynb b/notebooks/atlas-and-kai/notebook.ipynb index 125a7e5..6114c54 100644 --- a/notebooks/atlas-and-kai/notebook.ipynb +++ b/notebooks/atlas-and-kai/notebook.ipynb @@ -11,7 +11,7 @@ " \n", "
\n", "
SingleStore Notebooks
\n", - "

Mongo Atlas and SingleStore Kai

\n", + "

Mongo Atlas & SingleStore Kai

\n", "
\n", "" ] @@ -680,8 +680,7 @@ "metadata": {}, "source": [ "
\n", - "
\n", - "" + "
" ] } ], diff --git a/notebooks/getting-started-with-dataframes/notebook.ipynb b/notebooks/getting-started-with-dataframes/notebook.ipynb index 82ee6cd..20a416a 100644 --- a/notebooks/getting-started-with-dataframes/notebook.ipynb +++ b/notebooks/getting-started-with-dataframes/notebook.ipynb @@ -4,7 +4,17 @@ "cell_type": "markdown", "id": "caa4ce39-2f84-48b7-92b5-dccf6bede32b", "metadata": {}, - "source": "
\n
\n \n
\n
\n
SingleStore Notebooks
\n

Getting Started with DataFrames in SingleStoreDB

\n
\n
" + "source": [ + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
SingleStore Notebooks
\n", + "

Getting Started with DataFrames in SingleStoreDB

\n", + "
\n", + "
" + ] }, { "cell_type": "markdown", @@ -444,7 +454,10 @@ "cell_type": "markdown", "id": "b40a7c86-2b36-4dad-a92f-d88f44410ec6", "metadata": {}, - "source": "
\n
\n" + "source": [ + "
\n", + "
" + ] } ], "metadata": { diff --git a/notebooks/getting-started-with-notebooks/notebook.ipynb b/notebooks/getting-started-with-notebooks/notebook.ipynb index bdf98b5..b0b112d 100644 --- a/notebooks/getting-started-with-notebooks/notebook.ipynb +++ b/notebooks/getting-started-with-notebooks/notebook.ipynb @@ -11,7 +11,7 @@ " \n", "
\n", "
SingleStore Notebooks
\n", - "

Getting Started with SingleStoreDB Notebooks

\n", + "

Getting Started with Notebooks

\n", "
\n", "" ] @@ -403,8 +403,7 @@ "metadata": {}, "source": [ "
\n", - "
\n", - "" + "
" ] } ], diff --git a/notebooks/image-matching-with-sql/notebook.ipynb b/notebooks/image-matching-with-sql/notebook.ipynb index 444dc61..16e1b4d 100644 --- a/notebooks/image-matching-with-sql/notebook.ipynb +++ b/notebooks/image-matching-with-sql/notebook.ipynb @@ -11,7 +11,7 @@ " \n", "
\n", "
SingleStore Notebooks
\n", - "

Image Matching in SingleStoreDB with SQL

\n", + "

Image Matching with SQL

\n", "
\n", "" ] @@ -516,8 +516,7 @@ "metadata": {}, "source": [ "
\n", - "
\n", - "" + "
" ] } ], diff --git a/notebooks/integrating-with-pandas/notebook.ipynb b/notebooks/integrating-with-pandas/notebook.ipynb index 5f05683..b0af7a9 100644 --- a/notebooks/integrating-with-pandas/notebook.ipynb +++ b/notebooks/integrating-with-pandas/notebook.ipynb @@ -957,48 +957,13 @@ "DROP DATABASE IF EXISTS pandas_integration;" ] }, - { - "cell_type": "markdown", - "id": "92354f6d-4af9-49fd-9bbc-8725bec8dca1", - "metadata": {}, - "source": [ - "## Code Snippets\n", - "\n", - "This section is here to make copying and pasting of the headers, footers, notes, etc. easier.\n", - "\n", - "### Header code\n", - "\n", - "```html\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
SingleStore Notebooks
\n", - "

INSERT_NOTEBOOK_TITLE

\n", - "
\n", - "
\n", - "```\n", - "\n", - "### Footer code\n", - "\n", - "```html\n", - "
\n", - "
\n", - "\n", - "```\n", - "\n", - "### Info code" - ] - }, { "cell_type": "markdown", "id": "dca02e68-11a8-46b9-b2eb-35f466d0c96e", "metadata": {}, "source": [ "
\n", - "
\n", - "" + "
" ] } ], diff --git a/notebooks/launch-open-source-apps-with-langchain/meta.toml b/notebooks/launch-open-source-apps-with-langchain/meta.toml index cee0e3a..d470014 100644 --- a/notebooks/launch-open-source-apps-with-langchain/meta.toml +++ b/notebooks/launch-open-source-apps-with-langchain/meta.toml @@ -3,6 +3,6 @@ title="Launch Open-Source Apps with LangChain" description="""\ LangChain connector to use SingleStoreDB as your vector database for your apps. \ """ -icon="" +icon="vector-circle" tags=["vectordb", "genai", "langchain"] destinations=[] diff --git a/notebooks/launch-open-source-apps-with-langchain/notebook.ipynb b/notebooks/launch-open-source-apps-with-langchain/notebook.ipynb index 8cf1317..d2081c4 100644 --- a/notebooks/launch-open-source-apps-with-langchain/notebook.ipynb +++ b/notebooks/launch-open-source-apps-with-langchain/notebook.ipynb @@ -2,12 +2,17 @@ "cells": [ { "cell_type": "markdown", - "id": "e4bb409e", + "id": "7d9894c5-8938-4790-8acf-44882f2d3391", "metadata": {}, "source": [ - "
\n", - " \n", - " \n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
SingleStore Notebooks
\n", + "

Launch Open-Source Apps with LangChain

\n", + "
\n", "
" ] }, @@ -224,6 +229,15 @@ "\n", "print(response['choices'][0]['message']['content'])" ] + }, + { + "cell_type": "markdown", + "id": "f1ce8da7-0868-47fd-8585-4777d26f3adc", + "metadata": {}, + "source": [ + "
\n", + "
" + ] } ], "metadata": { diff --git a/notebooks/launch-open-source-apps-with-langchain/notebook2.ipynb b/notebooks/launch-open-source-apps-with-langchain/notebook2.ipynb deleted file mode 100644 index a6353c3..0000000 --- a/notebooks/launch-open-source-apps-with-langchain/notebook2.ipynb +++ /dev/null @@ -1,191 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "80fa344f", - "metadata": {}, - "source": [ - "
\n", - " \n", - " \n", - "
" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7d222bd8-c86f-4887-ba76-82f0c8a8c110", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install langchain --quiet\n", - "!pip install openai --quiet\n", - "!pip install singlestoredb --quiet\n", - "!pip install tiktoken --quiet\n", - "!pip install unstructured --quiet" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fafaecac-a478-48bf-845f-fc0d0a4f93fb", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.document_loaders import OnlinePDFLoader\n", - "\n", - "loader = OnlinePDFLoader(\"http://leavcom.com/pdf/DBpdf.pdf\")\n", - "\n", - "data = loader.load()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9b1d3e25-129f-4e4e-ad64-db3c4ba2d507", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", - "\n", - "print (f\"You have {len(data)} document(s) in your data\")\n", - "print (f\"There are {len(data[0].page_content)} characters in your document\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "445c4227-8f82-4a8e-b1d7-ed040a0a4ed2", - "metadata": {}, - "outputs": [], - "source": [ - "text_splitter = RecursiveCharacterTextSplitter(chunk_size = 2000, chunk_overlap = 0)\n", - "texts = text_splitter.split_documents(data)\n", - "\n", - "print (f\"You have {len(texts)} pages\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4f4d66a0-ed8c-4c3f-8116-9d7ea8526f24", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import getpass\n", - "\n", - "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "24649cac-6455-45fb-8021-049da13918e4", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.embeddings import OpenAIEmbeddings\n", - "\n", - "embedder = OpenAIEmbeddings()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9b0af2c5-37c9-4822-856b-f057d2acbad8", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.vectorstores import SingleStoreDB\n", - "\n", - "os.environ[\"SINGLESTOREDB_URL\"] = \"admin:@:3306/pdf_db\"\n", - "\n", - "docsearch = SingleStoreDB.from_documents(\n", - " texts,\n", - " embedder,\n", - " table_name = \"pdf_docs2\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d4e4b856-69a2-4210-9f89-95ae6e25540c", - "metadata": {}, - "outputs": [], - "source": [ - "%%sql\n", - "\n", - "USE pdf_db;\n", - "SELECT JSON_ARRAY_UNPACK_F32(vector)\n", - "FROM pdf_docs2\n", - "LIMIT 1;" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4041b9c9-2540-4324-9840-d19ff15d40fd", - "metadata": {}, - "outputs": [], - "source": [ - "query_text = \"Will object-oriented databases be commercially successful?\"\n", - "\n", - "docs = docsearch.similarity_search(query_text)\n", - "\n", - "print(docs[0].page_content)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "50d01b42-650e-4e1f-81fe-a151d6688013", - "metadata": {}, - "outputs": [], - "source": [ - "import openai\n", - "\n", - "prompt = f\"The user asked: {query_text}. The most similar text from the document is: {docs[0].page_content}\"\n", - "\n", - "response = openai.ChatCompletion.create(\n", - " model=\"gpt-3.5-turbo\",\n", - " messages=[\n", - " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n", - " {\"role\": \"user\", \"content\": prompt}\n", - " ]\n", - ")\n", - "\n", - "print(response['choices'][0]['message']['content'])" - ] - } - ], - "metadata": { - "jupyterlab": { - "notebooks": { - "version_major": 6, - "version_minor": 4 - } - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/movie-recommendation/notebook.ipynb b/notebooks/movie-recommendation/notebook.ipynb index df4aab1..e53e814 100644 --- a/notebooks/movie-recommendation/notebook.ipynb +++ b/notebooks/movie-recommendation/notebook.ipynb @@ -726,8 +726,7 @@ "metadata": {}, "source": [ "
\n", - "
\n", - "
" + "
" ] } ], diff --git a/notebooks/notebook-basics/notebook.ipynb b/notebooks/notebook-basics/notebook.ipynb index e2e8c27..a6e1d16 100644 --- a/notebooks/notebook-basics/notebook.ipynb +++ b/notebooks/notebook-basics/notebook.ipynb @@ -444,8 +444,7 @@ "metadata": {}, "source": [ "
\n", - "
\n", - "" + "
" ] } ], diff --git a/notebooks/semantic-search-with-openai-embedding-creation/notebook.ipynb b/notebooks/semantic-search-with-openai-embedding-creation/notebook.ipynb index 16d5d09..6ac20f7 100644 --- a/notebooks/semantic-search-with-openai-embedding-creation/notebook.ipynb +++ b/notebooks/semantic-search-with-openai-embedding-creation/notebook.ipynb @@ -273,8 +273,7 @@ "metadata": {}, "source": [ "
\n", - "
\n", - "" + "
" ] } ], diff --git a/notebooks/semantic-search-with-openai-qa/notebook.ipynb b/notebooks/semantic-search-with-openai-qa/notebook.ipynb index b365069..cbb38c1 100644 --- a/notebooks/semantic-search-with-openai-qa/notebook.ipynb +++ b/notebooks/semantic-search-with-openai-qa/notebook.ipynb @@ -481,8 +481,7 @@ "metadata": {}, "source": [ "
\n", - "
\n", - "" + "
" ] } ], diff --git a/resources/nb-check.py b/resources/nb-check.py index 20e151d..c0f2d38 100755 --- a/resources/nb-check.py +++ b/resources/nb-check.py @@ -1,13 +1,129 @@ #!/usr/bin/env python3 +"""Program for validating notebook content.""" +import html import json +import os import sys +import tomllib +import uuid +from typing import Any + + +NOTEBOOK_HEADER = [ + '
\n', + '
\n', + ' \n', + '
\n', + '
\n', + '
SingleStore Notebooks
\n', + '

{title}

\n', + '
\n', + '
', +] + +NOTEBOOK_FOOTER = [ + '\n', + '
', +] + +ICON_COLORS = { + 'arrow-up-right-dots': 'rgba(255, 167, 103, 0.25)', + 'arrows-spin': 'rgba(124, 195, 235, 0.25)', + 'binary': 'rgba(210, 255, 153, 0.25)', + 'block-question': 'rgba(255, 224, 129, 0.25)', + 'bolt': 'rgba(235, 249, 245, 0.25)', + 'book-open-cover': 'rgba(124, 195, 235, 0.25)', + 'browser': 'rgba(235, 249, 245, 0.25)', + 'calendar-check': 'rgba(235, 249, 245, 0.25)', + 'camera-movie': 'rgba(255, 182, 176, 0.25)', + 'chart-network': 'rgba(210, 255, 153, 0.25)', + 'chart-scatter': 'rgba(124, 195, 235, 0.25)', + 'clouds': 'rgba(124, 195, 235, 0.25)', + 'crystal-ball': 'rgba(255, 167, 103, 0.25)', + 'database': 'rgba(235, 249, 245, 0.25)', + 'dollar-circle': 'rgba(255, 167, 103, 0.25)', + 'face-viewfinder': 'rgba(209, 153, 255, 0.25)', + 'file-export': 'rgba(255, 182, 176, 0.25)', + 'files': 'rgba(255, 224, 129, 0.25)', + 'filter': 'rgba(255, 167, 103, 0.25)', + 'gears': 'rgba(235, 249, 245, 0.25)', + 'globe': 'rgba(209, 153, 255, 0.25)', + 'handshake': 'rgba(255, 224, 129, 0.25)', + 'id-card': 'rgba(255, 182, 176, 0.25)', + 'image': 'rgba(255, 224, 129, 0.25)', + 'laptop': 'rgba(209, 153, 255, 0.25)', + 'lightbulb-on': 'rgba(255, 167, 103, 0.25)', + 'link': 'rgba(124, 195, 235, 0.25)', + 'location-dots': 'rgba(210, 255, 153, 0.25)', + 'lock': 'rgba(235, 249, 245, 0.25)', + 'map': 'rgba(255, 224, 129, 0.25)', + 'megaphone': 'rgba(124, 195, 235, 0.25)', + 'memo-circle-check': 'rgba(210, 255, 153, 0.25)', + 'message-dots': 'rgba(210, 255, 153, 0.25)', + 'nodes-circle': 'rgba(255, 224, 129, 0.25)', + 'notes': 'rgba(209, 153, 255, 0.25)', + 'pipeline': 'rgba(255, 167, 103, 0.25)', + 'radar': 'rgba(255, 182, 176, 0.25)', + 'rocket': 'rgba(210, 255, 153, 0.25)', + 'screwdriver-wrench': 'rgba(255, 182, 176, 0.25)', + 'server': 'rgba(255, 182, 176, 0.25)', + 'shield': 'rgba(124, 195, 235, 0.25)', + 'shop': 'rgba(235, 249, 245, 0.25)', + 'shopping-bag': 'rgba(255, 224, 129, 0.25)', + 'shopping-cart': 'rgba(255, 167, 103, 0.25)', + 'star': 'rgba(255, 182, 176, 0.25)', + 'user-plus': 'rgba(209, 153, 255, 0.25)', + 'users': 'rgba(210, 255, 153, 0.25)', + 'vector-circle': 'rgba(209, 153, 255, 0.25)', + 'waveform': 'rgba(209, 153, 255, 0.25)', +} + + +def error(msg: str) -> None: + """Print an error message and end the program.""" + print('ERROR:', msg, file=sys.stderr) + sys.exit(1) + + +def new_markdown_cell(cell_id: str, content: list[str]) -> dict[str, Any]: + """ + Construct a markdown cell for a notebook. + + Parameters + ---------- + cell_id : str + The UUID to use for the cell ID + content : list[str] + The list of strings that make up the cell contents + + Returns + ------- + dict[str, Any] + + """ + return dict( + cell_type='markdown', + id=cell_id, + metadata={}, + source=content, + ) for f in sys.argv[1:]: + try: + toml_path = os.path.join(os.path.dirname(f), 'meta.toml') + with open(toml_path, 'rb') as toml_f: + toml_info = tomllib.load(toml_f) + except Exception: + error(f'could not load `meta.toml` file: {toml_path}') + with open(f, 'r') as infile: nb = json.loads(infile.read()) + if os.path.basename(f) != 'notebook.ipynb': + error(f'notebook must be named `notebook.ipynb`: {f}') + # Clear out SingleStore metadata metadata = nb.get('metadata', {}) @@ -32,6 +148,53 @@ cells.pop(end) end -= 1 + header_id = str(uuid.uuid4()) + footer_id = str(uuid.uuid4()) + + # Remove header cell, it will be regenerated later + if cells: + source = cells[0].get('source', []) + if not isinstance(source, str): + source = ''.join(source) + if 'id="singlestore-header"' in source: + header_cell = cells.pop(0) + header_id = header_cell.get('id', header_id) + + # Remove footer cell, it will be regenerated later + if cells: + source = cells[-1].get('source', []) + if not isinstance(source, str): + source = ''.join(source) + if 'id="singlestore-footer"' in source: + footer_cell = cells.pop(-1) + footer_id = footer_cell.get('id', footer_id) + + # Prepare parameter substitutions for header + try: + icon_name = toml_info['meta']['icon'] + background_color = ICON_COLORS[icon_name] + except KeyError as exc: + print(str(exc)) + error(f'missing or incorrect icon in {toml_path}') + + try: + title = html.escape(toml_info['meta']['title']) + except KeyError as exc: + error(f'missing title in {toml_path}') + + # Add header cell + header = [ + x.format( + background_color=background_color, + icon_name=icon_name, + title=title, + ) for x in NOTEBOOK_HEADER + ] + cells.insert(0, new_markdown_cell(header_id, header)) + + # Add footer cell + cells.append(new_markdown_cell(footer_id, NOTEBOOK_FOOTER)) + with open(f, 'w') as outfile: outfile.write(json.dumps(nb, indent=2)) outfile.write('\n')