Skip to content

Commit

Permalink
adding warm up disk notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
Ben Paul authored and Ben Paul committed Nov 30, 2023
1 parent 906f149 commit 18907ce
Show file tree
Hide file tree
Showing 2 changed files with 188 additions and 0 deletions.
8 changes: 8 additions & 0 deletions notebooks/kebab-case/meta.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[meta]
title="Automatically warm up your disk after resuming workspace"
description="""\
Runs through the queries to bring data residing in object storage onto disk for a specified database. \
"""
icon="database"
tags=["automation"]
destinations=["spaces"]
180 changes: 180 additions & 0 deletions notebooks/kebab-case/notebook.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "40e5ffee-7dbd-4148-a58c-fa3b014590b7",
"metadata": {},
"source": [
"<div id=\"singlestore-header\" style=\"display: flex; background-color: rgba(235, 249, 245, 0.25); padding: 5px;\">\n",
" <div id=\"icon-image\" style=\"width: 90px; height: 90px;\">\n",
" <img width=\"100%\" height=\"100%\" src=\"https://raw.githubusercontent.com/singlestore-labs/spaces-notebooks/master/common/images/header-icons/database.png\" />\n",
" </div>\n",
" <div id=\"text\" style=\"padding: 5px; margin-left: 10px;\">\n",
" <div id=\"badge\" style=\"display: inline-block; background-color: rgba(0, 0, 0, 0.15); border-radius: 4px; padding: 4px 8px; align-items: center; margin-top: 6px; margin-bottom: -2px; font-size: 80%\">SingleStore Notebooks</div>\n",
" <h1 style=\"font-weight: 500; margin: 8px 0 0 4px;\">Automatically warm up your disk after resuming workspace</h1>\n",
" </div>\n",
"</div>"
]
},
{
"cell_type": "markdown",
"id": "68a6ae45-1ff5-475b-ad42-d724adf998c6",
"metadata": {},
"source": [
"## Step 0. Import libraries & create connection to database"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "d5b77e41-852a-4610-a3c9-052e684dd3f1",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sqlalchemy import *\n",
"import singlestoredb as s2\n",
"s2_conn = s2.connect()\n",
"s2_cur = s2_conn.cursor()"
]
},
{
"cell_type": "markdown",
"id": "882e7736-aa5a-4f89-aa11-9ad907376274",
"metadata": {},
"source": [
"## Step 1. Specify which database you want to cache"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "2db88756-4303-4019-ba69-e065d9c1aa72",
"metadata": {},
"outputs": [],
"source": [
"database_name = input('Enter database name:')"
]
},
{
"cell_type": "markdown",
"id": "9f31256d-495c-4743-95b8-1a7a3fb8b29a",
"metadata": {},
"source": [
"## Step 2. Get a list of the columnstore table names in your database"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e08f3465-d454-4dd1-b93f-e3337e030f4f",
"metadata": {},
"outputs": [],
"source": [
"query = \"\"\"SELECT table_name FROM information_schema.tables WHERE table_schema = '{}' AND table_type = 'BASE TABLE' AND storage_type = 'COLUMNSTORE';\"\"\".format(database_name)\n",
"result = s2_cur.execute(query)\n",
"result_df = pd.DataFrame(list(s2_cur))\n",
"list_of_tables = result_df[[0]].values"
]
},
{
"cell_type": "markdown",
"id": "f6e3091f-4e77-4e2c-b397-95695adf3e2b",
"metadata": {},
"source": [
"## Step 3. Cache columnar files"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "fc670353-5d5a-4d47-8d4f-a6637b741a37",
"metadata": {},
"outputs": [],
"source": [
"# get queries to warm up columm files\n",
"column_queries_df = pd.DataFrame()\n",
"\n",
"for table_name in list_of_tables:\n",
" query = \"\"\" WITH t1 AS (SELECT c.column_name, c.ordinal_position, MAX(c.ordinal_position) OVER (ORDER BY c.ordinal_position DESC) AS last_row FROM information_schema.columns c JOIN information_schema.tables t ON c.table_catalog = t.table_catalog AND c.table_schema = t.table_schema AND c.table_name = t.table_name WHERE c.table_schema = '{0}' AND c.table_name = '{1}') SELECT CASE WHEN ordinal_position = 1 AND ordinal_position = last_row THEN CONCAT('SELECT ', 'AVG(LENGTH(`',column_name,'`)) FROM ', '{1}') WHEN ordinal_position = 1 and ordinal_position != last_row THEN CONCAT('SELECT ', 'AVG(LENGTH(`',column_name,'`)),') WHEN ordinal_position != last_row THEN CONCAT('AVG(LENGTH(`',column_name,'`)),') ELSE CONCAT('AVG(LENGTH(`',column_name,'`)) FROM ', '{1}') END AS query_text FROM t1 ORDER BY ordinal_position; \"\"\".format(database_name, table_name[0])\n",
" result = s2_cur.execute(query)\n",
" result_df = pd.DataFrame(list(s2_cur))\n",
" result_df['table_name'] = table_name[0]\n",
" column_queries_df = pd.concat([column_queries_df, result_df], axis=0)\n",
"\n",
"column_queries_df.rename(columns = {0:'query_text'}, inplace = True)\n",
"final_column_df = column_queries_df.groupby('table_name')['query_text'].apply(' '.join).reset_index()\n",
"final_column_df['query_text'] = final_column_df['query_text'].astype(str) + ';'\n",
"\n",
"# run column file warm up queries\n",
"for query in final_column_df[['query_text']].values:\n",
" s2_cur.execute(\"\"\" {} \"\"\".format(query[0]))"
]
},
{
"cell_type": "markdown",
"id": "2eea9509-b8ef-4004-84b2-a92cb948332d",
"metadata": {},
"source": [
"## Step 4. Cache index files"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "0e087126-c17d-43b4-bb50-59a1eee61357",
"metadata": {},
"outputs": [],
"source": [
"# get queries to warm up index files\n",
"index_queries_df = pd.DataFrame()\n",
"\n",
"for table_name in list_of_tables:\n",
" query = \"\"\" SELECT DISTINCT CONCAT(\"OPTIMIZE TABLE \", table_name, \" WARM BLOB CACHE FOR INDEX \", \"`\", index_name, \"`\", \";\") FROM information_schema.statistics WHERE TABLE_SCHEMA = '{}' AND index_type = 'COLUMNSTORE HASH' AND table_name = '{}'; \"\"\".format(database_name, table_name[0])\n",
" result = s2_cur.execute(query)\n",
" result_df = pd.DataFrame(list(s2_cur))\n",
" index_queries_df = pd.concat([index_queries_df, result_df], axis=0)\n",
"\n",
"# run index file warm up queries\n",
"for query in index_queries_df.values:\n",
" s2_cur.execute(\"\"\" {} \"\"\".format(query[0]))"
]
},
{
"cell_type": "markdown",
"id": "90aea1ad-d9e8-49a8-bbb9-263803bab91f",
"metadata": {},
"source": [
"<div id=\"singlestore-footer\" style=\"background-color: rgba(194, 193, 199, 0.25); height:2px; margin-bottom:10px\"></div>\n",
"<div><img src=\"https://raw.githubusercontent.com/singlestore-labs/spaces-notebooks/master/common/images/singlestore-logo-grey.png\" style=\"padding: 0px; margin: 0px; height: 24px\"/></div>"
]
}
],
"metadata": {
"jupyterlab": {
"notebooks": {
"version_major": 6,
"version_minor": 4
}
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 18907ce

Please sign in to comment.