-
Notifications
You must be signed in to change notification settings - Fork 51
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #31 from bpaul-singlestore/master
adding warm up disk notebook
- Loading branch information
Showing
2 changed files
with
187 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
[meta] | ||
title="Automatically warm up your disk after resuming workspace" | ||
description="""\ | ||
Runs through the queries to bring data residing in object storage onto disk for a specified database. \ | ||
""" | ||
icon="database" | ||
tags=["automation"] | ||
destinations=["spaces"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "40e5ffee-7dbd-4148-a58c-fa3b014590b7", | ||
"metadata": {}, | ||
"source": [ | ||
"<div id=\"singlestore-header\" style=\"display: flex; background-color: rgba(235, 249, 245, 0.25); padding: 5px;\">\n", | ||
" <div id=\"icon-image\" style=\"width: 90px; height: 90px;\">\n", | ||
" <img width=\"100%\" height=\"100%\" src=\"https://raw.githubusercontent.com/singlestore-labs/spaces-notebooks/master/common/images/header-icons/database.png\" />\n", | ||
" </div>\n", | ||
" <div id=\"text\" style=\"padding: 5px; margin-left: 10px;\">\n", | ||
" <div id=\"badge\" style=\"display: inline-block; background-color: rgba(0, 0, 0, 0.15); border-radius: 4px; padding: 4px 8px; align-items: center; margin-top: 6px; margin-bottom: -2px; font-size: 80%\">SingleStore Notebooks</div>\n", | ||
" <h1 style=\"font-weight: 500; margin: 8px 0 0 4px;\">Automatically warm up your disk after resuming workspace</h1>\n", | ||
" </div>\n", | ||
"</div>" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "68a6ae45-1ff5-475b-ad42-d724adf998c6", | ||
"metadata": {}, | ||
"source": [ | ||
"## Step 0. Import libraries & create connection to database" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "d5b77e41-852a-4610-a3c9-052e684dd3f1", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import singlestoredb as s2\n", | ||
"s2_conn = s2.connect()\n", | ||
"s2_cur = s2_conn.cursor()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "882e7736-aa5a-4f89-aa11-9ad907376274", | ||
"metadata": {}, | ||
"source": [ | ||
"## Step 1. Specify which database you want to cache" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "2db88756-4303-4019-ba69-e065d9c1aa72", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"database_name = input('Enter database name:')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "9f31256d-495c-4743-95b8-1a7a3fb8b29a", | ||
"metadata": {}, | ||
"source": [ | ||
"## Step 2. Get a list of the columnstore table names in your database" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "e08f3465-d454-4dd1-b93f-e3337e030f4f", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"query = \"\"\"SELECT table_name FROM information_schema.tables WHERE table_schema = '{}' AND table_type = 'BASE TABLE' AND storage_type = 'COLUMNSTORE';\"\"\".format(database_name)\n", | ||
"result = s2_cur.execute(query)\n", | ||
"result_df = pd.DataFrame(list(s2_cur))\n", | ||
"list_of_tables = result_df[[0]].values" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "f6e3091f-4e77-4e2c-b397-95695adf3e2b", | ||
"metadata": {}, | ||
"source": [ | ||
"## Step 3. Cache columnar files" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"id": "fc670353-5d5a-4d47-8d4f-a6637b741a37", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# get queries to warm up columm files\n", | ||
"column_queries_df = pd.DataFrame()\n", | ||
"\n", | ||
"for table_name in list_of_tables:\n", | ||
" query = \"\"\" WITH t1 AS (SELECT c.column_name, c.ordinal_position, MAX(c.ordinal_position) OVER (ORDER BY c.ordinal_position DESC) AS last_row FROM information_schema.columns c JOIN information_schema.tables t ON c.table_catalog = t.table_catalog AND c.table_schema = t.table_schema AND c.table_name = t.table_name WHERE c.table_schema = '{0}' AND c.table_name = '{1}') SELECT CASE WHEN ordinal_position = 1 AND ordinal_position = last_row THEN CONCAT('SELECT ', 'AVG(LENGTH(`',column_name,'`)) FROM ', '{1}') WHEN ordinal_position = 1 and ordinal_position != last_row THEN CONCAT('SELECT ', 'AVG(LENGTH(`',column_name,'`)),') WHEN ordinal_position != last_row THEN CONCAT('AVG(LENGTH(`',column_name,'`)),') ELSE CONCAT('AVG(LENGTH(`',column_name,'`)) FROM ', '{1}') END AS query_text FROM t1 ORDER BY ordinal_position; \"\"\".format(database_name, table_name[0])\n", | ||
" result = s2_cur.execute(query)\n", | ||
" result_df = pd.DataFrame(list(s2_cur))\n", | ||
" result_df['table_name'] = table_name[0]\n", | ||
" column_queries_df = pd.concat([column_queries_df, result_df], axis=0)\n", | ||
"\n", | ||
"column_queries_df.rename(columns = {0:'query_text'}, inplace = True)\n", | ||
"final_column_df = column_queries_df.groupby('table_name')['query_text'].apply(' '.join).reset_index()\n", | ||
"final_column_df['query_text'] = final_column_df['query_text'].astype(str) + ';'\n", | ||
"\n", | ||
"# run column file warm up queries\n", | ||
"for query in final_column_df[['query_text']].values:\n", | ||
" s2_cur.execute(\"\"\" {} \"\"\".format(query[0]))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "2eea9509-b8ef-4004-84b2-a92cb948332d", | ||
"metadata": {}, | ||
"source": [ | ||
"## Step 4. Cache index files" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"id": "0e087126-c17d-43b4-bb50-59a1eee61357", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# get queries to warm up index files\n", | ||
"index_queries_df = pd.DataFrame()\n", | ||
"\n", | ||
"for table_name in list_of_tables:\n", | ||
" query = \"\"\" SELECT DISTINCT CONCAT(\"OPTIMIZE TABLE \", table_name, \" WARM BLOB CACHE FOR INDEX \", \"`\", index_name, \"`\", \";\") FROM information_schema.statistics WHERE TABLE_SCHEMA = '{}' AND index_type = 'COLUMNSTORE HASH' AND table_name = '{}'; \"\"\".format(database_name, table_name[0])\n", | ||
" result = s2_cur.execute(query)\n", | ||
" result_df = pd.DataFrame(list(s2_cur))\n", | ||
" index_queries_df = pd.concat([index_queries_df, result_df], axis=0)\n", | ||
"\n", | ||
"# run index file warm up queries\n", | ||
"for query in index_queries_df.values:\n", | ||
" s2_cur.execute(\"\"\" {} \"\"\".format(query[0]))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "90aea1ad-d9e8-49a8-bbb9-263803bab91f", | ||
"metadata": {}, | ||
"source": [ | ||
"<div id=\"singlestore-footer\" style=\"background-color: rgba(194, 193, 199, 0.25); height:2px; margin-bottom:10px\"></div>\n", | ||
"<div><img src=\"https://raw.githubusercontent.com/singlestore-labs/spaces-notebooks/master/common/images/singlestore-logo-grey.png\" style=\"padding: 0px; margin: 0px; height: 24px\"/></div>" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"jupyterlab": { | ||
"notebooks": { | ||
"version_major": 6, | ||
"version_minor": 4 | ||
} | ||
}, | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.4" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |