Skip to content

Commit

Permalink
Clean up formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
kesmit13 committed Oct 5, 2023
1 parent a50d503 commit 0b8cbb0
Showing 1 changed file with 102 additions and 83 deletions.
185 changes: 102 additions & 83 deletions notebooks/load-json-files-s3/notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -112,20 +112,43 @@
"outputs": [],
"source": [
"%%sql\n",
"Create database if not exists demo_database;\n",
"Use demo_database;\n",
"CREATE TABLE if not exists demo_database.actors (\n",
"name text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n",
"age int NOT NULL,\n",
"born_at text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n",
"Birthdate text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n",
"photo text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n",
"wife text CHARACTER SET utf8 COLLATE utf8_general_ci,\n",
"weight float NOT NULL,\n",
"haschildren boolean,\n",
"hasGreyHair boolean,\n",
"children JSON COLLATE utf8_bin NOT NULL,\n",
"SHARD KEY ()\n",
"CREATE DATABASE IF NOT EXISTS demo_database;"
]
},
{
"cell_type": "markdown",
"id": "6dfc5b0b-9308-46c9-8cc8-be08fb07c1b6",
"metadata": {},
"source": [
"<div class=\"alert alert-block alert-warning\">\n",
" <b class=\"fa fa-solid fa-exclamation-circle\"></b>\n",
" <div>\n",
" <p><b>Action Required</b></p>\n",
" <p>Make sure to select the <tt>demo_database</tt> database from the drop-down menu at the top of this notebook. It updates the <tt>connection_url</tt> to connect to that database.</p>\n",
" </div>\n",
"</div>"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "b09528cf-0beb-4fe0-9e60-6edefb72f8b1",
"metadata": {},
"outputs": [],
"source": [
"%%sql\n",
"CREATE TABLE IF NOT EXISTS demo_database.actors (\n",
" name text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n",
" age int NOT NULL,\n",
" born_at text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n",
" Birthdate text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n",
" photo text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n",
" wife text CHARACTER SET utf8 COLLATE utf8_general_ci,\n",
" weight float NOT NULL,\n",
" haschildren boolean,\n",
" hasGreyHair boolean,\n",
" children JSON COLLATE utf8_bin NOT NULL,\n",
" SHARD KEY ()\n",
");"
]
},
Expand All @@ -148,38 +171,38 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"id": "92df7943-e68d-4509-b7f5-4a93697f6578",
"metadata": {},
"outputs": [],
"source": [
"%%sql\n",
"CREATE PIPELINE if not exists demo_database.actors\n",
"AS LOAD DATA S3 'studiotutorials/sample_dataset/json_files/wildcard_demo/*.json'\n",
"CONFIG '{ \\\"region\\\": \\\"us-east-1\\\" }'\n",
"/*\n",
"CREDENTIALS '{\"aws_access_key_id\": \"<Key to Enter>\",\n",
" \"aws_secret_access_key\": \"<Key to Enter>\"}'\n",
"*/\n",
"BATCH_INTERVAL 2500\n",
"MAX_PARTITIONS_PER_BATCH 1\n",
"DISABLE OUT_OF_ORDER OPTIMIZATION\n",
"DISABLE OFFSETS METADATA GC\n",
"SKIP DUPLICATE KEY ERRORS\n",
"INTO TABLE `actors`\n",
"FORMAT JSON\n",
"(\n",
" actors.name <- name,\n",
" actors.age <- age,\n",
" actors.born_at <- `Born At`,\n",
" actors.Birthdate <- Birthdate,\n",
" actors.photo <- photo,\n",
" actors.wife <- wife,\n",
" actors.weight <- weight,\n",
" actors.haschildren <- hasChildren,\n",
" actors.hasGreyHair <- hasGreyHair,\n",
" actors.children <- children\n",
");"
" AS LOAD DATA S3 'studiotutorials/sample_dataset/json_files/wildcard_demo/*.json'\n",
" CONFIG '{ \\\"region\\\": \\\"us-east-1\\\" }'\n",
" /*\n",
" CREDENTIALS '{\"aws_access_key_id\": \"<Key to Enter>\",\n",
" \"aws_secret_access_key\": \"<Key to Enter>\"}'\n",
" */\n",
" BATCH_INTERVAL 2500\n",
" MAX_PARTITIONS_PER_BATCH 1\n",
" DISABLE OUT_OF_ORDER OPTIMIZATION\n",
" DISABLE OFFSETS METADATA GC\n",
" SKIP DUPLICATE KEY ERRORS\n",
" INTO TABLE `actors`\n",
" FORMAT JSON\n",
" (\n",
" actors.name <- name,\n",
" actors.age <- age,\n",
" actors.born_at <- `Born At`,\n",
" actors.Birthdate <- Birthdate,\n",
" actors.photo <- photo,\n",
" actors.wife <- wife,\n",
" actors.weight <- weight,\n",
" actors.haschildren <- hasChildren,\n",
" actors.hasGreyHair <- hasGreyHair,\n",
" actors.children <- children\n",
" );"
]
},
{
Expand All @@ -192,13 +215,13 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"id": "eeddd12e-e28c-4000-859b-6d1291c4a137",
"metadata": {},
"outputs": [],
"source": [
"%%sql\n",
"Start pipeline demo_database.actors;"
"START PIPELINE demo_database.actors;"
]
},
{
Expand All @@ -211,14 +234,14 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"id": "f48de155-af85-4c40-ad56-955573a434f8",
"metadata": {},
"outputs": [],
"source": [
"%%sql\n",
"select * from information_schema.pipelines_errors\n",
"where pipeline_name = 'actors' ;"
"SELECT * FROM information_schema.pipelines_errors\n",
" WHERE pipeline_name = 'actors' ;"
]
},
{
Expand All @@ -231,13 +254,13 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"id": "09a739cb-4925-4699-ab61-71016a04bfb6",
"metadata": {},
"outputs": [],
"source": [
"%%sql\n",
"select * from demo_database.actors;"
"SELECT * FROM demo_database.actors;"
]
},
{
Expand All @@ -250,14 +273,14 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"id": "6a6dfc1d-c758-4287-a797-6cc3e4fff934",
"metadata": {},
"outputs": [],
"source": [
"%%sql\n",
"Drop pipeline if exists demo_database.actors;\n",
"Drop table if exists demo_database.actors;"
"DROP PIPELINE IF EXISTS demo_database.actors;\n",
"DROP TABLE IF EXISTS demo_database.actors;"
]
},
{
Expand Down Expand Up @@ -286,17 +309,15 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"id": "bcb14814-7b79-4df2-ab47-7def7ae03ce3",
"metadata": {},
"outputs": [],
"source": [
"%%sql\n",
"Create database if not exists demo_database;\n",
"Use demo_database;\n",
"CREATE TABLE if not exists demo_database.actors_json (\n",
"json_data JSON NOT NULL ,\n",
"SHARD KEY ()\n",
"CREATE TABLE IF NOT EXISTS demo_database.actors_json (\n",
" json_data JSON NOT NULL,\n",
" SHARD KEY ()\n",
");"
]
},
Expand All @@ -310,27 +331,27 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"id": "a1d60130-095e-45da-b55d-b427a0af3d26",
"metadata": {},
"outputs": [],
"source": [
"%%sql\n",
"CREATE PIPELINE if not exists demo_database.actors_json\n",
"AS LOAD DATA S3 'studiotutorials/sample_dataset/json_files/wildcard_demo/*.json'\n",
"CONFIG '{ \\\"region\\\": \\\"us-east-1\\\" }'\n",
"/*\n",
"CREDENTIALS '{\"aws_access_key_id\": \"<Key to Enter>\",\n",
" \"aws_secret_access_key\": \"<Key to Enter>\"}'\n",
"*/\n",
"BATCH_INTERVAL 2500\n",
"MAX_PARTITIONS_PER_BATCH 1\n",
"DISABLE OUT_OF_ORDER OPTIMIZATION\n",
"DISABLE OFFSETS METADATA GC\n",
"SKIP DUPLICATE KEY ERRORS\n",
"INTO TABLE `actors_json`\n",
"FORMAT JSON\n",
"(json_data <- %);"
"CREATE PIPELINE IF NOT EXISTS demo_database.actors_json\n",
" AS LOAD DATA S3 'studiotutorials/sample_dataset/json_files/wildcard_demo/*.json'\n",
" CONFIG '{ \\\"region\\\": \\\"us-east-1\\\" }'\n",
" /*\n",
" CREDENTIALS '{\"aws_access_key_id\": \"<Key to Enter>\",\n",
" \"aws_secret_access_key\": \"<Key to Enter>\"}'\n",
" */\n",
" BATCH_INTERVAL 2500\n",
" MAX_PARTITIONS_PER_BATCH 1\n",
" DISABLE OUT_OF_ORDER OPTIMIZATION\n",
" DISABLE OFFSETS METADATA GC\n",
" SKIP DUPLICATE KEY ERRORS\n",
" INTO TABLE `actors_json`\n",
" FORMAT JSON\n",
" (json_data <- %);"
]
},
{
Expand All @@ -343,26 +364,26 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"id": "b374598a-f9cb-43c4-a2a4-ebcd298108c4",
"metadata": {},
"outputs": [],
"source": [
"%%sql\n",
"Start pipeline demo_database.actors_json;"
"START PIPELINE demo_database.actors_json;"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 11,
"id": "ca06781b-61fa-4fea-97de-cd0dbacd86e8",
"metadata": {},
"outputs": [],
"source": [
"%%sql\n",
"# Monitor and see if there is any error or warning\n",
"select * from information_schema.pipelines_errors\n",
"where pipeline_name = 'actors_json' ;"
"SELECT * FROM information_schema.pipelines_errors\n",
" WHERE pipeline_name = 'actors_json' ;"
]
},
{
Expand All @@ -375,13 +396,13 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 12,
"id": "e34c5b49-0e97-4b07-9026-38bb6c370f73",
"metadata": {},
"outputs": [],
"source": [
"%%sql\n",
"select * from demo_database.actors_json"
"SELECT * FROM demo_database.actors_json"
]
},
{
Expand All @@ -394,15 +415,13 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 13,
"id": "6f0bd356-8a11-4cd9-b774-569d8f5e2520",
"metadata": {},
"outputs": [],
"source": [
"%%sql\n",
"\n",
"Drop pipeline if exists demo_database.actors_json;\n",
"Drop table if exists demo_database.actors_json;"
"DROP DATABASE IF EXISTS demo_database;"
]
},
{
Expand Down

0 comments on commit 0b8cbb0

Please sign in to comment.