diff --git a/notebooks/load-json-files-s3/notebook.ipynb b/notebooks/load-json-files-s3/notebook.ipynb index 9f0ad7f..4a548ac 100644 --- a/notebooks/load-json-files-s3/notebook.ipynb +++ b/notebooks/load-json-files-s3/notebook.ipynb @@ -112,20 +112,43 @@ "outputs": [], "source": [ "%%sql\n", - "Create database if not exists demo_database;\n", - "Use demo_database;\n", - "CREATE TABLE if not exists demo_database.actors (\n", - "name text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n", - "age int NOT NULL,\n", - "born_at text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n", - "Birthdate text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n", - "photo text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n", - "wife text CHARACTER SET utf8 COLLATE utf8_general_ci,\n", - "weight float NOT NULL,\n", - "haschildren boolean,\n", - "hasGreyHair boolean,\n", - "children JSON COLLATE utf8_bin NOT NULL,\n", - "SHARD KEY ()\n", + "CREATE DATABASE IF NOT EXISTS demo_database;" + ] + }, + { + "cell_type": "markdown", + "id": "6dfc5b0b-9308-46c9-8cc8-be08fb07c1b6", + "metadata": {}, + "source": [ + "
\n", + " \n", + "
\n", + "

Action Required

\n", + "

Make sure to select the demo_database database from the drop-down menu at the top of this notebook. It updates the connection_url to connect to that database.

\n", + "
\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "b09528cf-0beb-4fe0-9e60-6edefb72f8b1", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "CREATE TABLE IF NOT EXISTS demo_database.actors (\n", + " name text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n", + " age int NOT NULL,\n", + " born_at text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n", + " Birthdate text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n", + " photo text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n", + " wife text CHARACTER SET utf8 COLLATE utf8_general_ci,\n", + " weight float NOT NULL,\n", + " haschildren boolean,\n", + " hasGreyHair boolean,\n", + " children JSON COLLATE utf8_bin NOT NULL,\n", + " SHARD KEY ()\n", ");" ] }, @@ -148,38 +171,38 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "92df7943-e68d-4509-b7f5-4a93697f6578", "metadata": {}, "outputs": [], "source": [ "%%sql\n", "CREATE PIPELINE if not exists demo_database.actors\n", - "AS LOAD DATA S3 'studiotutorials/sample_dataset/json_files/wildcard_demo/*.json'\n", - "CONFIG '{ \\\"region\\\": \\\"us-east-1\\\" }'\n", - "/*\n", - "CREDENTIALS '{\"aws_access_key_id\": \"\",\n", - " \"aws_secret_access_key\": \"\"}'\n", - "*/\n", - "BATCH_INTERVAL 2500\n", - "MAX_PARTITIONS_PER_BATCH 1\n", - "DISABLE OUT_OF_ORDER OPTIMIZATION\n", - "DISABLE OFFSETS METADATA GC\n", - "SKIP DUPLICATE KEY ERRORS\n", - "INTO TABLE `actors`\n", - "FORMAT JSON\n", - "(\n", - " actors.name <- name,\n", - " actors.age <- age,\n", - " actors.born_at <- `Born At`,\n", - " actors.Birthdate <- Birthdate,\n", - " actors.photo <- photo,\n", - " actors.wife <- wife,\n", - " actors.weight <- weight,\n", - " actors.haschildren <- hasChildren,\n", - " actors.hasGreyHair <- hasGreyHair,\n", - " actors.children <- children\n", - ");" + " AS LOAD DATA S3 'studiotutorials/sample_dataset/json_files/wildcard_demo/*.json'\n", + " CONFIG '{ \\\"region\\\": \\\"us-east-1\\\" }'\n", + " /*\n", + " CREDENTIALS '{\"aws_access_key_id\": \"\",\n", + " \"aws_secret_access_key\": \"\"}'\n", + " */\n", + " BATCH_INTERVAL 2500\n", + " MAX_PARTITIONS_PER_BATCH 1\n", + " DISABLE OUT_OF_ORDER OPTIMIZATION\n", + " DISABLE OFFSETS METADATA GC\n", + " SKIP DUPLICATE KEY ERRORS\n", + " INTO TABLE `actors`\n", + " FORMAT JSON\n", + " (\n", + " actors.name <- name,\n", + " actors.age <- age,\n", + " actors.born_at <- `Born At`,\n", + " actors.Birthdate <- Birthdate,\n", + " actors.photo <- photo,\n", + " actors.wife <- wife,\n", + " actors.weight <- weight,\n", + " actors.haschildren <- hasChildren,\n", + " actors.hasGreyHair <- hasGreyHair,\n", + " actors.children <- children\n", + " );" ] }, { @@ -192,13 +215,13 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "eeddd12e-e28c-4000-859b-6d1291c4a137", "metadata": {}, "outputs": [], "source": [ "%%sql\n", - "Start pipeline demo_database.actors;" + "START PIPELINE demo_database.actors;" ] }, { @@ -211,14 +234,14 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "f48de155-af85-4c40-ad56-955573a434f8", "metadata": {}, "outputs": [], "source": [ "%%sql\n", - "select * from information_schema.pipelines_errors\n", - "where pipeline_name = 'actors' ;" + "SELECT * FROM information_schema.pipelines_errors\n", + " WHERE pipeline_name = 'actors' ;" ] }, { @@ -231,13 +254,13 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "09a739cb-4925-4699-ab61-71016a04bfb6", "metadata": {}, "outputs": [], "source": [ "%%sql\n", - "select * from demo_database.actors;" + "SELECT * FROM demo_database.actors;" ] }, { @@ -250,14 +273,14 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "6a6dfc1d-c758-4287-a797-6cc3e4fff934", "metadata": {}, "outputs": [], "source": [ "%%sql\n", - "Drop pipeline if exists demo_database.actors;\n", - "Drop table if exists demo_database.actors;" + "DROP PIPELINE IF EXISTS demo_database.actors;\n", + "DROP TABLE IF EXISTS demo_database.actors;" ] }, { @@ -286,17 +309,15 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "bcb14814-7b79-4df2-ab47-7def7ae03ce3", "metadata": {}, "outputs": [], "source": [ "%%sql\n", - "Create database if not exists demo_database;\n", - "Use demo_database;\n", - "CREATE TABLE if not exists demo_database.actors_json (\n", - "json_data JSON NOT NULL ,\n", - "SHARD KEY ()\n", + "CREATE TABLE IF NOT EXISTS demo_database.actors_json (\n", + " json_data JSON NOT NULL,\n", + " SHARD KEY ()\n", ");" ] }, @@ -310,27 +331,27 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "a1d60130-095e-45da-b55d-b427a0af3d26", "metadata": {}, "outputs": [], "source": [ "%%sql\n", - "CREATE PIPELINE if not exists demo_database.actors_json\n", - "AS LOAD DATA S3 'studiotutorials/sample_dataset/json_files/wildcard_demo/*.json'\n", - "CONFIG '{ \\\"region\\\": \\\"us-east-1\\\" }'\n", - "/*\n", - "CREDENTIALS '{\"aws_access_key_id\": \"\",\n", - " \"aws_secret_access_key\": \"\"}'\n", - "*/\n", - "BATCH_INTERVAL 2500\n", - "MAX_PARTITIONS_PER_BATCH 1\n", - "DISABLE OUT_OF_ORDER OPTIMIZATION\n", - "DISABLE OFFSETS METADATA GC\n", - "SKIP DUPLICATE KEY ERRORS\n", - "INTO TABLE `actors_json`\n", - "FORMAT JSON\n", - "(json_data <- %);" + "CREATE PIPELINE IF NOT EXISTS demo_database.actors_json\n", + " AS LOAD DATA S3 'studiotutorials/sample_dataset/json_files/wildcard_demo/*.json'\n", + " CONFIG '{ \\\"region\\\": \\\"us-east-1\\\" }'\n", + " /*\n", + " CREDENTIALS '{\"aws_access_key_id\": \"\",\n", + " \"aws_secret_access_key\": \"\"}'\n", + " */\n", + " BATCH_INTERVAL 2500\n", + " MAX_PARTITIONS_PER_BATCH 1\n", + " DISABLE OUT_OF_ORDER OPTIMIZATION\n", + " DISABLE OFFSETS METADATA GC\n", + " SKIP DUPLICATE KEY ERRORS\n", + " INTO TABLE `actors_json`\n", + " FORMAT JSON\n", + " (json_data <- %);" ] }, { @@ -343,26 +364,26 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "b374598a-f9cb-43c4-a2a4-ebcd298108c4", "metadata": {}, "outputs": [], "source": [ "%%sql\n", - "Start pipeline demo_database.actors_json;" + "START PIPELINE demo_database.actors_json;" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "ca06781b-61fa-4fea-97de-cd0dbacd86e8", "metadata": {}, "outputs": [], "source": [ "%%sql\n", "# Monitor and see if there is any error or warning\n", - "select * from information_schema.pipelines_errors\n", - "where pipeline_name = 'actors_json' ;" + "SELECT * FROM information_schema.pipelines_errors\n", + " WHERE pipeline_name = 'actors_json' ;" ] }, { @@ -375,13 +396,13 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "e34c5b49-0e97-4b07-9026-38bb6c370f73", "metadata": {}, "outputs": [], "source": [ "%%sql\n", - "select * from demo_database.actors_json" + "SELECT * FROM demo_database.actors_json" ] }, { @@ -394,15 +415,13 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "6f0bd356-8a11-4cd9-b774-569d8f5e2520", "metadata": {}, "outputs": [], "source": [ "%%sql\n", - "\n", - "Drop pipeline if exists demo_database.actors_json;\n", - "Drop table if exists demo_database.actors_json;" + "DROP DATABASE IF EXISTS demo_database;" ] }, {