Skip to content

Commit

Permalink
feat: Custom schema for SQL query visualization (#51)
Browse files Browse the repository at this point in the history
* feat: duckdb to use connection pool

* feat: fastapi to use middleware to only allow specific hosts

* fix: upload view will no longer support SQL

* feat: schema modal to have accordian components for adding new schemas

* feat: schema to use a JSON format and query should be generated in backend

* feat: add JWT, Custom TTLCache and table manipulation for custom schema

* fix: cachetools in requirements file for FastAPI application

* feat: sort imports using isort

* fix: api code formatted using black

* fix: function names as per pep8

* fix: minor changes and fixes in the api on names

* feat: use a variable for specifying pool size

* fix: remove trailing comma from Error throw in planToNode function

* feat: use a separate function for setting token

* feat: use append_pool method to add new connection instances to duckdb connection pool

* fix: Merge conflicts and add docstrings

* fix: npm package files

* fix: api routing

* fix: api testing functions

* feat: add binary file to test validation

* fix: vue unit testing

* fix: cypress e2e testing

* fix: cypress testing port

* feat: cypress tests for custom schema

* feat: add python tests for custom schema

* fix: indentation in python docstrings

* feat: use env variables and secrets

* fix: python api test github ci

* fix: github ci workflows

* fix: explicit cast duckdb pool size

* fix: use dotenv to access env variables in JS files

* fix: dotenv in package.json

* fix: vue no longer supports process

* fix: correct secret name in workflows

* fix: python lint
  • Loading branch information
sanjibansg authored Nov 8, 2023
1 parent e29b16c commit bfdda28
Show file tree
Hide file tree
Showing 36 changed files with 2,862 additions and 8,233 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/deploy-api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ on:
paths:
- 'api/**'

env:
VITE_SESSION_SECRET: ${{ secrets.VITE_SESSION_SECRET }}
DUCKDB_POOL_SIZE: 5

jobs:
deploy:
runs-on: ubuntu-latest
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/deploy-client.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ on:
paths:
- 'client/**'

env:
VITE_SESSION_SECRET: ${{ secrets.VITE_SESSION_SECRET }}
DUCKDB_POOL_SIZE: 5

jobs:
build:
runs-on: ubuntu-latest
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/test_api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ name: Test

on: [push]

env:
VITE_SESSION_SECRET: ${{ secrets.VITE_SESSION_SECRET }}
DUCKDB_POOL_SIZE: 5

jobs:
Pytest:
runs-on: ubuntu-latest
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/test_client_e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ name: Cypress Test

on: [push]

env:
VITE_SESSION_SECRET: ${{ secrets.VITE_SESSION_SECRET }}
DUCKDB_POOL_SIZE: 5

jobs:
electron-run:
runs-on: ubuntu-latest
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/test_client_unit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ name: Vitest

on: [push]

env:
VITE_SESSION_SECRET: ${{ secrets.VITE_SESSION_SECRET }}
DUCKDB_POOL_SIZE: 5

jobs:
unit-test:
runs-on: ubuntu-latest
Expand Down
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,7 @@ dist-ssr
# substrait-js
substrait-js
*~

# databases
*.db
*.wal
3 changes: 3 additions & 0 deletions api/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# db files
*.db
*.wal
257 changes: 210 additions & 47 deletions api/app.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,87 @@
from fastapi import FastAPI, HTTPException, status, File, UploadFile, Form, Depends, Request
import json

import substrait_validator as sv
from duckdb import DuckDBPyConnection
from fastapi import (Depends, FastAPI, File, Form, HTTPException, UploadFile,
status)
from fastapi.middleware.trustedhost import TrustedHostMiddleware
from fastapi.openapi.utils import get_openapi
from fastapi.routing import APIRouter
from fastapi_health import health

from loguru import logger
from motor.motor_asyncio import AsyncIOMotorCollection

import substrait_validator as sv

from backend.duckdb import (
ConnectDuckDB,
CheckDuckDBConnection,
ExecuteDuckDb,
ParseFromDuckDB,
)

from shareable import MongoDBConnection, PlanData

from loguru import logger
from components.auth import verify_token
from components.duckdb import (DuckDBConnection, check_duckdb_connection,
delete_table_from_duckDB, execute_duckdb,
parse_from_duckDB)
from components.shareable import MongoDBConnection, PlanData
from components.ttl_cache import TTL_Cache

router = APIRouter()
duckConn = None


############################################################
# Functions to request db objects from connection pools
############################################################
async def get_mongo_conn():
return app.state.mongo_pool.initialize()

async def get_duck_conn():
conn = app.state.duck_pool.get_connection()
try:
yield conn
finally:
conn.close()



############################################################
# Initialization function to be called during startup
############################################################
@router.on_event("startup")
def Initialize():
global duckConn
duckConn = ConnectDuckDB()
async def initialize():
app.state.mongo_pool = MongoDBConnection()
app.state.duck_pool = DuckDBConnection()
app.state.schema_cache = TTL_Cache(
maxsize=100,
ttl=3600,
on_expire=lambda key, _: delete_table_from_duckDB(key,
get_duck_conn()),
)



@router.get("/health/duckcb/")
def CheckBackendConn(conn):
CheckDuckDBConnection(conn)
############################################################
# API Endpoint to check health of backend components
############################################################
def check_backend_conn(conn):
check_duckdb_connection(conn)
app.state.mongo_pool.check()

router.add_api_route("/health", health([CheckBackendConn]))
router.add_api_route("/health", health([check_backend_conn]))


@router.post("/validate/")
async def Validate(plan: dict, override_levels: list[int]):

############################################################
# API Endpoint to validate a substrait plan
############################################################
@router.post("/validate/", status_code=status.HTTP_200_OK)
async def validate(plan: dict, override_levels: list[int]):
'''
Validates a plan using the substrait-validator
with overriding rules as specified and returns
errors accordingly.
Parameters:
plan (dict): Substrait JSON plan
override_levels (list[int]): List of validation
override levels
Returns:
Success message upon validation otherwise Exception
with failure message.
'''
try:
logger.info("Validating plan using substrait-validator!")
config = sv.Config()
Expand All @@ -50,14 +91,35 @@ async def Validate(plan: dict, override_levels: list[int]):
logger.info("Plan validated successfully!")
except Exception as e:
raise HTTPException(
status_code=500, detail="Substrait Validator Internal Error: " + str(e)
status_code=500, detail="Substrait Validator Internal Error: "
+ str(e)
)



############################################################
# API Endpoint to validate file containing a substrait plan
############################################################
@router.post("/validate/file/", status_code=status.HTTP_200_OK)
async def validate_file(file: UploadFile = File(),
override_levels: list = Form()):
'''
Validates a file using the substrait-validator
with overriding rules as specified and returns
errors accordingly.
@router.post("/validate/file/")
async def ValidateFile(file: UploadFile = File(), override_levels: list[int] = Form()):
Parameters:
file (File): File object to read and validate
override_levels (list[int]): List of validation
override levels
Returns:
Success message upon validation otherwise Exception
with failure message.
'''
try:
logger.info("Validating file using substrait-validator!")
override_levels = [int(level) for level in override_levels[0].split(',')]
config = sv.Config()
for level in override_levels:
config.override_diagnostic_level(level, "warning", "info")
Expand All @@ -66,32 +128,52 @@ async def ValidateFile(file: UploadFile = File(), override_levels: list[int] = F
logger.info("File validated successfully!")
except Exception as e:
raise HTTPException(
status_code=500, detail="Substrait Validator Internal Error: " + str(e)
status_code=500, detail="Substrait Validator Internal Error: "
+ str(e)
)


@router.post("/execute/duckdb/")
async def ExecuteBackend(data: list[str]):
global duckConn
return ExecuteDuckDb(data, duckConn)


@router.post("/parse/")
async def ParseToSubstrait(data: dict):
global duckConn
return ParseFromDuckDB(data, duckConn)


@app.post("/save/")
async def SavePlan(
############################################################
# API Endpoint to save a substrait plan
############################################################
@router.post("/save/")
async def save_plan(
data: PlanData, db_conn: AsyncIOMotorCollection = Depends(get_mongo_conn)
):
'''
Saves a Substrait plan to MongoDB and returns a unique ID
that can be used to fetch it later.
Parameters:
data (PlanData): Substrait Plan
db_conn: MongoDB connection object
Returns:
Unique ID of the inserted plan
'''
response = await app.state.mongo_pool.add_record(db_conn, data)
return response


@app.post("/fetch/")
async def FetchPlan(id: str, db_conn: AsyncIOMotorCollection = Depends(get_mongo_conn)):

############################################################
# API Endpoint to fetch a saved substrait plan
############################################################
@router.post("/fetch/")
async def fetch_plan(id: str,
db_conn: AsyncIOMotorCollection = Depends(get_mongo_conn)):
'''
Fetches a saved Substrait plan from MongoDB
Parameters:
id (str): Plan ID
db_conn: MongoDB connection object
Returns:
Substrait JSON plans if present otherwise
HTTPException is raised
'''
response = await app.state.mongo_pool.get_record(db_conn, id)
if response is None:
raise HTTPException(status_code=404, detail="Plan not found")
Expand All @@ -101,8 +183,84 @@ async def FetchPlan(id: str, db_conn: AsyncIOMotorCollection = Depends(get_mongo
}


# For defining custom documentation for the server
def SubstraitFiddleOpenAPI():

############################################################
# API Endpoint to add schema in DuckDB instance
############################################################
@router.post("/add_schema/")
def add_schema(
data: dict,
headers: dict = Depends(verify_token),
db_conn: DuckDBPyConnection = Depends(get_duck_conn),
):
'''
Creates a table in DuckDB to allow SQL queries
on them for further parsing. API requires
authentication to internally store tables
with specified user_id. API accepts the schema
in a format specified in the front-end and
builds the SQL CREATE statement before executing it.
Parameters:
data (dict): API request containing the schema
headers (dict): API headers for authentication
db_conn: DuckDB connection object
Returns:
Success/Error response
'''
user_id = headers["user_id"]
schema = data["schema"]
json_data = json.loads(schema)
table_name = json_data["table"] + "_" + user_id

query = "CREATE TABLE "
query += table_name + "("
for field in json_data["fields"]:
query += field["name"] + " "
query += field["type"] + " "
for props in field["properties"]:
query += props + " "
query += ", "
query = query[:-2]
query += ");"
response = execute_duckdb(query, db_conn)
app.state.schema_cache[table_name] = None
return response



############################################################
# API Endpoint to parse SQL queries to Substrait JSON plans
############################################################
@router.post("/parse/", status_code=status.HTTP_200_OK)
def parse_to_substrait(
data: dict,
headers: dict = Depends(verify_token),
db_conn: DuckDBPyConnection = Depends(get_duck_conn),
):
'''
Parses a SQL query to Substrait JSON plans via
DuckDB.
Parameters:
data (dict): API request containing the
SQL query
db_conn: DuckDB connection object
Returns:
response (dict): Response JSON for translated
Substrait plan
'''
response = parse_from_duckDB(data.get("query"), db_conn)
return response



############################################################
# API Declaration
############################################################
def substrait_fiddle_openapi():
if app.openapi_schema:
return app.openapi_schema
openapi_schema = get_openapi(
Expand All @@ -114,9 +272,14 @@ def SubstraitFiddleOpenAPI():
app.openapi_schema = openapi_schema
return app.openapi_schema


app = FastAPI()
app.include_router(router, prefix="/api")
app.openapi = SubstraitFiddleOpenAPI
app.include_router(router, prefix="/route")
app.openapi = substrait_fiddle_openapi
app.add_middleware(
TrustedHostMiddleware,
allowed_hosts=["substrait-fiddle.com", "*.substrait-fiddle.com", "127.0.0.1"],
)

@app.get("/")
def global_ping():
Expand Down
Loading

0 comments on commit bfdda28

Please sign in to comment.