Skip to content

Commit

Permalink
Add tags to ingest process
Browse files Browse the repository at this point in the history
  • Loading branch information
CannonLock committed Apr 10, 2024
1 parent 7b9b9af commit 591470a
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 111 deletions.
197 changes: 113 additions & 84 deletions api/query_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
import urllib.parse
from dataclasses import dataclass
from functools import lru_cache
from typing import Union

from multidict import MultiDict
import starlette.requests
import logging
from fastapi import FastAPI, HTTPException, Request
Expand Down Expand Up @@ -52,10 +54,88 @@ def cast_to_column_type(column: Column, value):

@dataclass
class QueryParameter:
column: Column
column: Union[Column, str]
operators: list[str]
value: str

def __str__(self):
return f"{self.column} {self.operators} {self.value}"

def is_mapped_to_column(self) -> bool:
return isinstance(self.column, Column)

def get_operator_expression(self):
return self._get_operator_expression(self.column, self.operators, self.value)

@staticmethod
def _get_operator_expression(column: Column, operators, value: str):

if len(operators) == 0:
raise ParserException(f"Query parameters invalid")

match operators[0]:
case "not":
return not_(
QueryParameter._get_operator_expression(column, operators[1:], value)
)

case "eq":
value = cast_to_column_type(column, value)
return column.__eq__(value)

case "lt":
value = cast_to_column_type(column, value)
return column.__lt__(value)

case "le":
value = cast_to_column_type(column, value)
return column.__le__(value)

case "gt":
value = cast_to_column_type(column, value)
return column.__gt__(value)

case "ge":
value = cast_to_column_type(column, value)
return column.__ge__(value)

case "ne":
value = cast_to_column_type(column, value)
return column.__ne__(value)

case "like":
if value[0] != "%" or value[-1] != "%":
value = f"%{value}%"

value = cast_to_column_type(column, value)
return column.like(value)

case "in":
if value[0] != "(" or value[-1] != ")":
raise ParserException(
f"Query param value for in must be in form (x,y,z)"
)

values = value[1:-1].split(",")
clean_values = map(lambda x: cast_to_column_type(column, x), values)

return column.in_(clean_values)

case "is":
if value.lower() == "false":
return column.is_(False)
elif value.lower() == "true":
return column.is_(True)
elif value.lower() == "null":
return column.is_(None)
else:
raise ParserException(
f"Query params outside valid set: {operators}"
)

case "_":
raise ParserException(f"Query params outside valid set: {operators}")


class QueryParser:
"""Used to parse the query parameters from the request"""
Expand All @@ -70,19 +150,21 @@ def __init__(self, columns: list[Column], query_params: list[dict] | None):

self.columns = {c.name: c for c in columns}
self.query_params = query_params
self.decomposed_query_params = self._decompose_query_params()

def where_expressions(self):
"""Returns the where expressions for the query"""

where_expressions = []

for query_param in self.decomposed_query_params:
for query_param in self.decomposed_query_params.values():

# If the column is not mapped to a column, then skip
if not query_param.is_mapped_to_column():
continue

if query_param.operators[0] not in ["group_by", "order_by"]:
where_expressions.append(
self._get_operator_expression(
query_param.column, query_param.operators, query_param.value
)
query_param.get_operator_expression()
)

if len(where_expressions) == 1:
Expand All @@ -96,7 +178,12 @@ def get_group_by_column(self):
"""Returns the group by expressions for the query"""

group_by_columns = []
for query_param in self.decomposed_query_params:
for query_param in self.decomposed_query_params.values():

# If the column is not mapped to a column, then skip
if not query_param.is_mapped_to_column():
continue

if query_param.operators[0] == "group_by":
group_by_columns.append(query_param.column)

Expand Down Expand Up @@ -132,26 +219,36 @@ def get_order_by_columns(self):

order_by_columns = []

for query_param in self.decomposed_query_params:
for query_param in self.decomposed_query_params.values():

# If the column is not mapped to a column, then skip
if not query_param.is_mapped_to_column():
continue

if query_param.operators[0] == "order_by":
order_by_columns.append(query_param.column)

return order_by_columns

def _decompose_query_params(self) -> list[QueryParameter]:
decomposed_query_params = []
@property
@lru_cache
def decomposed_query_params(self):
return self._decompose_query_params()

def _decompose_query_params(self) -> MultiDict[QueryParameter]:
decomposed_query_params = MultiDict()

for column_name, encoded_expression in self.query_params:
operators, value = self._decompose_encoded_expression(encoded_expression)
value = urllib.parse.unquote(value)

col = self.columns.get(column_name, None)
if col is None:
# We should eventually make this an error
log.warning(f"Column ({column_name}) not found in table")
continue
col = self.columns.get(column_name, column_name)

decomposed_query_params.append(
if col == column_name:
log.warning(f"Column ({column_name}) not found in table, potential error")

decomposed_query_params.add(
column_name,
QueryParameter(column=col, operators=operators, value=value)
)

Expand Down Expand Up @@ -186,71 +283,3 @@ def _decompose_encoded_expression(self, encoded_expression) -> tuple:
return encoded_expression_split[:1], ".".join(
encoded_expression_split[1:]
)

@staticmethod
def _get_operator_expression(column: Column, operators, value: str):
if len(operators) == 0:
raise ParserException(f"Query parameters invalid")

match operators[0]:
case "not":
return not_(
QueryParser._get_operator_expression(column, operators[1:], value)
)

case "eq":
value = cast_to_column_type(column, value)
return column.__eq__(value)

case "lt":
value = cast_to_column_type(column, value)
return column.__lt__(value)

case "le":
value = cast_to_column_type(column, value)
return column.__le__(value)

case "gt":
value = cast_to_column_type(column, value)
return column.__gt__(value)

case "ge":
value = cast_to_column_type(column, value)
return column.__ge__(value)

case "ne":
value = cast_to_column_type(column, value)
return column.__ne__(value)

case "like":
if value[0] != "%" or value[-1] != "%":
value = f"%{value}%"

value = cast_to_column_type(column, value)
return column.like(value)

case "in":
if value[0] != "(" or value[-1] != ")":
raise ParserException(
f"Query param value for in must be in form (x,y,z)"
)

values = value[1:-1].split(",")
clean_values = map(lambda x: cast_to_column_type(column, x), values)

return column.in_(clean_values)

case "is":
if value.lower() == "false":
return column.is_(False)
elif value.lower() == "true":
return column.is_(True)
elif value.lower() == "null":
return column.is_(None)
else:
raise ParserException(
f"Query params outside valid set: {operators}"
)

case "_":
raise ParserException(f"Query params outside valid set: {operators}")
Loading

0 comments on commit 591470a

Please sign in to comment.