Skip to content

Commit

Permalink
Merge pull request #250 from DocShow-AI/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
liberty-rising authored Jan 29, 2024
2 parents 53cd457 + ceb04a1 commit 5fe5cf0
Show file tree
Hide file tree
Showing 18 changed files with 365 additions and 25 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-quality.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
python-version: '3.8'

- name: Install dependencies
run: pip install black flake8 mypy isort
run: pip install black==23.11.0 flake8 mypy isort

- name: Check Python code formatting with Black
run: black --check --exclude backend/alembic/versions backend/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""replace table_id with table_name in data profile model
Revision ID: f8ca6f4bf570
Revises: 94af7f09e896
Create Date: 2024-01-29 20:26:47.028083
"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "f8ca6f4bf570"
down_revision = "94af7f09e896"
branch_labels = None
depends_on = None


def upgrade():
# Rename the column and change its type
op.alter_column(
"data_profiles",
"table_id",
new_column_name="table_name",
type_=sa.String(),
existing_nullable=True,
)


def downgrade():
# Reverse the changes made in the upgrade function
op.alter_column(
"data_profiles",
"table_name",
new_column_name="table_id",
type_=sa.Integer(),
existing_nullable=True,
)
19 changes: 19 additions & 0 deletions backend/llms/gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,21 @@ async def generate_chart_config(

return parsed_config

async def generate_suggested_column_types(self, column_names: list, data: dict):
"""Generate suggested column types for the given data."""
self._add_system_message(assistant_type="column_type_suggestion")
self._set_response_format(is_json=True)

prompt = self.prompt_manager.create_column_type_suggestion_prompt(
column_names, data
)

gpt_response = await self._send_and_receive_message(prompt)

suggested_column_types = json.loads(gpt_response)

return suggested_column_types

def fetch_table_name_from_sample(
self, sample_content: str, extra_desc: str, table_metadata: str
):
Expand Down Expand Up @@ -418,5 +433,9 @@ async def extract_data_from_jpgs(
"\n```", ""
)
data = json.loads(json_string)

# If data is a dictionary, wrap it in a list
if isinstance(data, dict):
data = [data]
print(data)
return data
15 changes: 15 additions & 0 deletions backend/llms/prompt_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,18 @@ def jpg_data_extraction_prompt(self, instructions: str):
Return only the requested information, no additional text or formatting.
"""
return prompt

def create_column_type_suggestion_prompt(self, column_names, data):
prompt = f"""
Based on the following data, suggest the data types for each column in the table.
The available column types are: text, integer, money, date, boolean
Column names:
{column_names}
Data:
{data}
Return a JSON with the column names as keys and the suggested data types as values.
"""
return prompt
4 changes: 4 additions & 0 deletions backend/llms/system_message_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ def __init__(self):
"analytics_chat": """
You are an analytics assistant.
You will be generating SQL queries, and providing useful information for reports and analytics based on the given prompt.""",
"column_type_suggestion": """
You are a column type suggestion assistant.
You will be suggesting column data types based on the given prompt.
""",
"sql_code": """
You are a PostgreSQL SQL statement assistant.
Generate PostgreSQL SQL statements based on the given prompt.
Expand Down
2 changes: 2 additions & 0 deletions backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from routes.data_profile_routes import data_profile_router
from routes.file_routes import file_router
from routes.organization_routes import organization_router
from routes.powerbi_routes import powerbi_router
from routes.table_routes import table_router
from routes.user_routes import user_router
from settings import APP_ENV
Expand Down Expand Up @@ -54,6 +55,7 @@ async def shutdown_event():
app.include_router(data_profile_router)
app.include_router(file_router)
app.include_router(organization_router)
app.include_router(powerbi_router)
app.include_router(table_router)
app.include_router(user_router)

Expand Down
8 changes: 6 additions & 2 deletions backend/models/data_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class DataProfile(Base):
file_type = Column(String)
organization_id = Column(Integer, ForeignKey("organizations.id"))
extract_instructions = Column(String)
table_id = Column(Integer)
table_name = Column(String)

__table_args__ = (
UniqueConstraint("name", "organization_id", name="uq_name_organization_id"),
Expand All @@ -40,7 +40,7 @@ def to_dict(self):
"file_type": self.file_type,
"organization_id": self.organization_id,
"extract_instructions": self.extract_instructions,
"table_id": self.table_id,
"table_name": self.table_name,
}


Expand All @@ -52,3 +52,7 @@ class DataProfileCreateRequest(BaseModel):
class DataProfileCreateResponse(BaseModel):
name: str
extract_instructions: str


class SuggestedColumnTypesRequest(BaseModel):
data: list
3 changes: 2 additions & 1 deletion backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@ sendgrid==6.11.0
boto3==1.34.10
pillow==10.1.0
pdf2image==1.16.3
isort==5.13.2
isort==5.13.2
azure-identity==1.15.0
25 changes: 24 additions & 1 deletion backend/routes/data_profile_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@
DataProfile,
DataProfileCreateRequest,
DataProfileCreateResponse,
SuggestedColumnTypesRequest,
)
from models.user import User
from object_storage.digitalocean_space_manager import DigitalOceanSpaceManager
from security import get_current_user
from utils.image_conversion_manager import ImageConversionManager
from utils.object_storage.digitalocean_space_manager import DigitalOceanSpaceManager

data_profile_router = APIRouter()

Expand All @@ -43,6 +44,7 @@ async def get_data_profiles_by_org_id(current_user: User = Depends(get_current_u
async def save_data_profile(
request: DataProfileCreateRequest, current_user: User = Depends(get_current_user)
) -> DataProfileCreateResponse:
"""Save a new data profile to the database"""
with DatabaseManager() as session:
data_profile_manager = DataProfileManager(session)
if data_profile_manager.get_dataprofile_by_name_and_org(
Expand Down Expand Up @@ -77,6 +79,11 @@ async def get_data_profile(
return data_profile


@data_profile_router.get("/data-profiles/column-types/")
async def get_column_types(current_user: User = Depends(get_current_user)):
return ["text", "integer", "money", "date", "boolean"]


@data_profile_router.post("/data-profiles/preview/")
async def preview_data_profile(
files: List[UploadFile] = File(...),
Expand Down Expand Up @@ -127,6 +134,22 @@ async def preview_data_profile(
return extracted_data


@data_profile_router.post("/data-profiles/preview/column-types/")
async def generate_suggested_column_types(
request: SuggestedColumnTypesRequest, current_user: User = Depends(get_current_user)
):
gpt = GPTLLM(chat_id=1, user=current_user)
if request.data:
column_names = list(request.data[0].keys())
suggested_column_types = await gpt.generate_suggested_column_types(
column_names, request.data
)

print(suggested_column_types)

return suggested_column_types


@data_profile_router.post("/data-profiles/{data_profile_name}/preview/")
async def preview_data_profile_upload(
data_profile_name: str,
Expand Down
11 changes: 11 additions & 0 deletions backend/routes/powerbi_routes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from fastapi import APIRouter
from utils.azure.azure_manager import AzureManager

powerbi_router = APIRouter()


@powerbi_router.get("/powerbi/token/")
async def get_powerbi_token():
azure_manager = AzureManager()
token = azure_manager.get_powerbi_token()
return {"token": token}
5 changes: 5 additions & 0 deletions backend/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
APP_ENV = config("APP_ENV")
APP_HOST = config("APP_HOST")

AZURE_CLIENT_ID = config("AZURE_CLIENT_ID")
AZURE_TENANT_ID = config("AZURE_TENANT_ID")
AZURE_APP_VALUE = config("AZURE_APP_VALUE")
AZURE_APP_SECRET = config("AZURE_APP_SECRET")

ACCESS_TOKEN_EXPIRE_MINUTES = config("ACCESS_TOKEN_EXPIRE_MINUTES", default=30)
REFRESH_TOKEN_EXPIRE_DAYS = config("REFRESH_TOKEN_EXPIRE_DAYS", default=1)
REMEMBER_ME_ACCESS_TOKEN_EXPIRE_MINUTES = config(
Expand Down
15 changes: 15 additions & 0 deletions backend/utils/azure/azure_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from azure.identity import ClientSecretCredential
from settings import AZURE_APP_SECRET, AZURE_CLIENT_ID, AZURE_TENANT_ID


class AzureManager:
def __init__(self):
self.credential = ClientSecretCredential(
AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_APP_SECRET
)
self.powerbi_token = self.credential.get_token(
"https://analysis.windows.net/powerbi/api/.default"
)

def get_powerbi_token(self):
return self.powerbi_token.token
File renamed without changes.
26 changes: 26 additions & 0 deletions frontend/src/api/dataProfilesRequests.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import axios from "axios";
import { API_URL } from "../utils/constants";

export const getPreviewData = (sampleFiles, extractInstructions) => {
const formData = new FormData();
sampleFiles.forEach((file) => {
formData.append("files", file);
});
formData.append("extract_instructions", extractInstructions);

return axios.post(`${API_URL}data-profiles/preview/`, formData, {
headers: {
"Content-Type": "multipart/form-data",
},
});
};

export const getAvailableColumnTypes = () => {
return axios.get(`${API_URL}data-profiles/column-types/`);
};

export const getSuggestedColumnTypes = (previewData) => {
return axios.post(`${API_URL}data-profiles/preview/column-types/`, {
data: previewData,
});
};
Loading

0 comments on commit 5fe5cf0

Please sign in to comment.