Skip to content

Commit

Permalink
chore(backend): add datasource overview endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
wd0517 committed Jul 26, 2024
1 parent 67adcc1 commit 66e67f7
Showing 1 changed file with 61 additions and 2 deletions.
63 changes: 61 additions & 2 deletions backend/app/api/admin_routes/data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi_pagination import Params, Page
from fastapi_pagination.ext.sqlmodel import paginate
from sqlmodel import select
from sqlmodel import select, func

from app.api.deps import SessionDep, CurrentSuperuserDep
from app.models import DataSource, DataSourceType
from app.models import (
DataSource,
DataSourceType,
Document,
Chunk,
)
from app.tasks import import_documents_from_datasource

router = APIRouter()
Expand Down Expand Up @@ -64,3 +69,57 @@ def get_datasource(
detail="Data source not found",
)
return data_source


@router.get("/admin/datasources/{data_source_id}/overview")
def get_datasource_overview(
session: SessionDep,
user: CurrentSuperuserDep,
data_source_id: int,
) -> dict:
data_source = session.get(DataSource, data_source_id)
if data_source is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Data source not found",
)
documents_count = session.scalar(
select(func.count(Document.id)).where(Document.data_source_id == data_source_id)
)
chunks_count = session.scalar(
select(func.count(Chunk.id)).where(
Chunk.document.has(Document.data_source_id == data_source_id)
)
)

statement = (
select(Document.index_status, func.count(Document.id))
.where(Document.data_source_id == data_source_id)
.group_by(Document.index_status)
.order_by(Document.index_status)
)
status = session.exec(statement).all()
vector_index_status = {s: c for s, c in status}

if data_source.build_kg_index:
statement = (
select(Chunk.index_status, func.count(Chunk.id))
.where(Chunk.document.has(Document.data_source_id == data_source_id))
.group_by(Chunk.index_status)
.order_by(Chunk.index_status)
)
status = session.exec(statement).all()
kg_index_status = {s: c for s, c in status}
else:
kg_index_status = {}

return {
"documents": {
"total": documents_count,
},
"chunks": {
"total": chunks_count,
},
"kg_index": kg_index_status,
"vector_index": vector_index_status,
}

0 comments on commit 66e67f7

Please sign in to comment.