Skip to content

Commit

Permalink
feat: add more ways to filter the docs during searching
Browse files Browse the repository at this point in the history
  • Loading branch information
Icemap committed Oct 10, 2024
1 parent fb0861e commit 1370b6a
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 12 deletions.
53 changes: 48 additions & 5 deletions backend/app/api/admin_routes/document.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,62 @@
from fastapi import APIRouter, Depends
from fastapi import APIRouter, Depends, Query
from fastapi_pagination import Params, Page

from app.api.deps import SessionDep, CurrentSuperuserDep
from app.repositories import document_repo
from app.models import Document
from app.models import Document, DocIndexTaskStatus

from datetime import datetime

from app.types import MimeTypes

router = APIRouter()


@router.get("/admin/documents")
def list_documents(
session: SessionDep,
user: CurrentSuperuserDep,
# user: CurrentSuperuserDep,
params: Params = Depends(),
query: str | None = None,
source_uri: str | None = Query(
None,
description="[Fuzzy Match] source URI field, will search for the source URI that contains the given string."
),
data_source_id: int | None = None,
created_at_start: datetime | None = None,
created_at_end: datetime | None = None,
updated_at_start: datetime | None = None,
updated_at_end: datetime | None = None,
last_modified_at_start: datetime | None = None,
last_modified_at_end: datetime | None = None,
name: str | None = Query(
None,
description="[Fuzzy Match] name field, will search for the name that contains the given string."
),
mime_type: MimeTypes | None = None,
index_status: DocIndexTaskStatus | None = None,
language: str | None = Query(
None,
description="[Exact Match] meta.language field, for example: 'en', 'zh', etc."
),
product: str | None = Query(
None,
description="[Exact Match] meta.product field, for example: 'tidb_operator', 'tidb', etc."
),
) -> Page[Document]:
return document_repo.paginate(session, params, query, data_source_id)
return document_repo.paginate(
session=session,
params=params,
source_uri=source_uri,
data_source_id=data_source_id,
created_at_start=created_at_start,
created_at_end=created_at_end,
updated_at_start=updated_at_start,
updated_at_end=updated_at_end,
last_modified_at_start=last_modified_at_start,
last_modified_at_end=last_modified_at_end,
name=name,
mime_type=mime_type,
index_status=index_status,
language=language,
product=product,
)
55 changes: 48 additions & 7 deletions backend/app/repositories/document.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
from typing import Optional
from sqlmodel import select, Session, col
from typing import Optional, cast

from sqlalchemy import String
from sqlmodel import select, Session, col, func
from fastapi_pagination import Params, Page
from fastapi_pagination.ext.sqlmodel import paginate

from app.models import Document
from app.models import Document, DocIndexTaskStatus
from app.repositories.base_repo import BaseRepo

from datetime import datetime

from app.types import MimeTypes


class DocumentRepo(BaseRepo):
model_cls = Document
Expand All @@ -14,15 +20,50 @@ def paginate(
self,
session: Session,
params: Params | None = Params(),
query: Optional[str] = None,
source_uri: Optional[str] = None,
data_source_id: Optional[int] = None,
created_at_start: Optional[datetime] = None,
created_at_end: Optional[datetime] = None,
updated_at_start: Optional[datetime] = None,
updated_at_end: Optional[datetime] = None,
last_modified_at_start: Optional[datetime] = None,
last_modified_at_end: Optional[datetime] = None,
name: Optional[str] = None,
mime_type: Optional[MimeTypes] = None,
index_status: Optional[DocIndexTaskStatus] = None,
language: Optional[str] = None, # meta field
product: Optional[str] = None, # meta field
) -> Page[Document]:
# build the select statement via conditions
stmt = select(Document)
if query:
stmt = stmt.where(col(Document.source_uri).contains(query))
if source_uri:
stmt = stmt.where(col(Document.source_uri).contains(source_uri))
if data_source_id:
stmt = stmt.where(Document.data_source_id == data_source_id)
# Make sure the default engine is always on top
if created_at_start:
stmt = stmt.where(Document.created_at >= created_at_start)
if created_at_end:
stmt = stmt.where(Document.created_at <= created_at_end)
if updated_at_start:
stmt = stmt.where(Document.updated_at >= updated_at_start)
if updated_at_end:
stmt = stmt.where(Document.updated_at <= updated_at_end)
if last_modified_at_start:
stmt = stmt.where(Document.last_modified_at >= last_modified_at_start)
if last_modified_at_end:
stmt = stmt.where(Document.last_modified_at <= last_modified_at_end)
if name:
stmt = stmt.where(col(Document.name).contains(name))
if mime_type:
stmt = stmt.where(Document.mime_type == mime_type)
if index_status:
stmt = stmt.where(Document.index_status == index_status)
if language:
stmt = stmt.where(func.json_unquote(func.json_extract(Document.meta, "$.language")) == language)
if product:
stmt = stmt.where(func.json_unquote(func.json_extract(Document.meta, "$.product")) == product)

# Make sure the newer edited record is always on top
stmt = stmt.order_by(Document.updated_at.desc())
return paginate(session, stmt, params)

Expand Down

0 comments on commit 1370b6a

Please sign in to comment.