Skip to content

Commit

Permalink
separate parse config
Browse files Browse the repository at this point in the history
  • Loading branch information
leehuwuj committed Mar 29, 2024
1 parent 632b214 commit 52bf693
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 21 deletions.
2 changes: 1 addition & 1 deletion .changeset/seven-zebras-allow.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
"create-llama": patch
---

Add database data source
Use databases as data source
1 change: 0 additions & 1 deletion helpers/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,6 @@ export const installPythonTemplate = async ({
queries: [dsConfig.queries],
};
});
console.log("configEntries", configEntries);

const node = dbLoaderConfig.createNode(configEntries);
node.commentBefore = ` The configuration for the database loader, only supports MySQL database for now.
Expand Down
14 changes: 8 additions & 6 deletions templates/components/loaders/python/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import importlib
import logging
from typing import Dict
from app.engine.loaders.file import get_file_documents
from app.engine.loaders.web import get_web_documents
from app.engine.loaders.db import get_db_documents
from app.engine.loaders.file import FileLoaderConfig, get_file_documents
from app.engine.loaders.web import WebLoaderConfig, get_web_documents
from app.engine.loaders.db import DBLoaderConfig, get_db_documents

logger = logging.getLogger(__name__)

Expand All @@ -25,11 +25,13 @@ def get_documents():
)
match loader_type:
case "file":
document = get_file_documents(loader_config)
document = get_file_documents(FileLoaderConfig(**loader_config))
case "web":
document = get_web_documents(loader_config)
document = get_web_documents(WebLoaderConfig(**loader_config))
case "db":
document = get_db_documents(loader_config)
document = get_db_documents(
configs=[DBLoaderConfig(**cfg) for cfg in loader_config]
)
case _:
raise ValueError(f"Invalid loader type: {loader_type}")
documents.extend(document)
Expand Down
7 changes: 2 additions & 5 deletions templates/components/loaders/python/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,11 @@ class DBLoaderConfig(BaseModel):
queries: List[str]


def get_db_documents(raw_configs: list[dict]):
def get_db_documents(configs: list[DBLoaderConfig]):
from llama_index.readers.database import DatabaseReader

# Parse and validate the config
db_configs = [DBLoaderConfig(**cfg) for cfg in raw_configs]

docs = []
for entry in db_configs:
for entry in configs:
loader = DatabaseReader(uri=entry.uri)
for query in entry.queries:
logger.info(f"Loading data from database with query: {query}")
Expand Down
5 changes: 1 addition & 4 deletions templates/components/loaders/python/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,9 @@ def llama_parse_parser():
return parser


def get_file_documents(raw_config: dict):
def get_file_documents(config: FileLoaderConfig):
from llama_index.core.readers import SimpleDirectoryReader

# Parse and validate the config
config = FileLoaderConfig(**raw_config)

reader = SimpleDirectoryReader(
config.data_dir,
recursive=True,
Expand Down
5 changes: 1 addition & 4 deletions templates/components/loaders/python/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,11 @@ class WebLoaderConfig(BaseModel):
urls: list[CrawlUrl]


def get_web_documents(raw_config: dict):
def get_web_documents(config: WebLoaderConfig):
from llama_index.readers.web import WholeSiteReader
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

# Parse and validate the config
config = WebLoaderConfig(**raw_config)

options = Options()
driver_arguments = config.driver_arguments or []
for arg in driver_arguments:
Expand Down

0 comments on commit 52bf693

Please sign in to comment.