From 52bf693f4541ebaba28a6ca937da593dee9532fc Mon Sep 17 00:00:00 2001 From: leehuwuj Date: Fri, 29 Mar 2024 14:39:44 +0700 Subject: [PATCH] separate parse config --- .changeset/seven-zebras-allow.md | 2 +- helpers/python.ts | 1 - templates/components/loaders/python/__init__.py | 14 ++++++++------ templates/components/loaders/python/db.py | 7 ++----- templates/components/loaders/python/file.py | 5 +---- templates/components/loaders/python/web.py | 5 +---- 6 files changed, 13 insertions(+), 21 deletions(-) diff --git a/.changeset/seven-zebras-allow.md b/.changeset/seven-zebras-allow.md index 9187e4ea..579a93fe 100644 --- a/.changeset/seven-zebras-allow.md +++ b/.changeset/seven-zebras-allow.md @@ -2,4 +2,4 @@ "create-llama": patch --- -Add database data source +Use databases as data source diff --git a/helpers/python.ts b/helpers/python.ts index 39a8cf5c..d27df466 100644 --- a/helpers/python.ts +++ b/helpers/python.ts @@ -333,7 +333,6 @@ export const installPythonTemplate = async ({ queries: [dsConfig.queries], }; }); - console.log("configEntries", configEntries); const node = dbLoaderConfig.createNode(configEntries); node.commentBefore = ` The configuration for the database loader, only supports MySQL database for now. diff --git a/templates/components/loaders/python/__init__.py b/templates/components/loaders/python/__init__.py index 0a5d7a0c..d17df8e0 100644 --- a/templates/components/loaders/python/__init__.py +++ b/templates/components/loaders/python/__init__.py @@ -3,9 +3,9 @@ import importlib import logging from typing import Dict -from app.engine.loaders.file import get_file_documents -from app.engine.loaders.web import get_web_documents -from app.engine.loaders.db import get_db_documents +from app.engine.loaders.file import FileLoaderConfig, get_file_documents +from app.engine.loaders.web import WebLoaderConfig, get_web_documents +from app.engine.loaders.db import DBLoaderConfig, get_db_documents logger = logging.getLogger(__name__) @@ -25,11 +25,13 @@ def get_documents(): ) match loader_type: case "file": - document = get_file_documents(loader_config) + document = get_file_documents(FileLoaderConfig(**loader_config)) case "web": - document = get_web_documents(loader_config) + document = get_web_documents(WebLoaderConfig(**loader_config)) case "db": - document = get_db_documents(loader_config) + document = get_db_documents( + configs=[DBLoaderConfig(**cfg) for cfg in loader_config] + ) case _: raise ValueError(f"Invalid loader type: {loader_type}") documents.extend(document) diff --git a/templates/components/loaders/python/db.py b/templates/components/loaders/python/db.py index e1487a6f..d5c9ffde 100644 --- a/templates/components/loaders/python/db.py +++ b/templates/components/loaders/python/db.py @@ -12,14 +12,11 @@ class DBLoaderConfig(BaseModel): queries: List[str] -def get_db_documents(raw_configs: list[dict]): +def get_db_documents(configs: list[DBLoaderConfig]): from llama_index.readers.database import DatabaseReader - # Parse and validate the config - db_configs = [DBLoaderConfig(**cfg) for cfg in raw_configs] - docs = [] - for entry in db_configs: + for entry in configs: loader = DatabaseReader(uri=entry.uri) for query in entry.queries: logger.info(f"Loading data from database with query: {query}") diff --git a/templates/components/loaders/python/file.py b/templates/components/loaders/python/file.py index 86a5de91..a814b0d0 100644 --- a/templates/components/loaders/python/file.py +++ b/templates/components/loaders/python/file.py @@ -24,12 +24,9 @@ def llama_parse_parser(): return parser -def get_file_documents(raw_config: dict): +def get_file_documents(config: FileLoaderConfig): from llama_index.core.readers import SimpleDirectoryReader - # Parse and validate the config - config = FileLoaderConfig(**raw_config) - reader = SimpleDirectoryReader( config.data_dir, recursive=True, diff --git a/templates/components/loaders/python/web.py b/templates/components/loaders/python/web.py index 8b182254..563e51b5 100644 --- a/templates/components/loaders/python/web.py +++ b/templates/components/loaders/python/web.py @@ -14,14 +14,11 @@ class WebLoaderConfig(BaseModel): urls: list[CrawlUrl] -def get_web_documents(raw_config: dict): +def get_web_documents(config: WebLoaderConfig): from llama_index.readers.web import WholeSiteReader from selenium import webdriver from selenium.webdriver.chrome.options import Options - # Parse and validate the config - config = WebLoaderConfig(**raw_config) - options = Options() driver_arguments = config.driver_arguments or [] for arg in driver_arguments: