From b6db19790121906a2ce3dea7261be44b1e2afe64 Mon Sep 17 00:00:00 2001 From: leehuwuj Date: Mon, 25 Mar 2024 13:39:18 +0700 Subject: [PATCH] rebase and refactor code --- helpers/python.ts | 113 +++++++----------- .../python/file/{loader.py => file.py} | 0 .../llama_parse/{loader.py => llama_parse.py} | 0 templates/components/loaders/python/loader.py | 12 ++ .../loaders/python/web/{loader.py => web.py} | 0 5 files changed, 58 insertions(+), 67 deletions(-) rename templates/components/loaders/python/file/{loader.py => file.py} (100%) rename templates/components/loaders/python/llama_parse/{loader.py => llama_parse.py} (100%) create mode 100644 templates/components/loaders/python/loader.py rename templates/components/loaders/python/web/{loader.py => web.py} (100%) diff --git a/helpers/python.ts b/helpers/python.ts index e01af299..a9633059 100644 --- a/helpers/python.ts +++ b/helpers/python.ts @@ -256,78 +256,57 @@ export const installPythonTemplate = async ({ }); } - // const dataSourceType = dataSource?.type; - // if (dataSourceType !== undefined && dataSourceType !== "none") { - // let loaderFolder: string; - // if (dataSourceType === "file" || dataSourceType === "folder") { - // const dataSourceConfig = dataSource?.config as FileSourceConfig; - // loaderFolder = dataSourceConfig.useLlamaParse ? "llama_parse" : "file"; + if (dataSources.length > 0 && dataSources[0].type !== "none") { + const loaderConfigs: Record = {}; + const loaderPath = path.join(enginePath, "loaders"); - // Copy data source loaders - const loaderConfigs: Record = {}; - const loaderPath = path.join(enginePath, "loaders"); - for (const dataSource of dataSources) { - const sourceType = dataSource.type; - if (sourceType === "file" || sourceType === "folder") { - const sourceConfig = dataSource.config as FileSourceConfig; - const loaderFolder = sourceConfig.useLlamaParse - ? "llama_parse" - : "file"; - await copy("**", loaderPath, { - parents: true, - cwd: path.join(compPath, "loaders", "python", loaderFolder), - }); - } else { - // Write loader configs - if (sourceType === "web") { - const config = dataSource.config as WebSourceConfig[]; - const webLoaderConfig = config.map((c) => { - return { - base_url: c.baseUrl, - prefix: c.prefix || c.baseUrl, - depth: c.depth || 1, - }; - }); - loaderConfigs["web"] = webLoaderConfig; + // Copy loader.py file to enginePath + await copy("loader.py", enginePath, { + parents: true, + cwd: path.join(compPath, "loaders", "python"), + }); + + for (const dataSource of dataSources) { + const sourceType = dataSource.type; + switch (sourceType) { + case "file": + case "folder": { + const sourceConfig = dataSource.config as FileSourceConfig; + const loaderFolder = sourceConfig.useLlamaParse + ? "llama_parse" + : "file"; + await copy("**", loaderPath, { + parents: true, + cwd: path.join(compPath, "loaders", "python", loaderFolder), + }); + break; + } + case "web": { + const config = dataSource.config as WebSourceConfig[]; + // Append web loader config + const webLoaderConfig = config.map((c) => { + return { + base_url: c.baseUrl, + prefix: c.prefix || c.baseUrl, + depth: c.depth || 1, + }; + }); + loaderConfigs["web"] = webLoaderConfig; + await copy("**", loaderPath, { + parents: true, + cwd: path.join(compPath, "loaders", "python", sourceType), + }); + break; + } } - await copy("**", loaderPath, { - parents: true, - cwd: path.join(compPath, "loaders", "python", sourceType), - }); } - } - - // Write loaders config - if (Object.keys(loaderConfigs).length > 0) { - const loaderConfigPath = path.join(root, "config/loaders.json"); - await fs.mkdir(path.join(root, "config"), { recursive: true }); - await fs.writeFile( - loaderConfigPath, - JSON.stringify(loaderConfigs, null, 2), - ); - } - - // Generate loader configs - for (const dataSource of dataSources) { - if (dataSource?.type === "web") { - const config = dataSource.config as WebSourceConfig[]; - const webLoaderConfig = config.map((c) => { - return { - base_url: c.baseUrl, - prefix: c.prefix || c.baseUrl, - depth: c.depth || 1, - }; - }); - const loaderConfigPath = path.join(root, "loaders.json"); + // Write loaders config + if (Object.keys(loaderConfigs).length > 0) { + const loaderConfigPath = path.join(root, "config/loaders.json"); + await fs.mkdir(path.join(root, "config"), { recursive: true }); await fs.writeFile( loaderConfigPath, - JSON.stringify( - { - web: webLoaderConfig, - }, - null, - 2, - ), + JSON.stringify(loaderConfigs, null, 2), ); } } diff --git a/templates/components/loaders/python/file/loader.py b/templates/components/loaders/python/file/file.py similarity index 100% rename from templates/components/loaders/python/file/loader.py rename to templates/components/loaders/python/file/file.py diff --git a/templates/components/loaders/python/llama_parse/loader.py b/templates/components/loaders/python/llama_parse/llama_parse.py similarity index 100% rename from templates/components/loaders/python/llama_parse/loader.py rename to templates/components/loaders/python/llama_parse/llama_parse.py diff --git a/templates/components/loaders/python/loader.py b/templates/components/loaders/python/loader.py new file mode 100644 index 00000000..51306bf5 --- /dev/null +++ b/templates/components/loaders/python/loader.py @@ -0,0 +1,12 @@ +import os +import importlib + + +def get_documents(): + # For each file in .loaders, import the module and call the get_documents function + for loader in os.listdir(os.path.join(os.path.dirname(__file__), "loaders")): + if loader.endswith(".py"): + loader = loader[:-3] + module = importlib.import_module(f"app.engine.loaders.{loader}") + documents = module.get_documents() + yield documents diff --git a/templates/components/loaders/python/web/loader.py b/templates/components/loaders/python/web/web.py similarity index 100% rename from templates/components/loaders/python/web/loader.py rename to templates/components/loaders/python/web/web.py