From 29b17ee32846b129f6ed33b5f9cd667d0497dbcb Mon Sep 17 00:00:00 2001 From: Marcus Schiesser Date: Thu, 4 Apr 2024 09:48:01 +0800 Subject: [PATCH] Allow tools without datasource and clean up (#33) --- .changeset/curvy-candles-mix.md | 2 +- .changeset/eleven-lemons-look.md | 2 +- .changeset/five-flowers-admire.md | 5 + .changeset/forty-ads-tell.md | 2 +- .changeset/healthy-insects-check.md | 2 +- helpers/copy.ts | 17 ++ helpers/datasources.ts | 81 ++++++- helpers/index.ts | 15 ++ helpers/python.ts | 159 +++---------- helpers/tools.ts | 35 +++ helpers/typescript.ts | 213 ++++++++---------- questions.ts | 5 +- .../engines/python/agent/__init__.py | 9 +- .../components/engines/python/agent/tools.py | 10 +- .../engines/python/chat/__init__.py | 8 +- .../engines/typescript/agent/chat.ts | 50 ++-- .../engines/typescript/chat/chat.ts | 5 + .../components/vectordbs/python/none/index.py | 5 +- .../vectordbs/typescript/none/index.ts | 4 +- 19 files changed, 338 insertions(+), 291 deletions(-) create mode 100644 .changeset/five-flowers-admire.md diff --git a/.changeset/curvy-candles-mix.md b/.changeset/curvy-candles-mix.md index 204b65f8..59890ab9 100644 --- a/.changeset/curvy-candles-mix.md +++ b/.changeset/curvy-candles-mix.md @@ -2,4 +2,4 @@ "create-llama": patch --- -Update loaders and tools config to yaml format +Update loaders and tools config to yaml format (for Python) diff --git a/.changeset/eleven-lemons-look.md b/.changeset/eleven-lemons-look.md index 84d3879a..373f1672 100644 --- a/.changeset/eleven-lemons-look.md +++ b/.changeset/eleven-lemons-look.md @@ -2,4 +2,4 @@ "create-llama": patch --- -Add nodes to the response and support Vercel streaming format +Add nodes to the response and support Vercel streaming format (Python) diff --git a/.changeset/five-flowers-admire.md b/.changeset/five-flowers-admire.md new file mode 100644 index 00000000..7f48a4a6 --- /dev/null +++ b/.changeset/five-flowers-admire.md @@ -0,0 +1,5 @@ +--- +"create-llama": patch +--- + +Allow using tools without any data source diff --git a/.changeset/forty-ads-tell.md b/.changeset/forty-ads-tell.md index ef9fed95..e4623b26 100644 --- a/.changeset/forty-ads-tell.md +++ b/.changeset/forty-ads-tell.md @@ -2,4 +2,4 @@ "create-llama": patch --- -Add redirect to documentation page when accessing the base URL +Add redirect to documentation page when accessing the base URL (FastAPI) diff --git a/.changeset/healthy-insects-check.md b/.changeset/healthy-insects-check.md index 00b948e1..6f97eb63 100644 --- a/.changeset/healthy-insects-check.md +++ b/.changeset/healthy-insects-check.md @@ -2,4 +2,4 @@ "create-llama": patch --- -Add Dockerfile template +Add Dockerfile templates for Typescript and Python diff --git a/helpers/copy.ts b/helpers/copy.ts index a5b722ba..d5f6ac00 100644 --- a/helpers/copy.ts +++ b/helpers/copy.ts @@ -48,3 +48,20 @@ export const copy = async ( }), ); }; + +export const assetRelocator = (name: string) => { + switch (name) { + case "gitignore": + case "eslintrc.json": { + return `.${name}`; + } + // README.md is ignored by webpack-asset-relocator-loader used by ncc: + // https://github.com/vercel/webpack-asset-relocator-loader/blob/e9308683d47ff507253e37c9bcbb99474603192b/src/asset-relocator.js#L227 + case "README-template.md": { + return "README.md"; + } + default: { + return name; + } + } +}; diff --git a/helpers/datasources.ts b/helpers/datasources.ts index c8a47e22..e56e2a72 100644 --- a/helpers/datasources.ts +++ b/helpers/datasources.ts @@ -1,6 +1,8 @@ +import fs from "fs/promises"; import path from "path"; +import yaml, { Document } from "yaml"; import { templatesDir } from "./dir"; -import { TemplateDataSource } from "./types"; +import { DbSourceConfig, TemplateDataSource, WebSourceConfig } from "./types"; export const EXAMPLE_FILE: TemplateDataSource = { type: "file", @@ -28,3 +30,80 @@ export function getDataSources( } return dataSources; } + +export async function writeLoadersConfig( + root: string, + dataSources: TemplateDataSource[], + useLlamaParse?: boolean, +) { + if (dataSources.length === 0) return; // no datasources, no config needed + const loaderConfig = new Document({}); + // Web loader config + if (dataSources.some((ds) => ds.type === "web")) { + const webLoaderConfig = new Document({}); + + // Create config for browser driver arguments + const driverArgNodeValue = webLoaderConfig.createNode([ + "--no-sandbox", + "--disable-dev-shm-usage", + ]); + driverArgNodeValue.commentBefore = + " The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode"; + webLoaderConfig.set("driver_arguments", driverArgNodeValue); + + // Create config for urls + const urlConfigs = dataSources + .filter((ds) => ds.type === "web") + .map((ds) => { + const dsConfig = ds.config as WebSourceConfig; + return { + base_url: dsConfig.baseUrl, + prefix: dsConfig.prefix, + depth: dsConfig.depth, + }; + }); + const urlConfigNode = webLoaderConfig.createNode(urlConfigs); + urlConfigNode.commentBefore = ` base_url: The URL to start crawling with + prefix: Only crawl URLs matching the specified prefix + depth: The maximum depth for BFS traversal + You can add more websites by adding more entries (don't forget the - prefix from YAML)`; + webLoaderConfig.set("urls", urlConfigNode); + + // Add web config to the loaders config + loaderConfig.set("web", webLoaderConfig); + } + + // File loader config + if (dataSources.some((ds) => ds.type === "file")) { + // Add documentation to web loader config + const node = loaderConfig.createNode({ + use_llama_parse: useLlamaParse, + }); + node.commentBefore = ` use_llama_parse: Use LlamaParse if \`true\`. Needs a \`LLAMA_CLOUD_API_KEY\` from https://cloud.llamaindex.ai set as environment variable`; + loaderConfig.set("file", node); + } + + // DB loader config + const dbLoaders = dataSources.filter((ds) => ds.type === "db"); + if (dbLoaders.length > 0) { + const dbLoaderConfig = new Document({}); + const configEntries = dbLoaders.map((ds) => { + const dsConfig = ds.config as DbSourceConfig; + return { + uri: dsConfig.uri, + queries: [dsConfig.queries], + }; + }); + + const node = dbLoaderConfig.createNode(configEntries); + node.commentBefore = ` The configuration for the database loader, only supports MySQL and PostgreSQL databases for now. + uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db + query: The query to fetch data from the database. E.g.: SELECT * FROM table`; + loaderConfig.set("db", node); + } + + // Write loaders config + const loaderConfigPath = path.join(root, "config", "loaders.yaml"); + await fs.mkdir(path.join(root, "config"), { recursive: true }); + await fs.writeFile(loaderConfigPath, yaml.stringify(loaderConfig)); +} diff --git a/helpers/index.ts b/helpers/index.ts index fe57ac5c..8b2fead7 100644 --- a/helpers/index.ts +++ b/helpers/index.ts @@ -4,12 +4,14 @@ import path from "path"; import { cyan } from "picocolors"; import fsExtra from "fs-extra"; +import { writeLoadersConfig } from "./datasources"; import { createBackendEnvFile, createFrontendEnvFile } from "./env-variables"; import { PackageManager } from "./get-pkg-manager"; import { installLlamapackProject } from "./llama-pack"; import { isHavingPoetryLockFile, tryPoetryRun } from "./poetry"; import { installPythonTemplate } from "./python"; import { downloadAndExtractRepo } from "./repo"; +import { ConfigFileType, writeToolsConfig } from "./tools"; import { FileSourceConfig, InstallTemplateArgs, @@ -117,10 +119,23 @@ export const installTemplate = async ( if (props.framework === "fastapi") { await installPythonTemplate(props); + // write loaders configuration (currently Python only) + await writeLoadersConfig( + props.root, + props.dataSources, + props.useLlamaParse, + ); } else { await installTSTemplate(props); } + // write tools configuration + await writeToolsConfig( + props.root, + props.tools, + props.framework === "fastapi" ? ConfigFileType.YAML : ConfigFileType.JSON, + ); + if (props.backend) { // This is a backend, so we need to copy the test data and create the env file. diff --git a/helpers/python.ts b/helpers/python.ts index c02cd8f1..56dc463d 100644 --- a/helpers/python.ts +++ b/helpers/python.ts @@ -3,17 +3,15 @@ import path from "path"; import { cyan, red } from "picocolors"; import { parse, stringify } from "smol-toml"; import terminalLink from "terminal-link"; -import yaml, { Document } from "yaml"; -import { copy } from "./copy"; + +import { assetRelocator, copy } from "./copy"; import { templatesDir } from "./dir"; import { isPoetryAvailable, tryPoetryInstall } from "./poetry"; import { Tool } from "./tools"; import { - DbSourceConfig, InstallTemplateArgs, TemplateDataSource, TemplateVectorDB, - WebSourceConfig, } from "./types"; interface Dependency { @@ -217,141 +215,38 @@ export const installPythonTemplate = async ({ await copy("**", root, { parents: true, cwd: templatePath, - rename(name) { - switch (name) { - case "gitignore": { - return `.${name}`; - } - // README.md is ignored by webpack-asset-relocator-loader used by ncc: - // https://github.com/vercel/webpack-asset-relocator-loader/blob/e9308683d47ff507253e37c9bcbb99474603192b/src/asset-relocator.js#L227 - case "README-template.md": { - return "README.md"; - } - default: { - return name; - } - } - }, + rename: assetRelocator, }); const compPath = path.join(templatesDir, "components"); + const enginePath = path.join(root, "app", "engine"); - if (dataSources.length > 0) { - const enginePath = path.join(root, "app", "engine"); - - const vectorDbDirName = vectorDb ?? "none"; - const VectorDBPath = path.join( - compPath, - "vectordbs", - "python", - vectorDbDirName, - ); - await copy("**", enginePath, { - parents: true, - cwd: VectorDBPath, - }); - - // Copy engine code - if (tools !== undefined && tools.length > 0) { - await copy("**", enginePath, { - parents: true, - cwd: path.join(compPath, "engines", "python", "agent"), - }); - // Write tool configs - const configContent: Record = {}; - tools.forEach((tool) => { - configContent[tool.name] = tool.config ?? {}; - }); - const configFilePath = path.join(root, "config/tools.yaml"); - await fs.mkdir(path.join(root, "config"), { recursive: true }); - await fs.writeFile(configFilePath, yaml.stringify(configContent)); - } else { - await copy("**", enginePath, { - parents: true, - cwd: path.join(compPath, "engines", "python", "chat"), - }); - } - - const loaderConfig = new Document({}); - const loaderPath = path.join(enginePath, "loaders"); - - // Copy loaders to enginePath - await copy("**", loaderPath, { - parents: true, - cwd: path.join(compPath, "loaders", "python"), - }); - - // Generate loaders config - // Web loader config - if (dataSources.some((ds) => ds.type === "web")) { - const webLoaderConfig = new Document({}); - - // Create config for browser driver arguments - const driverArgNodeValue = webLoaderConfig.createNode([ - "--no-sandbox", - "--disable-dev-shm-usage", - ]); - driverArgNodeValue.commentBefore = - " The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode"; - webLoaderConfig.set("driver_arguments", driverArgNodeValue); - - // Create config for urls - const urlConfigs = dataSources - .filter((ds) => ds.type === "web") - .map((ds) => { - const dsConfig = ds.config as WebSourceConfig; - return { - base_url: dsConfig.baseUrl, - prefix: dsConfig.prefix, - depth: dsConfig.depth, - }; - }); - const urlConfigNode = webLoaderConfig.createNode(urlConfigs); - urlConfigNode.commentBefore = ` base_url: The URL to start crawling with - prefix: Only crawl URLs matching the specified prefix - depth: The maximum depth for BFS traversal - You can add more websites by adding more entries (don't forget the - prefix from YAML)`; - webLoaderConfig.set("urls", urlConfigNode); - - // Add web config to the loaders config - loaderConfig.set("web", webLoaderConfig); - } - // File loader config - if (dataSources.some((ds) => ds.type === "file")) { - // Add documentation to web loader config - const node = loaderConfig.createNode({ - use_llama_parse: useLlamaParse, - }); - node.commentBefore = ` use_llama_parse: Use LlamaParse if \`true\`. Needs a \`LLAMA_CLOUD_API_KEY\` from https://cloud.llamaindex.ai set as environment variable`; - loaderConfig.set("file", node); - } - - // DB loader config - const dbLoaders = dataSources.filter((ds) => ds.type === "db"); - if (dbLoaders.length > 0) { - const dbLoaderConfig = new Document({}); - const configEntries = dbLoaders.map((ds) => { - const dsConfig = ds.config as DbSourceConfig; - return { - uri: dsConfig.uri, - queries: [dsConfig.queries], - }; - }); + // Copy selected vector DB + await copy("**", enginePath, { + parents: true, + cwd: path.join(compPath, "vectordbs", "python", vectorDb ?? "none"), + }); - const node = dbLoaderConfig.createNode(configEntries); - node.commentBefore = ` The configuration for the database loader, only supports MySQL and PostgreSQL databases for now. - uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db - query: The query to fetch data from the database. E.g.: SELECT * FROM table`; - loaderConfig.set("db", node); - } + // Copy all loaders to enginePath + const loaderPath = path.join(enginePath, "loaders"); + await copy("**", loaderPath, { + parents: true, + cwd: path.join(compPath, "loaders", "python"), + }); - // Write loaders config - if (Object.keys(loaderConfig).length > 0) { - const loaderConfigPath = path.join(root, "config/loaders.yaml"); - await fs.mkdir(path.join(root, "config"), { recursive: true }); - await fs.writeFile(loaderConfigPath, yaml.stringify(loaderConfig)); - } + // Select and copy engine code based on data sources and tools + let engine; + tools = tools ?? []; + if (dataSources.length > 0 && tools.length === 0) { + console.log("\nNo tools selected - use optimized context chat engine\n"); + engine = "chat"; + } else { + engine = "agent"; } + await copy("**", enginePath, { + parents: true, + cwd: path.join(compPath, "engines", "python", engine), + }); const addOnDependencies = dataSources .map((ds) => getAdditionalDependencies(vectorDb, ds, tools)) diff --git a/helpers/tools.ts b/helpers/tools.ts index f2e44bd0..d8b3967d 100644 --- a/helpers/tools.ts +++ b/helpers/tools.ts @@ -1,4 +1,8 @@ +import fs from "fs/promises"; +import path from "path"; import { red } from "picocolors"; +import yaml from "yaml"; +import { makeDir } from "./make-dir"; import { TemplateFramework } from "./types"; export type Tool = { @@ -8,6 +12,7 @@ export type Tool = { dependencies?: ToolDependencies[]; supportedFrameworks?: Array; }; + export type ToolDependencies = { name: string; version?: string; @@ -73,3 +78,33 @@ export const toolsRequireConfig = (tools?: Tool[]): boolean => { } return false; }; + +export enum ConfigFileType { + YAML = "yaml", + JSON = "json", +} + +export const writeToolsConfig = async ( + root: string, + tools: Tool[] = [], + type: ConfigFileType = ConfigFileType.YAML, +) => { + if (tools.length === 0) return; // no tools selected, no config need + const configContent: Record = {}; + tools.forEach((tool) => { + configContent[tool.name] = tool.config ?? {}; + }); + const configPath = path.join(root, "config"); + await makeDir(configPath); + if (type === ConfigFileType.YAML) { + await fs.writeFile( + path.join(configPath, "tools.yaml"), + yaml.stringify(configContent), + ); + } else { + await fs.writeFile( + path.join(configPath, "tools.json"), + JSON.stringify(configContent, null, 2), + ); + } +}; diff --git a/helpers/typescript.ts b/helpers/typescript.ts index 7dcf3ed4..3ffa2131 100644 --- a/helpers/typescript.ts +++ b/helpers/typescript.ts @@ -2,51 +2,12 @@ import fs from "fs/promises"; import os from "os"; import path from "path"; import { bold, cyan } from "picocolors"; -import { copy } from "../helpers/copy"; +import { assetRelocator, copy } from "../helpers/copy"; import { callPackageManager } from "../helpers/install"; import { templatesDir } from "./dir"; import { PackageManager } from "./get-pkg-manager"; -import { makeDir } from "./make-dir"; import { InstallTemplateArgs } from "./types"; -const rename = (name: string) => { - switch (name) { - case "gitignore": - case "eslintrc.json": { - return `.${name}`; - } - // README.md is ignored by webpack-asset-relocator-loader used by ncc: - // https://github.com/vercel/webpack-asset-relocator-loader/blob/e9308683d47ff507253e37c9bcbb99474603192b/src/asset-relocator.js#L227 - case "README-template.md": { - return "README.md"; - } - default: { - return name; - } - } -}; - -export const installTSDependencies = async ( - packageJson: any, - packageManager: PackageManager, - isOnline: boolean, -): Promise => { - console.log("\nInstalling dependencies:"); - for (const dependency in packageJson.dependencies) - console.log(`- ${cyan(dependency)}`); - - console.log("\nInstalling devDependencies:"); - for (const dependency in packageJson.devDependencies) - console.log(`- ${cyan(dependency)}`); - - console.log(); - - await callPackageManager(packageManager, isOnline).catch((error) => { - console.error("Failed to install TS dependencies. Exiting..."); - process.exit(1); - }); -}; - /** * Install a LlamaIndex internal template to a given `root` directory. */ @@ -58,7 +19,6 @@ export const installTSTemplate = async ({ template, framework, ui, - customApiPath, vectorDb, postInstallAction, backend, @@ -79,7 +39,7 @@ export const installTSTemplate = async ({ await copy(copySource, root, { parents: true, cwd: templatePath, - rename, + rename: assetRelocator, }); /** @@ -137,9 +97,6 @@ export const installTSTemplate = async ({ ); } - /** - * Copy the selected chat engine files to the target directory and reference it. - */ const compPath = path.join(templatesDir, "components"); const relativeEngineDestPath = framework === "nextjs" @@ -147,59 +104,33 @@ export const installTSTemplate = async ({ : path.join("src", "controllers"); const enginePath = path.join(root, relativeEngineDestPath, "engine"); - if (dataSources.length === 0) { - // use simple hat engine if user neither select tools nor a data source - console.log("\nUsing simple chat engine\n"); - } else { - if (vectorDb) { - // copy vector db component - console.log("\nUsing vector DB:", vectorDb, "\n"); - const vectorDBPath = path.join( - compPath, - "vectordbs", - "typescript", - vectorDb, - ); - await copy("**", enginePath, { - parents: true, - cwd: vectorDBPath, - }); - } - // copy loader component (TS only supports llama_parse and file for now) - let loaderFolder: string; - loaderFolder = useLlamaParse ? "llama_parse" : "file"; - await copy("**", enginePath, { - parents: true, - cwd: path.join(compPath, "loaders", "typescript", loaderFolder), - }); - if (tools?.length) { - // use agent chat engine if user selects tools - console.log("\nUsing agent chat engine\n"); - await copy("**", enginePath, { - parents: true, - cwd: path.join(compPath, "engines", "typescript", "agent"), - }); + // copy vector db component + console.log("\nUsing vector DB:", vectorDb, "\n"); + await copy("**", enginePath, { + parents: true, + cwd: path.join(compPath, "vectordbs", "typescript", vectorDb ?? "none"), + }); - // Write config/tools.json - const configContent: Record = {}; - tools.forEach((tool) => { - configContent[tool.name] = tool.config ?? {}; - }); - const configPath = path.join(root, "config"); - await makeDir(configPath); - await fs.writeFile( - path.join(configPath, "tools.json"), - JSON.stringify(configContent, null, 2), - ); - } else { - // use context chat engine if user does not select tools - console.log("\nUsing context chat engine\n"); - await copy("**", enginePath, { - parents: true, - cwd: path.join(compPath, "engines", "typescript", "chat"), - }); - } + // copy loader component (TS only supports llama_parse and file for now) + const loaderFolder = useLlamaParse ? "llama_parse" : "file"; + await copy("**", enginePath, { + parents: true, + cwd: path.join(compPath, "loaders", "typescript", loaderFolder), + }); + + // Select and copy engine code based on data sources and tools + let engine; + tools = tools ?? []; + if (dataSources.length > 0 && tools.length === 0) { + console.log("\nNo tools selected - use optimized context chat engine\n"); + engine = "chat"; + } else { + engine = "agent"; } + await copy("**", enginePath, { + parents: true, + cwd: path.join(compPath, "engines", "typescript", engine), + }); /** * Copy the selected UI files to the target directory and reference it. @@ -214,13 +145,54 @@ export const installTSTemplate = async ({ await copy("**", destUiPath, { parents: true, cwd: uiPath, - rename, + rename: assetRelocator, }); } - /** - * Update the package.json scripts. - */ + /** Modify frontend code to use custom API path */ + if (framework === "nextjs" && !backend) { + console.log( + "\nUsing external API for frontend, removing API code and configuration\n", + ); + // remove the default api folder and config folder + await fs.rm(path.join(root, "app", "api"), { recursive: true }); + await fs.rm(path.join(root, "config"), { recursive: true, force: true }); + } + + const packageJson = await updatePackageJson({ + root, + appName, + dataSources, + relativeEngineDestPath, + framework, + ui, + observability, + }); + + if (postInstallAction === "runApp" || postInstallAction === "dependencies") { + await installTSDependencies(packageJson, packageManager, isOnline); + } + + // Copy deployment files for typescript + await copy("**", root, { + cwd: path.join(compPath, "deployments", "typescript"), + }); +}; + +async function updatePackageJson({ + root, + appName, + dataSources, + relativeEngineDestPath, + framework, + ui, + observability, +}: Pick< + InstallTemplateArgs, + "root" | "appName" | "dataSources" | "framework" | "ui" | "observability" +> & { + relativeEngineDestPath: string; +}): Promise { const packageJsonFile = path.join(root, "package.json"); const packageJson: any = JSON.parse( await fs.readFile(packageJsonFile, "utf8"), @@ -228,19 +200,8 @@ export const installTSTemplate = async ({ packageJson.name = appName; packageJson.version = "0.1.0"; - if (framework === "nextjs" && customApiPath) { - console.log( - "\nUsing external API with custom API path:", - customApiPath, - "\n", - ); - // remove the default api folder - const apiPath = path.join(root, "app", "api"); - await fs.rm(apiPath, { recursive: true }); - // modify the dev script to use the custom api path - } - if (dataSources.length > 0 && relativeEngineDestPath) { + // TODO: move script to {root}/scripts for all frameworks // add generate script if using context engine packageJson.scripts = { ...packageJson.scripts, @@ -292,12 +253,26 @@ export const installTSTemplate = async ({ JSON.stringify(packageJson, null, 2) + os.EOL, ); - if (postInstallAction === "runApp" || postInstallAction === "dependencies") { - await installTSDependencies(packageJson, packageManager, isOnline); - } + return packageJson; +} - // Copy deployment files for typescript - await copy("**", root, { - cwd: path.join(compPath, "deployments", "typescript"), +async function installTSDependencies( + packageJson: any, + packageManager: PackageManager, + isOnline: boolean, +): Promise { + console.log("\nInstalling dependencies:"); + for (const dependency in packageJson.dependencies) + console.log(`- ${cyan(dependency)}`); + + console.log("\nInstalling devDependencies:"); + for (const dependency in packageJson.devDependencies) + console.log(`- ${cyan(dependency)}`); + + console.log(); + + await callPackageManager(packageManager, isOnline).catch((error) => { + console.error("Failed to install TS dependencies. Exiting..."); + process.exit(1); }); -}; +} diff --git a/questions.ts b/questions.ts index d81c9726..1e5f3268 100644 --- a/questions.ts +++ b/questions.ts @@ -131,7 +131,7 @@ export const getDataSourceChoices = ( } if (selectedDataSource === undefined || selectedDataSource.length === 0) { choices.push({ - title: "No data, just a simple chat", + title: "No data, just a simple chat or agent", value: "none", }); choices.push({ @@ -783,8 +783,7 @@ export const askQuestions = async ( } } - // TODO: allow tools also without datasources - if (!program.tools && program.dataSources.length > 0) { + if (!program.tools) { if (ciInfo.isCI) { program.tools = getPrefOrDefault("tools"); } else { diff --git a/templates/components/engines/python/agent/__init__.py b/templates/components/engines/python/agent/__init__.py index 52e36394..b2222ad6 100644 --- a/templates/components/engines/python/agent/__init__.py +++ b/templates/components/engines/python/agent/__init__.py @@ -11,11 +11,12 @@ def get_chat_engine(): top_k = os.getenv("TOP_K", "3") tools = [] - # Add query tool + # Add query tool if index exists index = get_index() - query_engine = index.as_query_engine(similarity_top_k=int(top_k)) - query_engine_tool = QueryEngineTool.from_defaults(query_engine=query_engine) - tools.append(query_engine_tool) + if index is not None: + query_engine = index.as_query_engine(similarity_top_k=int(top_k)) + query_engine_tool = QueryEngineTool.from_defaults(query_engine=query_engine) + tools.append(query_engine_tool) # Add additional tools tools += ToolFactory.from_env() diff --git a/templates/components/engines/python/agent/tools.py b/templates/components/engines/python/agent/tools.py index 93f63c44..584947e8 100644 --- a/templates/components/engines/python/agent/tools.py +++ b/templates/components/engines/python/agent/tools.py @@ -1,3 +1,4 @@ +import os import yaml import importlib @@ -26,8 +27,9 @@ def create_tool(tool_name: str, **kwargs) -> list[FunctionTool]: @staticmethod def from_env() -> list[FunctionTool]: tools = [] - with open("config/tools.yaml", "r") as f: - tool_configs = yaml.safe_load(f) - for name, config in tool_configs.items(): - tools += ToolFactory.create_tool(name, **config) + if os.path.exists("config/tools.yaml"): + with open("config/tools.yaml", "r") as f: + tool_configs = yaml.safe_load(f) + for name, config in tool_configs.items(): + tools += ToolFactory.create_tool(name, **config) return tools diff --git a/templates/components/engines/python/chat/__init__.py b/templates/components/engines/python/chat/__init__.py index 65624c32..da1c3cdd 100644 --- a/templates/components/engines/python/chat/__init__.py +++ b/templates/components/engines/python/chat/__init__.py @@ -6,7 +6,13 @@ def get_chat_engine(): system_prompt = os.getenv("SYSTEM_PROMPT") top_k = os.getenv("TOP_K", 3) - return get_index().as_chat_engine( + index = get_index() + if index is None: + raise Exception( + "StorageContext is empty - call 'python app/engine/generate.py' to generate the storage first" + ) + + return index.as_chat_engine( similarity_top_k=int(top_k), system_prompt=system_prompt, chat_mode="condense_plus_context", diff --git a/templates/components/engines/typescript/agent/chat.ts b/templates/components/engines/typescript/agent/chat.ts index 98523674..3de82345 100644 --- a/templates/components/engines/typescript/agent/chat.ts +++ b/templates/components/engines/typescript/agent/chat.ts @@ -1,26 +1,44 @@ -import config from "@/config/tools.json"; -import { OpenAI, OpenAIAgent, QueryEngineTool, ToolFactory } from "llamaindex"; +import { + BaseTool, + OpenAI, + OpenAIAgent, + QueryEngineTool, + ToolFactory, +} from "llamaindex"; +import fs from "node:fs/promises"; +import path from "node:path"; import { STORAGE_CACHE_DIR } from "./constants.mjs"; import { getDataSource } from "./index"; export async function createChatEngine(llm: OpenAI) { + let tools: BaseTool[] = []; + + // Add a query engine tool if we have a data source + // Delete this code if you don't have a data source const index = await getDataSource(llm); - const queryEngine = index.asQueryEngine(); - const queryEngineTool = new QueryEngineTool({ - queryEngine: queryEngine, - metadata: { - name: "data_query_engine", - description: `A query engine for documents in storage folder: ${STORAGE_CACHE_DIR}`, - }, - }); + if (index) { + tools.push( + new QueryEngineTool({ + queryEngine: index.asQueryEngine(), + metadata: { + name: "data_query_engine", + description: `A query engine for documents in storage folder: ${STORAGE_CACHE_DIR}`, + }, + }), + ); + } - const externalTools = await ToolFactory.createTools(config); + try { + // add tools from config file if it exists + const config = JSON.parse( + await fs.readFile(path.join("config", "tools.json"), "utf8"), + ); + tools = tools.concat(await ToolFactory.createTools(config)); + } catch {} - const agent = new OpenAIAgent({ - tools: [queryEngineTool, ...externalTools], - verbose: true, + return new OpenAIAgent({ + tools, llm, + verbose: true, }); - - return agent; } diff --git a/templates/components/engines/typescript/chat/chat.ts b/templates/components/engines/typescript/chat/chat.ts index cf77edb3..2feea01b 100644 --- a/templates/components/engines/typescript/chat/chat.ts +++ b/templates/components/engines/typescript/chat/chat.ts @@ -3,6 +3,11 @@ import { getDataSource } from "./index"; export async function createChatEngine(llm: LLM) { const index = await getDataSource(llm); + if (!index) { + throw new Error( + `StorageContext is empty - call 'npm run generate' to generate the storage first`, + ); + } const retriever = index.asRetriever(); retriever.similarityTopK = 3; diff --git a/templates/components/vectordbs/python/none/index.py b/templates/components/vectordbs/python/none/index.py index 4dcc858a..8b77414a 100644 --- a/templates/components/vectordbs/python/none/index.py +++ b/templates/components/vectordbs/python/none/index.py @@ -11,10 +11,7 @@ def get_index(): # check if storage already exists if not os.path.exists(STORAGE_DIR): - raise Exception( - "StorageContext is empty - call 'python app/engine/generate.py' to generate the storage first" - ) - + return None # load the existing index logger.info(f"Loading index from {STORAGE_DIR}...") storage_context = StorageContext.from_defaults(persist_dir=STORAGE_DIR) diff --git a/templates/components/vectordbs/typescript/none/index.ts b/templates/components/vectordbs/typescript/none/index.ts index 528d6057..f3819b51 100644 --- a/templates/components/vectordbs/typescript/none/index.ts +++ b/templates/components/vectordbs/typescript/none/index.ts @@ -21,9 +21,7 @@ export async function getDataSource(llm: LLM) { (storageContext.docStore as SimpleDocumentStore).toDict(), ).length; if (numberOfDocs === 0) { - throw new Error( - `StorageContext is empty - call 'npm run generate' to generate the storage first`, - ); + return null; } return await VectorStoreIndex.init({ storageContext,