Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow tools without datasource #33

Merged
merged 8 commits into from
Apr 4, 2024
2 changes: 1 addition & 1 deletion .changeset/curvy-candles-mix.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
"create-llama": patch
---

Update loaders and tools config to yaml format
Update loaders and tools config to yaml format (for Python)
2 changes: 1 addition & 1 deletion .changeset/eleven-lemons-look.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
"create-llama": patch
---

Add nodes to the response and support Vercel streaming format
Add nodes to the response and support Vercel streaming format (Python)
5 changes: 5 additions & 0 deletions .changeset/five-flowers-admire.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"create-llama": patch
---

Allow using tools without any data source
2 changes: 1 addition & 1 deletion .changeset/forty-ads-tell.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
"create-llama": patch
---

Add redirect to documentation page when accessing the base URL
Add redirect to documentation page when accessing the base URL (FastAPI)
2 changes: 1 addition & 1 deletion .changeset/healthy-insects-check.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
"create-llama": patch
---

Add Dockerfile template
Add Dockerfile templates for Typescript and Python
17 changes: 17 additions & 0 deletions helpers/copy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,20 @@ export const copy = async (
}),
);
};

export const assetRelocator = (name: string) => {
switch (name) {
case "gitignore":
case "eslintrc.json": {
return `.${name}`;
}
// README.md is ignored by webpack-asset-relocator-loader used by ncc:
// https://github.com/vercel/webpack-asset-relocator-loader/blob/e9308683d47ff507253e37c9bcbb99474603192b/src/asset-relocator.js#L227
case "README-template.md": {
return "README.md";
}
default: {
return name;
}
}
};
81 changes: 80 additions & 1 deletion helpers/datasources.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import fs from "fs/promises";
import path from "path";
import yaml, { Document } from "yaml";
import { templatesDir } from "./dir";
import { TemplateDataSource } from "./types";
import { DbSourceConfig, TemplateDataSource, WebSourceConfig } from "./types";

export const EXAMPLE_FILE: TemplateDataSource = {
type: "file",
Expand Down Expand Up @@ -28,3 +30,80 @@ export function getDataSources(
}
return dataSources;
}

export async function writeLoadersConfig(
root: string,
dataSources: TemplateDataSource[],
useLlamaParse?: boolean,
) {
if (dataSources.length === 0) return; // no datasources, no config needed
const loaderConfig = new Document({});
// Web loader config
if (dataSources.some((ds) => ds.type === "web")) {
const webLoaderConfig = new Document({});

// Create config for browser driver arguments
const driverArgNodeValue = webLoaderConfig.createNode([
"--no-sandbox",
"--disable-dev-shm-usage",
]);
driverArgNodeValue.commentBefore =
" The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode";
webLoaderConfig.set("driver_arguments", driverArgNodeValue);

// Create config for urls
const urlConfigs = dataSources
.filter((ds) => ds.type === "web")
.map((ds) => {
const dsConfig = ds.config as WebSourceConfig;
return {
base_url: dsConfig.baseUrl,
prefix: dsConfig.prefix,
depth: dsConfig.depth,
};
});
const urlConfigNode = webLoaderConfig.createNode(urlConfigs);
urlConfigNode.commentBefore = ` base_url: The URL to start crawling with
prefix: Only crawl URLs matching the specified prefix
depth: The maximum depth for BFS traversal
You can add more websites by adding more entries (don't forget the - prefix from YAML)`;
webLoaderConfig.set("urls", urlConfigNode);

// Add web config to the loaders config
loaderConfig.set("web", webLoaderConfig);
}

// File loader config
if (dataSources.some((ds) => ds.type === "file")) {
// Add documentation to web loader config
const node = loaderConfig.createNode({
use_llama_parse: useLlamaParse,
});
node.commentBefore = ` use_llama_parse: Use LlamaParse if \`true\`. Needs a \`LLAMA_CLOUD_API_KEY\` from https://cloud.llamaindex.ai set as environment variable`;
loaderConfig.set("file", node);
}

// DB loader config
const dbLoaders = dataSources.filter((ds) => ds.type === "db");
if (dbLoaders.length > 0) {
const dbLoaderConfig = new Document({});
const configEntries = dbLoaders.map((ds) => {
const dsConfig = ds.config as DbSourceConfig;
return {
uri: dsConfig.uri,
queries: [dsConfig.queries],
};
});

const node = dbLoaderConfig.createNode(configEntries);
node.commentBefore = ` The configuration for the database loader, only supports MySQL and PostgreSQL databases for now.
uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db
query: The query to fetch data from the database. E.g.: SELECT * FROM table`;
loaderConfig.set("db", node);
}

// Write loaders config
const loaderConfigPath = path.join(root, "config", "loaders.yaml");
await fs.mkdir(path.join(root, "config"), { recursive: true });
await fs.writeFile(loaderConfigPath, yaml.stringify(loaderConfig));
}
15 changes: 15 additions & 0 deletions helpers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@ import path from "path";
import { cyan } from "picocolors";

import fsExtra from "fs-extra";
import { writeLoadersConfig } from "./datasources";
import { createBackendEnvFile, createFrontendEnvFile } from "./env-variables";
import { PackageManager } from "./get-pkg-manager";
import { installLlamapackProject } from "./llama-pack";
import { isHavingPoetryLockFile, tryPoetryRun } from "./poetry";
import { installPythonTemplate } from "./python";
import { downloadAndExtractRepo } from "./repo";
import { ConfigFileType, writeToolsConfig } from "./tools";
import {
FileSourceConfig,
InstallTemplateArgs,
Expand Down Expand Up @@ -117,10 +119,23 @@ export const installTemplate = async (

if (props.framework === "fastapi") {
await installPythonTemplate(props);
// write loaders configuration (currently Python only)
await writeLoadersConfig(
props.root,
props.dataSources,
props.useLlamaParse,
);
} else {
await installTSTemplate(props);
}

// write tools configuration
await writeToolsConfig(
props.root,
props.tools,
props.framework === "fastapi" ? ConfigFileType.YAML : ConfigFileType.JSON,
);

if (props.backend) {
// This is a backend, so we need to copy the test data and create the env file.

Expand Down
159 changes: 27 additions & 132 deletions helpers/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,15 @@ import path from "path";
import { cyan, red } from "picocolors";
import { parse, stringify } from "smol-toml";
import terminalLink from "terminal-link";
import yaml, { Document } from "yaml";
import { copy } from "./copy";

import { assetRelocator, copy } from "./copy";
import { templatesDir } from "./dir";
import { isPoetryAvailable, tryPoetryInstall } from "./poetry";
import { Tool } from "./tools";
import {
DbSourceConfig,
InstallTemplateArgs,
TemplateDataSource,
TemplateVectorDB,
WebSourceConfig,
} from "./types";

interface Dependency {
Expand Down Expand Up @@ -217,141 +215,38 @@ export const installPythonTemplate = async ({
await copy("**", root, {
parents: true,
cwd: templatePath,
rename(name) {
switch (name) {
case "gitignore": {
return `.${name}`;
}
// README.md is ignored by webpack-asset-relocator-loader used by ncc:
// https://github.com/vercel/webpack-asset-relocator-loader/blob/e9308683d47ff507253e37c9bcbb99474603192b/src/asset-relocator.js#L227
case "README-template.md": {
return "README.md";
}
default: {
return name;
}
}
},
rename: assetRelocator,
});

const compPath = path.join(templatesDir, "components");
const enginePath = path.join(root, "app", "engine");

if (dataSources.length > 0) {
const enginePath = path.join(root, "app", "engine");

const vectorDbDirName = vectorDb ?? "none";
const VectorDBPath = path.join(
compPath,
"vectordbs",
"python",
vectorDbDirName,
);
await copy("**", enginePath, {
parents: true,
cwd: VectorDBPath,
});

// Copy engine code
if (tools !== undefined && tools.length > 0) {
await copy("**", enginePath, {
parents: true,
cwd: path.join(compPath, "engines", "python", "agent"),
});
// Write tool configs
const configContent: Record<string, any> = {};
tools.forEach((tool) => {
configContent[tool.name] = tool.config ?? {};
});
const configFilePath = path.join(root, "config/tools.yaml");
await fs.mkdir(path.join(root, "config"), { recursive: true });
await fs.writeFile(configFilePath, yaml.stringify(configContent));
} else {
await copy("**", enginePath, {
parents: true,
cwd: path.join(compPath, "engines", "python", "chat"),
});
}

const loaderConfig = new Document({});
const loaderPath = path.join(enginePath, "loaders");

// Copy loaders to enginePath
await copy("**", loaderPath, {
parents: true,
cwd: path.join(compPath, "loaders", "python"),
});

// Generate loaders config
// Web loader config
if (dataSources.some((ds) => ds.type === "web")) {
const webLoaderConfig = new Document({});

// Create config for browser driver arguments
const driverArgNodeValue = webLoaderConfig.createNode([
"--no-sandbox",
"--disable-dev-shm-usage",
]);
driverArgNodeValue.commentBefore =
" The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode";
webLoaderConfig.set("driver_arguments", driverArgNodeValue);

// Create config for urls
const urlConfigs = dataSources
.filter((ds) => ds.type === "web")
.map((ds) => {
const dsConfig = ds.config as WebSourceConfig;
return {
base_url: dsConfig.baseUrl,
prefix: dsConfig.prefix,
depth: dsConfig.depth,
};
});
const urlConfigNode = webLoaderConfig.createNode(urlConfigs);
urlConfigNode.commentBefore = ` base_url: The URL to start crawling with
prefix: Only crawl URLs matching the specified prefix
depth: The maximum depth for BFS traversal
You can add more websites by adding more entries (don't forget the - prefix from YAML)`;
webLoaderConfig.set("urls", urlConfigNode);

// Add web config to the loaders config
loaderConfig.set("web", webLoaderConfig);
}
// File loader config
if (dataSources.some((ds) => ds.type === "file")) {
// Add documentation to web loader config
const node = loaderConfig.createNode({
use_llama_parse: useLlamaParse,
});
node.commentBefore = ` use_llama_parse: Use LlamaParse if \`true\`. Needs a \`LLAMA_CLOUD_API_KEY\` from https://cloud.llamaindex.ai set as environment variable`;
loaderConfig.set("file", node);
}

// DB loader config
const dbLoaders = dataSources.filter((ds) => ds.type === "db");
if (dbLoaders.length > 0) {
const dbLoaderConfig = new Document({});
const configEntries = dbLoaders.map((ds) => {
const dsConfig = ds.config as DbSourceConfig;
return {
uri: dsConfig.uri,
queries: [dsConfig.queries],
};
});
// Copy selected vector DB
await copy("**", enginePath, {
parents: true,
cwd: path.join(compPath, "vectordbs", "python", vectorDb ?? "none"),
});

const node = dbLoaderConfig.createNode(configEntries);
node.commentBefore = ` The configuration for the database loader, only supports MySQL and PostgreSQL databases for now.
uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db
query: The query to fetch data from the database. E.g.: SELECT * FROM table`;
loaderConfig.set("db", node);
}
// Copy all loaders to enginePath
const loaderPath = path.join(enginePath, "loaders");
await copy("**", loaderPath, {
parents: true,
cwd: path.join(compPath, "loaders", "python"),
});

// Write loaders config
if (Object.keys(loaderConfig).length > 0) {
const loaderConfigPath = path.join(root, "config/loaders.yaml");
await fs.mkdir(path.join(root, "config"), { recursive: true });
await fs.writeFile(loaderConfigPath, yaml.stringify(loaderConfig));
}
// Select and copy engine code based on data sources and tools
let engine;
tools = tools ?? [];
if (dataSources.length > 0 && tools.length === 0) {
console.log("\nNo tools selected - use optimized context chat engine\n");
engine = "chat";
} else {
engine = "agent";
}
await copy("**", enginePath, {
parents: true,
cwd: path.join(compPath, "engines", "python", engine),
});

const addOnDependencies = dataSources
.map((ds) => getAdditionalDependencies(vectorDb, ds, tools))
Expand Down
Loading
Loading