Skip to content

Commit

Permalink
Allow tools without datasource and clean up (#33)
Browse files Browse the repository at this point in the history
  • Loading branch information
marcusschiesser authored Apr 4, 2024
1 parent c06d4af commit 29b17ee
Show file tree
Hide file tree
Showing 19 changed files with 338 additions and 291 deletions.
2 changes: 1 addition & 1 deletion .changeset/curvy-candles-mix.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
"create-llama": patch
---

Update loaders and tools config to yaml format
Update loaders and tools config to yaml format (for Python)
2 changes: 1 addition & 1 deletion .changeset/eleven-lemons-look.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
"create-llama": patch
---

Add nodes to the response and support Vercel streaming format
Add nodes to the response and support Vercel streaming format (Python)
5 changes: 5 additions & 0 deletions .changeset/five-flowers-admire.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"create-llama": patch
---

Allow using tools without any data source
2 changes: 1 addition & 1 deletion .changeset/forty-ads-tell.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
"create-llama": patch
---

Add redirect to documentation page when accessing the base URL
Add redirect to documentation page when accessing the base URL (FastAPI)
2 changes: 1 addition & 1 deletion .changeset/healthy-insects-check.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
"create-llama": patch
---

Add Dockerfile template
Add Dockerfile templates for Typescript and Python
17 changes: 17 additions & 0 deletions helpers/copy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,20 @@ export const copy = async (
}),
);
};

export const assetRelocator = (name: string) => {
switch (name) {
case "gitignore":
case "eslintrc.json": {
return `.${name}`;
}
// README.md is ignored by webpack-asset-relocator-loader used by ncc:
// https://github.com/vercel/webpack-asset-relocator-loader/blob/e9308683d47ff507253e37c9bcbb99474603192b/src/asset-relocator.js#L227
case "README-template.md": {
return "README.md";
}
default: {
return name;
}
}
};
81 changes: 80 additions & 1 deletion helpers/datasources.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import fs from "fs/promises";
import path from "path";
import yaml, { Document } from "yaml";
import { templatesDir } from "./dir";
import { TemplateDataSource } from "./types";
import { DbSourceConfig, TemplateDataSource, WebSourceConfig } from "./types";

export const EXAMPLE_FILE: TemplateDataSource = {
type: "file",
Expand Down Expand Up @@ -28,3 +30,80 @@ export function getDataSources(
}
return dataSources;
}

export async function writeLoadersConfig(
root: string,
dataSources: TemplateDataSource[],
useLlamaParse?: boolean,
) {
if (dataSources.length === 0) return; // no datasources, no config needed
const loaderConfig = new Document({});
// Web loader config
if (dataSources.some((ds) => ds.type === "web")) {
const webLoaderConfig = new Document({});

// Create config for browser driver arguments
const driverArgNodeValue = webLoaderConfig.createNode([
"--no-sandbox",
"--disable-dev-shm-usage",
]);
driverArgNodeValue.commentBefore =
" The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode";
webLoaderConfig.set("driver_arguments", driverArgNodeValue);

// Create config for urls
const urlConfigs = dataSources
.filter((ds) => ds.type === "web")
.map((ds) => {
const dsConfig = ds.config as WebSourceConfig;
return {
base_url: dsConfig.baseUrl,
prefix: dsConfig.prefix,
depth: dsConfig.depth,
};
});
const urlConfigNode = webLoaderConfig.createNode(urlConfigs);
urlConfigNode.commentBefore = ` base_url: The URL to start crawling with
prefix: Only crawl URLs matching the specified prefix
depth: The maximum depth for BFS traversal
You can add more websites by adding more entries (don't forget the - prefix from YAML)`;
webLoaderConfig.set("urls", urlConfigNode);

// Add web config to the loaders config
loaderConfig.set("web", webLoaderConfig);
}

// File loader config
if (dataSources.some((ds) => ds.type === "file")) {
// Add documentation to web loader config
const node = loaderConfig.createNode({
use_llama_parse: useLlamaParse,
});
node.commentBefore = ` use_llama_parse: Use LlamaParse if \`true\`. Needs a \`LLAMA_CLOUD_API_KEY\` from https://cloud.llamaindex.ai set as environment variable`;
loaderConfig.set("file", node);
}

// DB loader config
const dbLoaders = dataSources.filter((ds) => ds.type === "db");
if (dbLoaders.length > 0) {
const dbLoaderConfig = new Document({});
const configEntries = dbLoaders.map((ds) => {
const dsConfig = ds.config as DbSourceConfig;
return {
uri: dsConfig.uri,
queries: [dsConfig.queries],
};
});

const node = dbLoaderConfig.createNode(configEntries);
node.commentBefore = ` The configuration for the database loader, only supports MySQL and PostgreSQL databases for now.
uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db
query: The query to fetch data from the database. E.g.: SELECT * FROM table`;
loaderConfig.set("db", node);
}

// Write loaders config
const loaderConfigPath = path.join(root, "config", "loaders.yaml");
await fs.mkdir(path.join(root, "config"), { recursive: true });
await fs.writeFile(loaderConfigPath, yaml.stringify(loaderConfig));
}
15 changes: 15 additions & 0 deletions helpers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@ import path from "path";
import { cyan } from "picocolors";

import fsExtra from "fs-extra";
import { writeLoadersConfig } from "./datasources";
import { createBackendEnvFile, createFrontendEnvFile } from "./env-variables";
import { PackageManager } from "./get-pkg-manager";
import { installLlamapackProject } from "./llama-pack";
import { isHavingPoetryLockFile, tryPoetryRun } from "./poetry";
import { installPythonTemplate } from "./python";
import { downloadAndExtractRepo } from "./repo";
import { ConfigFileType, writeToolsConfig } from "./tools";
import {
FileSourceConfig,
InstallTemplateArgs,
Expand Down Expand Up @@ -117,10 +119,23 @@ export const installTemplate = async (

if (props.framework === "fastapi") {
await installPythonTemplate(props);
// write loaders configuration (currently Python only)
await writeLoadersConfig(
props.root,
props.dataSources,
props.useLlamaParse,
);
} else {
await installTSTemplate(props);
}

// write tools configuration
await writeToolsConfig(
props.root,
props.tools,
props.framework === "fastapi" ? ConfigFileType.YAML : ConfigFileType.JSON,
);

if (props.backend) {
// This is a backend, so we need to copy the test data and create the env file.

Expand Down
159 changes: 27 additions & 132 deletions helpers/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,15 @@ import path from "path";
import { cyan, red } from "picocolors";
import { parse, stringify } from "smol-toml";
import terminalLink from "terminal-link";
import yaml, { Document } from "yaml";
import { copy } from "./copy";

import { assetRelocator, copy } from "./copy";
import { templatesDir } from "./dir";
import { isPoetryAvailable, tryPoetryInstall } from "./poetry";
import { Tool } from "./tools";
import {
DbSourceConfig,
InstallTemplateArgs,
TemplateDataSource,
TemplateVectorDB,
WebSourceConfig,
} from "./types";

interface Dependency {
Expand Down Expand Up @@ -217,141 +215,38 @@ export const installPythonTemplate = async ({
await copy("**", root, {
parents: true,
cwd: templatePath,
rename(name) {
switch (name) {
case "gitignore": {
return `.${name}`;
}
// README.md is ignored by webpack-asset-relocator-loader used by ncc:
// https://github.com/vercel/webpack-asset-relocator-loader/blob/e9308683d47ff507253e37c9bcbb99474603192b/src/asset-relocator.js#L227
case "README-template.md": {
return "README.md";
}
default: {
return name;
}
}
},
rename: assetRelocator,
});

const compPath = path.join(templatesDir, "components");
const enginePath = path.join(root, "app", "engine");

if (dataSources.length > 0) {
const enginePath = path.join(root, "app", "engine");

const vectorDbDirName = vectorDb ?? "none";
const VectorDBPath = path.join(
compPath,
"vectordbs",
"python",
vectorDbDirName,
);
await copy("**", enginePath, {
parents: true,
cwd: VectorDBPath,
});

// Copy engine code
if (tools !== undefined && tools.length > 0) {
await copy("**", enginePath, {
parents: true,
cwd: path.join(compPath, "engines", "python", "agent"),
});
// Write tool configs
const configContent: Record<string, any> = {};
tools.forEach((tool) => {
configContent[tool.name] = tool.config ?? {};
});
const configFilePath = path.join(root, "config/tools.yaml");
await fs.mkdir(path.join(root, "config"), { recursive: true });
await fs.writeFile(configFilePath, yaml.stringify(configContent));
} else {
await copy("**", enginePath, {
parents: true,
cwd: path.join(compPath, "engines", "python", "chat"),
});
}

const loaderConfig = new Document({});
const loaderPath = path.join(enginePath, "loaders");

// Copy loaders to enginePath
await copy("**", loaderPath, {
parents: true,
cwd: path.join(compPath, "loaders", "python"),
});

// Generate loaders config
// Web loader config
if (dataSources.some((ds) => ds.type === "web")) {
const webLoaderConfig = new Document({});

// Create config for browser driver arguments
const driverArgNodeValue = webLoaderConfig.createNode([
"--no-sandbox",
"--disable-dev-shm-usage",
]);
driverArgNodeValue.commentBefore =
" The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode";
webLoaderConfig.set("driver_arguments", driverArgNodeValue);

// Create config for urls
const urlConfigs = dataSources
.filter((ds) => ds.type === "web")
.map((ds) => {
const dsConfig = ds.config as WebSourceConfig;
return {
base_url: dsConfig.baseUrl,
prefix: dsConfig.prefix,
depth: dsConfig.depth,
};
});
const urlConfigNode = webLoaderConfig.createNode(urlConfigs);
urlConfigNode.commentBefore = ` base_url: The URL to start crawling with
prefix: Only crawl URLs matching the specified prefix
depth: The maximum depth for BFS traversal
You can add more websites by adding more entries (don't forget the - prefix from YAML)`;
webLoaderConfig.set("urls", urlConfigNode);

// Add web config to the loaders config
loaderConfig.set("web", webLoaderConfig);
}
// File loader config
if (dataSources.some((ds) => ds.type === "file")) {
// Add documentation to web loader config
const node = loaderConfig.createNode({
use_llama_parse: useLlamaParse,
});
node.commentBefore = ` use_llama_parse: Use LlamaParse if \`true\`. Needs a \`LLAMA_CLOUD_API_KEY\` from https://cloud.llamaindex.ai set as environment variable`;
loaderConfig.set("file", node);
}

// DB loader config
const dbLoaders = dataSources.filter((ds) => ds.type === "db");
if (dbLoaders.length > 0) {
const dbLoaderConfig = new Document({});
const configEntries = dbLoaders.map((ds) => {
const dsConfig = ds.config as DbSourceConfig;
return {
uri: dsConfig.uri,
queries: [dsConfig.queries],
};
});
// Copy selected vector DB
await copy("**", enginePath, {
parents: true,
cwd: path.join(compPath, "vectordbs", "python", vectorDb ?? "none"),
});

const node = dbLoaderConfig.createNode(configEntries);
node.commentBefore = ` The configuration for the database loader, only supports MySQL and PostgreSQL databases for now.
uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db
query: The query to fetch data from the database. E.g.: SELECT * FROM table`;
loaderConfig.set("db", node);
}
// Copy all loaders to enginePath
const loaderPath = path.join(enginePath, "loaders");
await copy("**", loaderPath, {
parents: true,
cwd: path.join(compPath, "loaders", "python"),
});

// Write loaders config
if (Object.keys(loaderConfig).length > 0) {
const loaderConfigPath = path.join(root, "config/loaders.yaml");
await fs.mkdir(path.join(root, "config"), { recursive: true });
await fs.writeFile(loaderConfigPath, yaml.stringify(loaderConfig));
}
// Select and copy engine code based on data sources and tools
let engine;
tools = tools ?? [];
if (dataSources.length > 0 && tools.length === 0) {
console.log("\nNo tools selected - use optimized context chat engine\n");
engine = "chat";
} else {
engine = "agent";
}
await copy("**", enginePath, {
parents: true,
cwd: path.join(compPath, "engines", "python", engine),
});

const addOnDependencies = dataSources
.map((ds) => getAdditionalDependencies(vectorDb, ds, tools))
Expand Down
Loading

0 comments on commit 29b17ee

Please sign in to comment.