Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add multiple data sources #19

Merged
merged 17 commits into from
Mar 27, 2024
6 changes: 4 additions & 2 deletions create-app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@ export async function createApp({
vectorDb,
externalPort,
postInstallAction,
dataSource,
dataSources,
tools,
useLlamaParse,
observability,
}: InstallAppArgs): Promise<void> {
const root = path.resolve(appPath);
Expand Down Expand Up @@ -89,8 +90,9 @@ export async function createApp({
vectorDb,
externalPort,
postInstallAction,
dataSource,
dataSources,
tools,
useLlamaParse,
observability,
};

Expand Down
18 changes: 6 additions & 12 deletions helpers/env-variables.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ export const createBackendEnvFile = async (
model?: string;
embeddingModel?: string;
framework?: TemplateFramework;
dataSource?: TemplateDataSource;
dataSources?: TemplateDataSource[];
port?: number;
},
) => {
Expand All @@ -126,19 +126,13 @@ export const createBackendEnvFile = async (
description: "The OpenAI API key to use.",
value: opts.openAiKey,
},

{
name: "LLAMA_CLOUD_API_KEY",
description: `The Llama Cloud API key.`,
value: opts.llamaCloudKey,
},
// Add vector database environment variables
...(opts.vectorDb ? getVectorDBEnvs(opts.vectorDb) : []),
// Add LlamaCloud API key
...(opts.llamaCloudKey
? [
{
name: "LLAMA_CLOUD_API_KEY",
description: `The Llama Cloud API key.`,
value: opts.llamaCloudKey,
},
]
: []),
];
let envVars: EnvVar[] = [];
if (opts.framework === "fastapi") {
Expand Down
66 changes: 17 additions & 49 deletions helpers/index.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import { copy } from "./copy";
import { callPackageManager } from "./install";

import fs from "fs/promises";
import path from "path";
import { cyan } from "picocolors";

import fsExtra from "fs-extra";
import { templatesDir } from "./dir";
import { createBackendEnvFile, createFrontendEnvFile } from "./env-variables";
import { PackageManager } from "./get-pkg-manager";
Expand All @@ -27,8 +26,8 @@ async function generateContextData(
packageManager?: PackageManager,
openAiKey?: string,
vectorDb?: TemplateVectorDB,
dataSource?: TemplateDataSource,
llamaCloudKey?: string,
useLlamaParse?: boolean,
) {
if (packageManager) {
const runGenerate = `${cyan(
Expand All @@ -37,8 +36,7 @@ async function generateContextData(
: `${packageManager} run generate`,
)}`;
const openAiKeyConfigured = openAiKey || process.env["OPENAI_API_KEY"];
const llamaCloudKeyConfigured = (dataSource?.config as FileSourceConfig)
?.useLlamaParse
const llamaCloudKeyConfigured = useLlamaParse
? llamaCloudKey || process.env["LLAMA_CLOUD_API_KEY"]
: true;
const hasVectorDb = vectorDb && vectorDb !== "none";
Expand Down Expand Up @@ -76,47 +74,16 @@ async function generateContextData(

const copyContextData = async (
root: string,
dataSource?: TemplateDataSource,
dataSources: TemplateDataSource[],
) => {
const destPath = path.join(root, "data");

const dataSourceConfig = dataSource?.config as FileSourceConfig;

// Copy file
if (dataSource?.type === "file") {
if (dataSourceConfig.paths) {
await fs.mkdir(destPath, { recursive: true });
console.log(
"Copying data from files:",
dataSourceConfig.paths.toString(),
);
for (const p of dataSourceConfig.paths) {
await fs.copyFile(p, path.join(destPath, path.basename(p)));
}
} else {
console.log("Missing file path in config");
process.exit(1);
}
return;
}

// Copy folder
if (dataSource?.type === "folder") {
// Example data does not have path config, set the default path
const srcPaths = dataSourceConfig.paths ?? [
path.join(templatesDir, "components", "data"),
];
console.log("Copying data from folders: ", srcPaths);
for (const p of srcPaths) {
const folderName = path.basename(p);
const destFolderPath = path.join(destPath, folderName);
await fs.mkdir(destFolderPath, { recursive: true });
await copy("**", destFolderPath, {
parents: true,
cwd: p,
});
}
return;
for (const dataSource of dataSources) {
const dataSourceConfig = dataSource?.config as FileSourceConfig;
// Copy local data
const dataPath =
dataSourceConfig.path ?? path.join(templatesDir, "components", "data");
const destPath = path.join(root, "data", path.basename(dataPath));
console.log("Copying data from path:", dataPath);
await fsExtra.copy(dataPath, destPath);
}
};

Expand Down Expand Up @@ -166,12 +133,13 @@ export const installTemplate = async (
model: props.model,
embeddingModel: props.embeddingModel,
framework: props.framework,
dataSource: props.dataSource,
dataSources: props.dataSources,
port: props.externalPort,
});

if (props.engine === "context") {
await copyContextData(props.root, props.dataSource);
console.log("\nGenerating context data...\n");
await copyContextData(props.root, props.dataSources);
if (
props.postInstallAction === "runApp" ||
props.postInstallAction === "dependencies"
Expand All @@ -181,14 +149,14 @@ export const installTemplate = async (
props.packageManager,
props.openAiKey,
props.vectorDb,
props.dataSource,
props.llamaCloudKey,
props.useLlamaParse,
);
}
}
} else {
// this is a frontend for a full-stack app, create .env file with model information
createFrontendEnvFile(props.root, {
await createFrontendEnvFile(props.root, {
model: props.model,
customApiPath: props.customApiPath,
});
Expand Down
88 changes: 45 additions & 43 deletions helpers/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import { templatesDir } from "./dir";
import { isPoetryAvailable, tryPoetryInstall } from "./poetry";
import { Tool } from "./tools";
import {
FileSourceConfig,
InstallTemplateArgs,
TemplateDataSource,
TemplateVectorDB,
Expand Down Expand Up @@ -65,7 +64,7 @@ const getAdditionalDependencies = (

// Add data source dependencies
const dataSourceType = dataSource?.type;
if (dataSourceType === "file" || dataSourceType === "folder") {
if (dataSourceType === "file") {
// llama-index-readers-file (pdf, excel, csv) is already included in llama_index package
dependencies.push({
name: "docx2txt",
Expand Down Expand Up @@ -180,18 +179,20 @@ export const installPythonTemplate = async ({
framework,
engine,
vectorDb,
dataSource,
dataSources,
tools,
postInstallAction,
useLlamaParse,
}: Pick<
InstallTemplateArgs,
| "root"
| "framework"
| "template"
| "engine"
| "vectorDb"
| "dataSource"
| "dataSources"
| "tools"
| "useLlamaParse"
| "postInstallAction"
>) => {
console.log("\nInitializing Python project with template:", template, "\n");
Expand Down Expand Up @@ -256,51 +257,52 @@ export const installPythonTemplate = async ({
});
}

// Write loader configs
if (dataSource?.type === "web") {
const config = dataSource.config as WebSourceConfig[];
const webLoaderConfig = config.map((c) => {
return {
base_url: c.baseUrl,
prefix: c.prefix || c.baseUrl,
depth: c.depth || 1,
};
});
const loaderConfigPath = path.join(root, "config/loaders.json");
await fs.mkdir(path.join(root, "config"), { recursive: true });
await fs.writeFile(
loaderConfigPath,
JSON.stringify(
{
web: webLoaderConfig,
},
null,
2,
),
);
}
if (dataSources.length > 0) {
const loaderConfigs: Record<string, any> = {};
const loaderPath = path.join(enginePath, "loaders");

const dataSourceType = dataSource?.type;
if (dataSourceType !== undefined && dataSourceType !== "none") {
let loaderFolder: string;
if (dataSourceType === "file" || dataSourceType === "folder") {
const dataSourceConfig = dataSource?.config as FileSourceConfig;
loaderFolder = dataSourceConfig.useLlamaParse ? "llama_parse" : "file";
} else {
loaderFolder = dataSourceType;
}
await copy("**", enginePath, {
// Copy loaders to enginePath
await copy("**", loaderPath, {
parents: true,
cwd: path.join(compPath, "loaders", "python", loaderFolder),
cwd: path.join(compPath, "loaders", "python"),
});

// Generate loaders config
// Web loader config
if (dataSources.some((ds) => ds.type === "web")) {
const webLoaderConfig = dataSources
.filter((ds) => ds.type === "web")
.map((ds) => {
const dsConfig = ds.config as WebSourceConfig;
return {
base_url: dsConfig.baseUrl,
prefix: dsConfig.prefix,
depth: dsConfig.depth,
};
});
loaderConfigs["web"] = webLoaderConfig;
}
// File loader config
if (dataSources.some((ds) => ds.type === "file")) {
loaderConfigs["file"] = {
use_llama_parse: useLlamaParse,
};
}
// Write loaders config
if (Object.keys(loaderConfigs).length > 0) {
const loaderConfigPath = path.join(root, "config/loaders.json");
await fs.mkdir(path.join(root, "config"), { recursive: true });
await fs.writeFile(
loaderConfigPath,
JSON.stringify(loaderConfigs, null, 2),
);
}
}
}

const addOnDependencies = getAdditionalDependencies(
vectorDb,
dataSource,
tools,
);
const addOnDependencies = dataSources
.map((ds) => getAdditionalDependencies(vectorDb, ds, tools))
.flat();
await addDependencies(root, addOnDependencies);

if (postInstallAction === "runApp" || postInstallAction === "dependencies") {
Expand Down
10 changes: 5 additions & 5 deletions helpers/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,19 @@ export type TemplateDataSource = {
type: TemplateDataSourceType;
config: TemplateDataSourceConfig;
};
export type TemplateDataSourceType = "none" | "file" | "folder" | "web";
export type TemplateDataSourceType = "file" | "web";
export type TemplateObservability = "none" | "opentelemetry";
// Config for both file and folder
export type FileSourceConfig = {
paths?: string[];
useLlamaParse?: boolean;
path?: string;
};
export type WebSourceConfig = {
baseUrl?: string;
prefix?: string;
depth?: number;
};

export type TemplateDataSourceConfig = FileSourceConfig | WebSourceConfig[];
export type TemplateDataSourceConfig = FileSourceConfig | WebSourceConfig;

export type CommunityProjectConfig = {
owner: string;
Expand All @@ -46,11 +45,12 @@ export interface InstallTemplateArgs {
framework: TemplateFramework;
engine: TemplateEngine;
ui: TemplateUI;
dataSource?: TemplateDataSource;
dataSources: TemplateDataSource[];
eslint: boolean;
customApiPath?: string;
openAiKey?: string;
llamaCloudKey?: string;
useLlamaParse?: boolean;
model: string;
embeddingModel: string;
communityProjectConfig?: CommunityProjectConfig;
Expand Down
16 changes: 6 additions & 10 deletions helpers/typescript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { copy } from "../helpers/copy";
import { callPackageManager } from "../helpers/install";
import { templatesDir } from "./dir";
import { PackageManager } from "./get-pkg-manager";
import { FileSourceConfig, InstallTemplateArgs } from "./types";
import { InstallTemplateArgs } from "./types";

const rename = (name: string) => {
switch (name) {
Expand Down Expand Up @@ -65,7 +65,8 @@ export const installTSTemplate = async ({
backend,
observability,
tools,
dataSource,
dataSources,
useLlamaParse,
}: InstallTemplateArgs & { backend: boolean }) => {
console.log(bold(`Using ${packageManager}.`));

Expand Down Expand Up @@ -173,15 +174,10 @@ export const installTSTemplate = async ({
});

// copy loader component
const dataSourceType = dataSource?.type;
if (dataSourceType && dataSourceType !== "none") {
const dataSourceType = dataSources[0]?.type;
marcusschiesser marked this conversation as resolved.
Show resolved Hide resolved
if (dataSourceType) {
let loaderFolder: string;
if (dataSourceType === "file" || dataSourceType === "folder") {
const dataSourceConfig = dataSource?.config as FileSourceConfig;
loaderFolder = dataSourceConfig.useLlamaParse ? "llama_parse" : "file";
} else {
loaderFolder = dataSourceType;
}
loaderFolder = useLlamaParse ? "llama_parse" : dataSourceType;
await copy("**", enginePath, {
parents: true,
cwd: path.join(compPath, "loaders", "typescript", loaderFolder),
Expand Down
3 changes: 2 additions & 1 deletion index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,9 @@ async function run(): Promise<void> {
vectorDb: program.vectorDb,
externalPort: program.externalPort,
postInstallAction: program.postInstallAction,
dataSource: program.dataSource,
dataSources: program.dataSources,
tools: program.tools,
useLlamaParse: program.useLlamaParse,
observability: program.observability,
});
conf.set("preferences", preferences);
Expand Down
Loading
Loading