Skip to content

Commit

Permalink
feat: Add multiple data sources (#19)
Browse files Browse the repository at this point in the history
  • Loading branch information
leehuwuj authored Mar 27, 2024
1 parent 76aa336 commit c7a978e
Show file tree
Hide file tree
Showing 21 changed files with 373 additions and 356 deletions.
6 changes: 4 additions & 2 deletions create-app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@ export async function createApp({
vectorDb,
externalPort,
postInstallAction,
dataSource,
dataSources,
tools,
useLlamaParse,
observability,
}: InstallAppArgs): Promise<void> {
const root = path.resolve(appPath);
Expand Down Expand Up @@ -89,8 +90,9 @@ export async function createApp({
vectorDb,
externalPort,
postInstallAction,
dataSource,
dataSources,
tools,
useLlamaParse,
observability,
};

Expand Down
18 changes: 6 additions & 12 deletions helpers/env-variables.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ export const createBackendEnvFile = async (
model?: string;
embeddingModel?: string;
framework?: TemplateFramework;
dataSource?: TemplateDataSource;
dataSources?: TemplateDataSource[];
port?: number;
},
) => {
Expand All @@ -126,19 +126,13 @@ export const createBackendEnvFile = async (
description: "The OpenAI API key to use.",
value: opts.openAiKey,
},

{
name: "LLAMA_CLOUD_API_KEY",
description: `The Llama Cloud API key.`,
value: opts.llamaCloudKey,
},
// Add vector database environment variables
...(opts.vectorDb ? getVectorDBEnvs(opts.vectorDb) : []),
// Add LlamaCloud API key
...(opts.llamaCloudKey
? [
{
name: "LLAMA_CLOUD_API_KEY",
description: `The Llama Cloud API key.`,
value: opts.llamaCloudKey,
},
]
: []),
];
let envVars: EnvVar[] = [];
if (opts.framework === "fastapi") {
Expand Down
66 changes: 17 additions & 49 deletions helpers/index.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import { copy } from "./copy";
import { callPackageManager } from "./install";

import fs from "fs/promises";
import path from "path";
import { cyan } from "picocolors";

import fsExtra from "fs-extra";
import { templatesDir } from "./dir";
import { createBackendEnvFile, createFrontendEnvFile } from "./env-variables";
import { PackageManager } from "./get-pkg-manager";
Expand All @@ -27,8 +26,8 @@ async function generateContextData(
packageManager?: PackageManager,
openAiKey?: string,
vectorDb?: TemplateVectorDB,
dataSource?: TemplateDataSource,
llamaCloudKey?: string,
useLlamaParse?: boolean,
) {
if (packageManager) {
const runGenerate = `${cyan(
Expand All @@ -37,8 +36,7 @@ async function generateContextData(
: `${packageManager} run generate`,
)}`;
const openAiKeyConfigured = openAiKey || process.env["OPENAI_API_KEY"];
const llamaCloudKeyConfigured = (dataSource?.config as FileSourceConfig)
?.useLlamaParse
const llamaCloudKeyConfigured = useLlamaParse
? llamaCloudKey || process.env["LLAMA_CLOUD_API_KEY"]
: true;
const hasVectorDb = vectorDb && vectorDb !== "none";
Expand Down Expand Up @@ -76,47 +74,16 @@ async function generateContextData(

const copyContextData = async (
root: string,
dataSource?: TemplateDataSource,
dataSources: TemplateDataSource[],
) => {
const destPath = path.join(root, "data");

const dataSourceConfig = dataSource?.config as FileSourceConfig;

// Copy file
if (dataSource?.type === "file") {
if (dataSourceConfig.paths) {
await fs.mkdir(destPath, { recursive: true });
console.log(
"Copying data from files:",
dataSourceConfig.paths.toString(),
);
for (const p of dataSourceConfig.paths) {
await fs.copyFile(p, path.join(destPath, path.basename(p)));
}
} else {
console.log("Missing file path in config");
process.exit(1);
}
return;
}

// Copy folder
if (dataSource?.type === "folder") {
// Example data does not have path config, set the default path
const srcPaths = dataSourceConfig.paths ?? [
path.join(templatesDir, "components", "data"),
];
console.log("Copying data from folders: ", srcPaths);
for (const p of srcPaths) {
const folderName = path.basename(p);
const destFolderPath = path.join(destPath, folderName);
await fs.mkdir(destFolderPath, { recursive: true });
await copy("**", destFolderPath, {
parents: true,
cwd: p,
});
}
return;
for (const dataSource of dataSources) {
const dataSourceConfig = dataSource?.config as FileSourceConfig;
// Copy local data
const dataPath =
dataSourceConfig.path ?? path.join(templatesDir, "components", "data");
const destPath = path.join(root, "data", path.basename(dataPath));
console.log("Copying data from path:", dataPath);
await fsExtra.copy(dataPath, destPath);
}
};

Expand Down Expand Up @@ -166,12 +133,13 @@ export const installTemplate = async (
model: props.model,
embeddingModel: props.embeddingModel,
framework: props.framework,
dataSource: props.dataSource,
dataSources: props.dataSources,
port: props.externalPort,
});

if (props.engine === "context") {
await copyContextData(props.root, props.dataSource);
console.log("\nGenerating context data...\n");
await copyContextData(props.root, props.dataSources);
if (
props.postInstallAction === "runApp" ||
props.postInstallAction === "dependencies"
Expand All @@ -181,14 +149,14 @@ export const installTemplate = async (
props.packageManager,
props.openAiKey,
props.vectorDb,
props.dataSource,
props.llamaCloudKey,
props.useLlamaParse,
);
}
}
} else {
// this is a frontend for a full-stack app, create .env file with model information
createFrontendEnvFile(props.root, {
await createFrontendEnvFile(props.root, {
model: props.model,
customApiPath: props.customApiPath,
});
Expand Down
88 changes: 45 additions & 43 deletions helpers/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import { templatesDir } from "./dir";
import { isPoetryAvailable, tryPoetryInstall } from "./poetry";
import { Tool } from "./tools";
import {
FileSourceConfig,
InstallTemplateArgs,
TemplateDataSource,
TemplateVectorDB,
Expand Down Expand Up @@ -65,7 +64,7 @@ const getAdditionalDependencies = (

// Add data source dependencies
const dataSourceType = dataSource?.type;
if (dataSourceType === "file" || dataSourceType === "folder") {
if (dataSourceType === "file") {
// llama-index-readers-file (pdf, excel, csv) is already included in llama_index package
dependencies.push({
name: "docx2txt",
Expand Down Expand Up @@ -180,18 +179,20 @@ export const installPythonTemplate = async ({
framework,
engine,
vectorDb,
dataSource,
dataSources,
tools,
postInstallAction,
useLlamaParse,
}: Pick<
InstallTemplateArgs,
| "root"
| "framework"
| "template"
| "engine"
| "vectorDb"
| "dataSource"
| "dataSources"
| "tools"
| "useLlamaParse"
| "postInstallAction"
>) => {
console.log("\nInitializing Python project with template:", template, "\n");
Expand Down Expand Up @@ -256,51 +257,52 @@ export const installPythonTemplate = async ({
});
}

// Write loader configs
if (dataSource?.type === "web") {
const config = dataSource.config as WebSourceConfig[];
const webLoaderConfig = config.map((c) => {
return {
base_url: c.baseUrl,
prefix: c.prefix || c.baseUrl,
depth: c.depth || 1,
};
});
const loaderConfigPath = path.join(root, "config/loaders.json");
await fs.mkdir(path.join(root, "config"), { recursive: true });
await fs.writeFile(
loaderConfigPath,
JSON.stringify(
{
web: webLoaderConfig,
},
null,
2,
),
);
}
if (dataSources.length > 0) {
const loaderConfigs: Record<string, any> = {};
const loaderPath = path.join(enginePath, "loaders");

const dataSourceType = dataSource?.type;
if (dataSourceType !== undefined && dataSourceType !== "none") {
let loaderFolder: string;
if (dataSourceType === "file" || dataSourceType === "folder") {
const dataSourceConfig = dataSource?.config as FileSourceConfig;
loaderFolder = dataSourceConfig.useLlamaParse ? "llama_parse" : "file";
} else {
loaderFolder = dataSourceType;
}
await copy("**", enginePath, {
// Copy loaders to enginePath
await copy("**", loaderPath, {
parents: true,
cwd: path.join(compPath, "loaders", "python", loaderFolder),
cwd: path.join(compPath, "loaders", "python"),
});

// Generate loaders config
// Web loader config
if (dataSources.some((ds) => ds.type === "web")) {
const webLoaderConfig = dataSources
.filter((ds) => ds.type === "web")
.map((ds) => {
const dsConfig = ds.config as WebSourceConfig;
return {
base_url: dsConfig.baseUrl,
prefix: dsConfig.prefix,
depth: dsConfig.depth,
};
});
loaderConfigs["web"] = webLoaderConfig;
}
// File loader config
if (dataSources.some((ds) => ds.type === "file")) {
loaderConfigs["file"] = {
use_llama_parse: useLlamaParse,
};
}
// Write loaders config
if (Object.keys(loaderConfigs).length > 0) {
const loaderConfigPath = path.join(root, "config/loaders.json");
await fs.mkdir(path.join(root, "config"), { recursive: true });
await fs.writeFile(
loaderConfigPath,
JSON.stringify(loaderConfigs, null, 2),
);
}
}
}

const addOnDependencies = getAdditionalDependencies(
vectorDb,
dataSource,
tools,
);
const addOnDependencies = dataSources
.map((ds) => getAdditionalDependencies(vectorDb, ds, tools))
.flat();
await addDependencies(root, addOnDependencies);

if (postInstallAction === "runApp" || postInstallAction === "dependencies") {
Expand Down
10 changes: 5 additions & 5 deletions helpers/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,19 @@ export type TemplateDataSource = {
type: TemplateDataSourceType;
config: TemplateDataSourceConfig;
};
export type TemplateDataSourceType = "none" | "file" | "folder" | "web";
export type TemplateDataSourceType = "file" | "web";
export type TemplateObservability = "none" | "opentelemetry";
// Config for both file and folder
export type FileSourceConfig = {
paths?: string[];
useLlamaParse?: boolean;
path?: string;
};
export type WebSourceConfig = {
baseUrl?: string;
prefix?: string;
depth?: number;
};

export type TemplateDataSourceConfig = FileSourceConfig | WebSourceConfig[];
export type TemplateDataSourceConfig = FileSourceConfig | WebSourceConfig;

export type CommunityProjectConfig = {
owner: string;
Expand All @@ -46,11 +45,12 @@ export interface InstallTemplateArgs {
framework: TemplateFramework;
engine: TemplateEngine;
ui: TemplateUI;
dataSource?: TemplateDataSource;
dataSources: TemplateDataSource[];
eslint: boolean;
customApiPath?: string;
openAiKey?: string;
llamaCloudKey?: string;
useLlamaParse?: boolean;
model: string;
embeddingModel: string;
communityProjectConfig?: CommunityProjectConfig;
Expand Down
16 changes: 6 additions & 10 deletions helpers/typescript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { copy } from "../helpers/copy";
import { callPackageManager } from "../helpers/install";
import { templatesDir } from "./dir";
import { PackageManager } from "./get-pkg-manager";
import { FileSourceConfig, InstallTemplateArgs } from "./types";
import { InstallTemplateArgs } from "./types";

const rename = (name: string) => {
switch (name) {
Expand Down Expand Up @@ -65,7 +65,8 @@ export const installTSTemplate = async ({
backend,
observability,
tools,
dataSource,
dataSources,
useLlamaParse,
}: InstallTemplateArgs & { backend: boolean }) => {
console.log(bold(`Using ${packageManager}.`));

Expand Down Expand Up @@ -173,15 +174,10 @@ export const installTSTemplate = async ({
});

// copy loader component
const dataSourceType = dataSource?.type;
if (dataSourceType && dataSourceType !== "none") {
const dataSourceType = dataSources[0]?.type;
if (dataSourceType) {
let loaderFolder: string;
if (dataSourceType === "file" || dataSourceType === "folder") {
const dataSourceConfig = dataSource?.config as FileSourceConfig;
loaderFolder = dataSourceConfig.useLlamaParse ? "llama_parse" : "file";
} else {
loaderFolder = dataSourceType;
}
loaderFolder = useLlamaParse ? "llama_parse" : dataSourceType;
await copy("**", enginePath, {
parents: true,
cwd: path.join(compPath, "loaders", "typescript", loaderFolder),
Expand Down
3 changes: 2 additions & 1 deletion index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,9 @@ async function run(): Promise<void> {
vectorDb: program.vectorDb,
externalPort: program.externalPort,
postInstallAction: program.postInstallAction,
dataSource: program.dataSource,
dataSources: program.dataSources,
tools: program.tools,
useLlamaParse: program.useLlamaParse,
observability: program.observability,
});
conf.set("preferences", preferences);
Expand Down
Loading

0 comments on commit c7a978e

Please sign in to comment.