Skip to content

Commit

Permalink
chore: Always use file loader as default loader (#279)
Browse files Browse the repository at this point in the history
  • Loading branch information
leehuwuj authored Sep 9, 2024
1 parent 71fbe1b commit b6da3c2
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 77 deletions.
5 changes: 5 additions & 0 deletions .changeset/chilled-mangos-cheat.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"create-llama": patch
---

Ensure the generation script always works
110 changes: 51 additions & 59 deletions helpers/datasources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,74 +36,66 @@ export async function writeLoadersConfig(
dataSources: TemplateDataSource[],
useLlamaParse?: boolean,
) {
if (dataSources.length === 0) return; // no datasources, no config needed
const loaderConfig = new Document({});
// Web loader config
if (dataSources.some((ds) => ds.type === "web")) {
const webLoaderConfig = new Document({});

// Create config for browser driver arguments
const driverArgNodeValue = webLoaderConfig.createNode([
"--no-sandbox",
"--disable-dev-shm-usage",
]);
driverArgNodeValue.commentBefore =
" The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode";
webLoaderConfig.set("driver_arguments", driverArgNodeValue);

// Create config for urls
const urlConfigs = dataSources
.filter((ds) => ds.type === "web")
.map((ds) => {
const dsConfig = ds.config as WebSourceConfig;
return {
base_url: dsConfig.baseUrl,
prefix: dsConfig.prefix,
depth: dsConfig.depth,
};
});
const urlConfigNode = webLoaderConfig.createNode(urlConfigs);
urlConfigNode.commentBefore = ` base_url: The URL to start crawling with
prefix: Only crawl URLs matching the specified prefix
depth: The maximum depth for BFS traversal
You can add more websites by adding more entries (don't forget the - prefix from YAML)`;
webLoaderConfig.set("urls", urlConfigNode);
const loaderConfig: Record<string, any> = {};

// Add web config to the loaders config
loaderConfig.set("web", webLoaderConfig);
}
// Always set file loader config
loaderConfig.file = createFileLoaderConfig(useLlamaParse);

// File loader config
if (dataSources.some((ds) => ds.type === "file")) {
// Add documentation to web loader config
const node = loaderConfig.createNode({
use_llama_parse: useLlamaParse,
});
node.commentBefore = ` use_llama_parse: Use LlamaParse if \`true\`. Needs a \`LLAMA_CLOUD_API_KEY\` from https://cloud.llamaindex.ai set as environment variable`;
loaderConfig.set("file", node);
if (dataSources.some((ds) => ds.type === "web")) {
loaderConfig.web = createWebLoaderConfig(dataSources);
}

// DB loader config
const dbLoaders = dataSources.filter((ds) => ds.type === "db");
if (dbLoaders.length > 0) {
const dbLoaderConfig = new Document({});
const configEntries = dbLoaders.map((ds) => {
const dsConfig = ds.config as DbSourceConfig;
return {
uri: dsConfig.uri,
queries: [dsConfig.queries],
};
});

const node = dbLoaderConfig.createNode(configEntries);
node.commentBefore = ` The configuration for the database loader, only supports MySQL and PostgreSQL databases for now.
uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db
query: The query to fetch data from the database. E.g.: SELECT * FROM table`;
loaderConfig.set("db", node);
loaderConfig.db = createDbLoaderConfig(dbLoaders);
}

// Create a new Document with the loaderConfig
const yamlDoc = new Document(loaderConfig);

// Write loaders config
const loaderConfigPath = path.join(root, "config", "loaders.yaml");
await fs.mkdir(path.join(root, "config"), { recursive: true });
await fs.writeFile(loaderConfigPath, yaml.stringify(loaderConfig));
await fs.writeFile(loaderConfigPath, yaml.stringify(yamlDoc));
}

function createWebLoaderConfig(dataSources: TemplateDataSource[]): any {
const webLoaderConfig: Record<string, any> = {};

// Create config for browser driver arguments
webLoaderConfig.driver_arguments = [
"--no-sandbox",
"--disable-dev-shm-usage",
];

// Create config for urls
const urlConfigs = dataSources
.filter((ds) => ds.type === "web")
.map((ds) => {
const dsConfig = ds.config as WebSourceConfig;
return {
base_url: dsConfig.baseUrl,
prefix: dsConfig.prefix,
depth: dsConfig.depth,
};
});
webLoaderConfig.urls = urlConfigs;

return webLoaderConfig;
}

function createFileLoaderConfig(useLlamaParse?: boolean): any {
return {
use_llama_parse: useLlamaParse,
};
}

function createDbLoaderConfig(dbLoaders: TemplateDataSource[]): any {
return dbLoaders.map((ds) => {
const dsConfig = ds.config as DbSourceConfig;
return {
uri: dsConfig.uri,
queries: [dsConfig.queries],
};
});
}
37 changes: 19 additions & 18 deletions helpers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,11 @@ async function generateContextData(
}
}

const copyContextData = async (
const prepareContextData = async (
root: string,
dataSources: TemplateDataSource[],
) => {
await makeDir(path.join(root, "data"));
for (const dataSource of dataSources) {
const dataSourceConfig = dataSource?.config as FileSourceConfig;
// Copy local data
Expand Down Expand Up @@ -174,25 +175,25 @@ export const installTemplate = async (
await createBackendEnvFile(props.root, props);
}

if (props.dataSources.length > 0) {
await prepareContextData(
props.root,
props.dataSources.filter((ds) => ds.type === "file"),
);

if (
props.dataSources.length > 0 &&
(props.postInstallAction === "runApp" ||
props.postInstallAction === "dependencies")
) {
console.log("\nGenerating context data...\n");
await copyContextData(
props.root,
props.dataSources.filter((ds) => ds.type === "file"),
await generateContextData(
props.framework,
props.modelConfig,
props.packageManager,
props.vectorDb,
props.llamaCloudKey,
props.useLlamaParse,
);
if (
props.postInstallAction === "runApp" ||
props.postInstallAction === "dependencies"
) {
await generateContextData(
props.framework,
props.modelConfig,
props.packageManager,
props.vectorDb,
props.llamaCloudKey,
props.useLlamaParse,
);
}
}

// Create outputs directory
Expand Down
1 change: 1 addition & 0 deletions questions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,7 @@ export const askQuestions = async (
type: "db",
config: await prompts(dbPrompts, questionHandlers),
});
break;
}
case "llamacloud": {
program.dataSources.push({
Expand Down

0 comments on commit b6da3c2

Please sign in to comment.