From 38a8be8d124e25a4c52a31a8c8b5cf8d4bc2bc09 Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:34:54 +0700 Subject: [PATCH] fix: filter in mongo vector store (#269) --- .changeset/five-grapes-switch.md | 5 +++++ .../vectordbs/typescript/mongo/generate.ts | 15 +++++++++------ .../vectordbs/typescript/mongo/index.ts | 13 ++++++++++--- .../vectordbs/typescript/mongo/shared.ts | 2 ++ templates/types/streaming/express/package.json | 2 +- templates/types/streaming/nextjs/package.json | 2 +- 6 files changed, 28 insertions(+), 11 deletions(-) create mode 100644 .changeset/five-grapes-switch.md diff --git a/.changeset/five-grapes-switch.md b/.changeset/five-grapes-switch.md new file mode 100644 index 00000000..9b049b9d --- /dev/null +++ b/.changeset/five-grapes-switch.md @@ -0,0 +1,5 @@ +--- +"create-llama": patch +--- + +fix: filter in mongo vector store diff --git a/templates/components/vectordbs/typescript/mongo/generate.ts b/templates/components/vectordbs/typescript/mongo/generate.ts index 0f6f2258..73ff8592 100644 --- a/templates/components/vectordbs/typescript/mongo/generate.ts +++ b/templates/components/vectordbs/typescript/mongo/generate.ts @@ -1,14 +1,11 @@ /* eslint-disable turbo/no-undeclared-env-vars */ import * as dotenv from "dotenv"; -import { - MongoDBAtlasVectorSearch, - VectorStoreIndex, - storageContextFromDefaults, -} from "llamaindex"; +import { storageContextFromDefaults, VectorStoreIndex } from "llamaindex"; +import { MongoDBAtlasVectorSearch } from "llamaindex/storage/vectorStore/MongoDBAtlasVectorStore"; import { MongoClient } from "mongodb"; import { getDocuments } from "./loader"; import { initSettings } from "./settings"; -import { checkRequiredEnvVars } from "./shared"; +import { checkRequiredEnvVars, POPULATED_METADATA_FIELDS } from "./shared"; dotenv.config(); @@ -30,6 +27,12 @@ async function loadAndIndex() { dbName: databaseName, collectionName: vectorCollectionName, // this is where your embeddings will be stored indexName: indexName, // this is the name of the index you will need to create + indexedMetadataFields: POPULATED_METADATA_FIELDS, + embeddingDefinition: { + dimensions: process.env.EMBEDDING_DIM + ? parseInt(process.env.EMBEDDING_DIM) + : 1536, + }, }); // now create an index from all the Documents and store them in Atlas diff --git a/templates/components/vectordbs/typescript/mongo/index.ts b/templates/components/vectordbs/typescript/mongo/index.ts index 5aabb131..75c20fb6 100644 --- a/templates/components/vectordbs/typescript/mongo/index.ts +++ b/templates/components/vectordbs/typescript/mongo/index.ts @@ -1,16 +1,23 @@ /* eslint-disable turbo/no-undeclared-env-vars */ -import { MongoDBAtlasVectorSearch, VectorStoreIndex } from "llamaindex"; +import { VectorStoreIndex } from "llamaindex"; +import { MongoDBAtlasVectorSearch } from "llamaindex/storage/vectorStore/MongoDBAtlasVectorStore"; import { MongoClient } from "mongodb"; -import { checkRequiredEnvVars } from "./shared"; +import { checkRequiredEnvVars, POPULATED_METADATA_FIELDS } from "./shared"; export async function getDataSource(params?: any) { checkRequiredEnvVars(); - const client = new MongoClient(process.env.MONGO_URI!); + const client = new MongoClient(process.env.MONGODB_URI!); const store = new MongoDBAtlasVectorSearch({ mongodbClient: client, dbName: process.env.MONGODB_DATABASE!, collectionName: process.env.MONGODB_VECTORS!, indexName: process.env.MONGODB_VECTOR_INDEX, + indexedMetadataFields: POPULATED_METADATA_FIELDS, + embeddingDefinition: { + dimensions: process.env.EMBEDDING_DIM + ? parseInt(process.env.EMBEDDING_DIM) + : 1536, + }, }); return await VectorStoreIndex.fromVectorStore(store); diff --git a/templates/components/vectordbs/typescript/mongo/shared.ts b/templates/components/vectordbs/typescript/mongo/shared.ts index c6b5f303..5b046f91 100644 --- a/templates/components/vectordbs/typescript/mongo/shared.ts +++ b/templates/components/vectordbs/typescript/mongo/shared.ts @@ -5,6 +5,8 @@ const REQUIRED_ENV_VARS = [ "MONGODB_VECTOR_INDEX", ]; +export const POPULATED_METADATA_FIELDS = ["private", "doc_id"]; // for filtering in MongoDB VectorSearchIndex + export function checkRequiredEnvVars() { const missingEnvVars = REQUIRED_ENV_VARS.filter((envVar) => { return !process.env[envVar]; diff --git a/templates/types/streaming/express/package.json b/templates/types/streaming/express/package.json index 21e9edc9..569a6986 100644 --- a/templates/types/streaming/express/package.json +++ b/templates/types/streaming/express/package.json @@ -20,7 +20,7 @@ "dotenv": "^16.3.1", "duck-duck-scrape": "^2.2.5", "express": "^4.18.2", - "llamaindex": "0.5.20", + "llamaindex": "0.5.24", "pdf2json": "3.0.5", "ajv": "^8.12.0", "@e2b/code-interpreter": "^0.0.5", diff --git a/templates/types/streaming/nextjs/package.json b/templates/types/streaming/nextjs/package.json index a5e1feed..8d81b32d 100644 --- a/templates/types/streaming/nextjs/package.json +++ b/templates/types/streaming/nextjs/package.json @@ -25,7 +25,7 @@ "duck-duck-scrape": "^2.2.5", "formdata-node": "^6.0.3", "got": "^14.4.1", - "llamaindex": "0.5.20", + "llamaindex": "0.5.24", "lucide-react": "^0.294.0", "next": "^14.2.4", "react": "^18.2.0",