From 46da052326a6895b9f4cd361325dbec0a0b3d582 Mon Sep 17 00:00:00 2001 From: Nuno Campos Date: Thu, 15 Feb 2024 14:14:39 -0800 Subject: [PATCH] Process uploaded files in parallel Using default runnable batch implementation --- backend/app/upload.py | 35 ++++++++++------------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/backend/app/upload.py b/backend/app/upload.py index 87168003..c09ae055 100644 --- a/backend/app/upload.py +++ b/backend/app/upload.py @@ -9,7 +9,7 @@ from __future__ import annotations import os -from typing import Any, BinaryIO, List, Optional +from typing import BinaryIO, List, Optional from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter from langchain_community.document_loaders.blob_loaders import Blob @@ -77,30 +77,15 @@ def namespace(self) -> str: def invoke( self, input: BinaryIO, config: Optional[RunnableConfig] = None ) -> List[str]: - return self.batch([input], config) - - def batch( - self, - inputs: List[BinaryIO], - config: RunnableConfig | List[RunnableConfig] | None = None, - *, - return_exceptions: bool = False, - **kwargs: Any | None, - ) -> List: - """Ingest a batch of files into the vectorstore.""" - ids = [] - for data in inputs: - blob = _convert_ingestion_input_to_blob(data) - ids.extend( - ingest_blob( - blob, - MIMETYPE_BASED_PARSER, - self.text_splitter, - self.vectorstore, - self.namespace, - ) - ) - return ids + blob = _convert_ingestion_input_to_blob(input) + out = ingest_blob( + blob, + MIMETYPE_BASED_PARSER, + self.text_splitter, + self.vectorstore, + self.namespace, + ) + return out index_schema = {