Skip to content

Commit

Permalink
Merge pull request #188 from langchain-ai/nc/15feb/ingest-parallel
Browse files Browse the repository at this point in the history
Process uploaded files in parallel
  • Loading branch information
nfcampos authored Apr 19, 2024
2 parents c4590ec + 46da052 commit 825272d
Showing 1 changed file with 10 additions and 25 deletions.
35 changes: 10 additions & 25 deletions backend/app/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from __future__ import annotations

import os
from typing import Any, BinaryIO, List, Optional
from typing import BinaryIO, List, Optional

from langchain_community.document_loaders.blob_loaders.schema import Blob
from langchain_community.vectorstores.pgvector import PGVector
Expand Down Expand Up @@ -107,30 +107,15 @@ def namespace(self) -> str:
def invoke(
self, input: BinaryIO, config: Optional[RunnableConfig] = None
) -> List[str]:
return self.batch([input], config)

def batch(
self,
inputs: List[BinaryIO],
config: RunnableConfig | List[RunnableConfig] | None = None,
*,
return_exceptions: bool = False,
**kwargs: Any | None,
) -> List:
"""Ingest a batch of files into the vectorstore."""
ids = []
for data in inputs:
blob = _convert_ingestion_input_to_blob(data)
ids.extend(
ingest_blob(
blob,
MIMETYPE_BASED_PARSER,
self.text_splitter,
self.vectorstore,
self.namespace,
)
)
return ids
blob = _convert_ingestion_input_to_blob(input)
out = ingest_blob(
blob,
MIMETYPE_BASED_PARSER,
self.text_splitter,
self.vectorstore,
self.namespace,
)
return out


PG_CONNECTION_STRING = PGVector.connection_string_from_db_params(
Expand Down

0 comments on commit 825272d

Please sign in to comment.