Querent-ai · the-non-expert · Sep 1, 2023 · saraswatpuneet · Sep 1, 2023 · saraswatpuneet
diff --git a/querent/ingestors/pdfs/pdf_ingestor_v1.py b/querent/ingestors/pdfs/pdf_ingestor_v1.py
@@ -40,7 +40,8 @@ async def ingest(
                     if current_file:
                         # Process the collected bytes of the previous file
                         text = await self.extract_and_process_pdf(
-                            CollectedBytes(file=current_file, data=collected_bytes)
+                            CollectedBytes(file=current_file,
+                                           data=collected_bytes)
                         )
                         yield text
                     collected_bytes = b""  # Reset collected bytes for the new file
@@ -56,6 +57,7 @@ async def ingest(
                 yield text
 
         except Exception as e:
+            print(e)
             yield []
 
     async def extract_and_process_pdf(self, collected_bytes: CollectedBytes) -> List[str]:
@@ -66,7 +68,7 @@ async def extract_text_from_pdf(self, collected_bytes: CollectedBytes) -> str:
         pdf = fitz.open(stream=collected_bytes.data, filetype="pdf")
         text = ""
         for page in pdf:
-            text += page.getText()
+            text += page.get_text()
         return text
 
     async def process_data(self, text: str) -> List[str]:

diff --git a/tests/test_pdf_ingestor.py b/tests/test_pdf_ingestor.py
@@ -6,30 +6,33 @@
 from querent.ingestors.ingestor_manager import IngestorFactoryManager
 import pytest
 
+
 @pytest.mark.asyncio
 async def test_collect_and_ingest_pdf():
     # Set up the collector
     collector_factory = FSCollectorFactory()
     uri = Uri("file://" + str(Path("./tests/data/pdf/").resolve()))
     config = FSCollectorConfig(root_path=uri.path)
     collector = collector_factory.resolve(uri, config)
-    
+
     # Set up the ingestor
     ingestor_factory_manager = IngestorFactoryManager()
-    ingestor_factory = await ingestor_factory_manager.get_factory("pdf")  # Notice the use of await here
+    # Notice the use of await here
+    ingestor_factory = await ingestor_factory_manager.get_factory("pdf")
     ingestor = await ingestor_factory.create("pdf", [])
-    
+
     # Collect and ingest the PDF
     ingested_call = ingestor.ingest(collector.poll())
     counter = 0
+
     async def poll_and_print():
         counter = 0
         async for ingested in ingested_call:
             assert ingested is not None
-            if len(ingested)  == 0:
+            if len(ingested) == 0:
                 counter += 1
-        assert counter == 1
-    
+        assert counter == 0
+
     await poll_and_print()  # Notice the use of await here