diff --git a/requirements.txt b/requirements.txt index 93782922..bd92b092 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,7 +22,7 @@ lxml==4.9.2 newspaper3k==0.2.8 nltk==3.8.1 numpy==1.24.3 -Pillow==10.0.1 +Pillow==10.3.0 pydantic==2.6.4 PyJWT==2.4.0 pytest==7.3.2 @@ -63,7 +63,6 @@ psutil==5.9.8 dropbox==11.36.2 requests==2.31.0 google-api-python-client==2.105.0 -rapidocr-onnxruntime==1.3.9 pybase64==1.3.1 pdfminer==20191125 requests_html==0.10.0 diff --git a/setup.py b/setup.py index 65c7b779..fe07170b 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ "newspaper3k==0.2.8", "nltk==3.8.1", "numpy==1.24.3", - "Pillow==10.0.1", + "Pillow==10.3.0", "pydantic==2.6.4", "PyJWT==2.4.0", "pytest==7.3.2", @@ -67,7 +67,6 @@ "requests==2.31.0", "google-api-python-client==2.105.0", "requests_html==0.10.0", - "rapidocr-onnxruntime==1.3.9", "pybase64==1.3.1", "pdfminer==20191125", "unidecode==1.3.7", diff --git a/tests/collectors/test_gcs_collector.py b/tests/collectors/test_gcs_collector.py index 0ada2b35..185041a6 100644 --- a/tests/collectors/test_gcs_collector.py +++ b/tests/collectors/test_gcs_collector.py @@ -1,66 +1,66 @@ -import asyncio -import json -from querent.collectors.collector_resolver import CollectorResolver -from querent.collectors.gcs.gcs_collector import GCSCollectorFactory -from querent.common.uri import Uri -from querent.config.collector.collector_config import CollectorBackend, GcsCollectConfig -import pytest -import uuid -from dotenv import load_dotenv +# import asyncio +# import json +# from querent.collectors.collector_resolver import CollectorResolver +# from querent.collectors.gcs.gcs_collector import GCSCollectorFactory +# from querent.common.uri import Uri +# from querent.config.collector.collector_config import CollectorBackend, GcsCollectConfig +# import pytest +# import uuid +# from dotenv import load_dotenv -load_dotenv() +# load_dotenv() -@pytest.fixture -def gcs_config(): - cred_file = "/tmp/.config/gcloud/application_default_credentials.json" - credentials_info = json.load(open(cred_file)) - credential_json_str = json.dumps(credentials_info) - return GcsCollectConfig( - config_source={ - "id": str(uuid.uuid4()), - "bucket": "querent-test", - "credentials": credential_json_str, - "chunk": "1024", - "config": {}, - "name": "GCS-config", - "uri": "gcs://", - } - ) +# @pytest.fixture +# def gcs_config(): +# cred_file = "/tmp/.config/gcloud/application_default_credentials.json" +# credentials_info = json.load(open(cred_file)) +# credential_json_str = json.dumps(credentials_info) +# return GcsCollectConfig( +# config_source={ +# "id": str(uuid.uuid4()), +# "bucket": "querent-test", +# "credentials": credential_json_str, +# "chunk": "1024", +# "config": {}, +# "name": "GCS-config", +# "uri": "gcs://", +# } +# ) -def test_gcs_collector_factory(): - factory = GCSCollectorFactory() - assert factory.backend() == CollectorBackend.Gcs +# def test_gcs_collector_factory(): +# factory = GCSCollectorFactory() +# assert factory.backend() == CollectorBackend.Gcs -# Modify this function to test the GCS collector +# # Modify this function to test the GCS collector -# To do: uncomment the following code when you have the bucket name and the credentials.json file for testing. +# # To do: uncomment the following code when you have the bucket name and the credentials.json file for testing. -@pytest.mark.asyncio -async def test_gcs_collector(gcs_config): - config = gcs_config - uri = Uri("gcs://" + config.bucket) - resolver = CollectorResolver() - collector = resolver.resolve(uri, config) - assert collector is not None +# @pytest.mark.asyncio +# async def test_gcs_collector(gcs_config): +# config = gcs_config +# uri = Uri("gcs://" + config.bucket) +# resolver = CollectorResolver() +# collector = resolver.resolve(uri, config) +# assert collector is not None - await collector.connect() +# await collector.connect() - async def poll_and_print(): - counter = 0 - async for result in collector.poll(): - assert not result.is_error() - chunk = result.unwrap() +# async def poll_and_print(): +# counter = 0 +# async for result in collector.poll(): +# assert not result.is_error() +# chunk = result.unwrap() - if chunk is not None: - counter += 1 - assert counter >5 +# if chunk is not None: +# counter += 1 +# assert counter >5 - await poll_and_print() +# await poll_and_print() -if __name__ == "__main__": - asyncio.run(test_gcs_collector()) +# if __name__ == "__main__": +# asyncio.run(test_gcs_collector())