From 71a24b2887fed23ac6669adbfbc3a5c3e3965e5f Mon Sep 17 00:00:00 2001 From: shreyanid <42684285+shreyanid@users.noreply.github.com> Date: Wed, 26 Jul 2023 09:56:39 -0700 Subject: [PATCH] Update `partition_via_api` to not post a strategy value if not user specified (#967) * remove default strategy * working on test * fixed test, coordinates param needed to be included * nits * update changelog * lint * update requirements --- CHANGELOG.md | 1 + docs/requirements.txt | 6 +- requirements/base.txt | 8 +- requirements/build.txt | 6 +- requirements/dev.txt | 146 ++++++++++++++---------- requirements/huggingface.txt | 8 +- requirements/ingest-azure.txt | 8 +- requirements/ingest-confluence.txt | 4 +- requirements/ingest-discord.txt | 2 +- requirements/ingest-dropbox.txt | 2 +- requirements/ingest-elasticsearch.txt | 2 +- requirements/ingest-gcs.txt | 5 +- requirements/ingest-github.txt | 4 +- requirements/ingest-gitlab.txt | 2 +- requirements/ingest-google-drive.txt | 4 +- requirements/ingest-onedrive.txt | 8 +- requirements/ingest-reddit.txt | 2 +- requirements/ingest-s3.txt | 2 +- requirements/ingest-wikipedia.txt | 2 +- requirements/local-inference.txt | 16 +-- requirements/test.txt | 22 ++-- test_unstructured/partition/test_api.py | 25 ++++ unstructured/partition/api.py | 12 -- 23 files changed, 169 insertions(+), 128 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7974b6d317..a01434f183 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ has a `text/plain` MIME type. * Enables filters to be passed to `partition_doc` so it doesn't error with LibreOffice7. * Removed old error message that's superseded by `requires_dependencies`. +* Removes using `hi_res` as the default strategy value for `partition_via_api` and `partition_multiple_via_api` ## 0.8.1 diff --git a/docs/requirements.txt b/docs/requirements.txt index c3514039c9..3de8e6c768 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -10,7 +10,7 @@ babel==2.12.1 # via sphinx beautifulsoup4==4.12.2 # via furo -certifi==2023.5.7 +certifi==2023.7.22 # via # -r requirements/build.in # requests @@ -71,7 +71,7 @@ sphinxcontrib-qthelp==1.0.3 # via sphinx sphinxcontrib-serializinghtml==1.1.5 # via sphinx -urllib3==2.0.3 +urllib3==2.0.4 # via requests -zipp==3.16.1 +zipp==3.16.2 # via importlib-metadata diff --git a/requirements/base.txt b/requirements/base.txt index 1bf924f41b..03125fc3c8 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,7 +4,7 @@ # # pip-compile requirements/base.in # -certifi==2023.5.7 +certifi==2023.7.22 # via # -c requirements/constraints.in # requests @@ -16,7 +16,7 @@ charset-normalizer==3.2.0 # via # pdfminer-six # requests -click==8.1.5 +click==8.1.6 # via nltk cryptography==41.0.2 # via pdfminer-six @@ -35,7 +35,7 @@ lxml==4.9.3 # -r requirements/base.in # python-docx # python-pptx -markdown==3.4.3 +markdown==3.4.4 # via -r requirements/base.in msg-parser==1.2.0 # via -r requirements/base.in @@ -92,5 +92,5 @@ xlrd==2.0.1 # via -r requirements/base.in xlsxwriter==3.1.2 # via python-pptx -zipp==3.16.1 +zipp==3.16.2 # via importlib-metadata diff --git a/requirements/build.txt b/requirements/build.txt index c3514039c9..3de8e6c768 100644 --- a/requirements/build.txt +++ b/requirements/build.txt @@ -10,7 +10,7 @@ babel==2.12.1 # via sphinx beautifulsoup4==4.12.2 # via furo -certifi==2023.5.7 +certifi==2023.7.22 # via # -r requirements/build.in # requests @@ -71,7 +71,7 @@ sphinxcontrib-qthelp==1.0.3 # via sphinx sphinxcontrib-serializinghtml==1.1.5 # via sphinx -urllib3==2.0.3 +urllib3==2.0.4 # via requests -zipp==3.16.1 +zipp==3.16.2 # via importlib-metadata diff --git a/requirements/dev.txt b/requirements/dev.txt index 59b72c04c6..2937228ac2 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -11,20 +11,21 @@ appnope==0.1.3 # ipykernel # ipython argon2-cffi==21.3.0 - # via - # jupyter-server - # nbclassic - # notebook + # via jupyter-server argon2-cffi-bindings==21.2.0 # via argon2-cffi arrow==1.2.3 # via isoduration asttokens==2.2.1 # via stack-data +async-lru==2.0.3 + # via jupyterlab attrs==23.1.0 # via # jsonschema # referencing +babel==2.12.1 + # via jupyterlab-server backcall==0.2.0 # via ipython beautifulsoup4==4.12.2 @@ -33,13 +34,24 @@ bleach==6.0.0 # via nbconvert build==0.10.0 # via pip-tools +certifi==2023.7.22 + # via + # -c requirements/base.txt + # -c requirements/constraints.in + # -c requirements/test.txt + # requests cffi==1.15.1 # via # -c requirements/base.txt # argon2-cffi-bindings cfgv==3.3.1 # via pre-commit -click==8.1.5 +charset-normalizer==3.2.0 + # via + # -c requirements/base.txt + # -c requirements/test.txt + # requests +click==8.1.6 # via # -c requirements/base.txt # -c requirements/test.txt @@ -52,7 +64,7 @@ decorator==5.1.1 # via ipython defusedxml==0.7.1 # via nbconvert -distlib==0.3.6 +distlib==0.3.7 # via virtualenv exceptiongroup==1.1.2 # via @@ -60,13 +72,13 @@ exceptiongroup==1.1.2 # anyio executing==1.2.0 # via stack-data -fastjsonschema==2.17.1 +fastjsonschema==2.18.0 # via nbformat filelock==3.12.2 # via virtualenv fqdn==1.5.1 # via jsonschema -identify==2.5.24 +identify==2.5.26 # via pre-commit idna==3.4 # via @@ -74,22 +86,27 @@ idna==3.4 # -c requirements/test.txt # anyio # jsonschema + # requests importlib-metadata==6.8.0 # via # -c requirements/base.txt # jupyter-client + # jupyter-lsp + # jupyterlab + # jupyterlab-server # nbconvert importlib-resources==6.0.0 # via # jsonschema # jsonschema-specifications -ipykernel==6.24.0 + # jupyterlab + # notebook +ipykernel==6.25.0 # via # ipywidgets # jupyter # jupyter-console - # nbclassic - # notebook + # jupyterlab # qtconsole ipython==8.12.2 # via @@ -98,10 +115,7 @@ ipython==8.12.2 # ipywidgets # jupyter-console ipython-genutils==0.2.0 - # via - # nbclassic - # notebook - # qtconsole + # via qtconsole ipywidgets==8.0.7 # via jupyter isoduration==20.11.0 @@ -111,16 +125,19 @@ jedi==0.18.2 jinja2==3.1.2 # via # jupyter-server - # nbclassic + # jupyterlab + # jupyterlab-server # nbconvert - # notebook +json5==0.9.14 + # via jupyterlab-server jsonpointer==2.4 # via jsonschema -jsonschema[format-nongpl]==4.18.3 +jsonschema[format-nongpl]==4.18.4 # via # jupyter-events + # jupyterlab-server # nbformat -jsonschema-specifications==2023.6.1 +jsonschema-specifications==2023.7.1 # via jsonschema jupyter==1.0.0 # via -r requirements/dev.in @@ -129,9 +146,7 @@ jupyter-client==8.3.0 # ipykernel # jupyter-console # jupyter-server - # nbclassic # nbclient - # notebook # qtconsole jupyter-console==6.6.3 # via jupyter @@ -142,22 +157,32 @@ jupyter-core==5.3.1 # jupyter-client # jupyter-console # jupyter-server - # nbclassic + # jupyterlab # nbclient # nbconvert # nbformat - # notebook # qtconsole jupyter-events==0.6.3 # via jupyter-server +jupyter-lsp==2.2.0 + # via jupyterlab jupyter-server==2.7.0 # via - # nbclassic + # jupyter-lsp + # jupyterlab + # jupyterlab-server + # notebook # notebook-shim jupyter-server-terminals==0.4.4 # via jupyter-server +jupyterlab==4.0.3 + # via notebook jupyterlab-pygments==0.2.2 # via nbconvert +jupyterlab-server==2.24.0 + # via + # jupyterlab + # notebook jupyterlab-widgets==3.0.8 # via ipywidgets markupsafe==2.1.3 @@ -170,34 +195,27 @@ matplotlib-inline==0.1.6 # ipython mistune==3.0.1 # via nbconvert -nbclassic==1.0.0 - # via notebook nbclient==0.8.0 # via nbconvert -nbconvert==7.6.0 +nbconvert==7.7.3 # via # jupyter # jupyter-server - # nbclassic - # notebook nbformat==5.9.1 # via # jupyter-server - # nbclassic # nbclient # nbconvert - # notebook nest-asyncio==1.5.6 - # via - # ipykernel - # nbclassic - # notebook + # via ipykernel nodeenv==1.8.0 # via pre-commit -notebook==6.5.4 +notebook==7.0.0 # via jupyter notebook-shim==0.2.3 - # via nbclassic + # via + # jupyterlab + # notebook overrides==7.3.1 # via jupyter-server packaging==23.1 @@ -206,6 +224,8 @@ packaging==23.1 # build # ipykernel # jupyter-server + # jupyterlab + # jupyterlab-server # nbconvert # qtconsole # qtpy @@ -217,11 +237,11 @@ pexpect==4.8.0 # via ipython pickleshare==0.7.5 # via ipython -pip-tools==6.14.0 +pip-tools==7.1.0 # via -r requirements/dev.in pkgutil-resolve-name==1.3.10 # via jsonschema -platformdirs==3.8.1 +platformdirs==3.9.1 # via # -c requirements/test.txt # jupyter-core @@ -229,10 +249,7 @@ platformdirs==3.8.1 pre-commit==3.3.3 # via -r requirements/dev.in prometheus-client==0.17.1 - # via - # jupyter-server - # nbclassic - # notebook + # via jupyter-server prompt-toolkit==3.0.39 # via # ipython @@ -265,7 +282,11 @@ python-dateutil==2.8.2 # jupyter-client python-json-logger==2.0.7 # via jupyter-events -pyyaml==6.0 +pytz==2023.3 + # via + # -c requirements/base.txt + # babel +pyyaml==6.0.1 # via # -c requirements/test.txt # jupyter-events @@ -276,17 +297,20 @@ pyzmq==25.1.0 # jupyter-client # jupyter-console # jupyter-server - # nbclassic - # notebook # qtconsole qtconsole==5.4.3 # via jupyter qtpy==2.3.1 # via qtconsole -referencing==0.29.1 +referencing==0.30.0 # via # jsonschema # jsonschema-specifications +requests==2.31.0 + # via + # -c requirements/base.txt + # -c requirements/test.txt + # jupyterlab-server rfc3339-validator==0.1.4 # via # jsonschema @@ -295,15 +319,12 @@ rfc3986-validator==0.1.1 # via # jsonschema # jupyter-events -rpds-py==0.8.10 +rpds-py==0.9.2 # via # jsonschema # referencing send2trash==1.8.2 - # via - # jupyter-server - # nbclassic - # notebook + # via jupyter-server six==1.16.0 # via # -c requirements/base.txt @@ -322,14 +343,13 @@ terminado==0.17.1 # via # jupyter-server # jupyter-server-terminals - # nbclassic - # notebook tinycss2==1.2.1 # via nbconvert tomli==2.0.1 # via # -c requirements/test.txt # build + # jupyterlab # pip-tools # pyproject-hooks tornado==6.3.2 @@ -337,7 +357,7 @@ tornado==6.3.2 # ipykernel # jupyter-client # jupyter-server - # nbclassic + # jupyterlab # notebook # terminado traitlets==5.9.0 @@ -351,20 +371,26 @@ traitlets==5.9.0 # jupyter-core # jupyter-events # jupyter-server + # jupyterlab # matplotlib-inline - # nbclassic # nbclient # nbconvert # nbformat - # notebook # qtconsole typing-extensions==4.7.1 # via # -c requirements/test.txt + # async-lru # ipython uri-template==1.3.0 # via jsonschema -virtualenv==20.23.1 +urllib3==1.26.16 + # via + # -c requirements/base.txt + # -c requirements/constraints.in + # -c requirements/test.txt + # requests +virtualenv==20.24.2 # via pre-commit wcwidth==0.2.6 # via prompt-toolkit @@ -376,13 +402,13 @@ webencodings==0.5.1 # tinycss2 websocket-client==1.6.1 # via jupyter-server -wheel==0.40.0 +wheel==0.41.0 # via # -c requirements/constraints.in # pip-tools widgetsnbextension==4.0.8 # via ipywidgets -zipp==3.16.1 +zipp==3.16.2 # via # -c requirements/base.txt # importlib-metadata diff --git a/requirements/huggingface.txt b/requirements/huggingface.txt index d3f847769d..21686e2a3f 100644 --- a/requirements/huggingface.txt +++ b/requirements/huggingface.txt @@ -4,7 +4,7 @@ # # pip-compile requirements/huggingface.in # -certifi==2023.5.7 +certifi==2023.7.22 # via # -c requirements/base.txt # -c requirements/constraints.in @@ -13,7 +13,7 @@ charset-normalizer==3.2.0 # via # -c requirements/base.txt # requests -click==8.1.5 +click==8.1.6 # via # -c requirements/base.txt # sacremoses @@ -52,7 +52,7 @@ packaging==23.1 # via # huggingface-hub # transformers -pyyaml==6.0 +pyyaml==6.0.1 # via # huggingface-hub # transformers @@ -89,7 +89,7 @@ tqdm==4.65.0 # huggingface-hub # sacremoses # transformers -transformers==4.30.2 +transformers==4.31.0 # via -r requirements/huggingface.in typing-extensions==4.7.1 # via diff --git a/requirements/ingest-azure.txt b/requirements/ingest-azure.txt index facfaba5c6..feba108e96 100644 --- a/requirements/ingest-azure.txt +++ b/requirements/ingest-azure.txt @@ -6,7 +6,7 @@ # adlfs==2023.4.0 # via -r requirements/ingest-azure.in -aiohttp==3.8.4 +aiohttp==3.8.5 # via adlfs aiosignal==1.3.1 # via aiohttp @@ -25,7 +25,7 @@ azure-identity==1.13.0 # via adlfs azure-storage-blob==12.17.0 # via adlfs -certifi==2023.5.7 +certifi==2023.7.22 # via # -c requirements/base.txt # -c requirements/constraints.in @@ -62,7 +62,7 @@ idna==3.4 # yarl isodate==0.6.1 # via azure-storage-blob -msal==1.22.0 +msal==1.23.0 # via # azure-datalake-store # azure-identity @@ -79,7 +79,7 @@ pycparser==2.21 # via # -c requirements/base.txt # cffi -pyjwt[crypto]==2.7.0 +pyjwt[crypto]==2.8.0 # via msal requests==2.31.0 # via diff --git a/requirements/ingest-confluence.txt b/requirements/ingest-confluence.txt index e4cb4d9c92..31a80b3f8d 100644 --- a/requirements/ingest-confluence.txt +++ b/requirements/ingest-confluence.txt @@ -1,12 +1,12 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.8 # by the following command: # # pip-compile requirements/ingest-confluence.in # atlassian-python-api==3.39.0 # via -r requirements/ingest-confluence.in -certifi==2023.5.7 +certifi==2023.7.22 # via # -c requirements/base.txt # -c requirements/constraints.in diff --git a/requirements/ingest-discord.txt b/requirements/ingest-discord.txt index 7983cf17d4..544398b4cd 100644 --- a/requirements/ingest-discord.txt +++ b/requirements/ingest-discord.txt @@ -4,7 +4,7 @@ # # pip-compile requirements/ingest-discord.in # -aiohttp==3.8.4 +aiohttp==3.8.5 # via discord-py aiosignal==1.3.1 # via aiohttp diff --git a/requirements/ingest-dropbox.txt b/requirements/ingest-dropbox.txt index c36a2ef552..1bd06426e5 100644 --- a/requirements/ingest-dropbox.txt +++ b/requirements/ingest-dropbox.txt @@ -4,7 +4,7 @@ # # pip-compile requirements/ingest-dropbox.in # -certifi==2023.5.7 +certifi==2023.7.22 # via # -c requirements/base.txt # -c requirements/constraints.in diff --git a/requirements/ingest-elasticsearch.txt b/requirements/ingest-elasticsearch.txt index e8e55c65b7..78b7d96237 100644 --- a/requirements/ingest-elasticsearch.txt +++ b/requirements/ingest-elasticsearch.txt @@ -4,7 +4,7 @@ # # pip-compile requirements/ingest-elasticsearch.in # -certifi==2023.5.7 +certifi==2023.7.22 # via # -c requirements/base.txt # -c requirements/constraints.in diff --git a/requirements/ingest-gcs.txt b/requirements/ingest-gcs.txt index 854943a65b..129eae20a6 100644 --- a/requirements/ingest-gcs.txt +++ b/requirements/ingest-gcs.txt @@ -4,7 +4,7 @@ # # pip-compile requirements/ingest-gcs.in # -aiohttp==3.8.4 +aiohttp==3.8.5 # via gcsfs aiosignal==1.3.1 # via aiohttp @@ -14,7 +14,7 @@ attrs==23.1.0 # via aiohttp cachetools==5.3.1 # via google-auth -certifi==2023.5.7 +certifi==2023.7.22 # via # -c requirements/base.txt # -c requirements/constraints.in @@ -74,6 +74,7 @@ protobuf==4.23.4 # via # -c requirements/constraints.in # google-api-core + # googleapis-common-protos pyasn1==0.5.0 # via # pyasn1-modules diff --git a/requirements/ingest-github.txt b/requirements/ingest-github.txt index 4dd082c369..3c1504b9c7 100644 --- a/requirements/ingest-github.txt +++ b/requirements/ingest-github.txt @@ -4,7 +4,7 @@ # # pip-compile requirements/ingest-github.in # -certifi==2023.5.7 +certifi==2023.7.22 # via # -c requirements/base.txt # -c requirements/constraints.in @@ -34,7 +34,7 @@ pycparser==2.21 # cffi pygithub==1.58.2 # via -r requirements/ingest-github.in -pyjwt[crypto]==2.7.0 +pyjwt[crypto]==2.8.0 # via pygithub pynacl==1.5.0 # via pygithub diff --git a/requirements/ingest-gitlab.txt b/requirements/ingest-gitlab.txt index 20310f0c8c..dbff64042c 100644 --- a/requirements/ingest-gitlab.txt +++ b/requirements/ingest-gitlab.txt @@ -4,7 +4,7 @@ # # pip-compile requirements/ingest-gitlab.in # -certifi==2023.5.7 +certifi==2023.7.22 # via # -c requirements/base.txt # -c requirements/constraints.in diff --git a/requirements/ingest-google-drive.txt b/requirements/ingest-google-drive.txt index 48e1660fb2..fc48a89ca7 100644 --- a/requirements/ingest-google-drive.txt +++ b/requirements/ingest-google-drive.txt @@ -6,7 +6,7 @@ # cachetools==5.3.1 # via google-auth -certifi==2023.5.7 +certifi==2023.7.22 # via # -c requirements/base.txt # -c requirements/constraints.in @@ -17,7 +17,7 @@ charset-normalizer==3.2.0 # requests google-api-core==2.11.1 # via google-api-python-client -google-api-python-client==2.93.0 +google-api-python-client==2.95.0 # via -r requirements/ingest-google-drive.in google-auth==2.22.0 # via diff --git a/requirements/ingest-onedrive.txt b/requirements/ingest-onedrive.txt index 65c32f34e3..6afcb9036d 100644 --- a/requirements/ingest-onedrive.txt +++ b/requirements/ingest-onedrive.txt @@ -4,7 +4,7 @@ # # pip-compile requirements/ingest-onedrive.in # -certifi==2023.5.7 +certifi==2023.7.22 # via # -c requirements/base.txt # -c requirements/constraints.in @@ -27,17 +27,17 @@ idna==3.4 # via # -c requirements/base.txt # requests -msal==1.22.0 +msal==1.23.0 # via # -r requirements/ingest-onedrive.in # office365-rest-python-client -office365-rest-python-client==2.4.2 +office365-rest-python-client==2.4.3 # via -r requirements/ingest-onedrive.in pycparser==2.21 # via # -c requirements/base.txt # cffi -pyjwt[crypto]==2.7.0 +pyjwt[crypto]==2.8.0 # via msal pytz==2023.3 # via diff --git a/requirements/ingest-reddit.txt b/requirements/ingest-reddit.txt index dc2758ffd8..42bffeaddb 100644 --- a/requirements/ingest-reddit.txt +++ b/requirements/ingest-reddit.txt @@ -4,7 +4,7 @@ # # pip-compile requirements/ingest-reddit.in # -certifi==2023.5.7 +certifi==2023.7.22 # via # -c requirements/base.txt # -c requirements/constraints.in diff --git a/requirements/ingest-s3.txt b/requirements/ingest-s3.txt index 266fdfbbe8..e9801a500c 100644 --- a/requirements/ingest-s3.txt +++ b/requirements/ingest-s3.txt @@ -6,7 +6,7 @@ # aiobotocore==2.5.2 # via s3fs -aiohttp==3.8.4 +aiohttp==3.8.5 # via # aiobotocore # s3fs diff --git a/requirements/ingest-wikipedia.txt b/requirements/ingest-wikipedia.txt index 3001b8c488..7455fb82c4 100644 --- a/requirements/ingest-wikipedia.txt +++ b/requirements/ingest-wikipedia.txt @@ -6,7 +6,7 @@ # beautifulsoup4==4.12.2 # via wikipedia -certifi==2023.5.7 +certifi==2023.7.22 # via # -c requirements/base.txt # -c requirements/constraints.in diff --git a/requirements/local-inference.txt b/requirements/local-inference.txt index 860c7b2cc7..110e2d0a3f 100644 --- a/requirements/local-inference.txt +++ b/requirements/local-inference.txt @@ -6,7 +6,7 @@ # antlr4-python3-runtime==4.9.3 # via omegaconf -certifi==2023.5.7 +certifi==2023.7.22 # via # -c requirements/base.txt # -c requirements/constraints.in @@ -39,7 +39,7 @@ filelock==3.12.2 # transformers flatbuffers==23.5.26 # via onnxruntime -fonttools==4.41.0 +fonttools==4.41.1 # via matplotlib fsspec==2023.6.0 # via huggingface-hub @@ -112,7 +112,7 @@ pdfminer-six==20221105 # via # -c requirements/base.txt # pdfplumber -pdfplumber==0.9.0 +pdfplumber==0.10.1 # via layoutparser pillow==10.0.0 # via @@ -139,6 +139,8 @@ pyparsing==3.0.9 # via # -c requirements/constraints.in # matplotlib +pypdfium2==4.18.0 + # via pdfplumber pytesseract==0.3.10 # via layoutparser python-dateutil==2.8.2 @@ -152,7 +154,7 @@ pytz==2023.3 # via # -c requirements/base.txt # pandas -pyyaml==6.0 +pyyaml==6.0.1 # via # huggingface-hub # layoutparser @@ -204,7 +206,7 @@ tqdm==4.65.0 # huggingface-hub # iopath # transformers -transformers==4.30.2 +transformers==4.31.0 # via unstructured-inference typing-extensions==4.7.1 # via @@ -222,9 +224,7 @@ urllib3==1.26.16 # -c requirements/base.txt # -c requirements/constraints.in # requests -wand==0.6.11 - # via pdfplumber -zipp==3.16.1 +zipp==3.16.2 # via # -c requirements/base.txt # importlib-resources diff --git a/requirements/test.txt b/requirements/test.txt index 9f6091508e..bd78932afc 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -8,7 +8,7 @@ appdirs==1.4.4 # via label-studio-tools black==23.7.0 # via -r requirements/test.in -certifi==2023.5.7 +certifi==2023.7.22 # via # -c requirements/base.txt # -c requirements/constraints.in @@ -17,7 +17,7 @@ charset-normalizer==3.2.0 # via # -c requirements/base.txt # requests -click==8.1.5 +click==8.1.6 # via # -c requirements/base.txt # -r requirements/test.in @@ -32,7 +32,7 @@ flake8==6.0.0 # via -r requirements/test.in freezegun==1.2.2 # via -r requirements/test.in -grpcio==1.56.0 +grpcio==1.56.2 # via -r requirements/test.in idna==3.4 # via @@ -66,13 +66,13 @@ packaging==23.1 # pytest pathspec==0.11.1 # via black -platformdirs==3.8.1 +platformdirs==3.9.1 # via black pluggy==1.2.0 # via pytest pycodestyle==2.10.0 # via flake8 -pydantic==1.10.11 +pydantic==1.10.12 # via label-studio-sdk pyflakes==3.0.1 # via flake8 @@ -88,13 +88,13 @@ python-dateutil==2.8.2 # via # -c requirements/base.txt # freezegun -pyyaml==6.0 +pyyaml==6.0.1 # via vcrpy requests==2.31.0 # via # -c requirements/base.txt # label-studio-sdk -ruff==0.0.278 +ruff==0.0.280 # via -r requirements/test.in six==1.16.0 # via @@ -107,13 +107,13 @@ tomli==2.0.1 # coverage # mypy # pytest -types-markdown==3.4.2.9 +types-markdown==3.4.2.10 # via -r requirements/test.in -types-requests==2.31.0.1 +types-requests==2.31.0.2 # via -r requirements/test.in -types-tabulate==0.9.0.2 +types-tabulate==0.9.0.3 # via -r requirements/test.in -types-urllib3==1.26.25.13 +types-urllib3==1.26.25.14 # via types-requests typing-extensions==4.7.1 # via diff --git a/test_unstructured/partition/test_api.py b/test_unstructured/partition/test_api.py index 5faf83b9e7..1d0ccd9d43 100644 --- a/test_unstructured/partition/test_api.py +++ b/test_unstructured/partition/test_api.py @@ -97,6 +97,31 @@ def test_partition_via_api_raises_with_bad_response(monkeypatch): partition_via_api(filename=filename) +def test_partition_via_api_with_no_strategy(): + filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.jpg") + + elements_no_strategy = partition_via_api(filename=filename, api_key=get_api_key()) + elements_hi_res = partition_via_api(filename=filename, strategy="hi_res", api_key=get_api_key()) + + # confirm that hi_res strategy was not passed as defaukt to partition by comparing outputs + assert elements_no_strategy[0].text.startswith("arXiv") + assert elements_hi_res[0].text.startswith("LayoutParser") + + +def test_partition_via_api_with_image_hi_res_strategy_includes_coordinates(): + filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.jpg") + + # coordinates not included by default to limit payload size + elements = partition_via_api( + filename=filename, + strategy="hi_res", + coordinates="true", + api_key=get_api_key(), + ) + + assert elements[0].metadata.coordinates is not None + + @pytest.mark.skipif(skip_outside_ci, reason="Skipping test run outside of CI") @pytest.mark.skipif(skip_not_on_main, reason="Skipping test run outside of main branch") def test_partition_via_api_valid_request_data_kwargs(): diff --git a/unstructured/partition/api.py b/unstructured/partition/api.py index 5d7e6ae078..1064c06214 100644 --- a/unstructured/partition/api.py +++ b/unstructured/partition/api.py @@ -53,10 +53,6 @@ def partition_via_api( "UNSTRUCTURED-API-KEY": api_key, } - # set default values for kwargs - strategy = request_kwargs.pop("strategy", "hi_res") - request_kwargs["strategy"] = strategy - if filename is not None: with open(filename, "rb") as f: files = [ @@ -118,10 +114,6 @@ def partition_multiple_via_api( A list of file-like object using "rb" mode --> open(filename, "rb"). file_filename When file is not None, the filename (string) to store in element metadata. E.g. "foo.txt" - strategy - The strategy to use for partitioning the PDF. Uses a layout detection model if set - to 'hi_res', otherwise partition_pdf simply extracts the text from the document - and processes it. api_url The URL for the Unstructured API. Defaults to the hosted Unstructured API. api_key @@ -135,10 +127,6 @@ def partition_multiple_via_api( "UNSTRUCTURED-API-KEY": api_key, } - # set default values for kwargs - strategy = request_kwargs.pop("strategy", "hi_res") - request_kwargs["strategy"] = strategy - if filenames is not None: if content_types and len(content_types) != len(filenames): raise ValueError("content_types and filenames must have the same length.")