Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
logan-markewich committed Sep 27, 2024
1 parent 26a17ed commit 1ceee39
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 9 deletions.
12 changes: 8 additions & 4 deletions llama_extract/base.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import asyncio
import os
import time

import pydantic.v1 as pydantic_v1

from io import BufferedIOBase, BufferedReader, BytesIO
from json.decoder import JSONDecodeError
from pathlib import Path
from pydantic import BaseModel, ValidationError
from pydantic import BaseModel, Extra, ValidationError
from typing import List, Optional, Tuple, Type, Union
import urllib.parse

Expand Down Expand Up @@ -212,15 +215,16 @@ async def ainfer_schema(
)

if 200 <= _response.status_code < 300:
return pydantic.parse_obj_as(ExtractionSchema, _response.json()) # type: ignore
return pydantic_v1.parse_obj_as(ExtractionSchema, _response.json())
if _response.status_code == 422:
raise UnprocessableEntityError(
pydantic.parse_obj_as(HttpValidationError, _response.json())
) # type: ignore
pydantic_v1.parse_obj_as(HttpValidationError, _response.json())
)
try:
_response_json = _response.json()
except JSONDecodeError:
raise ApiError(status_code=_response.status_code, body=_response.text)

raise ApiError(status_code=_response.status_code, body=_response_json)

def infer_schema(
Expand Down
7 changes: 3 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "llama-extract"
version = "0.0.4"
version = "0.0.5"
description = "Infer schema and extract data from unstructured files"
authors = ["Logan Markewich <[email protected]>"]
license = "MIT"
Expand All @@ -13,9 +13,8 @@ packages = [{include = "llama_extract"}]

[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
llama-index-core = ">=0.10.29"
llama-cloud = "^0.0.11"
pydantic = ">=1.10"
llama-index-core = "^0.11.0"
llama-cloud = ">=0.1.0"

[tool.poetry.group.dev.dependencies]
pytest = "^8.0.0"
Expand Down
18 changes: 17 additions & 1 deletion tests/test_extract.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,26 @@
import os
import pytest

from llama_extract import LlamaExtract


TEST_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data/test.pdf")


@pytest.mark.skipif(
os.environ.get("LLAMA_CLOUD_API_KEY", "") == "",
reason="LLAMA_CLOUD_API_KEY not set",
)
def test_simple() -> None:
pass
extractor = LlamaExtract(
api_key=os.environ["LLAMA_CLOUD_API_KEY"],
)

# Infer schema
schema = extractor.infer_schema(
"my_schema", [TEST_FILE]
)

# Extract data
results = extractor.extract(schema.id, [TEST_FILE])

0 comments on commit 1ceee39

Please sign in to comment.