Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
Disiok committed Jul 25, 2024
1 parent 011f9d5 commit 7936419
Showing 1 changed file with 93 additions and 25 deletions.
118 changes: 93 additions & 25 deletions examples/demo_basic.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Apply `nest_asyncio` and bring your own LlamaCloud API key:"
"Bring your own LlamaCloud API key:"
]
},
{
Expand All @@ -51,11 +51,6 @@
"metadata": {},
"outputs": [],
"source": [
"# llama-extract is async-first, running the sync code in a notebook requires the use of nest_asyncio\n",
"import nest_asyncio\n",
"\n",
"nest_asyncio.apply()\n",
"\n",
"import os\n",
"\n",
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-...\""
Expand All @@ -77,9 +72,16 @@
"source": [
"from llama_extract import LlamaExtract\n",
"\n",
"extractor = LlamaExtract()\n",
"\n",
"extraction_schema = extractor.infer_schema(\n",
"extractor = LlamaExtract()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"extraction_schema = await extractor.ainfer_schema(\n",
" \"Test Schema\", [\"./data/noisebridge_receipt.pdf\", \"./data/parallels_invoice.pdf\"]\n",
")"
]
Expand All @@ -97,15 +99,62 @@
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'type': 'object', 'properties': {'Invoice': {'type': 'object', 'properties': {'total': {'type': 'string'}, 'products': {'type': 'string'}, 'salesTax': {'type': 'string'}, 'subtotal': {'type': 'string'}, 'invoiceDate': {'type': 'string'}, 'invoiceNumber': {'type': 'string'}, 'billingAddress': {'type': 'object', 'properties': {'city': {'type': 'string'}, 'name': {'type': 'string'}, 'country': {'type': 'string'}, 'postalCode': {'type': 'string'}}}, 'paymentDetails': {'type': 'object', 'properties': {'taxId': {'type': 'string'}, 'merchant': {'type': 'string'}, 'merchantAddress': {'type': 'object', 'properties': {'city': {'type': 'string'}, 'suite': {'type': 'string'}, 'street': {'type': 'string'}, 'country': {'type': 'string'}, 'postalCode': {'type': 'string'}}}, 'creditCardLastFour': {'type': 'string'}}}, 'referenceNumber': {'type': 'string'}}}}}\n"
]
"data": {
"text/plain": [
"{'type': 'object',\n",
" 'properties': {'Invoice': {'type': 'object',\n",
" 'properties': {'total': {'type': 'string'},\n",
" 'products': {'type': 'string'},\n",
" 'salesTax': {'type': 'string'},\n",
" 'subtotal': {'type': 'string'},\n",
" 'invoiceDate': {'type': 'string'},\n",
" 'invoiceNumber': {'type': 'string'},\n",
" 'billingAddress': {'type': 'object',\n",
" 'properties': {'city': {'type': 'string'},\n",
" 'name': {'type': 'string'},\n",
" 'country': {'type': 'string'},\n",
" 'postalCode': {'type': 'string'}}},\n",
" 'paymentDetails': {'type': 'object',\n",
" 'properties': {'taxId': {'type': 'string'},\n",
" 'merchant': {'type': 'string'},\n",
" 'merchantAddress': {'type': 'object',\n",
" 'properties': {'city': {'type': 'string'},\n",
" 'suite': {'type': 'string'},\n",
" 'street': {'type': 'string'},\n",
" 'country': {'type': 'string'},\n",
" 'postalCode': {'type': 'string'}}},\n",
" 'creditCardLastFour': {'type': 'string'}}},\n",
" 'referenceNumber': {'type': 'string'}}},\n",
" 'Receipt': {'type': 'object',\n",
" 'properties': {'items': {'type': 'array',\n",
" 'items': {'type': 'object',\n",
" 'properties': {'amount': {'type': 'string'},\n",
" 'quantity': {'type': 'integer'},\n",
" 'unitPrice': {'type': 'string'},\n",
" 'description': {'type': 'string'}}}},\n",
" 'total': {'type': 'string'},\n",
" 'datePaid': {'type': 'string'},\n",
" 'subtotal': {'type': 'string'},\n",
" 'amountPaid': {'type': 'string'},\n",
" 'paymentMethod': {'type': 'string'},\n",
" 'receiptNumber': {'type': 'string'},\n",
" 'billingAddress': {'type': 'object',\n",
" 'properties': {'city': {'type': 'string'},\n",
" 'name': {'type': 'string'},\n",
" 'email': {'type': 'string'},\n",
" 'street': {'type': 'string'},\n",
" 'country': {'type': 'string'},\n",
" 'postalCode': {'type': 'string'},\n",
" 'phoneNumber': {'type': 'string'}}}}}}}"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(extraction_schema.data_schema)"
"extraction_schema.data_schema"
]
},
{
Expand All @@ -125,12 +174,12 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Extracting files: 100%|██████████| 2/2 [00:14<00:00, 7.11s/it]\n"
"Extracting files: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:06<00:00, 3.10s/it]\n"
]
}
],
"source": [
"extractions = extractor.extract(\n",
"extractions = await extractor.aextract(\n",
" extraction_schema.id,\n",
" [\"./data/noisebridge_receipt.pdf\", \"./data/parallels_invoice.pdf\"],\n",
")"
Expand All @@ -149,21 +198,40 @@
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'Invoice': {'total': '$119.99', 'products': 'Parallels Desktop for Mac Pro Edition (1 Year)', 'salesTax': '$0.00', 'subtotal': '$119.99', 'invoiceDate': 'Jul 23, 2024', 'invoiceNumber': 'BKD-73649835575', 'billingAddress': {'city': 'California', 'name': 'Laurie Voss', 'country': 'United States', 'postalCode': '94110'}, 'paymentDetails': {'taxId': '20-4503251', 'merchant': 'Cleverbridge, Inc.', 'merchantAddress': {'city': 'Chicago', 'suite': 'Suite 700', 'street': '350 N Clark', 'country': 'United States', 'postalCode': '60654'}, 'creditCardLastFour': '4469'}, 'referenceNumber': '474534804'}}\n"
]
"data": {
"text/plain": [
"{'Receipt': {'items': [{'amount': '$10.00',\n",
" 'quantity': 1,\n",
" 'unitPrice': '$10.00',\n",
" 'description': '$10 / month'}],\n",
" 'total': '$10.00',\n",
" 'datePaid': 'July 19, 2024',\n",
" 'subtotal': '$10.00',\n",
" 'amountPaid': '$10.00',\n",
" 'paymentMethod': 'Visa - 7267',\n",
" 'receiptNumber': '2721 5058',\n",
" 'billingAddress': {'city': 'San Francisco',\n",
" 'name': 'Noisebridge',\n",
" 'email': '[email protected]',\n",
" 'street': '272 Capp St',\n",
" 'country': 'United States',\n",
" 'postalCode': '94110',\n",
" 'phoneNumber': '1 650 701 7829'}}}"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(extractions[0].data)"
"extractions[0].data"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "llama-extract-tm5usU00-py3.11",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -180,5 +248,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}

0 comments on commit 7936419

Please sign in to comment.