From 7936419a4b1c7794f984938fc4461d29acf63061 Mon Sep 17 00:00:00 2001 From: Simon Suo Date: Thu, 25 Jul 2024 10:52:18 -0700 Subject: [PATCH] wip --- examples/demo_basic.ipynb | 118 ++++++++++++++++++++++++++++++-------- 1 file changed, 93 insertions(+), 25 deletions(-) diff --git a/examples/demo_basic.ipynb b/examples/demo_basic.ipynb index 9802d9d..7dc3c2f 100644 --- a/examples/demo_basic.ipynb +++ b/examples/demo_basic.ipynb @@ -42,7 +42,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Apply `nest_asyncio` and bring your own LlamaCloud API key:" + "Bring your own LlamaCloud API key:" ] }, { @@ -51,11 +51,6 @@ "metadata": {}, "outputs": [], "source": [ - "# llama-extract is async-first, running the sync code in a notebook requires the use of nest_asyncio\n", - "import nest_asyncio\n", - "\n", - "nest_asyncio.apply()\n", - "\n", "import os\n", "\n", "os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-...\"" @@ -77,9 +72,16 @@ "source": [ "from llama_extract import LlamaExtract\n", "\n", - "extractor = LlamaExtract()\n", - "\n", - "extraction_schema = extractor.infer_schema(\n", + "extractor = LlamaExtract()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "extraction_schema = await extractor.ainfer_schema(\n", " \"Test Schema\", [\"./data/noisebridge_receipt.pdf\", \"./data/parallels_invoice.pdf\"]\n", ")" ] @@ -97,15 +99,62 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'type': 'object', 'properties': {'Invoice': {'type': 'object', 'properties': {'total': {'type': 'string'}, 'products': {'type': 'string'}, 'salesTax': {'type': 'string'}, 'subtotal': {'type': 'string'}, 'invoiceDate': {'type': 'string'}, 'invoiceNumber': {'type': 'string'}, 'billingAddress': {'type': 'object', 'properties': {'city': {'type': 'string'}, 'name': {'type': 'string'}, 'country': {'type': 'string'}, 'postalCode': {'type': 'string'}}}, 'paymentDetails': {'type': 'object', 'properties': {'taxId': {'type': 'string'}, 'merchant': {'type': 'string'}, 'merchantAddress': {'type': 'object', 'properties': {'city': {'type': 'string'}, 'suite': {'type': 'string'}, 'street': {'type': 'string'}, 'country': {'type': 'string'}, 'postalCode': {'type': 'string'}}}, 'creditCardLastFour': {'type': 'string'}}}, 'referenceNumber': {'type': 'string'}}}}}\n" - ] + "data": { + "text/plain": [ + "{'type': 'object',\n", + " 'properties': {'Invoice': {'type': 'object',\n", + " 'properties': {'total': {'type': 'string'},\n", + " 'products': {'type': 'string'},\n", + " 'salesTax': {'type': 'string'},\n", + " 'subtotal': {'type': 'string'},\n", + " 'invoiceDate': {'type': 'string'},\n", + " 'invoiceNumber': {'type': 'string'},\n", + " 'billingAddress': {'type': 'object',\n", + " 'properties': {'city': {'type': 'string'},\n", + " 'name': {'type': 'string'},\n", + " 'country': {'type': 'string'},\n", + " 'postalCode': {'type': 'string'}}},\n", + " 'paymentDetails': {'type': 'object',\n", + " 'properties': {'taxId': {'type': 'string'},\n", + " 'merchant': {'type': 'string'},\n", + " 'merchantAddress': {'type': 'object',\n", + " 'properties': {'city': {'type': 'string'},\n", + " 'suite': {'type': 'string'},\n", + " 'street': {'type': 'string'},\n", + " 'country': {'type': 'string'},\n", + " 'postalCode': {'type': 'string'}}},\n", + " 'creditCardLastFour': {'type': 'string'}}},\n", + " 'referenceNumber': {'type': 'string'}}},\n", + " 'Receipt': {'type': 'object',\n", + " 'properties': {'items': {'type': 'array',\n", + " 'items': {'type': 'object',\n", + " 'properties': {'amount': {'type': 'string'},\n", + " 'quantity': {'type': 'integer'},\n", + " 'unitPrice': {'type': 'string'},\n", + " 'description': {'type': 'string'}}}},\n", + " 'total': {'type': 'string'},\n", + " 'datePaid': {'type': 'string'},\n", + " 'subtotal': {'type': 'string'},\n", + " 'amountPaid': {'type': 'string'},\n", + " 'paymentMethod': {'type': 'string'},\n", + " 'receiptNumber': {'type': 'string'},\n", + " 'billingAddress': {'type': 'object',\n", + " 'properties': {'city': {'type': 'string'},\n", + " 'name': {'type': 'string'},\n", + " 'email': {'type': 'string'},\n", + " 'street': {'type': 'string'},\n", + " 'country': {'type': 'string'},\n", + " 'postalCode': {'type': 'string'},\n", + " 'phoneNumber': {'type': 'string'}}}}}}}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(extraction_schema.data_schema)" + "extraction_schema.data_schema" ] }, { @@ -125,12 +174,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "Extracting files: 100%|██████████| 2/2 [00:14<00:00, 7.11s/it]\n" + "Extracting files: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:06<00:00, 3.10s/it]\n" ] } ], "source": [ - "extractions = extractor.extract(\n", + "extractions = await extractor.aextract(\n", " extraction_schema.id,\n", " [\"./data/noisebridge_receipt.pdf\", \"./data/parallels_invoice.pdf\"],\n", ")" @@ -149,21 +198,40 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'Invoice': {'total': '$119.99', 'products': 'Parallels Desktop for Mac Pro Edition (1 Year)', 'salesTax': '$0.00', 'subtotal': '$119.99', 'invoiceDate': 'Jul 23, 2024', 'invoiceNumber': 'BKD-73649835575', 'billingAddress': {'city': 'California', 'name': 'Laurie Voss', 'country': 'United States', 'postalCode': '94110'}, 'paymentDetails': {'taxId': '20-4503251', 'merchant': 'Cleverbridge, Inc.', 'merchantAddress': {'city': 'Chicago', 'suite': 'Suite 700', 'street': '350 N Clark', 'country': 'United States', 'postalCode': '60654'}, 'creditCardLastFour': '4469'}, 'referenceNumber': '474534804'}}\n" - ] + "data": { + "text/plain": [ + "{'Receipt': {'items': [{'amount': '$10.00',\n", + " 'quantity': 1,\n", + " 'unitPrice': '$10.00',\n", + " 'description': '$10 / month'}],\n", + " 'total': '$10.00',\n", + " 'datePaid': 'July 19, 2024',\n", + " 'subtotal': '$10.00',\n", + " 'amountPaid': '$10.00',\n", + " 'paymentMethod': 'Visa - 7267',\n", + " 'receiptNumber': '2721 5058',\n", + " 'billingAddress': {'city': 'San Francisco',\n", + " 'name': 'Noisebridge',\n", + " 'email': 'noisebridge@seldo.com',\n", + " 'street': '272 Capp St',\n", + " 'country': 'United States',\n", + " 'postalCode': '94110',\n", + " 'phoneNumber': '1 650 701 7829'}}}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(extractions[0].data)" + "extractions[0].data" ] } ], "metadata": { "kernelspec": { - "display_name": "llama-extract-tm5usU00-py3.11", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -180,5 +248,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 }