Skip to content

Commit

Permalink
Merge pull request #5 from run-llama/feat/examples
Browse files Browse the repository at this point in the history
Add more example notebooks
  • Loading branch information
Disiok authored Jul 25, 2024
2 parents e769622 + e878ab8 commit 5f81c52
Show file tree
Hide file tree
Showing 5 changed files with 265 additions and 5 deletions.
Binary file added examples/data/noisebridge_receipt.pdf
Binary file not shown.
Binary file added examples/data/parallels_invoice.pdf
Binary file not shown.
15 changes: 10 additions & 5 deletions examples/demo_basic.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@
"\n",
"extractor = LlamaExtract()\n",
"\n",
"extraction_schema = extractor.infer_schema(\"Test Schema\", [...])"
"extraction_schema = extractor.infer_schema(\n",
" \"Test Schema\", [\"./data/noisebridge_receipt.pdf\", \"./data/parallels_invoice.pdf\"]\n",
")"
]
},
{
Expand All @@ -54,7 +56,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"{'type': 'object', 'properties': {'Contact': {'type': 'object', 'properties': {'name': {'type': 'string'}, 'email': {'type': 'string'}, 'telephone': {'type': 'string'}}}, 'PaymentNotice': {'type': 'object', 'properties': {'investor': {'type': 'object', 'properties': {'name': {'type': 'string'}}}, 'dealCusip': {'type': 'string'}, 'loanShare': {'type': 'string'}, 'reference': {'type': 'string'}, 'bankDetails': {'type': 'object', 'properties': {'aba': {'type': 'string'}, 'bankName': {'type': 'string'}, 'attention': {'type': 'string'}, 'accountNumber': {'type': 'string'}, 'bankToBankInfo': {'type': 'string'}}}, 'paymentType': {'type': 'string'}, 'effectiveDate': {'type': 'string'}, 'facilityCusip': {'type': 'string'}, 'remittedAmount': {'type': 'string'}, 'interestPayment': {'type': 'string'}, 'paymentDescription': {'type': 'string'}}}, 'LoanRepricingNotice': {'type': 'object', 'properties': {'dealIsin': {'type': 'string'}, 'allInRate': {'type': 'string'}, 'dealCusip': {'type': 'string'}, 'rateBasis': {'type': 'string'}, 'yourShare': {'type': 'string'}, 'loanAmount': {'type': 'string'}, 'description': {'type': 'string'}, 'facilityIsin': {'type': 'string'}, 'lookBackDays': {'type': 'integer'}, 'baseRateFloor': {'type': 'string'}, 'effectiveDate': {'type': 'string'}, 'facilityCusip': {'type': 'string'}, 'referenceRate': {'type': 'string'}, 'repricingDate': {'type': 'string'}, 'spreadAdjustment': {'type': 'string'}, 'nextRepricingDate': {'type': 'string'}, 'legacyBaseRateFloor': {'type': 'string'}, 'projectedInterestDue': {'type': 'string'}}}}}\n"
"{'type': 'object', 'properties': {'Invoice': {'type': 'object', 'properties': {'total': {'type': 'string'}, 'products': {'type': 'string'}, 'salesTax': {'type': 'string'}, 'subtotal': {'type': 'string'}, 'invoiceDate': {'type': 'string'}, 'invoiceNumber': {'type': 'string'}, 'billingAddress': {'type': 'object', 'properties': {'city': {'type': 'string'}, 'name': {'type': 'string'}, 'country': {'type': 'string'}, 'postalCode': {'type': 'string'}}}, 'paymentDetails': {'type': 'object', 'properties': {'taxId': {'type': 'string'}, 'merchant': {'type': 'string'}, 'merchantAddress': {'type': 'object', 'properties': {'city': {'type': 'string'}, 'suite': {'type': 'string'}, 'street': {'type': 'string'}, 'country': {'type': 'string'}, 'postalCode': {'type': 'string'}}}, 'creditCardLastFour': {'type': 'string'}}}, 'referenceNumber': {'type': 'string'}}}}}\n"
]
}
],
Expand All @@ -71,12 +73,15 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Extracting files: 100%|██████████| 2/2 [01:02<00:00, 31.19s/it]\n"
"Extracting files: 100%|██████████| 2/2 [00:14<00:00, 7.11s/it]\n"
]
}
],
"source": [
"extractions = extractor.extract(extraction_schema.id, [...])"
"extractions = extractor.extract(\n",
" extraction_schema.id,\n",
" [\"./data/noisebridge_receipt.pdf\", \"./data/parallels_invoice.pdf\"],\n",
")"
]
},
{
Expand All @@ -88,7 +93,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"{'Contact': {'name': 'HASHNEET KAUR', 'email': '[email protected]', 'telephone': '+1-302-634-1253'}, 'PaymentNotice': {'investor': {'name': 'FARALLON CAPITAL (AM) INVESTORS, L.P.'}, 'dealCusip': 'CORELOGIC INC. $4BN 6/4/21 MUCY*', 'loanShare': 'USD 17,773.30', 'reference': 'CORELOGIC INC. $4BN 6/4/21 MUCY*, CME Term SOFR Reference Rates - Effective Repricing', 'bankDetails': {'aba': '021000089', 'bankName': 'CITIBANK NA', 'attention': 'Goldman Sachs & Co New York', 'accountNumber': '30649484', 'bankToBankInfo': 'FFC 002458339 Farallon Capital AM Investors LP'}, 'paymentType': 'Interest Payment', 'effectiveDate': '31-Oct-2023', 'facilityCusip': None, 'remittedAmount': 'USD 132.39', 'interestPayment': 'USD 132.39', 'paymentDescription': 'Your share of the CME Term SOFR Reference Rates - Effective interest payment'}, 'LoanRepricingNotice': {'dealIsin': None, 'allInRate': '8.938600%', 'dealCusip': None, 'rateBasis': None, 'yourShare': 'USD 17,773.30', 'loanAmount': None, 'description': None, 'facilityIsin': None, 'lookBackDays': None, 'baseRateFloor': None, 'effectiveDate': '31-Oct-2023', 'facilityCusip': None, 'referenceRate': None, 'repricingDate': '29-Nov-2023', 'spreadAdjustment': None, 'nextRepricingDate': None, 'legacyBaseRateFloor': None, 'projectedInterestDue': None}}\n"
"{'Invoice': {'total': '$119.99', 'products': 'Parallels Desktop for Mac Pro Edition (1 Year)', 'salesTax': '$0.00', 'subtotal': '$119.99', 'invoiceDate': 'Jul 23, 2024', 'invoiceNumber': 'BKD-73649835575', 'billingAddress': {'city': 'California', 'name': 'Laurie Voss', 'country': 'United States', 'postalCode': '94110'}, 'paymentDetails': {'taxId': '20-4503251', 'merchant': 'Cleverbridge, Inc.', 'merchantAddress': {'city': 'Chicago', 'suite': 'Suite 700', 'street': '350 N Clark', 'country': 'United States', 'postalCode': '60654'}, 'creditCardLastFour': '4469'}, 'referenceNumber': '474534804'}}\n"
]
}
],
Expand Down
123 changes: 123 additions & 0 deletions examples/demo_existent_schema.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Extracting data from files using an existing schema"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pip install llama-extract"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# llama-extract is async-first, running the sync code in a notebook requires the use of nest_asyncio\n",
"import nest_asyncio\n",
"\n",
"nest_asyncio.apply()\n",
"\n",
"import os\n",
"\n",
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-...\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from llama_extract import LlamaExtract\n",
"\n",
"extractor = LlamaExtract()\n",
"\n",
"extraction_schema = extractor.get_schema(\"schema_id...\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id='88ea0633-937b-42f1-a35d-7da19c2db74e' created_at=datetime.datetime(2024, 7, 24, 19, 48, 49, 968786, tzinfo=datetime.timezone.utc) updated_at=datetime.datetime(2024, 7, 24, 19, 48, 49, 968786, tzinfo=datetime.timezone.utc) name='Test Schema' project_id='b1be5ffd-3f90-4fd1-9742-ca7c0a30f6f7' data_schema={'type': 'object', 'properties': {'date': {'type': 'string'}, 'amount': {'type': 'number'}, 'number': {'type': 'string'}}}\n"
]
}
],
"source": [
"print(extraction_schema)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Extracting files: 100%|██████████| 2/2 [00:03<00:00, 1.71s/it]\n"
]
}
],
"source": [
"extractions = extractor.extract(\n",
" extraction_schema.id,\n",
" [\"./data/noisebridge_receipt.pdf\", \"./data/parallels_invoice.pdf\"],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'date': 'Jul 23, 2024', 'amount': '119.99', 'number': 'BKD-73649835575'}\n"
]
}
],
"source": [
"print(extractions[1].data)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "llama-extract-tm5usU00-py3.11",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
132 changes: 132 additions & 0 deletions examples/demo_manual.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Create a schema with your own schema to extract data from files"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pip install llama-extract"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# llama-extract is async-first, running the sync code in a notebook requires the use of nest_asyncio\n",
"import nest_asyncio\n",
"\n",
"nest_asyncio.apply()\n",
"\n",
"import os\n",
"\n",
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-...\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from llama_extract import LlamaExtract\n",
"\n",
"extractor = LlamaExtract()\n",
"\n",
"data_schema = {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"number\": {\"type\": \"string\"},\n",
" \"date\": {\"type\": \"string\"},\n",
" \"amount\": {\"type\": \"number\"},\n",
" },\n",
"}\n",
"\n",
"extraction_schema = extractor.create_schema(\"Test Schema\", data_schema)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id='88ea0633-937b-42f1-a35d-7da19c2db74e' created_at=datetime.datetime(2024, 7, 24, 19, 48, 49, 968786, tzinfo=datetime.timezone.utc) updated_at=datetime.datetime(2024, 7, 24, 19, 48, 49, 968786, tzinfo=datetime.timezone.utc) name='Test Schema' project_id='b1be5ffd-3f90-4fd1-9742-ca7c0a30f6f7' data_schema={'type': 'object', 'properties': {'date': {'type': 'string'}, 'amount': {'type': 'number'}, 'number': {'type': 'string'}}}\n"
]
}
],
"source": [
"print(extraction_schema)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Extracting files: 100%|██████████| 2/2 [00:06<00:00, 3.31s/it]\n"
]
}
],
"source": [
"extractions = extractor.extract(\n",
" extraction_schema.id,\n",
" [\"./data/noisebridge_receipt.pdf\", \"./data/parallels_invoice.pdf\"],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'date': 'Jul 23, 2024', 'amount': '119.99', 'number': 'BKD-73649835575'}\n"
]
}
],
"source": [
"print(extractions[0].data)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "llama-extract-tm5usU00-py3.11",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 5f81c52

Please sign in to comment.