diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c5ff1e2920..e475ec9f02 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -255,6 +255,10 @@ jobs: source .venv/bin/activate mkdir "$NLTK_DATA" make install-ci + - name: Setup docker-compose + uses: KengoTODA/actions-setup-docker-compose@v1 + with: + version: '2.22.0' - name: Test Ingest (unit) run: | source .venv/bin/activate @@ -293,6 +297,7 @@ jobs: AZURE_SEARCH_API_KEY: ${{ secrets.AZURE_SEARCH_API_KEY }} TABLE_OCR: "tesseract" ENTIRE_PAGE_OCR: "tesseract" + CI: "true" run: | source .venv/bin/activate sudo apt-get update diff --git a/.github/workflows/ingest-test-fixtures-update-pr.yml b/.github/workflows/ingest-test-fixtures-update-pr.yml index 7ca7d242f3..499a1f7593 100644 --- a/.github/workflows/ingest-test-fixtures-update-pr.yml +++ b/.github/workflows/ingest-test-fixtures-update-pr.yml @@ -9,7 +9,7 @@ env: jobs: setup: - runs-on: ubuntu-latest + runs-on: ubuntu-latest-m if: | github.event_name == 'workflow_dispatch' || (github.event_name == 'push' && contains(github.event.head_commit.message, 'ingest-test-fixtures-update')) @@ -37,7 +37,7 @@ jobs: make install-ci update-fixtures-and-pr: - runs-on: ubuntu-latest + runs-on: ubuntu-latest-m env: NLTK_DATA: ${{ github.workspace }}/nltk_data needs: [setup] @@ -56,6 +56,10 @@ jobs: source .venv/bin/activate mkdir "$NLTK_DATA" make install-ci + - name: Setup docker-compose + uses: KengoTODA/actions-setup-docker-compose@v1 + with: + version: '2.22.0' - name: Update test fixtures env: AIRTABLE_PERSONAL_ACCESS_TOKEN: ${{ secrets.AIRTABLE_PERSONAL_ACCESS_TOKEN }} @@ -91,6 +95,7 @@ jobs: TABLE_OCR: "tesseract" ENTIRE_PAGE_OCR: "tesseract" OVERWRITE_FIXTURES: "true" + CI: "true" run: | source .venv/bin/activate sudo apt-get update diff --git a/CHANGELOG.md b/CHANGELOG.md index 242dcfcdbc..a642da044a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,17 +1,44 @@ -## 0.10.17-dev9 +## 0.10.19-dev10 ### Enhancements +* **bump `unstructured-inference` to `0.6.6`** The updated version of `unstructured-inference` makes table extraction in `hi_res` mode configurable to fine tune table extraction performance; it also improves element detection by adding a deduplication post processing step in the `hi_res` partitioning of pdfs and images. +* **Detect text in HTML Heading Tags as Titles** This will increase the accuracy of hierarchies in HTML documents and provide more accurate element categorization. If text is in an HTML heading tag and is not a list item, address, or narrative text, categorize it as a title. +* **Update python-based docs** Refactor docs to use the actual unstructured code rather than using the subprocess library to run the cli command itself. * **Adds data source properties to SharePoint, Outlook, Onedrive, Reddit, and Slack connectors** These properties (date_created, date_modified, version, source_url, record_locator) are written to element metadata during ingest, mapping elements to information about the document source from which they derive. This functionality enables downstream applications to reveal source document applications, e.g. a link to a GDrive doc, Salesforce record, etc. +* **Adds Table support for the `add_chunking_strategy` decorator to partition functions.** In addition to combining elements under Title elements, user's can now specify the `max_characters=` argument to chunk Table elements into TableChunk elements with `text` and `text_as_html` of length characters. This means partitioned Table results are ready for use in downstream applications without any post processing. +* **Expose endpoint url for s3 connectors** By allowing for the endpoint url to be explicitly overwritten, this allows for any non-AWS data providers supporting the s3 protocol to be supported (i.e. minio). +* **change default `hi_res` model for pdf/image partition to `yolox`** Now partitioning pdf/image using `hi_res` strategy utilizes `yolox_quantized` model isntead of `detectron2_onnx` model. This new default model has better recall for tables and produces more detailed categories for elements. + +### Features + +### Fixes + +* **Fixes partition_pdf is_alnum reference bug** Problem: The `partition_pdf` when attempt to get bounding box from element experienced a reference before assignment error when the first object is not text extractable. Fix: Switched to a flag when the condition is met. Importance: Crucial to be able to partition with pdf. +* **Fix various cases of HTML text missing after partition** + Problem: Under certain circumstances, text immediately after some HTML tags will be misssing from partition result. + Fix: Updated code to deal with these cases. + Importance: This will ensure the correctness when partitioning HTML and Markdown documents. + +## 0.10.18 + +### Enhancements + +* **Better detection of natural reading order in images and PDF's** The elements returned by partition better reflect natural reading order in some cases, particularly in complicated multi-column layouts, leading to better chunking and retrieval for downstream applications. Achieved by improving the `xy-cut` sorting to preprocess bboxes, shrinking all bounding boxes by 90% along x and y axes (still centered around the same center point), which allows projection lines to be drawn where not possible before if layout bboxes overlapped. +* **Improves `partition_xml` to be faster and more memory efficient when partitioning large XML files** The new behavior is to partition iteratively to prevent loading the entire XML tree into memory at once in most use cases. +* **Adds data source properties to SharePoint, Outlook, Onedrive, Reddit, Slack, and DeltaTable connectors** These properties (date_created, date_modified, version, source_url, record_locator) are written to element metadata during ingest, mapping elements to information about the document source from which they derive. This functionality enables downstream applications to reveal source document applications, e.g. a link to a GDrive doc, Salesforce record, etc. * **Add functionality to save embedded images in PDF's separately as images** This allows users to save embedded images in PDF's separately as images, given some directory path. The saved image path is written to the metadata for the Image element. Downstream applications may benefit by providing users with image links from relevant "hits." * **Azure Cognite Search destination connector** New Azure Cognitive Search destination connector added to ingest CLI. Users may now use `unstructured-ingest` to write partitioned data from over 20 data sources (so far) to an Azure Cognitive Search index. * **Improves salesforce partitioning** Partitions Salesforce data as xlm instead of text for improved detail and flexibility. Partitions htmlbody instead of textbody for Salesforce emails. Importance: Allows all Salesforce fields to be ingested and gives Salesforce emails more detailed partitioning. * **Add document level language detection functionality.** Introduces the "auto" default for the languages param, which then detects the languages present in the document using the `langdetect` package. Adds the document languages as ISO 639-3 codes to the element metadata. Implemented only for the partition_text function to start. * **PPTX partitioner refactored in preparation for enhancement.** Behavior should be unchanged except that shapes enclosed in a group-shape are now included, as many levels deep as required (a group-shape can itself contain a group-shape). * **Embeddings support for the SharePoint SourceConnector via unstructured-ingest CLI** The SharePoint connector can now optionally create embeddings from the elements it pulls out during partition and upload those embeddings to Azure Cognitive Search index. +* **Improves hierarchy from docx files by leveraging natural hierarchies built into docx documents** Hierarchy can now be detected from an indentation level for list bullets/numbers and by style name (e.g. Heading 1, List Bullet 2, List Number). +* **Chunking support for the SharePoint SourceConnector via unstructured-ingest CLI** The SharePoint connector can now optionally chunk the elements pulled out during partition via the chunking unstructured brick. This can be used as a stage before creating embeddings. ### Features +* **Adds `links` metadata in `partition_pdf` for `fast` strategy.** Problem: PDF files contain rich information and hyperlink that Unstructured did not captured earlier. Feature: `partition_pdf` now can capture embedded links within the file along with its associated text and page number. Importance: Providing depth in extracted elements give user a better understanding and richer context of documents. This also enables user to map to other elements within the document if the hyperlink is refered internally. * **Adds the embedding module to be able to embed Elements** Problem: Many NLP applications require the ability to represent parts of documents in a semantic way. Until now, Unstructured did not have text embedding ability within the core library. Feature: This embedding module is able to track embeddings related data with a class, embed a list of elements, and return an updated list of Elements with the *embeddings* property. The module is also able to embed query strings. Importance: Ability to embed documents or parts of documents will enable users to make use of these semantic representations in different NLP applications, such as search, retrieval, and retrieval augmented generation. ### Fixes @@ -22,9 +49,12 @@ * **Fixes SharePoint connector failures if any document has an unsupported filetype** Problem: Currently the entire connector ingest run fails if a single IngestDoc has an unsupported filetype. This is because a ValueError is raised in the IngestDoc's `__post_init__`. Fix: Adds a try/catch when the IngestConnector runs get_ingest_docs such that the error is logged but all processable documents->IngestDocs are still instantiated and returned. Importance: Allows users to ingest SharePoint content even when some files with unsupported filetypes exist there. * **Fixes Sharepoint connector server_path issue** Problem: Server path for the Sharepoint Ingest Doc was incorrectly formatted, causing issues while fetching pages from the remote source. Fix: changes formatting of remote file path before instantiating SharepointIngestDocs and appends a '/' while fetching pages from the remote source. Importance: Allows users to fetch pages from Sharepoint Sites. * **Fixes badly initialized Formula** Problem: YoloX contain new types of elements, when loading a document that contain formulas a new element of that class -should be generated, however the Formula class inherits from Element instead of Text. After this change the element is correctly created with the correct class +should be generated, however the Formula class inherits from Element instead of Text. After this change the element is correctly created with the correct class allowing the document to be loaded. Fix: Change parent class for Formula to Text. Importance: Crucial to be able to load documents that contain formulas. * **Fixes Sphinx errors.** Fixes errors when running Sphinx `make html` and installs library to suppress warnings. +* **Fixes a metadata backwards compatibility error** Problem: When calling `partition_via_api`, the hosted api may return an element schema that's newer than the current `unstructured`. In this case, metadata fields were added which did not exist in the local `ElementMetadata` dataclass, and `__init__()` threw an error. Fix: remove nonexistent fields before instantiating in `ElementMetadata.from_json()`. Importance: Crucial to avoid breaking changes when adding fields. +* **Fixes issue with Discord connector when a channel returns `None`** Problem: Getting the `jump_url` from a nonexistent Discord `channel` fails. Fix: property `jump_url` is now retrieved within the same context as the messages from the channel. Importance: Avoids cascading issues when the connector fails to fetch information about a Discord channel. +* **Fixes occasionally SIGABTR when writing table with `deltalake` on Linux** Problem: occasionally on Linux ingest can throw a `SIGABTR` when writing `deltalake` table even though the table was written correctly. Fix: put the writing function into a `Process` to ensure its execution to the fullest extent before returning to the main process. Importance: Improves stability of connectors using `deltalake` ## 0.10.16 diff --git a/Dockerfile b/Dockerfile index e4accbddec..0bc9faebbc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # syntax=docker/dockerfile:experimental -FROM quay.io/unstructured-io/base-images:rocky9.2-4@sha256:b1063ffbf08c3037ee211620f011dd05bd2da9287c6e6a3473b15c1597724e4b as base +FROM quay.io/unstructured-io/base-images:rocky9.2-5@sha256:1721c3b0711e4e90587e3b4917f1b616e4603ddf5b4986bfaa68d02d82a13aba as base # NOTE(crag): NB_USER ARG for mybinder.org compat: # https://mybinder.readthedocs.io/en/latest/tutorials/dockerfile.html diff --git a/docs/source/api.rst b/docs/source/api.rst index 7ade12ab32..3d682940d8 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -108,7 +108,7 @@ When elements are extracted from PDFs or images, it may be useful to get their b file_path = "/Path/To/File" file_data = {'files': open(file_path, 'rb')} - response = requests.post(url, headers=headers, files=files, data=data) + response = requests.post(url, headers=headers, files=file_data, data=data) file_data['files'].close() @@ -155,7 +155,7 @@ You can specify the encoding to use to decode the text input. If no value is pro file_path = "/Path/To/File" file_data = {'files': open(file_path, 'rb')} - response = requests.post(url, headers=headers, files=files, data=data) + response = requests.post(url, headers=headers, files=file_data, data=data) file_data['files'].close() @@ -204,7 +204,7 @@ You can also specify what languages to use for OCR with the ``ocr_languages`` kw file_path = "/Path/To/File" file_data = {'files': open(file_path, 'rb')} - response = requests.post(url, headers=headers, files=files, data=data) + response = requests.post(url, headers=headers, files=file_data, data=data) file_data['files'].close() @@ -250,7 +250,7 @@ By default the result will be in ``json``, but it can be set to ``text/csv`` to file_path = "/Path/To/File" file_data = {'files': open(file_path, 'rb')} - response = requests.post(url, headers=headers, files=files, data=data) + response = requests.post(url, headers=headers, files=file_data, data=data) file_data['files'].close() @@ -296,7 +296,7 @@ Pass the `include_page_breaks` parameter to `true` to include `PageBreak` elemen file_path = "/Path/To/File" file_data = {'files': open(file_path, 'rb')} - response = requests.post(url, headers=headers, files=files, data=data) + response = requests.post(url, headers=headers, files=file_data, data=data) file_data['files'].close() @@ -345,7 +345,7 @@ On the other hand, ``hi_res`` is the better choice for PDFs that may have text w file_path = "/Path/To/File" file_data = {'files': open(file_path, 'rb')} - response = requests.post(url, headers=headers, files=files, data=data) + response = requests.post(url, headers=headers, files=file_data, data=data) file_data['files'].close() @@ -398,7 +398,7 @@ To use the ``hi_res`` strategy with **Chipper** model, pass the argument for ``h file_path = "/Path/To/File" file_data = {'files': open(file_path, 'rb')} - response = requests.post(url, headers=headers, files=files, data=data) + response = requests.post(url, headers=headers, files=file_data, data=data) file_data['files'].close() @@ -451,7 +451,7 @@ To extract the table structure from PDF files using the ``hi_res`` strategy, ens file_path = "/Path/To/File" file_data = {'files': open(file_path, 'rb')} - response = requests.post(url, headers=headers, files=files, data=data) + response = requests.post(url, headers=headers, files=file_data, data=data) file_data['files'].close() @@ -499,7 +499,7 @@ We also provide support for enabling and disabling table extraction for file typ file_path = "/Path/To/File" file_data = {'files': open(file_path, 'rb')} - response = requests.post(url, headers=headers, files=files, data=data) + response = requests.post(url, headers=headers, files=file_data, data=data) file_data['files'].close() @@ -545,7 +545,7 @@ When processing XML documents, set the ``xml_keep_tags`` parameter to ``true`` t file_path = "/Path/To/File" file_data = {'files': open(file_path, 'rb')} - response = requests.post(url, headers=headers, files=files, data=data) + response = requests.post(url, headers=headers, files=file_data, data=data) file_data['files'].close() diff --git a/docs/source/source_connectors/airtable.rst b/docs/source/source_connectors/airtable.rst index 66939236d0..839ec9acff 100644 --- a/docs/source/source_connectors/airtable.rst +++ b/docs/source/source_connectors/airtable.rst @@ -29,29 +29,21 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "airtable", - "--metadata-exclude", "filename,file_directory,metadata.data_source.date_processed", - "--personal-access-token", "$AIRTABLE_PERSONAL_ACCESS_TOKEN", - "--output-dir", "airtable-ingest-output" - "--num-processes", "2", - "--reprocess", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.airtable import airtable + + if __name__ == "__main__": + airtable( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="airtable-ingest-output", + num_processes=2, + ), + personal_access_token=os.getenv("AIRTABLE_PERSONAL_ACCESS_TOKEN"), + ) Run via the API --------------- @@ -78,31 +70,23 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "airtable", - "--metadata-exclude", "filename,file_directory,metadata.data_source.date_processed", - "--personal-access-token", "$AIRTABLE_PERSONAL_ACCESS_TOKEN", - "--output-dir", "airtable-ingest-output" - "--num-processes", "2", - "--reprocess", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.airtable import airtable + + if __name__ == "__main__": + airtable( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="airtable-ingest-output", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + personal_access_token=os.getenv("AIRTABLE_PERSONAL_ACCESS_TOKEN"), + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/azure.rst b/docs/source/source_connectors/azure.rst index e78ad11e70..479f4e1d58 100644 --- a/docs/source/source_connectors/azure.rst +++ b/docs/source/source_connectors/azure.rst @@ -28,28 +28,20 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "azure", - "--remote-url", "abfs://container1/", - "--account-name", "azureunstructured1" - "--output-dir", "/Output/Path/To/Files", - "--num-processes", "2", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.azure import azure + + if __name__ == "__main__": + azure( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="azure-ingest-output", + num_processes=2, + ), + remote_url="abfs://container1/", + account_name="azureunstructured1", + ) Run via the API --------------- @@ -62,43 +54,24 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: shell - unstructured-ingest \ - azure \ - --remote-url abfs://container1/ \ - --account-name azureunstructured1 \ - --output-dir azure-ingest-output \ - --num-processes 2 \ - --partition-by-api \ - --api-key "" - - .. tab:: Python - - .. code:: python - - import subprocess - - command = [ - "unstructured-ingest", - "azure", - "--remote-url", "abfs://container1/", - "--account-name", "azureunstructured1" - "--output-dir", "/Output/Path/To/Files", - "--num-processes", "2", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.azure import azure + + if __name__ == "__main__": + azure( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="azure-ingest-output", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + remote_url="abfs://container1/", + account_name="azureunstructured1", + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/biomed.rst b/docs/source/source_connectors/biomed.rst index 8cbd579c26..cec1deab6c 100644 --- a/docs/source/source_connectors/biomed.rst +++ b/docs/source/source_connectors/biomed.rst @@ -29,29 +29,21 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "biomed", - "--path", "oa_pdf/07/07/sbaa031.073.PMC7234218.pdf", - "--output-dir", "/Output/Path/To/Files", - "--num-processes", "2", - "--verbose", - "--preserve-downloads", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.biomed import biomed + + if __name__ == "__main__": + biomed( + verbose=True, + read_config=ReadConfig( + preserve_downloads=True, + ), + partition_config=PartitionConfig( + output_dir="biomed-ingest-output-path", + num_processes=2, + ), + path="oa_pdf/07/07/sbaa031.073.PMC7234218.pdf", + ) Run via the API --------------- @@ -78,31 +70,25 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "biomed", - "--path", "oa_pdf/07/07/sbaa031.073.PMC7234218.pdf", - "--output-dir", "/Output/Path/To/Files", - "--num-processes", "2", - "--verbose", - "--preserve-downloads", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.biomed import biomed + + if __name__ == "__main__": + biomed( + verbose=True, + read_config=ReadConfig( + preserve_downloads=True, + ), + partition_config=PartitionConfig( + output_dir="biomed-ingest-output-path", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + path="oa_pdf/07/07/sbaa031.073.PMC7234218.pdf", + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/box.rst b/docs/source/source_connectors/box.rst index bf42ea512c..c075566db9 100644 --- a/docs/source/source_connectors/box.rst +++ b/docs/source/source_connectors/box.rst @@ -30,30 +30,23 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "box", - "--box_app_config", "$BOX_APP_CONFIG_PATH" - "--remote-url", "box://utic-test-ingest-fixtures" - "--output-dir", "box-output" - "--num-processes", "2" - "--recursive", - "--verbose", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.box import box + + if __name__ == "__main__": + box( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="box-output", + num_processes=2, + ), + box_app_config=os.getenv("BOX_APP_CONFIG_PATH"), + recursive=True, + remote_url="box://utic-test-ingest-fixtures", + ) Run via the API --------------- @@ -81,32 +74,25 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "box", - "--box_app_config", "$BOX_APP_CONFIG_PATH" - "--remote-url", "box://utic-test-ingest-fixtures" - "--output-dir", "box-output" - "--num-processes", "2" - "--recursive", - "--verbose", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.box import box + + if __name__ == "__main__": + box( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="box-output", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + box_app_config=os.getenv("BOX_APP_CONFIG_PATH"), + recursive=True, + remote_url="box://utic-test-ingest-fixtures", + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/confluence.rst b/docs/source/source_connectors/confluence.rst index b9606d7c6c..83c3eda7e6 100644 --- a/docs/source/source_connectors/confluence.rst +++ b/docs/source/source_connectors/confluence.rst @@ -30,30 +30,22 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "confluence", - "--metadata-exclude", "filename,file_directory,metadata.data_source.date_processed", - "--url", "https://unstructured-ingest-test.atlassian.net", - "--user-email", "12345678@unstructured.io", - "--api-token", "ABCDE1234ABDE1234ABCDE1234", - "--output-dir", "confluence-ingest-output", - "--num-processes", "2", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.confluence import confluence + + if __name__ == "__main__": + confluence( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="confluence-ingest-output", + num_processes=2, + metadata_exclude=["filename", "file_directory", "metadata.data_source.date_processed"], + ), + url="https://unstructured-ingest-test.atlassian.net", + user_email="12345678@unstructured.io", + api_token="ABCDE1234ABDE1234ABCDE1234", + ) Run via the API --------------- @@ -81,32 +73,26 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "confluence", - "--metadata-exclude", "filename,file_directory,metadata.data_source.date_processed", - "--url", "https://unstructured-ingest-test.atlassian.net", - "--user-email", "12345678@unstructured.io", - "--api-token", "ABCDE1234ABDE1234ABCDE1234", - "--output-dir", "confluence-ingest-output", - "--num-processes", "2", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.confluence import confluence + + if __name__ == "__main__": + confluence( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="confluence-ingest-output", + num_processes=2, + metadata_exclude=["filename", "file_directory", "metadata.data_source.date_processed"], + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + url="https://unstructured-ingest-test.atlassian.net", + user_email="12345678@unstructured.io", + api_token="ABCDE1234ABDE1234ABCDE1234", + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/delta_table.rst b/docs/source/source_connectors/delta_table.rst index 62b504206c..b8d18d94f4 100644 --- a/docs/source/source_connectors/delta_table.rst +++ b/docs/source/source_connectors/delta_table.rst @@ -29,30 +29,20 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "delta-table", - "--table-uri", "s3://utic-dev-tech-fixtures/sample-delta-lake-data/deltatable/", - "--download-dir", "delta-table-ingest-download", - "--output-dir", "delta-table-example", - "--preserve-downloads", - "--storage_options", "AWS_REGION=us-east-2,AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY", - "--verbose", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.delta_table import delta_table + + if __name__ == "__main__": + delta_table( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="delta-table-example", + num_processes=2, + ), + table_uri="s3://utic-dev-tech-fixtures/sample-delta-lake-data/deltatable/", + storage_options="AWS_REGION=us-east-2,AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY" + ) Run via the API @@ -79,32 +69,24 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "delta-table", - "--table-uri", "s3://utic-dev-tech-fixtures/sample-delta-lake-data/deltatable/", - "--download-dir", "delta-table-ingest-download", - "--output-dir", "delta-table-example", - "--preserve-downloads", - "--storage_options", "AWS_REGION=us-east-2,AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY", - "--verbose", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.delta_table import delta_table + + if __name__ == "__main__": + delta_table( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="delta-table-example", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + table_uri="s3://utic-dev-tech-fixtures/sample-delta-lake-data/deltatable/", + storage_options="AWS_REGION=us-east-2,AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY" + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/discord.rst b/docs/source/source_connectors/discord.rst index cb2c4829a4..9455b7eeb3 100644 --- a/docs/source/source_connectors/discord.rst +++ b/docs/source/source_connectors/discord.rst @@ -30,30 +30,26 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "discord", - "--channels", "12345678", - "--token", "$DISCORD_TOKEN", - "--download-dir", "discord-ingest-download", - "--output-dir", "discord-example", - "--preserve-downloads", - "--verbose", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.discord import discord + + if __name__ == "__main__": + discord( + verbose=True, + read_config=ReadConfig( + download_dir="discord-ingest-download", + preserve_downloads=True, + ), + partition_config=PartitionConfig( + output_dir="discord-example", + num_processes=2, + ), + channels=["12345678"], + token=os.getenv("DISCORD_TOKEN"), + period=None, + ) Run via the API --------------- @@ -81,32 +77,28 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "discord", - "--channels", "12345678", - "--token", "$DISCORD_TOKEN", - "--download-dir", "discord-ingest-download", - "--output-dir", "discord-example", - "--preserve-downloads", - "--verbose", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.discord import discord + + if __name__ == "__main__": + discord( + verbose=True, + read_config=ReadConfig( + download_dir="discord-ingest-download", + preserve_downloads=True, + ), + partition_config=PartitionConfig( + output_dir="discord-example", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + channels=["12345678"], + token=os.getenv("DISCORD_TOKEN"), + period=None, + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/dropbox.rst b/docs/source/source_connectors/dropbox.rst index 515b23912b..f8e3d9c867 100644 --- a/docs/source/source_connectors/dropbox.rst +++ b/docs/source/source_connectors/dropbox.rst @@ -30,30 +30,23 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "dropbox", - "--remote-url", "dropbox:// /", - "--output-dir", "dropbox-output", - "--token", "$DROPBOX_TOKEN", - "--num-processes", "2", - "--recursive", - "--verbose", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.dropbox import dropbox + + if __name__ == "__main__": + dropbox( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="dropbox-output", + num_processes=2, + ), + remote_url="dropbox:// /", + token=os.getenv("DROPBOX_TOKEN"), + recursive=True, + ) Run via the API --------------- @@ -81,32 +74,25 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "dropbox", - "--remote-url", "dropbox:// /", - "--output-dir", "dropbox-output", - "--token", "$DROPBOX_TOKEN", - "--num-processes", "2", - "--recursive", - "--verbose", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.dropbox import dropbox + + if __name__ == "__main__": + dropbox( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="dropbox-output", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + remote_url="dropbox:// /", + token=os.getenv("DROPBOX_TOKEN"), + recursive=True, + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/elasticsearch.rst b/docs/source/source_connectors/elasticsearch.rst index fd4238ab16..f8f7391ffb 100644 --- a/docs/source/source_connectors/elasticsearch.rst +++ b/docs/source/source_connectors/elasticsearch.rst @@ -30,30 +30,22 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "elasticsearch", - "--metadata-exclude", "filename,file_directory,metadata.data_source.date_processed", - "--url", "http://localhost:9200", - "--index-name", "movies", - "--jq-query", "{ethnicity, director, plot}", - "--output-dir", "elasticsearch-ingest-output", - "--num-processes", "2" - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.elasticsearch import elasticsearch + + if __name__ == "__main__": + elasticsearch( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="elasticsearch-ingest-output", + num_processes=2, + metadata_exclude=["filename", "file_directory", "metadata.data_source.date_processed"], + ), + url="http://localhost:9200", + index_name="movies", + jq_query="{ethnicity, director, plot}", + ) Run via the API --------------- @@ -81,32 +73,26 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "elasticsearch", - "--metadata-exclude", "filename,file_directory,metadata.data_source.date_processed", - "--url", "http://localhost:9200", - "--index-name", "movies", - "--jq-query", "{ethnicity, director, plot}", - "--output-dir", "elasticsearch-ingest-output", - "--num-processes", "2", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.elasticsearch import elasticsearch + + if __name__ == "__main__": + elasticsearch( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="elasticsearch-ingest-output", + num_processes=2, + metadata_exclude=["filename", "file_directory", "metadata.data_source.date_processed"], + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + url="http://localhost:9200", + index_name="movies", + jq_query="{ethnicity, director, plot}", + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/github.rst b/docs/source/source_connectors/github.rst index 0b08cac62c..a8ba7c52ca 100644 --- a/docs/source/source_connectors/github.rst +++ b/docs/source/source_connectors/github.rst @@ -29,29 +29,20 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "github", - "--url", "Unstructured-IO/unstructured", - "--git-branch", "main", - "--output-dir", "github-ingest-output", - "--num-processes", "2", - "--verbose", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.github import github + + if __name__ == "__main__": + github( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="github-ingest-output", + num_processes=2, + ), + url="Unstructured-IO/unstructured", + git_branch="main", + ) Run via the API --------------- @@ -78,31 +69,24 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "github", - "--url", "Unstructured-IO/unstructured", - "--git-branch", "main", - "--output-dir", "github-ingest-output", - "--num-processes", "2", - "--verbose", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.github import github + + if __name__ == "__main__": + github( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="github-ingest-output", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + url="Unstructured-IO/unstructured", + git_branch="main", + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/gitlab.rst b/docs/source/source_connectors/gitlab.rst index e0f722c205..646fedb687 100644 --- a/docs/source/source_connectors/gitlab.rst +++ b/docs/source/source_connectors/gitlab.rst @@ -29,29 +29,20 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "gitlab", - "--url", "Unstructured-IO/unstructured", - "--git-branch", "v0.0.7", - "--output-dir", "gitlab-ingest-output", - "--num-processes", "2", - "--verbose", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.gitlab import gitlab + + if __name__ == "__main__": + gitlab( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="gitlab-ingest-output", + num_processes=2, + ), + url="https://gitlab.com/gitlab-com/content-sites/docsy-gitlab", + git_branch="v0.0.7", + ) Run via the API --------------- @@ -78,31 +69,24 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "gitlab", - "--url", "Unstructured-IO/unstructured", - "--git-branch", "v0.0.7", - "--output-dir", "gitlab-ingest-output", - "--num-processes", "2", - "--verbose", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.gitlab import gitlab + + if __name__ == "__main__": + gitlab( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="gitlab-ingest-output", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + url="https://gitlab.com/gitlab-com/content-sites/docsy-gitlab", + git_branch="v0.0.7", + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/google_cloud_storage.rst b/docs/source/source_connectors/google_cloud_storage.rst index 96af2c968c..54e009fe8d 100644 --- a/docs/source/source_connectors/google_cloud_storage.rst +++ b/docs/source/source_connectors/google_cloud_storage.rst @@ -29,29 +29,20 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "gcs", - "--remote-url", "gs://utic-test-ingest-fixtures-public/", - "--output-dir", "dropbox-output", - "--num-processes", "2", - "--recursive", - "--verbose", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.gcs import gcs + + if __name__ == "__main__": + gcs( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="gcs-output", + num_processes=2, + ), + remote_url="gs://utic-test-ingest-fixtures-public/", + recursive=True, + ) Run via the API --------------- @@ -76,29 +67,24 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "gcs", - "--remote-url", "gs://utic-test-ingest-fixtures-public/", - "--output-dir", "dropbox-output", - "--num-processes", "2", - "--recursive", - "--verbose", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.gcs import gcs + + if __name__ == "__main__": + gcs( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="gcs-output", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + remote_url="gs://utic-test-ingest-fixtures-public/", + recursive=True, + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/google_drive.rst b/docs/source/source_connectors/google_drive.rst index 9ec9724285..869f680798 100644 --- a/docs/source/source_connectors/google_drive.rst +++ b/docs/source/source_connectors/google_drive.rst @@ -30,28 +30,21 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "google-drive", - "--drive-id", "", - "--service-account-key",, "Path/To/Your/Service/Account/Key" - "--output-dir", "/Output/Path/To/Files", - "--num-processes", "2", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.google_drive import gdrive + + if __name__ == "__main__": + gdrive( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="google-drive-ingest-output", + num_processes=2, + ), + drive_id="POPULATE WITH FILE OR FOLDER ID", + service_account_key="POPULATE WITH DRIVE SERVICE ACCOUNT KEY", + recursive=True, + ) Run via the API --------------- @@ -79,30 +72,25 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "google-drive", - "--drive-id", "", - "--service-account-key",, "Path/To/Your/Service/Account/Key" - "--output-dir", "/Output/Path/To/Files", - "--num-processes", "2", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.google_drive import gdrive + + if __name__ == "__main__": + gdrive( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="google-drive-ingest-output", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + drive_id="POPULATE WITH FILE OR FOLDER ID", + service_account_key="POPULATE WITH DRIVE SERVICE ACCOUNT KEY", + recursive=True, + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/jira.rst b/docs/source/source_connectors/jira.rst index 37b2056e62..f93bb26f40 100644 --- a/docs/source/source_connectors/jira.rst +++ b/docs/source/source_connectors/jira.rst @@ -31,30 +31,22 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "jira", - "--metadata-exclude", "filename,file_directory,metadata.data_source.date_processed", - "--url", "https://unstructured-jira-connector-test.atlassian.net", - "--user-email", "12345678@unstructured.io", - "--api-token", "ABCDE1234ABDE1234ABCDE1234", - "--output-dir", "jira-ingest-output", - "--num-processes", "2", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.jira import jira + + if __name__ == "__main__": + jira( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="jira-ingest-output", + num_processes=2, + metadata_exclude=["filename", "file_directory", "metadata.data_source.date_processed"], + ), + url="https://unstructured-jira-connector-test.atlassian.net", + user_email="12345678@unstructured.io", + api_token="ABCDE1234ABDE1234ABCDE1234", + ) Run via the API --------------- @@ -82,32 +74,26 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "jira", - "--metadata-exclude", "filename,file_directory,metadata.data_source.date_processed", - "--url", "https://unstructured-jira-connector-test.atlassian.net", - "--user-email", "12345678@unstructured.io", - "--api-token", "ABCDE1234ABDE1234ABCDE1234", - "--output-dir", "jira-ingest-output", - "--num-processes", "2", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.jira import jira + + if __name__ == "__main__": + jira( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="jira-ingest-output", + num_processes=2, + metadata_exclude=["filename", "file_directory", "metadata.data_source.date_processed"], + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + url="https://unstructured-jira-connector-test.atlassian.net", + user_email="12345678@unstructured.io", + api_token="ABCDE1234ABDE1234ABCDE1234", + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/local_connector.rst b/docs/source/source_connectors/local_connector.rst index daa6645e52..b93ab589bf 100644 --- a/docs/source/source_connectors/local_connector.rst +++ b/docs/source/source_connectors/local_connector.rst @@ -23,29 +23,20 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "local", - "--input-path", "example-docs", - "--output-dir", "dropbox-output", - "--num-processes", "2", - "--recursive", - "--verbose", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.local import local + + if __name__ == "__main__": + local( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="local-ingest-output", + num_processes=2, + ), + input_path="example-docs", + recursive=True, + ) Run via the API --------------- @@ -72,31 +63,24 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "local", - "--input-path", "example-docs", - "--output-dir", "dropbox-output", - "--num-processes", "2", - "--recursive", - "--verbose", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.local import local + + if __name__ == "__main__": + local( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="local-ingest-output", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + input_path="example-docs", + recursive=True, + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/notion.rst b/docs/source/source_connectors/notion.rst index a79bd2d251..3036a01924 100644 --- a/docs/source/source_connectors/notion.rst +++ b/docs/source/source_connectors/notion.rst @@ -30,30 +30,22 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "notion", - "--api-key", "", - "--output-dir", "notion-ingest-output", - "--page-ids", "", - "--database-ids", """", - "--num-processes", "2", - "--verbose", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.notion import notion + + if __name__ == "__main__": + notion( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="notion-ingest-output", + num_processes=2, + ), + api_key="POPULATE API KEY", + page_ids=["LIST", "OF", "PAGE", "IDS"], + database_ids=["LIST", "OF", "DATABASE", "IDS"], + recursive=False, + ) Run via the API --------------- @@ -81,32 +73,26 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "notion", - "--api-key", "", - "--output-dir", "notion-ingest-output", - "--page-ids", "", - "--database-ids", """", - "--num-processes", "2", - "--verbose", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.notion import notion + + if __name__ == "__main__": + notion( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="notion-ingest-output", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + api_key="POPULATE API KEY", + page_ids=["LIST", "OF", "PAGE", "IDS"], + database_ids=["LIST", "OF", "DATABASE", "IDS"], + recursive=False, + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/onedrive.rst b/docs/source/source_connectors/onedrive.rst index 90241d0800..592a49313d 100644 --- a/docs/source/source_connectors/onedrive.rst +++ b/docs/source/source_connectors/onedrive.rst @@ -33,33 +33,25 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "onedrive", - "--client-id", "", - "--client-cred", "", - "--authority-url", "", - "--tenant", "", - "--user-pname", "", - "--path", "", - "--output-dir", "onedrive-ingest-output", - "--num-processes", "2", - "--verbose" - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.onedrive import onedrive + + if __name__ == "__main__": + onedrive( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="onedrive-ingest-output", + num_processes=2, + ), + client_id="", + client_cred="", + authority_url="", + tenant="", + user_pname="", + path="", + recursive=False, + ) Run via the API --------------- @@ -90,35 +82,29 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "onedrive", - "--client-id", "", - "--client-cred", "", - "--authority-url", "", - "--tenant", "", - "--user-pname", "", - "--path", "", - "--output-dir", "onedrive-ingest-output", - "--num-processes", "2", - "--verbose", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.onedrive import onedrive + + if __name__ == "__main__": + onedrive( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="onedrive-ingest-output", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + client_id="", + client_cred="", + authority_url="", + tenant="", + user_pname="", + path="", + recursive=False, + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/outlook.rst b/docs/source/source_connectors/outlook.rst index ce3c1f29f0..0e78738614 100644 --- a/docs/source/source_connectors/outlook.rst +++ b/docs/source/source_connectors/outlook.rst @@ -33,33 +33,26 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "outlook", - "--client-id", "$MS_CLIENT_ID", - "--client-cred", "$MS_CLIENT_CRED", - "--tenant", "", - "--user-email", "$MS_USER_EMAIL", - "--outlook-folders", "Inbox,Sent Items", - "--output-dir", "onedrive-ingest-output", - "--num-processes", "2", - "--recursive", - "--verbose", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.outlook import outlook + + if __name__ == "__main__": + outlook( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="outlook-output", + num_processes=2, + ), + client_id=os.getenv("MS_CLIENT_ID"), + client_cred=os.getenv("MS_CLIENT_CRED"), + tenant=os.getenv("MS_TENANT_ID"), + user_email=os.getenv("MS_USER_EMAIL"), + outlook_folders=["Inbox", "Sent Items"], + recursive=True, + ) Run via the API --------------- @@ -86,31 +79,28 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "airtable", - "--metadata-exclude", "filename,file_directory,metadata.data_source.date_processed", - "--personal-access-token", "$AIRTABLE_PERSONAL_ACCESS_TOKEN", - "--output-dir", "airtable-ingest-output" - "--num-processes", "2", - "--reprocess", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.outlook import outlook + + if __name__ == "__main__": + outlook( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="outlook-output", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + client_id=os.getenv("MS_CLIENT_ID"), + client_cred=os.getenv("MS_CLIENT_CRED"), + tenant=os.getenv("MS_TENANT_ID"), + user_email=os.getenv("MS_USER_EMAIL"), + outlook_folders=["Inbox", "Sent Items"], + recursive=True, + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/reddit.rst b/docs/source/source_connectors/reddit.rst index f31d0d55ba..4d7c82be82 100644 --- a/docs/source/source_connectors/reddit.rst +++ b/docs/source/source_connectors/reddit.rst @@ -33,33 +33,24 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "reddit", - "--subreddit-name", "machinelearning", - "--client-id", "", - "--client-secret", "", - "--user-agent", "Unstructured Ingest Subreddit fetcher by \\u\\...", - "--search-query", "Unstructured", - "--num-posts", "10", - "--output-dir", "reddit-ingest-output", - "--num-processes", "2", - "--verbose" - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.reddit import reddit + + if __name__ == "__main__": + reddit( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="reddit-ingest-output", + num_processes=2, + ), + subreddit_name="machinelearning", + client_id="", + client_secret="", + user_agent=r"Unstructured Ingest Subreddit fetcher by \\u\...", + search_query="Unstructured", + num_posts=10, + ) Run via the API --------------- @@ -90,35 +81,28 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "reddit", - "--subreddit-name", "machinelearning", - "--client-id", "", - "--client-secret", "", - "--user-agent", "Unstructured Ingest Subreddit fetcher by \\u\\...", - "--search-query", "Unstructured", - "--num-posts", "10", - "--output-dir", "reddit-ingest-output", - "--num-processes", "2", - "--verbose" - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.reddit import reddit + + if __name__ == "__main__": + reddit( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="reddit-ingest-output", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + subreddit_name="machinelearning", + client_id="", + client_secret="", + user_agent=r"Unstructured Ingest Subreddit fetcher by \\u\...", + search_query="Unstructured", + num_posts=10, + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/s3.rst b/docs/source/source_connectors/s3.rst index d2f16ad77f..483937a842 100644 --- a/docs/source/source_connectors/s3.rst +++ b/docs/source/source_connectors/s3.rst @@ -28,28 +28,20 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "s3", - "--remote-url", "s3://utic-dev-tech-fixtures/small-pdf-set/", - "--anonymous", - "--output-dir", "s3-small-batch-output", - "--num-processes", "2" - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.s3 import s3 + + if __name__ == "__main__": + s3( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="s3-small-batch-output", + num_processes=2, + ), + remote_url="s3://utic-dev-tech-fixtures/small-pdf-set/", + anonymous=True, + ) Run via the API --------------- @@ -75,30 +67,24 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "s3", - "--remote-url", "s3://utic-dev-tech-fixtures/small-pdf-set/", - "--anonymous", - "--output-dir", "s3-small-batch-output", - "--num-processes", "2", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.s3 import s3 + + if __name__ == "__main__": + s3( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="s3-small-batch-output", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + remote_url="s3://utic-dev-tech-fixtures/small-pdf-set/", + anonymous=True, + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/salesforce.rst b/docs/source/source_connectors/salesforce.rst index 04183ed7a9..fd52ad6d6a 100644 --- a/docs/source/source_connectors/salesforce.rst +++ b/docs/source/source_connectors/salesforce.rst @@ -32,32 +32,25 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "salesforce", - "--username" "$SALESFORCE_USERNAME" - "--consumer-key" "$SALESFORCE_CONSUMER_KEY" - "--private-key-path" "$SALESFORCE_PRIVATE_KEY_PATH" - "--categories" "EmailMessage,Account,Lead,Case,Campaign" - "--output-dir" "salesforce-output" - "--num-processes", "2" - "--recursive", - "--verbose", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.salesforce import salesforce + + if __name__ == "__main__": + salesforce( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="salesforce-output", + num_processes=2, + ), + username=os.getenv("SALESFORCE_USERNAME"), + consumer_key=os.getenv("SALESFORCE_CONSUMER_KEY"), + private_key_path=os.getenv("SALESFORCE_PRIVATE_KEY_PATH"), + categories=["EmailMessage", "Account", "Lead", "Case", "Campaign"], + recursive=True, + ) Run via the API --------------- @@ -87,34 +80,27 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "salesforce", - "--username" "$SALESFORCE_USERNAME" - "--consumer-key" "$SALESFORCE_CONSUMER_KEY" - "--private-key-path" "$SALESFORCE_PRIVATE_KEY_PATH" - "--categories" "EmailMessage,Account,Lead,Case,Campaign" - "--output-dir" "salesforce-output" - "--num-processes", "2" - "--recursive", - "--verbose", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.salesforce import salesforce + + if __name__ == "__main__": + salesforce( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="salesforce-output", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + username=os.getenv("SALESFORCE_USERNAME"), + consumer_key=os.getenv("SALESFORCE_CONSUMER_KEY"), + private_key_path=os.getenv("SALESFORCE_PRIVATE_KEY_PATH"), + categories=["EmailMessage", "Account", "Lead", "Case", "Campaign"], + recursive=True, + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/sharepoint.rst b/docs/source/source_connectors/sharepoint.rst index 67ffe626d9..bc0b144dd9 100644 --- a/docs/source/source_connectors/sharepoint.rst +++ b/docs/source/source_connectors/sharepoint.rst @@ -25,37 +25,32 @@ Run Locally --files-only "Flag to process only files within the site(s)" \ --output-dir sharepoint-ingest-output \ --num-processes 2 \ + --path "Shared Documents" \ --verbose .. tab:: Python .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "sharepoint", - "--client-id", "", - "--client-cred", "", - "--site", "", - "--files-only", "Flag to process only files within the site(s)", - "--output-dir", "sharepoint-ingest-output", - "--num-processes", "2", - "--verbose", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.sharepoint import sharepoint + + if __name__ == "__main__": + sharepoint( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="sharepoint-ingest-output", + num_processes=2, + ), + client_id="", + client_cred="", + site="", + # Flag to process only files within the site(s) + files_only=True, + path="Shared Documents", + recursive=False, + ) Run via the API --------------- @@ -77,6 +72,7 @@ You can also use upstream connectors with the ``unstructured`` API. For this you --output-dir sharepoint-ingest-output \ --num-processes 2 \ --verbose \ + --path "Shared Documents" \ --partition-by-api \ --api-key "" @@ -84,33 +80,29 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "sharepoint", - "--client-id", "", - "--client-cred", "", - "--site", "", - "--files-only", "Flag to process only files within the site(s)", - "--output-dir", "sharepoint-ingest-output", - "--num-processes", "2", - "--verbose", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.sharepoint import sharepoint + + if __name__ == "__main__": + sharepoint( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="sharepoint-ingest-output", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + client_id="", + client_cred="", + site="", + # Flag to process only files within the site(s) + files_only=True, + path="Shared Documents", + recursive=False, + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/slack.rst b/docs/source/source_connectors/slack.rst index dcb4700e60..53da13a257 100644 --- a/docs/source/source_connectors/slack.rst +++ b/docs/source/source_connectors/slack.rst @@ -30,30 +30,22 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "slack", - "--channels", "12345678", - "--token", "12345678", - "--download-dir", "slack-ingest-download", - "--output-dir", "slack-ingest-output", - "--start-date", "2023-04-01T01:00:00-08:00", - "--end-date", "2023-04-02" - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.slack import slack + + if __name__ == "__main__": + slack( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="slack-ingest-download", + num_processes=2, + ), + channels=["12345678"], + token="12345678", + start_date="2023-04-01T01:00:00-08:00", + end_date="2023-04-02,", + ) Run via the API --------------- @@ -81,32 +73,26 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "slack", - "--channels", "12345678", - "--token", "12345678", - "--download-dir", "slack-ingest-download", - "--output-dir", "slack-ingest-output", - "--start-date", "2023-04-01T01:00:00-08:00", - "--end-date", "2023-04-02", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.slack import slack + + if __name__ == "__main__": + slack( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="slack-ingest-download", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + channels=["12345678"], + token="12345678", + start_date="2023-04-01T01:00:00-08:00", + end_date="2023-04-02,", + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/docs/source/source_connectors/wikipedia.rst b/docs/source/source_connectors/wikipedia.rst index 7d81160994..cf6a6af061 100644 --- a/docs/source/source_connectors/wikipedia.rst +++ b/docs/source/source_connectors/wikipedia.rst @@ -28,28 +28,21 @@ Run Locally .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "wikipedia", - "--page-title", "Open Source Software", - "--output-dir", "dropbox-output", - "--num-processes", "2", - "--verbose", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + from unstructured.ingest.runner.wikipedia import wikipedia + from unstructured.ingest.interfaces import ReadConfig, PartitionConfig + + + if __name__ == "__main__": + wikipedia( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="wikipedia-ingest-output", + num_processes=2 + ), + page_title="Open Source Software", + auto_suggest=False, + ) Run via the API --------------- @@ -75,30 +68,24 @@ You can also use upstream connectors with the ``unstructured`` API. For this you .. code:: python - import subprocess - - command = [ - "unstructured-ingest", - "wikipedia", - "--page-title", "Open Source Software", - "--output-dir", "dropbox-output", - "--num-processes", "2", - "--verbose", - "--partition-by-api", - "--api-key", "", - ] - - # Run the command - process = subprocess.Popen(command, stdout=subprocess.PIPE) - output, error = process.communicate() - - # Print output - if process.returncode == 0: - print('Command executed successfully. Output:') - print(output.decode()) - else: - print('Command failed. Error:') - print(error.decode()) + import os + + from unstructured.ingest.interfaces import PartitionConfig, ReadConfig + from unstructured.ingest.runner.wikipedia import wikipedia + + if __name__ == "__main__": + wikipedia( + verbose=True, + read_config=ReadConfig(), + partition_config=PartitionConfig( + output_dir="wikipedia-ingest-output", + num_processes=2, + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + page_title="Open Source Software", + auto_suggest=False, + ) Additionally, you will need to pass the ``--partition-endpoint`` if you're running the API locally. You can find more information about the ``unstructured`` API `here `_. diff --git a/example-docs/category-level.docx b/example-docs/category-level.docx new file mode 100644 index 0000000000..2611c2cd29 Binary files /dev/null and b/example-docs/category-level.docx differ diff --git a/example-docs/embedded-link.pdf b/example-docs/embedded-link.pdf new file mode 100644 index 0000000000..4aa0d21324 Binary files /dev/null and b/example-docs/embedded-link.pdf differ diff --git a/example-docs/emphasis-text.pdf b/example-docs/emphasis-text.pdf new file mode 100644 index 0000000000..6508772086 Binary files /dev/null and b/example-docs/emphasis-text.pdf differ diff --git a/example-docs/interface-config-guide-p93.pdf b/example-docs/interface-config-guide-p93.pdf new file mode 100644 index 0000000000..db41a7cae4 Binary files /dev/null and b/example-docs/interface-config-guide-p93.pdf differ diff --git a/examples/ingest/sharepoint/ingest.sh b/examples/ingest/sharepoint/ingest.sh index 4a73ca65fb..53a1218207 100644 --- a/examples/ingest/sharepoint/ingest.sh +++ b/examples/ingest/sharepoint/ingest.sh @@ -25,4 +25,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \ --files-only "Flag to process only files within the site(s)" \ --output-dir sharepoint-ingest-output \ --num-processes 2 \ + --path "Shared Documents" \ --verbose diff --git a/requirements/base.in b/requirements/base.in index 736dd1324e..4a20b179c3 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -11,6 +11,4 @@ emoji dataclasses-json python-iso639 langdetect -# (Trevor): This is a simple hello world package that is used to track -# download count for this package using scarf. -https://packages.unstructured.io/scarf.tgz +numpy \ No newline at end of file diff --git a/requirements/base.txt b/requirements/base.txt index 62be4fd36e..cfc1b241da 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -36,6 +36,10 @@ mypy-extensions==1.0.0 # via typing-inspect nltk==3.8.1 # via -r requirements/base.in +numpy==1.24.4 + # via + # -c requirements/constraints.in + # -r requirements/base.in packaging==23.1 # via marshmallow python-iso639==2023.6.15 @@ -46,8 +50,6 @@ regex==2023.8.8 # via nltk requests==2.31.0 # via -r requirements/base.in -scarf @ https://packages.unstructured.io/scarf.tgz - # via -r requirements/base.in six==1.16.0 # via langdetect soupsieve==2.5 diff --git a/requirements/constraints.in b/requirements/constraints.in index 59f1d35dc2..19a6775177 100644 --- a/requirements/constraints.in +++ b/requirements/constraints.in @@ -39,5 +39,8 @@ matplotlib==3.7.2 # NOTE(crag) - pin to available pandas for python 3.8 (at least in CI) fsspec==2023.9.1 pandas<2.0.4 -# langchain limits this to 3.1.7 -anyio==3.1.7 +# langchain limits anyio to below 4.0 +anyio<4.0 +# pinned in unstructured paddleocr +opencv-python==4.8.0.76 +opencv-contrib-python==4.8.0.76 diff --git a/requirements/dev.txt b/requirements/dev.txt index f785ea00bd..b90b4776d6 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -4,8 +4,10 @@ # # pip-compile requirements/dev.in # -anyio==4.0.0 - # via jupyter-server +anyio==3.7.1 + # via + # -c requirements/constraints.in + # jupyter-server appnope==0.1.3 # via # ipykernel @@ -42,7 +44,7 @@ certifi==2023.7.22 # -c requirements/constraints.in # -c requirements/test.txt # requests -cffi==1.15.1 +cffi==1.16.0 # via argon2-cffi-bindings cfgv==3.4.0 # via pre-commit @@ -151,7 +153,7 @@ jupyter-client==8.3.1 # qtconsole jupyter-console==6.6.3 # via jupyter -jupyter-core==5.3.1 +jupyter-core==5.3.2 # via # -c requirements/constraints.in # ipykernel @@ -393,7 +395,7 @@ urllib3==1.26.16 # requests virtualenv==20.24.5 # via pre-commit -wcwidth==0.2.6 +wcwidth==0.2.7 # via prompt-toolkit webcolors==1.13 # via jsonschema diff --git a/requirements/extra-csv.txt b/requirements/extra-csv.txt index b121fa5298..b015ffa6f9 100644 --- a/requirements/extra-csv.txt +++ b/requirements/extra-csv.txt @@ -6,6 +6,7 @@ # numpy==1.24.4 # via + # -c requirements/base.txt # -c requirements/constraints.in # pandas pandas==2.0.3 diff --git a/requirements/extra-paddleocr.txt b/requirements/extra-paddleocr.txt index 3fff2566e5..1f028530d2 100644 --- a/requirements/extra-paddleocr.txt +++ b/requirements/extra-paddleocr.txt @@ -33,7 +33,7 @@ cssselect==1.2.0 # via premailer cssutils==2.7.1 # via premailer -cycler==0.11.0 +cycler==0.12.0 # via matplotlib cython==3.0.2 # via unstructured-paddleocr @@ -95,6 +95,7 @@ networkx==3.1 # via scikit-image numpy==1.24.4 # via + # -c requirements/base.txt # -c requirements/constraints.in # contourpy # imageio @@ -111,9 +112,12 @@ numpy==1.24.4 # unstructured-paddleocr # visualdl opencv-contrib-python==4.8.0.76 - # via unstructured-paddleocr + # via + # -c requirements/constraints.in + # unstructured-paddleocr opencv-python==4.8.0.76 # via + # -c requirements/constraints.in # imgaug # unstructured-paddleocr openpyxl==3.1.2 diff --git a/requirements/extra-pdf-image.in b/requirements/extra-pdf-image.in index f22311f875..fa9cbcda5a 100644 --- a/requirements/extra-pdf-image.in +++ b/requirements/extra-pdf-image.in @@ -5,7 +5,7 @@ pdf2image pdfminer.six # Do not move to contsraints.in, otherwise unstructured-inference will not be upgraded # when unstructured library is. -unstructured-inference==0.5.31 +unstructured-inference==0.6.6 # unstructured fork of pytesseract that provides an interface to allow for multiple output formats # from one tesseract call unstructured.pytesseract>=0.3.12 diff --git a/requirements/extra-pdf-image.txt b/requirements/extra-pdf-image.txt index 6c60f99dee..679ffef54c 100644 --- a/requirements/extra-pdf-image.txt +++ b/requirements/extra-pdf-image.txt @@ -11,7 +11,7 @@ certifi==2023.7.22 # -c requirements/base.txt # -c requirements/constraints.in # requests -cffi==1.15.1 +cffi==1.16.0 # via cryptography charset-normalizer==3.2.0 # via @@ -24,7 +24,7 @@ contourpy==1.1.1 # via matplotlib cryptography==41.0.4 # via pdfminer-six -cycler==0.11.0 +cycler==0.12.0 # via matplotlib effdet==0.4.1 # via layoutparser @@ -74,6 +74,7 @@ networkx==3.1 # via torch numpy==1.24.4 # via + # -c requirements/base.txt # -c requirements/constraints.in # contourpy # layoutparser @@ -94,6 +95,7 @@ onnxruntime==1.16.0 # via unstructured-inference opencv-python==4.8.0.76 # via + # -c requirements/constraints.in # layoutparser # unstructured-inference packaging==23.1 @@ -212,7 +214,7 @@ tqdm==4.66.1 # huggingface-hub # iopath # transformers -transformers==4.33.2 +transformers==4.33.3 # via unstructured-inference typing-extensions==4.8.0 # via @@ -223,7 +225,7 @@ typing-extensions==4.8.0 # torch tzdata==2023.3 # via pandas -unstructured-inference==0.5.31 +unstructured-inference==0.6.6 # via -r requirements/extra-pdf-image.in unstructured-pytesseract==0.3.12 # via diff --git a/requirements/extra-xlsx.txt b/requirements/extra-xlsx.txt index d07490c0dd..5f1a3d6517 100644 --- a/requirements/extra-xlsx.txt +++ b/requirements/extra-xlsx.txt @@ -8,6 +8,7 @@ et-xmlfile==1.1.0 # via openpyxl numpy==1.24.4 # via + # -c requirements/base.txt # -c requirements/constraints.in # pandas openpyxl==3.1.2 diff --git a/requirements/huggingface.txt b/requirements/huggingface.txt index 3bbc556b4e..00ba71293a 100644 --- a/requirements/huggingface.txt +++ b/requirements/huggingface.txt @@ -50,6 +50,7 @@ networkx==3.1 # via torch numpy==1.24.4 # via + # -c requirements/base.txt # -c requirements/constraints.in # transformers packaging==23.1 @@ -96,7 +97,7 @@ tqdm==4.66.1 # huggingface-hub # sacremoses # transformers -transformers==4.33.2 +transformers==4.33.3 # via -r requirements/huggingface.in typing-extensions==4.8.0 # via diff --git a/requirements/ingest-airtable.txt b/requirements/ingest-airtable.txt index 1b535a5db6..db7e92a6a1 100644 --- a/requirements/ingest-airtable.txt +++ b/requirements/ingest-airtable.txt @@ -21,7 +21,7 @@ inflection==0.5.1 # via pyairtable pyairtable==2.1.0.post1 # via -r requirements/ingest-airtable.in -pydantic==1.10.12 +pydantic==1.10.13 # via # -c requirements/constraints.in # pyairtable diff --git a/requirements/ingest-azure.txt b/requirements/ingest-azure.txt index e9eadb8deb..e682d29422 100644 --- a/requirements/ingest-azure.txt +++ b/requirements/ingest-azure.txt @@ -30,7 +30,7 @@ certifi==2023.7.22 # -c requirements/base.txt # -c requirements/constraints.in # requests -cffi==1.15.1 +cffi==1.16.0 # via # azure-datalake-store # cryptography diff --git a/requirements/ingest-box.txt b/requirements/ingest-box.txt index bc022a226c..79268b6b3d 100644 --- a/requirements/ingest-box.txt +++ b/requirements/ingest-box.txt @@ -15,7 +15,7 @@ certifi==2023.7.22 # -c requirements/base.txt # -c requirements/constraints.in # requests -cffi==1.15.1 +cffi==1.16.0 # via cryptography charset-normalizer==3.2.0 # via diff --git a/requirements/ingest-delta-table.txt b/requirements/ingest-delta-table.txt index 2c83b64e9a..d3c45a04d5 100644 --- a/requirements/ingest-delta-table.txt +++ b/requirements/ingest-delta-table.txt @@ -12,6 +12,7 @@ fsspec==2023.9.1 # -r requirements/ingest-delta-table.in numpy==1.24.4 # via + # -c requirements/base.txt # -c requirements/constraints.in # pyarrow pyarrow==12.0.0 diff --git a/requirements/ingest-gcs.txt b/requirements/ingest-gcs.txt index 5533294fbc..4f6d048137 100644 --- a/requirements/ingest-gcs.txt +++ b/requirements/ingest-gcs.txt @@ -47,7 +47,7 @@ google-api-core==2.12.0 # via # google-cloud-core # google-cloud-storage -google-auth==2.23.0 +google-auth==2.23.2 # via # gcsfs # google-api-core @@ -107,7 +107,6 @@ urllib3==1.26.16 # via # -c requirements/base.txt # -c requirements/constraints.in - # google-auth # requests yarl==1.9.2 # via aiohttp diff --git a/requirements/ingest-github.txt b/requirements/ingest-github.txt index 865778e014..ad5ac2a7a0 100644 --- a/requirements/ingest-github.txt +++ b/requirements/ingest-github.txt @@ -9,7 +9,7 @@ certifi==2023.7.22 # -c requirements/base.txt # -c requirements/constraints.in # requests -cffi==1.15.1 +cffi==1.16.0 # via # cryptography # pynacl diff --git a/requirements/ingest-google-drive.txt b/requirements/ingest-google-drive.txt index 993a01d200..9f90bcc9ca 100644 --- a/requirements/ingest-google-drive.txt +++ b/requirements/ingest-google-drive.txt @@ -19,7 +19,7 @@ google-api-core==2.12.0 # via google-api-python-client google-api-python-client==2.101.0 # via -r requirements/ingest-google-drive.in -google-auth==2.23.0 +google-auth==2.23.2 # via # google-api-core # google-api-python-client @@ -63,5 +63,4 @@ urllib3==1.26.16 # via # -c requirements/base.txt # -c requirements/constraints.in - # google-auth # requests diff --git a/requirements/ingest-notion.txt b/requirements/ingest-notion.txt index fadccceea2..b200c2562e 100644 --- a/requirements/ingest-notion.txt +++ b/requirements/ingest-notion.txt @@ -4,33 +4,35 @@ # # pip-compile requirements/ingest-notion.in # -certifi==2023.7.22 +anyio==3.7.1 # via - # -c requirements/base.txt # -c requirements/constraints.in - # httpx -charset-normalizer==3.2.0 + # httpcore +certifi==2023.7.22 # via # -c requirements/base.txt + # -c requirements/constraints.in + # httpcore # httpx -h11==0.12.0 +exceptiongroup==1.1.3 + # via anyio +h11==0.14.0 # via httpcore htmlbuilder==1.0.0 # via -r requirements/ingest-notion.in -httpcore==0.13.3 +httpcore==0.18.0 # via httpx -httpx==0.20.0 +httpx==0.25.0 # via notion-client idna==3.4 # via # -c requirements/base.txt + # anyio # httpx - # rfc3986 notion-client==2.0.0 # via -r requirements/ingest-notion.in -rfc3986[idna2008]==1.5.0 - # via httpx sniffio==1.3.0 # via + # anyio # httpcore # httpx diff --git a/requirements/ingest-onedrive.txt b/requirements/ingest-onedrive.txt index cb5c5903cb..2d9627f1d4 100644 --- a/requirements/ingest-onedrive.txt +++ b/requirements/ingest-onedrive.txt @@ -15,7 +15,7 @@ certifi==2023.7.22 # -c requirements/base.txt # -c requirements/constraints.in # requests -cffi==1.15.1 +cffi==1.16.0 # via cryptography charset-normalizer==3.2.0 # via diff --git a/requirements/ingest-openai.txt b/requirements/ingest-openai.txt index c6620e6579..d7846c0a08 100644 --- a/requirements/ingest-openai.txt +++ b/requirements/ingest-openai.txt @@ -10,6 +10,10 @@ aiohttp==3.8.5 # openai aiosignal==1.3.1 # via aiohttp +anyio==3.7.1 + # via + # -c requirements/constraints.in + # langchain async-timeout==4.0.3 # via # aiohttp @@ -30,6 +34,8 @@ dataclasses-json==0.6.1 # via # -c requirements/base.txt # langchain +exceptiongroup==1.1.3 + # via anyio frozenlist==1.4.0 # via # aiohttp @@ -37,11 +43,16 @@ frozenlist==1.4.0 idna==3.4 # via # -c requirements/base.txt + # anyio # requests # yarl -langchain==0.0.298 +jsonpatch==1.33 + # via langchain +jsonpointer==2.4 + # via jsonpatch +langchain==0.0.304 # via -r requirements/ingest-openai.in -langsmith==0.0.40 +langsmith==0.0.41 # via langchain marshmallow==3.20.1 # via @@ -59,6 +70,7 @@ numexpr==2.8.6 # via langchain numpy==1.24.4 # via + # -c requirements/base.txt # -c requirements/constraints.in # langchain # numexpr @@ -68,7 +80,7 @@ packaging==23.1 # via # -c requirements/base.txt # marshmallow -pydantic==1.10.12 +pydantic==1.10.13 # via # -c requirements/constraints.in # langchain @@ -86,6 +98,8 @@ requests==2.31.0 # langsmith # openai # tiktoken +sniffio==1.3.0 + # via anyio sqlalchemy==2.0.21 # via langchain tenacity==8.2.3 diff --git a/requirements/ingest-outlook.txt b/requirements/ingest-outlook.txt index 508d7573dd..ccef36d349 100644 --- a/requirements/ingest-outlook.txt +++ b/requirements/ingest-outlook.txt @@ -9,7 +9,7 @@ certifi==2023.7.22 # -c requirements/base.txt # -c requirements/constraints.in # requests -cffi==1.15.1 +cffi==1.16.0 # via cryptography charset-normalizer==3.2.0 # via diff --git a/requirements/ingest-salesforce.txt b/requirements/ingest-salesforce.txt index 116a00eab0..a6c31b1014 100644 --- a/requirements/ingest-salesforce.txt +++ b/requirements/ingest-salesforce.txt @@ -11,7 +11,7 @@ certifi==2023.7.22 # -c requirements/base.txt # -c requirements/constraints.in # requests -cffi==1.15.1 +cffi==1.16.0 # via cryptography charset-normalizer==3.2.0 # via diff --git a/requirements/ingest-sharepoint.txt b/requirements/ingest-sharepoint.txt index 7a209b1042..99d1efbfde 100644 --- a/requirements/ingest-sharepoint.txt +++ b/requirements/ingest-sharepoint.txt @@ -9,7 +9,7 @@ certifi==2023.7.22 # -c requirements/base.txt # -c requirements/constraints.in # requests -cffi==1.15.1 +cffi==1.16.0 # via cryptography charset-normalizer==3.2.0 # via diff --git a/requirements/test.txt b/requirements/test.txt index fe8dc02504..98d40fd188 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -74,7 +74,7 @@ pluggy==1.3.0 # via pytest pycodestyle==2.11.0 # via flake8 -pydantic==1.10.12 +pydantic==1.10.13 # via # -c requirements/constraints.in # -r requirements/test.in @@ -113,7 +113,7 @@ types-click==7.1.8 # via -r requirements/test.in types-markdown==3.4.2.10 # via -r requirements/test.in -types-requests==2.31.0.5 +types-requests==2.31.0.6 # via -r requirements/test.in types-tabulate==0.9.0.3 # via -r requirements/test.in diff --git a/scripts/elasticsearch-test-helpers/create-and-check-es.sh b/scripts/elasticsearch-test-helpers/create-and-check-es.sh index 62f7cb6b66..dc06c21a16 100755 --- a/scripts/elasticsearch-test-helpers/create-and-check-es.sh +++ b/scripts/elasticsearch-test-helpers/create-and-check-es.sh @@ -1,37 +1,14 @@ #!/usr/bin/env bash -SCRIPT_DIR=$(dirname "$(realpath "$0")") - -# Create the Elasticsearch cluster and get the container id -docker run -d --rm -p 9200:9200 -p 9300:9300 -e "xpack.security.enabled=false" -e "discovery.type=single-node" --name es-test docker.elastic.co/elasticsearch/elasticsearch:8.7.0 +set -e -# Wait for Elasticsearch container to start -echo "Waiting for Elasticsearch container to start..." -sleep 1 - -url="http://localhost:9200/_cluster/health" -status_code=0 -retry_count=0 -max_retries=6 +SCRIPT_DIR=$(dirname "$(realpath "$0")") -# Check the cluster status repeatedly until it becomes live or maximum retries are reached -while [ "$status_code" -ne 200 ] && [ "$retry_count" -lt "$max_retries" ]; do - # Send a GET request to the cluster health API - response=$(curl -s -o /dev/null -w "%{http_code}" "$url") - status_code="$response" +# Create the Elasticsearch cluster +docker compose version +docker compose -f "$SCRIPT_DIR"/docker-compose.yaml up --wait +docker compose -f "$SCRIPT_DIR"/docker-compose.yaml ps - # Process the files only when the Elasticsearch cluster is live - if [ "$status_code" -eq 200 ]; then - echo "Cluster is live." - python "$SCRIPT_DIR/create_and_fill_es.py" - else - ((retry_count++)) - echo "Cluster is not available. Retrying in 5 seconds... (Attempt $retry_count)" - sleep 5 - fi -done -# If the cluster has not become live, exit after a certain number of tries -if [ "$status_code" -ne 200 ]; then - echo "Cluster took an unusually long time to create (>25 seconds). Expected time is around 10 seconds. Exiting." -fi +echo "Cluster is live." +"$SCRIPT_DIR"/create_and_fill_es.py diff --git a/scripts/elasticsearch-test-helpers/create_and_fill_es.py b/scripts/elasticsearch-test-helpers/create_and_fill_es.py old mode 100644 new mode 100755 index a63bd222d2..a761255741 --- a/scripts/elasticsearch-test-helpers/create_and_fill_es.py +++ b/scripts/elasticsearch-test-helpers/create_and_fill_es.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + import pandas as pd from elasticsearch import Elasticsearch from elasticsearch.helpers import bulk @@ -10,12 +12,14 @@ ) print("Connecting to the Elasticsearch cluster.") -es = Elasticsearch(CLUSTER_URL) +es = Elasticsearch(CLUSTER_URL, request_timeout=30) print(es.info()) df = pd.read_csv(DATA_PATH).dropna().reset_index() print("Creating an Elasticsearch index for testing elasticsearch ingest.") -es.indices.create(index=INDEX_NAME, mappings=MAPPINGS) +response = es.options(max_retries=5).indices.create(index=INDEX_NAME, mappings=MAPPINGS) +if response.meta.status != 200: + raise RuntimeError("failed to create index") print("Loading data into the index.") bulk_data = [] diff --git a/scripts/elasticsearch-test-helpers/docker-compose.yaml b/scripts/elasticsearch-test-helpers/docker-compose.yaml new file mode 100644 index 0000000000..47cb93ae1f --- /dev/null +++ b/scripts/elasticsearch-test-helpers/docker-compose.yaml @@ -0,0 +1,15 @@ +services: + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:8.7.0 + container_name: es-test + ports: + - 9200:9200 + - 9300:9300 + environment: + - xpack.security.enabled=false + - discovery.type=single-node + healthcheck: + test: ["CMD-SHELL", "curl --silent --fail localhost:9200/_cluster/health || exit 1"] + interval: 30s + timeout: 30s + retries: 3 diff --git a/scripts/minio-test-helpers/create-and-check-minio.sh b/scripts/minio-test-helpers/create-and-check-minio.sh new file mode 100755 index 0000000000..09089a944a --- /dev/null +++ b/scripts/minio-test-helpers/create-and-check-minio.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +SCRIPT_DIR=$(dirname "$(realpath "$0")") + +secret_key=minioadmin +access_key=minioadmin +region=us-east-2 +endpoint_url=http://localhost:9000 +bucket_name=utic-dev-tech-fixtures + +function upload(){ + echo "Uploading test content to new bucket in minio" + AWS_REGION=$region AWS_SECRET_ACCESS_KEY=$secret_key AWS_ACCESS_KEY_ID=$access_key \ + aws --output json --endpoint-url $endpoint_url s3api create-bucket --bucket $bucket_name | jq + AWS_REGION=$region AWS_SECRET_ACCESS_KEY=$secret_key AWS_ACCESS_KEY_ID=$access_key \ + aws --endpoint-url $endpoint_url s3 cp "$SCRIPT_DIR"/wiki_movie_plots_small.csv s3://$bucket_name/ +} + +# Create Minio single server +docker compose version +docker compose -f "$SCRIPT_DIR"/docker-compose.yaml up --wait +docker compose -f "$SCRIPT_DIR"/docker-compose.yaml ps + +echo "Cluster is live." +upload diff --git a/scripts/minio-test-helpers/docker-compose.yaml b/scripts/minio-test-helpers/docker-compose.yaml new file mode 100644 index 0000000000..acc3ec9b48 --- /dev/null +++ b/scripts/minio-test-helpers/docker-compose.yaml @@ -0,0 +1,13 @@ +services: + minio: + image: quay.io/minio/minio + container_name: minio-test + ports: + - 9000:9000 + - 9001:9001 + command: server --console-address ":9001" /data + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 5s + timeout: 20s + retries: 3 diff --git a/scripts/minio-test-helpers/wiki_movie_plots_small.csv b/scripts/minio-test-helpers/wiki_movie_plots_small.csv new file mode 100644 index 0000000000..2fbb2b49bb --- /dev/null +++ b/scripts/minio-test-helpers/wiki_movie_plots_small.csv @@ -0,0 +1,31 @@ +Release Year,Title,Origin/Ethnicity,Director,Cast,Genre,Wiki Page,Plot +1901,Kansas Saloon Smashers,American,Unknown,,unknown,https://en.wikipedia.org/wiki/Kansas_Saloon_Smashers,"A bartender is working at a saloon, serving drinks to customers. After he fills a stereotypically Irish man's bucket with beer, Carrie Nation and her followers burst inside. They assault the Irish man, pulling his hat over his eyes and then dumping the beer over his head. The group then begin wrecking the bar, smashing the fixtures, mirrors, and breaking the cash register. The bartender then sprays seltzer water in Nation's face before a group of policemen appear and order everybody to leave.[1]" +1901,Love by the Light of the Moon,American,Unknown,,unknown,https://en.wikipedia.org/wiki/Love_by_the_Light_of_the_Moon,"The moon, painted with a smiling face hangs over a park at night. A young couple walking past a fence learn on a railing and look up. The moon smiles. They embrace, and the moon's smile gets bigger. They then sit down on a bench by a tree. The moon's view is blocked, causing him to frown. In the last scene, the man fans the woman with his hat because the moon has left the sky and is perched over her shoulder to see everything better." +1901,The Martyred Presidents,American,Unknown,,unknown,https://en.wikipedia.org/wiki/The_Martyred_Presidents,"The film, just over a minute long, is composed of two shots. In the first, a girl sits at the base of an altar or tomb, her face hidden from the camera. At the center of the altar, a viewing portal displays the portraits of three U.S. Presidents—Abraham Lincoln, James A. Garfield, and William McKinley—each victims of assassination. +In the second shot, which runs just over eight seconds long, an assassin kneels feet of Lady Justice." +1901,"Terrible Teddy, the Grizzly King",American,Unknown,,unknown,"https://en.wikipedia.org/wiki/Terrible_Teddy,_the_Grizzly_King","Lasting just 61 seconds and consisting of two shots, the first shot is set in a wood during winter. The actor representing then vice-president Theodore Roosevelt enthusiastically hurries down a hillside towards a tree in the foreground. He falls once, but rights himself and cocks his rifle. Two other men, bearing signs reading ""His Photographer"" and ""His Press Agent"" respectively, follow him into the shot; the photographer sets up his camera. ""Teddy"" aims his rifle upward at the tree and fells what appears to be a common house cat, which he then proceeds to stab. ""Teddy"" holds his prize aloft, and the press agent takes notes. The second shot is taken in a slightly different part of the wood, on a path. ""Teddy"" rides the path on his horse towards the camera and out to the left of the shot, followed closely by the press agent and photographer, still dutifully holding their signs." +1902,Jack and the Beanstalk,American,"George S. Fleming, Edwin S. Porter",,unknown,https://en.wikipedia.org/wiki/Jack_and_the_Beanstalk_(1902_film),"The earliest known adaptation of the classic fairytale, this films shows Jack trading his cow for the beans, his mother forcing him to drop them in the front yard, and beig forced upstairs. As he sleeps, Jack is visited by a fairy who shows him glimpses of what will await him when he ascends the bean stalk. In this version, Jack is the son of a deposed king. When Jack wakes up, he finds the beanstalk has grown and he climbs to the top where he enters the giant's home. The giant finds Jack, who narrowly escapes. The giant chases Jack down the bean stalk, but Jack is able to cut it down before the giant can get to safety. He falls and is killed as Jack celebrates. The fairy then reveals that Jack may return home as a prince." +1903,Alice in Wonderland,American,Cecil Hepworth,May Clark,unknown,https://en.wikipedia.org/wiki/Alice_in_Wonderland_(1903_film),"Alice follows a large white rabbit down a ""Rabbit-hole"". She finds a tiny door. When she finds a bottle labeled ""Drink me"", she does, and shrinks, but not enough to pass through the door. She then eats something labeled ""Eat me"" and grows larger. She finds a fan when enables her to shrink enough to get into the ""Garden"" and try to get a ""Dog"" to play with her. She enters the ""White Rabbit's tiny House,"" but suddenly resumes her normal size. In order to get out, she has to use the ""magic fan."" +She enters a kitchen, in which there is a cook and a woman holding a baby. She persuades the woman to give her the child and takes the infant outside after the cook starts throwing things around. The baby then turns into a pig and squirms out of her grip. ""The Duchess's Cheshire Cat"" appears and disappears a couple of times to Alice and directs her to the Mad Hatter's ""Mad Tea-Party."" After a while, she leaves. +The Queen invites Alice to join the ""ROYAL PROCESSION"": a parade of marching playing cards and others headed by the White Rabbit. When Alice ""unintentionally offends the Queen"", the latter summons the ""Executioner"". Alice ""boxes the ears"", then flees when all the playing cards come for her. Then she wakes up and realizes it was all a dream." +1903,The Great Train Robbery,American,Edwin S. Porter,,western,https://en.wikipedia.org/wiki/The_Great_Train_Robbery_(1903_film),"The film opens with two bandits breaking into a railroad telegraph office, where they force the operator at gunpoint to have a train stopped and to transmit orders for the engineer to fill the locomotive's tender at the station's water tank. They then knock the operator out and tie him up. As the train stops it is boarded by the bandits‍—‌now four. Two bandits enter an express car, kill a messenger and open a box of valuables with dynamite; the others kill the fireman and force the engineer to halt the train and disconnect the locomotive. The bandits then force the passengers off the train and rifle them for their belongings. One passenger tries to escape but is instantly shot down. Carrying their loot, the bandits escape in the locomotive, later stopping in a valley where their horses had been left. +Meanwhile, back in the telegraph office, the bound operator awakens, but he collapses again. His daughter arrives bringing him his meal and cuts him free, and restores him to consciousness by dousing him with water. +There is some comic relief at a dance hall, where an Eastern stranger is forced to dance while the locals fire at his feet. The door suddenly opens and the telegraph operator rushes in to tell them of the robbery. The men quickly form a posse, which overtakes the bandits, and in a final shootout kills them all and recovers the stolen mail." +1904,The Suburbanite,American,Wallace McCutcheon,,comedy,https://en.wikipedia.org/wiki/The_Suburbanite,"The film is about a family who move to the suburbs, hoping for a quiet life. Things start to go wrong, and the wife gets violent and starts throwing crockery, leading to her arrest." +1905,The Little Train Robbery,American,Edwin Stanton Porter,,unknown,https://en.wikipedia.org/wiki/The_Little_Train_Robbery,"The opening scene shows the interior of the robbers' den. The walls are decorated with the portraits of notorious criminals and pictures illustrating the exploits of famous bandits. Some of the gang are lounging about, while others are reading novels and illustrated papers. Although of youthful appearance, each is dressed like a typical Western desperado. The ""Bandit Queen,"" leading a blindfolded new recruit, now enters the room. He is led to the center of the room, raises his right hand and is solemnly sworn in. When the bandage is removed from his eyes he finds himself looking into the muzzles of a dozen or more 45's. The gang then congratulates the new member and heartily shake his hand. The ""Bandit Queen"" who is evidently the leader of the gang, now calls for volunteers to hold up a train. All respond, but she picks out seven for the job who immediately leave the cabin. +The next scene shows the gang breaking into a barn. They steal ponies and ride away. Upon reaching the place agreed upon they picket their ponies and leaving them in charge of a trusted member proceed to a wild mountain spot in a bend of the railroad, where the road runs over a steep embankment. The spot is an ideal one for holding up a train. Cross ties are now placed on the railroad track and the gang hide in some bushes close by and wait for the train. The train soon approaches and is brought to a stop. The engineer leaves his engine and proceeds to remove the obstruction on the track. While he is bending over one of the gang sneaks up behind them and hits him on the head with an axe, and knocks him senseless down the embankment, while the gang surround the train and hold up the passengers. After securing all the ""valuables,"" consisting principally of candy and dolls, the robbers uncouple the engine and one car and make their escape just in time to avoid a posse of police who appear on the scene. Further up the road they abandon the engine and car, take to the woods and soon reach their ponies. +In the meantime the police have learned the particulars of the hold-up from the frightened passengers and have started up the railroad tracks after the fleeing robbers. The robbers are next seen riding up the bed of a shallow stream and finally reach their den, where the remainder of the gang have been waiting for them. Believing they have successfully eluded their pursuers, they proceed to divide the ""plunder."" The police, however, have struck the right trail and are in close pursuit. While the ""plunder"" is being divided a sentry gives the alarm and the entire gang, abandoning everything, rush from the cabin barely in time to escape capture. The police make a hurried search and again start in pursuit. The robbers are so hard pressed that they are unable to reach their ponies, and are obliged to take chances on foot. The police now get in sight of the fleeing robbers and a lively chase follows through tall weeds, over a bridge and up a steep hill. Reaching a pond the police are close on their heels. The foremost robbers jump in clothes and all and strike out for the opposite bank. Two hesitate and are captured. Boats are secured and after an exciting tussle the entire gang is rounded up. In the mix up one of the police is dragged overboard. The final scene shows the entire gang of bedraggled and crestfallen robbers tied together with a rope and being led away by the police. Two of the police are loaded down with revolvers, knives and cartridge belts, and resemble walking aresenals. As a fitting climax a confederate steals out of the woods, cuts the rope and gallantly rescues the ""Bandit Queen.""" +1905,The Night Before Christmas,American,Edwin Stanton Porter,,unknown,https://en.wikipedia.org/wiki/The_Night_Before_Christmas_(1905_film),"Scenes are introduced using lines of the poem.[2] Santa Claus, played by Harry Eytinge, is shown feeding real reindeer[4] and finishes his work in the workshop. Meanwhile, the children of a city household hang their stockings and go to bed, but unable to sleep they engage in a pillow fight. Santa Claus leaves his home on a sleigh with his reindeer. He enters the children's house through the chimney, and leaves the presents. The children come down the stairs and enjoy their presents." +1906,Dream of a Rarebit Fiend,American,Wallace McCutcheon and Edwin S. Porter,,short,https://en.wikipedia.org/wiki/Dream_of_a_Rarebit_Fiend_(1906_film),"The Rarebit Fiend gorges on Welsh rarebit at a restaurant. When he leaves, he begins to get dizzy as he starts to hallucinate. He desperately tries to hang onto a lamppost as the world spins all around him. A man helps him get home. He falls into bed and begins having more hallucinatory dreams. During a dream sequence, the furniture begins moving around the room. Imps emerge from a floating Welsh rarebit container and begin poking his head as he sleeps. His bed then begins dancing and spinning wildly around the room before flying out the window with the Fiend in it. The bed floats across the city as the Fiend floats up and off the bed. He hangs off the back and eventually gets caught on a weathervane atop a steeple. His bedclothes tear and he falls from the sky, crashing through his bedroom ceiling. The Fiend awakens from the dream after falling out of his bed." +1906,From Leadville to Aspen: A Hold-Up in the Rockies,American,Francis J. Marion and Wallace McCutcheon,,short action/crime western,https://en.wikipedia.org/wiki/From_Leadville_to_Aspen:_A_Hold-Up_in_the_Rockies,The film features a train traveling through the Rockies and a hold up created by two thugs placing logs on the line. They systematically rob the wealthy occupants at gunpoint and then make their getaway along the tracks and later by a hi-jacked horse and cart. +1906,Kathleen Mavourneen,American,Edwin S. Porter,,short film,https://en.wikipedia.org/wiki/Kathleen_Mavourneen_(1906_film),"Irish villager Kathleen is a tenant of Captain Clearfield, who controls local judges and criminals. Her father owes Clearfield a large debt. Terence O'More saves the village from Clearfield, causing a large celebration. +Film historian Charles Musser writes of Porter's adaptation, ""O'More not only rescues Kathleen from the villain but, through marriage, renews the family for another generation.""[1]" +1907,Daniel Boone,American,Wallace McCutcheon and Ediwin S. Porter,"William Craven, Florence Lawrence",biographical,https://en.wikipedia.org/wiki/Daniel_Boone_(1907_film),"Boone's daughter befriends an Indian maiden as Boone and his companion start out on a hunting expedition. While he is away, Boone's cabin is attacked by the Indians, who set it on fire and abduct Boone's daughter. Boone returns, swears vengeance, then heads out on the trail to the Indian camp. His daughter escapes but is chased. The Indians encounter Boone, which sets off a huge fight on the edge of a cliff. A burning arrow gets shot into the Indian camp. Boone gets tied to the stake and tortured. The burning arrow sets the Indian camp on fire, causing panic. Boone is rescued by his horse, and Boone has a knife fight in which he kills the Indian chief.[2]" +1907,How Brown Saw the Baseball Game,American,Unknown,Unknown,comedy,https://en.wikipedia.org/wiki/How_Brown_Saw_the_Baseball_Game,"Before heading out to a baseball game at a nearby ballpark, sports fan Mr. Brown drinks several highball cocktails. He arrives at the ballpark to watch the game, but has become so inebriated that the game appears to him in reverse, with the players running the bases backwards and the baseball flying back into the pitcher's hand. After the game is over, Mr. Brown is escorted home by one of his friends. When they arrive at Brown's house, they encounter his wife who becomes furious with the friend and proceeds to physically assault him, believing he is responsible for her husband's severe intoxication.[1]" +1907,Laughing Gas,American,Edwin Stanton Porter,"Bertha Regustus, Edward Boulden",comedy,https://en.wikipedia.org/wiki/Laughing_Gas_(film)#1907_Film,"The plot is that of a black woman going to the dentist for a toothache and being given laughing gas. On her way walking home, and in other situations, she can't stop laughing, and everyone she meets ""catches"" the laughter from her, including a vendor and police officers." +1908,The Adventures of Dollie,American,D. W. Griffith,"Arthur V. Johnson, Linda Arvidson",drama,https://en.wikipedia.org/wiki/The_Adventures_of_Dollie,"On a beautiful summer day a father and mother take their daughter Dollie on an outing to the river. The mother refuses to buy a gypsy's wares. The gypsy tries to rob the mother, but the father drives him off. The gypsy returns to the camp and devises a plan. They return and kidnap Dollie while her parents are distracted. A rescue crew is organized, but the gypsy takes Dollie to his camp. They gag Dollie and hide her in a barrel before the rescue party gets to the camp. Once they leave the gypsies and escapes in their wagon. As the wagon crosses the river, the barrel falls into the water. Still sealed in the barrel, Dollie is swept downstream in dangerous currents. A boy who is fishing in the river finds the barrel, and Dollie is reunited safely with her parents." +1908,The Black Viper,American,D. W. Griffith,D. W. Griffith,drama,https://en.wikipedia.org/wiki/The_Black_Viper,"A thug accosts a girl as she leaves her workplace but a man rescues her. The thug vows revenge and, with the help of two friends, attacks the girl and her rescuer again as they're going for a walk. This time they succeed in kidnapping the rescuer. He is bound and gagged and taken away in a cart. The girl runs home and gets help from several neighbors. They track the ruffians down to a cabin in the mountains where the gang has trapped their victim and set the cabin on fire. A thug and Rescuer fight on the roof of the house." +1908,A Calamitous Elopement,American,D.W. Griffith,"Harry Solter, Linda Arvidson",comedy,https://en.wikipedia.org/wiki/A_Calamitous_Elopement,"A young couple decides to elope after being caught in the midst of a romantic moment by the woman's angry father. They make plans to leave, but a thief discovers their plans and hides in their trunk and waits for the right moment to steal their belongings." +1908,The Call of the Wild,American,D. W. Griffith,Charles Inslee,adventure,https://en.wikipedia.org/wiki/The_Call_of_the_Wild_(1908_film),"A white girl (Florence Lawrence) rejects a proposal from an Indian brave (Charles Inslee) in this early one-reel Western melodrama. Despite the rejection, the Indian still comes to the girl's defense when she is abducted by his warring tribe. In her first year in films, Florence Lawrence was already the most popular among the Biograph Company's anonymous stock company players. By 1909, she was known the world over as ""The Biograph Girl.""" +1908,A Christmas Carol,American,Unknown,Tom Ricketts,drama,https://en.wikipedia.org/wiki/A_Christmas_Carol_(1908_film),"No prints of the first American film adaptation of A Christmas Carol are known to exist,[1] but The Moving Picture World magazine provided a scene-by-scene description before the film's release.[2] Scrooge goes into his office and begins working. His nephew, along with three women who wish for Scrooge to donate enter. However, Scrooge dismisses them. On the night of Christmas Eve, his long-dead partner Jacob Marley comes as a ghost, warning him of a horrible fate if he does not change his ways. Scrooge meets three spirits that show Scrooge the real meaning of Christmas, along with his grave, the result of his parsimonious ways. The next morning, he wakes and realizes the error of his ways. Scrooge was then euphoric and generous for the rest of his life." +1908,The Fight for Freedom,American,D. W. Griffith,"Florence Auer, John G. Adolfi",western,https://en.wikipedia.org/wiki/The_Fight_for_Freedom,"The film opens in a town on the Mexican border. A poker game is going on in the local saloon. One of the players cheats and is shot dead by another of the players, a Mexican named Pedro. In the uproar that follows Pedro is wounded as he escapes from the saloon. The sheriff is called, who tracks Pedro to his home but Pedro kills the sherriff too. While Pedro hides, his wife Juanita, is arrested on suspicion of murdering the sheriff. Pedro rescues her from the town jail and the two head for the Mexican border. Caught by the posse before they reach the border, Juanita is killed and the film ends with Pedro being arrested and taken back to town." diff --git a/scripts/performance/benchmark.sh b/scripts/performance/benchmark.sh index 0cbf7f4337..878f22f4dd 100755 --- a/scripts/performance/benchmark.sh +++ b/scripts/performance/benchmark.sh @@ -44,9 +44,9 @@ if [[ "$DOCKER_TEST" == "true" ]]; then -e GIT_HASH="$GIT_HASH" \ -e SLOW_FILES="${SLOW_FILES[*]}" \ -e HI_RES_STRATEGY_FILES="${HI_RES_STRATEGY_FILES[*]}" \ - -v "${SCRIPT_DIR}":/home/scripts/performance \ + -v "${SCRIPT_DIR}":/home/notebook-user/scripts/performance \ unstructured:perf-test \ - bash /home/scripts/performance/benchmark-local.sh 2>&1 | tee >(while IFS= read -r line; do + bash /home/notebook-user/scripts/performance/benchmark-local.sh 2>&1 | tee >(while IFS= read -r line; do read_benchmark_logs_for_results done) else diff --git a/test_unstructured/chunking/test_title.py b/test_unstructured/chunking/test_title.py index 8ccfde5af8..bc8bdcc6b0 100644 --- a/test_unstructured/chunking/test_title.py +++ b/test_unstructured/chunking/test_title.py @@ -31,7 +31,7 @@ def test_split_elements_by_title_and_table(): Text("It is storming outside."), CheckBox(), ] - sections = _split_elements_by_title_and_table(elements, combine_under_n_chars=0) + sections = _split_elements_by_title_and_table(elements, combine_text_under_n_chars=0) assert sections == [ [ @@ -75,7 +75,7 @@ def test_chunk_by_title(): Text("It is storming outside."), CheckBox(), ] - chunks = chunk_by_title(elements, combine_under_n_chars=0) + chunks = chunk_by_title(elements, combine_text_under_n_chars=0) assert chunks == [ CompositeElement( @@ -112,7 +112,7 @@ def test_chunk_by_title_respects_section_change(): Text("It is storming outside."), CheckBox(), ] - chunks = chunk_by_title(elements, combine_under_n_chars=0) + chunks = chunk_by_title(elements, combine_text_under_n_chars=0) assert chunks == [ CompositeElement( @@ -147,7 +147,7 @@ def test_chunk_by_title_separates_by_page_number(): Text("It is storming outside."), CheckBox(), ] - chunks = chunk_by_title(elements, multipage_sections=False, combine_under_n_chars=0) + chunks = chunk_by_title(elements, multipage_sections=False, combine_text_under_n_chars=0) assert chunks == [ CompositeElement( @@ -182,7 +182,7 @@ def test_chunk_by_title_groups_across_pages(): Text("It is storming outside."), CheckBox(), ] - chunks = chunk_by_title(elements, multipage_sections=True, combine_under_n_chars=0) + chunks = chunk_by_title(elements, multipage_sections=True, combine_text_under_n_chars=0) assert chunks == [ CompositeElement( @@ -212,24 +212,32 @@ def test_add_chunking_strategy_on_partition_html_respects_multipage(): filename, chunking_strategy="by_title", multipage_sections=False, - combine_under_n_chars=0, + combine_text_under_n_chars=0, + new_after_n_chars=300, + max_characters=400, ) partitioned_elements_multipage_true_combine_chars_0 = partition_html( filename, chunking_strategy="by_title", multipage_sections=True, - combine_under_n_chars=0, + combine_text_under_n_chars=0, + new_after_n_chars=300, + max_characters=400, ) elements = partition_html(filename) cleaned_elements_multipage_false_combine_chars_0 = chunk_by_title( elements, multipage_sections=False, - combine_under_n_chars=0, + combine_text_under_n_chars=0, + new_after_n_chars=300, + max_characters=400, ) cleaned_elements_multipage_true_combine_chars_0 = chunk_by_title( elements, multipage_sections=True, - combine_under_n_chars=0, + combine_text_under_n_chars=0, + new_after_n_chars=300, + max_characters=400, ) assert ( partitioned_elements_multipage_false_combine_chars_0 @@ -244,7 +252,21 @@ def test_add_chunking_strategy_on_partition_html_respects_multipage(): ) -def test_add_chunking_strategy_raises_error_for_invalid_n_chars(): +@pytest.mark.parametrize( + ("combine_text_under_n_chars", "new_after_n_chars", "max_characters"), + [ + (-1, -1, -1), + (0, 0, 0), + (-5666, -6777, -8999), + (-5, 40, 50), + (50, 100, 20), + ], +) +def test_add_chunking_strategy_raises_error_for_invalid_n_chars( + combine_text_under_n_chars, + new_after_n_chars, + max_characters, +): elements = [ Title("A Great Day"), Text("Today is a great day."), @@ -258,7 +280,12 @@ def test_add_chunking_strategy_raises_error_for_invalid_n_chars(): CheckBox(), ] with pytest.raises(ValueError): - chunk_by_title(elements, combine_under_n_chars=1, new_after_n_chars=0) + chunk_by_title( + elements, + combine_text_under_n_chars=combine_text_under_n_chars, + new_after_n_chars=new_after_n_chars, + max_characters=max_characters, + ) def test_chunk_by_title_drops_extra_metadata(): @@ -335,7 +362,7 @@ def test_chunk_by_title_drops_extra_metadata(): ), ] - chunks = chunk_by_title(elements, combine_under_n_chars=0) + chunks = chunk_by_title(elements, combine_text_under_n_chars=0) assert str(chunks[0]) == str( CompositeElement("A Great Day\n\nToday is a great day.\n\nIt is sunny outside."), diff --git a/test_unstructured/documents/test_elements.py b/test_unstructured/documents/test_elements.py index 270e16fb68..c85f6c8495 100644 --- a/test_unstructured/documents/test_elements.py +++ b/test_unstructured/documents/test_elements.py @@ -14,6 +14,7 @@ UUID, CoordinatesMetadata, Element, + ElementMetadata, NoID, Text, ) @@ -186,3 +187,27 @@ def test_element_to_dict(): "element_id": "awt32t1", } assert element.to_dict() == expected + + +def test_metadata_from_dict_extra_fields(): + """ + Assert that the metadata classes ignore nonexistent fields. + This can be an issue when elements_from_json gets a schema + from the future. + """ + element_metadata = { + "new_field": "hello", + "data_source": { + "new_field": "world", + }, + "coordinates": { + "new_field": "foo", + }, + } + + metadata = ElementMetadata.from_dict(element_metadata) + metadata_dict = metadata.to_dict() + + assert "new_field" not in metadata_dict + assert "new_field" not in metadata_dict["coordinates"] + assert "new_field" not in metadata_dict["data_source"] diff --git a/test_unstructured/documents/test_html.py b/test_unstructured/documents/test_html.py index d6d236f08f..02f6d6bc72 100644 --- a/test_unstructured/documents/test_html.py +++ b/test_unstructured/documents/test_html.py @@ -17,6 +17,7 @@ from unstructured.documents.html import ( HEADING_TAGS, LIST_ITEM_TAGS, + SECTION_TAGS, TABLE_TAGS, TEXT_TAGS, HTMLDocument, @@ -41,8 +42,15 @@ TAGS = TAGS.replace(">", "").split("<")[1:] -INCLUDED_TAGS = TEXT_TAGS + HEADING_TAGS + LIST_ITEM_TAGS + ["div"] -EXCLUDED_TAGS = "tag", [tag for tag in TAGS if tag not in INCLUDED_TAGS] +VOID_TAGS = "

" +VOID_TAGS = VOID_TAGS.replace(">", "").split("<")[1:] + +INCLUDED_TAGS = TEXT_TAGS + HEADING_TAGS + LIST_ITEM_TAGS + SECTION_TAGS +EXCLUDED_TAGS = [ + tag + for tag in TAGS + if tag not in (INCLUDED_TAGS + TABLE_TAGS + VOID_TAGS + ["html", "head", "body"]) +] @pytest.fixture() @@ -685,3 +693,31 @@ def test_sample_doc_with_emoji(): # NOTE(robinson) - unclear why right now, but the output is the emoji on the test runners # and the byte string representation when running locally on mac assert doc.elements[0].text in ["Hello again ð\x9f\x98\x80", "Hello again 😀"] + + +def test_only_plain_text_in_body(): + raw_html = "Hello" + doc = HTMLDocument.from_string(raw_html) + assert doc.elements[0].text == "Hello" + + +def test_plain_text_before_anything_in_body(): + raw_html = "Hello

World

" + doc = HTMLDocument.from_string(raw_html) + assert doc.elements[0].text == "Hello" + assert doc.elements[1].text == "World" + + +def test_line_break_in_container(): + raw_html = "
Hello
World
" + doc = HTMLDocument.from_string(raw_html) + assert doc.elements[0].text == "Hello" + assert doc.elements[1].text == "World" + + +@pytest.mark.parametrize("tag", TEXT_TAGS) +def test_line_break_in_text_tag(tag): + raw_html = f"<{tag}>Hello
World" + doc = HTMLDocument.from_string(raw_html) + assert doc.elements[0].text == "Hello" + assert doc.elements[1].text == "World" diff --git a/test_unstructured/partition/csv/test_csv.py b/test_unstructured/partition/csv/test_csv.py index 050c2c2567..3f3d5e4ae0 100644 --- a/test_unstructured/partition/csv/test_csv.py +++ b/test_unstructured/partition/csv/test_csv.py @@ -8,6 +8,7 @@ EXPECTED_TEXT, EXPECTED_TEXT_WITH_EMOJI, ) +from unstructured.chunking.title import chunk_by_title from unstructured.cleaners.core import clean_extra_whitespace from unstructured.documents.elements import Table from unstructured.partition.csv import partition_csv @@ -189,3 +190,18 @@ def test_partition_csv_with_json(filename, expected_text, expected_table): assert elements[0].metadata.filename == test_elements[0].metadata.filename for i in range(len(elements)): assert elements[i] == test_elements[i] + + +def test_add_chunking_strategy_to_partition_csv_non_default(): + filename = "example-docs/stanley-cups.csv" + + elements = partition_csv(filename=filename) + chunk_elements = partition_csv( + filename, + chunking_strategy="by_title", + max_characters=9, + combine_text_under_n_chars=0, + ) + chunks = chunk_by_title(elements, max_characters=9, combine_text_under_n_chars=0) + assert chunk_elements != elements + assert chunk_elements == chunks diff --git a/test_unstructured/partition/docx/test_docx.py b/test_unstructured/partition/docx/test_docx.py index cbda549188..c622c390b4 100644 --- a/test_unstructured/partition/docx/test_docx.py +++ b/test_unstructured/partition/docx/test_docx.py @@ -1,20 +1,24 @@ # pyright: reportPrivateUsage=false import os +import pathlib from tempfile import SpooledTemporaryFile from typing import Dict, List import docx import pytest +from docx.document import Document from unstructured.chunking.title import chunk_by_title from unstructured.documents.elements import ( Address, + Element, Footer, Header, ListItem, NarrativeText, Table, + TableChunk, Text, Title, ) @@ -51,6 +55,14 @@ def mock_document(): return document +@pytest.fixture() +def mock_document_filename(mock_document: Document, tmp_path: pathlib.Path) -> str: + filename = str(tmp_path / "mock_document.docx") + print(f"filename = {filename}") + mock_document.save(filename) + return filename + + @pytest.fixture() def expected_elements(): return [ @@ -85,11 +97,12 @@ def expected_emphasized_text_tags(): return ["b", "i", "b", "i"] -def test_partition_docx_from_filename(mock_document, expected_elements, tmpdir): - filename = os.path.join(tmpdir.dirname, "mock_document.docx") - mock_document.save(filename) +def test_partition_docx_from_filename( + mock_document_filename: str, + expected_elements: List[Element], +): + elements = partition_docx(filename=mock_document_filename) - elements = partition_docx(filename=filename) assert elements == expected_elements assert elements[0].metadata.page_number is None for element in elements: @@ -410,9 +423,96 @@ def test_partition_docx_with_json(mock_document, expected_elements, tmpdir): assert elements[i] == test_elements[i] -def test_add_chunking_strategy_on_partition_docx(filename="example-docs/handbook-1p.docx"): +def test_parse_category_depth_by_style(): + partitioner = _DocxPartitioner("example-docs/category-level.docx", None, None, False, None) + + # Category depths are 0-indexed and relative to the category type + # Title, list item, bullet, narrative text, etc. + test_cases = [ + (0, "Call me Ishmael."), + (0, "A Heading 1"), + (0, "Whenever I find myself growing grim"), + (0, "A top level list item"), + (1, "Next level"), + (1, "Same"), + (0, "Second top-level list item"), + (0, "whenever I find myself involuntarily"), + (0, ""), # Empty paragraph + (1, "A Heading 2"), + (0, "This is my substitute for pistol and ball"), + (0, "Another Heading 1"), + (0, "There now is your insular city"), + ] + + paragraphs = partitioner._document.paragraphs + for idx, (depth, text) in enumerate(test_cases): + paragraph = paragraphs[idx] + actual_depth = partitioner._parse_category_depth_by_style(paragraph) + assert text in paragraph.text, f"paragraph[{[idx]}].text does not contain {text}" + assert ( + actual_depth == depth + ), f"expected paragraph[{idx}] to have depth=={depth}, got {actual_depth}" + + +def test_parse_category_depth_by_style_name(): + partitioner = _DocxPartitioner(None, None, None, False, None) + + test_cases = [ + (0, "Heading 1"), + (1, "Heading 2"), + (2, "Heading 3"), + (1, "Subtitle"), + (0, "List"), + (1, "List 2"), + (2, "List 3"), + (0, "List Bullet"), + (1, "List Bullet 2"), + (2, "List Bullet 3"), + (0, "List Number"), + (1, "List Number 2"), + (2, "List Number 3"), + ] + + for idx, (depth, text) in enumerate(test_cases): + assert ( + partitioner._parse_category_depth_by_style_name(text) == depth + ), f"test case {test_cases[idx]} failed" + + +def test_parse_category_depth_by_style_ilvl(): + partitioner = _DocxPartitioner(None, None, None, False, None) + assert partitioner._parse_category_depth_by_style_ilvl() == 0 + + +def test_add_chunking_strategy_on_partition_docx_default_args( + filename="example-docs/handbook-1p.docx", +): chunk_elements = partition_docx(filename, chunking_strategy="by_title") elements = partition_docx(filename) chunks = chunk_by_title(elements) + + assert chunk_elements != elements + assert chunk_elements == chunks + + +def test_add_chunking_strategy_on_partition_docx( + filename="example-docs/fake-doc-emphasized-text.docx", +): + chunk_elements = partition_docx( + filename, + chunking_strategy="by_title", + max_characters=9, + combine_text_under_n_chars=5, + ) + elements = partition_docx(filename) + chunks = chunk_by_title(elements, max_characters=9, combine_text_under_n_chars=5) + # remove the last element of the TableChunk list because it will be the leftover slice + # and not necessarily the max_characters len + table_chunks = [chunk for chunk in chunks if isinstance(chunk, TableChunk)][:-1] + other_chunks = [chunk for chunk in chunks if not isinstance(chunk, TableChunk)] + for table_chunk in table_chunks: + assert len(table_chunk.text) == 9 + for chunk in other_chunks: + assert len(chunk.text) >= 5 assert chunk_elements != elements assert chunk_elements == chunks diff --git a/test_unstructured/partition/epub/test_epub.py b/test_unstructured/partition/epub/test_epub.py index 7d0e741899..991ec1991f 100644 --- a/test_unstructured/partition/epub/test_epub.py +++ b/test_unstructured/partition/epub/test_epub.py @@ -193,3 +193,24 @@ def test_add_chunking_strategy_on_partition_epub( chunks = chunk_by_title(elements) assert chunk_elements != elements assert chunk_elements == chunks + + +def test_add_chunking_strategy_on_partition_epub_non_default( + filename=os.path.join(DIRECTORY, "..", "..", "..", "example-docs", "winter-sports.epub"), +): + elements = partition_epub(filename=filename) + chunk_elements = partition_epub( + filename, + chunking_strategy="by_title", + max_characters=5, + new_after_n_chars=5, + combine_text_under_n_chars=0, + ) + chunks = chunk_by_title( + elements, + max_characters=5, + new_after_n_chars=5, + combine_text_under_n_chars=0, + ) + assert chunk_elements != elements + assert chunk_elements == chunks diff --git a/test_unstructured/partition/markdown/test_md.py b/test_unstructured/partition/markdown/test_md.py index 33d131b7a3..c73247998c 100644 --- a/test_unstructured/partition/markdown/test_md.py +++ b/test_unstructured/partition/markdown/test_md.py @@ -276,7 +276,7 @@ def test_partition_md_with_json( assert elements[i] == test_elements[i] -def test_add_chunking_strategy_on_partition_md( +def test_add_chunking_strategy_by_title_on_partition_md( filename="example-docs/README.md", ): elements = partition_md(filename=filename) diff --git a/test_unstructured/partition/msg/test_msg.py b/test_unstructured/partition/msg/test_msg.py index 6e179987a2..7678a6cda5 100644 --- a/test_unstructured/partition/msg/test_msg.py +++ b/test_unstructured/partition/msg/test_msg.py @@ -285,7 +285,7 @@ def test_partition_msg_with_pgp_encrypted_message( assert "Encrypted email detected" in caplog.text -def test_add_chunking_strategy_on_partition_msg( +def test_add_chunking_strategy_by_title_on_partition_msg( filename=os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake-email.msg"), ): elements = partition_msg(filename=filename) diff --git a/test_unstructured/partition/odt/test_odt.py b/test_unstructured/partition/odt/test_odt.py index 9fe9b4b99d..982a11f9b4 100644 --- a/test_unstructured/partition/odt/test_odt.py +++ b/test_unstructured/partition/odt/test_odt.py @@ -2,7 +2,7 @@ import pathlib from unstructured.chunking.title import chunk_by_title -from unstructured.documents.elements import Table, Title +from unstructured.documents.elements import Table, TableChunk, Title from unstructured.partition.json import partition_json from unstructured.partition.odt import partition_odt from unstructured.staging.base import elements_to_json @@ -169,3 +169,24 @@ def test_add_chunking_strategy_on_partition_odt( chunks = chunk_by_title(elements) assert chunk_elements != elements assert chunk_elements == chunks + + +def test_add_chunking_strategy_on_partition_odt_non_default(): + filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake.odt") + elements = partition_odt(filename=filename) + chunk_elements = partition_odt( + filename, + chunking_strategy="by_title", + max_characters=7, + combine_text_under_n_chars=5, + ) + chunks = chunk_by_title( + elements, + max_characters=7, + combine_text_under_n_chars=5, + ) + for chunk in chunk_elements: + if isinstance(chunk, TableChunk): + assert len(chunk.text) <= 7 + assert chunk_elements != elements + assert chunk_elements == chunks diff --git a/test_unstructured/partition/pdf-image/test_image.py b/test_unstructured/partition/pdf-image/test_image.py index ce026767a1..bdbb24df8f 100644 --- a/test_unstructured/partition/pdf-image/test_image.py +++ b/test_unstructured/partition/pdf-image/test_image.py @@ -440,7 +440,7 @@ def test_partition_image_formats_languages_for_tesseract(): ocr_languages="jpn_vert", ocr_mode="entire_page", extract_tables=False, - model_name=None, + model_name=pdf.default_hi_res_model(), ) @@ -460,6 +460,25 @@ def test_add_chunking_strategy_on_partition_image( assert chunk_elements == chunks +def test_add_chunking_strategy_on_partition_image_hi_res( + filename="example-docs/layout-parser-paper-with-table.jpg", +): + elements = image.partition_image( + filename=filename, + strategy="hi_res", + infer_table_structure=True, + ) + chunk_elements = image.partition_image( + filename, + strategy="hi_res", + infer_table_structure=True, + chunking_strategy="by_title", + ) + chunks = chunk_by_title(elements) + assert chunk_elements != elements + assert chunk_elements == chunks + + def test_partition_image_uses_model_name(): with mock.patch.object( pdf, diff --git a/test_unstructured/partition/pdf-image/test_pdf.py b/test_unstructured/partition/pdf-image/test_pdf.py index 7178248231..b33b1ca337 100644 --- a/test_unstructured/partition/pdf-image/test_pdf.py +++ b/test_unstructured/partition/pdf-image/test_pdf.py @@ -407,7 +407,7 @@ def test_partition_pdf_with_dpi(): ocr_languages="eng", ocr_mode="entire_page", extract_tables=False, - model_name=None, + model_name=pdf.default_hi_res_model(), pdf_image_dpi=100, ) @@ -479,7 +479,7 @@ def test_partition_pdf_fast_groups_text_in_text_box(): system=expected_coordinate_system_3, ), ) - assert elements[3] == Text("2.5", metadata=expected_elem_metadata_3) + assert elements[2] == Text("2.5", metadata=expected_elem_metadata_3) def test_partition_pdf_with_metadata_filename( @@ -838,7 +838,7 @@ def test_partition_pdf_with_ocr_coordinates_are_not_nan_from_file( assert point[1] is not math.nan -def test_add_chunking_strategy_on_partition_pdf( +def test_add_chunking_strategy_by_title_on_partition_pdf( filename="example-docs/layout-parser-paper-fast.pdf", ): elements = pdf.partition_pdf(filename=filename) @@ -858,7 +858,7 @@ def test_partition_pdf_formats_languages_for_tesseract(): ocr_languages="eng", ocr_mode="entire_page", extract_tables=False, - model_name=None, + model_name=pdf.default_hi_res_model(), ) @@ -875,7 +875,7 @@ def test_partition_pdf_or_image_warns_with_ocr_languages(caplog): def test_partition_categorization_backup(): - text = "This is Clearly a Title." + text = "This is Clearly a Title" with mock.patch.object(pdf, "_partition_pdf_or_image_local", return_value=[Text(text)]): elements = pdf.partition_pdf_or_image( "example-docs/layout-parser-paper-fast.pdf", @@ -898,7 +898,45 @@ def test_combine_numbered_list(filename): first_list_element = element break assert len(elements) < 28 - assert first_list_element.text.endswith("(Section 3)") + assert first_list_element.text.endswith( + "character recognition, and other DIA tasks (Section 3)", + ) + + +@pytest.mark.parametrize( + "filename", + ["example-docs/layout-parser-paper-fast.pdf"], +) +def test_partition_pdf_hyperlinks(filename): + elements = pdf.partition_pdf(filename=filename, strategy="auto") + links = [ + { + "text": "8", + "url": "cite.gardner2018allennlp", + "start_index": 138, + }, + { + "text": "34", + "url": "cite.wolf2019huggingface", + "start_index": 141, + }, + { + "text": "35", + "url": "cite.wu2019detectron2", + "start_index": 168, + }, + ] + assert elements[-1].metadata.links == links + + +@pytest.mark.parametrize( + "filename", + ["example-docs/embedded-link.pdf"], +) +def test_partition_pdf_hyperlinks_multiple_lines(filename): + elements = pdf.partition_pdf(filename=filename, strategy="auto") + assert elements[-1].metadata.links[-1]["text"] == "capturing" + assert len(elements[-1].metadata.links) == 2 def test_partition_pdf_uses_model_name(): @@ -915,3 +953,13 @@ def test_partition_pdf_uses_model_name(): mockpartition.assert_called_once() assert "model_name" in mockpartition.call_args.kwargs assert mockpartition.call_args.kwargs["model_name"] + + +def test_partition_pdf_word_bbox_not_char( + filename="example-docs/interface-config-guide-p93.pdf", +): + try: + elements = pdf.partition_pdf(filename=filename) + except Exception as e: + raise ("Partitioning fail: %s" % e) + assert len(elements) == 17 diff --git a/test_unstructured/partition/pptx/test_ppt.py b/test_unstructured/partition/pptx/test_ppt.py index 1662002ddd..3750e0e9c6 100644 --- a/test_unstructured/partition/pptx/test_ppt.py +++ b/test_unstructured/partition/pptx/test_ppt.py @@ -174,7 +174,7 @@ def test_partition_ppt_with_json( assert elements[i] == test_elements[i] -def test_add_chunking_strategy_on_partition_ppt( +def test_add_chunking_strategy_by_title_on_partition_ppt( filename=os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake-power-point.ppt"), ): elements = partition_ppt(filename=filename) diff --git a/test_unstructured/partition/pptx/test_pptx.py b/test_unstructured/partition/pptx/test_pptx.py index e4c0e75426..37e9b7ce3e 100644 --- a/test_unstructured/partition/pptx/test_pptx.py +++ b/test_unstructured/partition/pptx/test_pptx.py @@ -112,7 +112,7 @@ def it_recurses_into_group_shapes(self): elements = cast( Iterator[Text], _PptxPartitioner( - get_test_file_path("group-shapes-nested.pptx") + get_test_file_path("group-shapes-nested.pptx"), )._iter_presentation_elements(), ) @@ -371,8 +371,8 @@ def test_partition_pptx_with_json(): assert elements[i] == test_elements[i] -def test_add_chunking_strategy_on_partition_pptx(): - filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake-power-point.pptx") +def test_add_chunking_strategy_by_title_on_partition_pptx(): + filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "science-exploration-1p.pptx") elements = partition_pptx(filename=filename) chunk_elements = partition_pptx(filename, chunking_strategy="by_title") chunks = chunk_by_title(elements) diff --git a/test_unstructured/partition/pypandoc/test_org.py b/test_unstructured/partition/pypandoc/test_org.py index 9017c5e86f..81ad6d4ed2 100644 --- a/test_unstructured/partition/pypandoc/test_org.py +++ b/test_unstructured/partition/pypandoc/test_org.py @@ -136,7 +136,7 @@ def test_partition_org_with_json(filename="example-docs/README.org"): assert elements[i] == test_elements[i] -def test_add_chunking_strategy_on_partition_org( +def test_add_chunking_strategy_by_title_on_partition_org( filename="example-docs/README.org", ): elements = partition_org(filename=filename) diff --git a/test_unstructured/partition/test_auto.py b/test_unstructured/partition/test_auto.py index f3e91e6a6b..e8404537c1 100644 --- a/test_unstructured/partition/test_auto.py +++ b/test_unstructured/partition/test_auto.py @@ -17,6 +17,7 @@ ListItem, NarrativeText, Table, + TableChunk, Text, Title, ) @@ -24,6 +25,7 @@ from unstructured.partition import auto from unstructured.partition.auto import _get_partition_with_extras, partition from unstructured.partition.common import convert_office_doc +from unstructured.partition.pdf import default_hi_res_model from unstructured.staging.base import elements_to_json DIRECTORY = pathlib.Path(__file__).parent.resolve() @@ -381,7 +383,7 @@ def test_auto_partition_formats_languages_for_tesseract(): ocr_languages="chi_sim+chi_sim_vert+chi_tra+chi_tra_vert", ocr_mode="entire_page", extract_tables=False, - model_name=None, + model_name=default_hi_res_model(), ) @@ -937,37 +939,45 @@ def test_get_partition_with_extras_prompts_for_install_if_missing(): def test_add_chunking_strategy_on_partition_auto(): filename = "example-docs/example-10k-1p.html" - chunk_elements = partition(filename, chunking_strategy="by_title") elements = partition(filename) + chunk_elements = partition(filename, chunking_strategy="by_title") chunks = chunk_by_title(elements) assert chunk_elements != elements assert chunk_elements == chunks -def test_add_chunking_strategy_on_partition_auto_respects_multipage(): +def test_add_chunking_strategy_title_on_partition_auto_respects_multipage(): filename = "example-docs/example-10k-1p.html" partitioned_elements_multipage_false_combine_chars_0 = partition( filename, chunking_strategy="by_title", multipage_sections=False, - combine_under_n_chars=0, + combine_text_under_n_chars=0, + new_after_n_chars=300, + max_characters=400, ) partitioned_elements_multipage_true_combine_chars_0 = partition( filename, chunking_strategy="by_title", multipage_sections=True, - combine_under_n_chars=0, + combine_text_under_n_chars=0, + new_after_n_chars=300, + max_characters=400, ) elements = partition(filename) cleaned_elements_multipage_false_combine_chars_0 = chunk_by_title( elements, multipage_sections=False, - combine_under_n_chars=0, + combine_text_under_n_chars=0, + new_after_n_chars=300, + max_characters=400, ) cleaned_elements_multipage_true_combine_chars_0 = chunk_by_title( elements, multipage_sections=True, - combine_under_n_chars=0, + combine_text_under_n_chars=0, + new_after_n_chars=300, + max_characters=400, ) assert ( partitioned_elements_multipage_false_combine_chars_0 @@ -980,3 +990,69 @@ def test_add_chunking_strategy_on_partition_auto_respects_multipage(): assert len(partitioned_elements_multipage_true_combine_chars_0) != len( partitioned_elements_multipage_false_combine_chars_0, ) + + +def test_add_chunking_strategy_on_partition_auto_respects_max_chars(): + filename = "example-docs/example-10k-1p.html" + + # default chunk size in chars is 200 + partitioned_table_elements_200_chars = [ + e + for e in partition( + filename, + chunking_strategy="by_title", + max_characters=200, + combine_text_under_n_chars=5, + ) + if isinstance(e, (Table, TableChunk)) + ] + + partitioned_table_elements_5_chars = [ + e + for e in partition( + filename, + chunking_strategy="by_title", + max_characters=5, + combine_text_under_n_chars=5, + ) + if isinstance(e, (Table, TableChunk)) + ] + + elements = partition(filename) + + table_elements = [e for e in elements if isinstance(e, Table)] + + assert len(partitioned_table_elements_5_chars) != len(table_elements) + assert len(partitioned_table_elements_200_chars) != len(table_elements) + + assert len(partitioned_table_elements_5_chars[0].text) == 5 + assert len(partitioned_table_elements_5_chars[0].metadata.text_as_html) == 5 + + # the first table element is under 200 chars so doesn't get chunked! + assert table_elements[0] == partitioned_table_elements_200_chars[0] + assert len(partitioned_table_elements_200_chars[0].text) < 200 + assert len(partitioned_table_elements_200_chars[1].text) == 200 + assert len(partitioned_table_elements_200_chars[1].metadata.text_as_html) == 200 + + +def test_add_chunking_strategy_chars_on_partition_auto_adds_is_continuation(): + filename = "example-docs/example-10k-1p.html" + + # default chunk size in chars is 200 + partitioned_table_elements_200_chars = [ + e + for e in partition( + filename, + chunking_strategy="by_num_characters", + ) + if isinstance(e, Table) + ] + + i = 0 + for table in partitioned_table_elements_200_chars: + # have to reset the counter to 0 here when we encounter a Table element + if isinstance(table, Table): + i = 0 + if i > 0 and isinstance(table, TableChunk): + assert table.metadata.is_continuation is True + i += 1 diff --git a/test_unstructured/partition/test_html_partition.py b/test_unstructured/partition/test_html_partition.py index 48934f73f4..82976a2621 100644 --- a/test_unstructured/partition/test_html_partition.py +++ b/test_unstructured/partition/test_html_partition.py @@ -8,7 +8,7 @@ from unstructured.chunking.title import chunk_by_title from unstructured.cleaners.core import clean_extra_whitespace -from unstructured.documents.elements import ListItem, NarrativeText, Table, Title +from unstructured.documents.elements import EmailAddress, ListItem, NarrativeText, Table, Title from unstructured.documents.html import HTMLTitle from unstructured.partition.html import partition_html from unstructured.partition.json import partition_json @@ -645,3 +645,25 @@ def test_add_chunking_strategy_on_partition_html( chunks = chunk_by_title(elements) assert chunk_elements != elements assert chunk_elements == chunks + + +def test_html_heading_title_detection(): + html_text = """ +

This is a section of narrative text, it's long, flows and has meaning

+

This is a section of narrative text, it's long, flows and has meaning

+

A heading that is at the second level

+

Finally, the third heading

+

December 1-17, 2017

+

email@example.com

+

  • - bulleted item
  • + """ + elements = partition_html(text=html_text) + assert elements == [ + NarrativeText("This is a section of narrative text, it's long, flows and has meaning"), + Title("This is a section of narrative text, it's long, flows and has meaning"), + Title("A heading that is at the second level"), + Title("Finally, the third heading"), + Title("December 1-17, 2017"), + EmailAddress("email@example.com"), + ListItem("- bulleted item"), + ] diff --git a/test_unstructured/partition/test_xml_partition.py b/test_unstructured/partition/test_xml_partition.py index cebff68b64..a301a46870 100644 --- a/test_unstructured/partition/test_xml_partition.py +++ b/test_unstructured/partition/test_xml_partition.py @@ -38,7 +38,7 @@ def test_partition_xml_from_filename_with_metadata_filename(): ) def test_partition_xml_from_file(filename): file_path = os.path.join(DIRECTORY, "..", "..", "example-docs", filename) - with open(file_path) as f: + with open(file_path, "rb") as f: elements = partition_xml(file=f, xml_keep_tags=False, metadata_filename=file_path) assert elements[0].text == "United States" @@ -47,7 +47,7 @@ def test_partition_xml_from_file(filename): def test_partition_xml_from_file_with_metadata_filename(): file_path = os.path.join(DIRECTORY, "..", "..", "example-docs", "factbook.xml") - with open(file_path) as f: + with open(file_path, "rb") as f: elements = partition_xml(file=f, xml_keep_tags=False, metadata_filename="test") assert elements[0].text == "United States" @@ -158,7 +158,7 @@ def test_partition_xml_from_filename_exclude_metadata(filename): ) def test_partition_xml_from_file_exclude_metadata(filename): file_path = os.path.join(DIRECTORY, "..", "..", "example-docs", filename) - with open(file_path) as f: + with open(file_path, "rb") as f: elements = partition_xml( file=f, xml_keep_tags=False, diff --git a/test_unstructured/partition/utils/test_sorting.py b/test_unstructured/partition/utils/test_sorting.py index 7bcf7a25d0..2000b4e3a3 100644 --- a/test_unstructured/partition/utils/test_sorting.py +++ b/test_unstructured/partition/utils/test_sorting.py @@ -5,10 +5,19 @@ from unstructured.partition.utils.constants import SORT_MODE_BASIC, SORT_MODE_XY_CUT from unstructured.partition.utils.sorting import ( coord_has_valid_points, + coordinates_to_bbox, + shrink_bbox, sort_page_elements, ) +class MockCoordinatesMetadata(CoordinatesMetadata): + def __init__(self, points): + system = PixelSpace(width=300, height=500) + + super().__init__(points, system) + + def test_coord_valid_coordinates(): coordinates = CoordinatesMetadata([(1, 2), (3, 4), (5, 6), (7, 8)], PixelSpace) assert coord_has_valid_points(coordinates) is True @@ -98,3 +107,21 @@ def test_sort_basic_pos_coordinates(): sorted_elem_text = " ".join([str(elem.text) for elem in sorted_page_elements]) assert sorted_elem_text == "7 8 9" + + +def test_coordinates_to_bbox(): + coordinates_data = MockCoordinatesMetadata([(10, 20), (10, 200), (100, 200), (100, 20)]) + expected_result = (10, 20, 100, 200) + assert coordinates_to_bbox(coordinates_data) == expected_result + + +def test_shrink_bbox(): + bbox = (0, 0, 100, 100) + shrink_factor = 0.5 + expected_result = (25, 25, 75, 75) + assert shrink_bbox(bbox, shrink_factor) == expected_result + + bbox = (0, 0, 200, 100) + shrink_factor = 0.9 + expected_result = (10, 5, 190, 95) + assert shrink_bbox(bbox, shrink_factor) == expected_result diff --git a/test_unstructured_ingest/expected-structured-output/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json b/test_unstructured_ingest/expected-structured-output/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json index f3eba38c14..79e18453bf 100644 --- a/test_unstructured_ingest/expected-structured-output/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json @@ -1,7 +1,7 @@ [ { "type": "Title", - "element_id": "cf66bb0e9e68e3a82a99b5621e4394f8", + "element_id": "0b8804afbc4722108e877480e28462a6", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -16,30 +16,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Core Skills for Biomedical Data" + "text": "Core Skills for Biomedical Data Scientists" }, { - "type": "Title", - "element_id": "733383a5f0f5bdea71d6d48805365e6f", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "Scientists" - }, - { - "type": "Title", - "element_id": "64b2134f054446d473fce1b05d4d4c94", + "type": "NarrativeText", + "element_id": "46b1e4dae5ffd7cdcb2a6ed9f206a8ee", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -76,160 +57,8 @@ "text": "Lisa Federer, MLIS, Data Science Training Coordinator" }, { - "type": "Title", - "element_id": "f089eaef57aba315bc0e1455985c0c8e", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "Michael" - }, - { - "type": "UncategorizedText", - "element_id": "fd0a559e715a134218c73276dc57d463", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "F." - }, - { - "type": "UncategorizedText", - "element_id": "44be44eccd482217c097571ddfa61f49", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "Huerta," - }, - { - "type": "Title", - "element_id": "394df19f0626f36d12da449624b691f9", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "PhD, Associate" - }, - { - "type": "Title", - "element_id": "4f5a6389c571b0d01690b1db0349c1b4", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "Director of" - }, - { - "type": "Title", - "element_id": "aecfc6e5b6c0de37a2c06c2fb1d71c82", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "NLM" - }, - { - "type": "Title", - "element_id": "237622d8c80fbdbe790b92d500aa7b00", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "for Program Development and" - }, - { - "type": "Title", - "element_id": "aecfc6e5b6c0de37a2c06c2fb1d71c82", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "NLM" - }, - { - "type": "Title", - "element_id": "ba490653e1ad81f341c35ae470c1b825", + "type": "NarrativeText", + "element_id": "d9644fb4b85468d186b132c91ca64f31", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -244,11 +73,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Coordinator of Data Science and Open Science Initiatives" + "text": "Michael F. Huerta, PhD, Associate Director of NLM for Program Development and NLM Coordinator of Data Science and Open Science Initiatives" }, { "type": "Title", - "element_id": "53d548aa01fc3eb72da15a5be7f235e2", + "element_id": "c8e51fdc53c202393adad77f7f93ee5a", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -267,45 +96,7 @@ }, { "type": "NarrativeText", - "element_id": "2364a6d2f9a3858d51d91b817732e6c9", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "This report provides recommendations for a scientists based on analysis that draws on opinions of data scientists, curricula for existing science requirements science jobs." - }, - { - "type": "Title", - "element_id": "6712d87f1d156abf6171f700e2875889", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "biomedical" - }, - { - "type": "Title", - "element_id": "3a6eb0790f39ac87c94f3856b2dd2c5d", + "element_id": "d6df9cd66da09d30c16d194e877766ca", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -320,11 +111,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "data" + "text": "This report provides recommendations for a minimal set of core skills for biomedical data scientists based on analysis that draws on opinions of data scientists, curricula for existing biomedical data science programs, and requirements for biomedical data science jobs. Suggested high-level core skills include:" }, { - "type": "UncategorizedText", - "element_id": "50e891aa619a7ccbeab043789ca5dd1a", + "type": "ListItem", + "element_id": "04ff84b51fab69c07381ac794b740243", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -339,11 +130,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "programs," + "text": "1. General biomedical subject matter knowledge: biomedical data scientists should have a general working knowledge of the principles of biology, bioinformatics, and basic clinical science;" }, { - "type": "Title", - "element_id": "6201111b83a0cb5b0922cb37cc442b9a", + "type": "ListItem", + "element_id": "0b2857001b1a9eba5e46e26cba08e2ac", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -358,11 +149,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "and" + "text": "2. Programming language expertise: biomedical data scientists should be fluent in at least one programming language (typically R and/or Python);" }, { - "type": "Title", - "element_id": "a703788f832056626d71b7db4d805524", + "type": "ListItem", + "element_id": "8b02f539eb8ccee5b3fc24f66858188c", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -377,11 +168,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "minimal" + "text": "3. Predictive analytics, modeling, and machine learning: while a range of statistical methods may be useful, predictive analytics, modeling, and machine learning emerged as especially important skills in biomedical data science;" }, { - "type": "Title", - "element_id": "6ee0eb490ff832101cf82a3d387c35f2", + "type": "ListItem", + "element_id": "469e981f34d1e6f2b420574ed8e932d2", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -396,11 +187,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "set" + "text": "4. Team science and scientific communication: “soft” skills, like the ability to work well on teams and communicate effectively in both verbal and written venues, may be as important as the more technical skills typically associated with data science." }, { - "type": "Title", - "element_id": "10c22bcf4c768b515be4e94bcafc71bf", + "type": "ListItem", + "element_id": "4b8fc76cbba0e2fef79ff8bc668b1401", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -415,11 +206,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "for" + "text": "5. Responsible data stewardship: a successful data scientist must be able to implement best practices for data management and stewardship, as well as conduct research in an ethical manner that maintains data security and privacy." }, { - "type": "Title", - "element_id": "28391d3bc64ec15cbb090426b04aa6b7", + "type": "NarrativeText", + "element_id": "69da7754428f154ee3b2906214d31ad9", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -434,11 +225,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "of" + "text": "The report further details specific skills and expertise relevant to biomedical data scientists." }, { "type": "Title", - "element_id": "6712d87f1d156abf6171f700e2875889", + "element_id": "37486ef32cbf05082d5dbff0581db762", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -453,11 +244,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "biomedical" + "text": "Motivation" }, { - "type": "Title", - "element_id": "0d45f5fd462b8c70bffb10021ac1bcff", + "type": "NarrativeText", + "element_id": "cfe4cc76625dc82267d95ec1dc7e7813", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -472,11 +263,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "core" + "text": "Training a biomedical data science (BDS) workforce is a central theme in NLM’s Strategic Plan for the coming decade. That commitment is echoed in the NIH-wide Big Data to Knowledge (BD2K) initiative, which invested $61 million between FY2014 and FY2017 in training programs for the development and use of biomedical big data science methods and tools. In line with" }, { - "type": "Title", - "element_id": "50c5080f67ea1f9eff473e46e6314fd2", + "type": "UncategorizedText", + "element_id": "68431de56564c6ad6aa3e6c02b78c89c", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -489,13 +280,13 @@ "date_modified": "2023-03-10T09:32:44+00:00" }, "filetype": "application/pdf", - "page_number": 1 + "page_number": 2 }, - "text": "skills for biomedical" + "text": "Core Skills for Biomedical Data Scientists _____________________________________________________________________________________________" }, { - "type": "Title", - "element_id": "3a6eb0790f39ac87c94f3856b2dd2c5d", + "type": "NarrativeText", + "element_id": "edd5f2f5a60a83c8899e533ac8bcd03c", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -508,13 +299,13 @@ "date_modified": "2023-03-10T09:32:44+00:00" }, "filetype": "application/pdf", - "page_number": 1 + "page_number": 2 }, - "text": "data" + "text": "this commitment, a recent report to the NLM Director recommended working across NIH to identify and develop core skills required of a biomedical data scientist to consistency across the cohort of NIH-trained data scientists. This report provides a set of recommended core skills based on analysis of current BD2K-funded training programs, biomedical data science job ads, and practicing members of the current data science workforce." }, { "type": "Title", - "element_id": "3a6eb0790f39ac87c94f3856b2dd2c5d", + "element_id": "3c36cd10b2e64b9f2169f05abddd4981", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -527,13 +318,13 @@ "date_modified": "2023-03-10T09:32:44+00:00" }, "filetype": "application/pdf", - "page_number": 1 + "page_number": 2 }, - "text": "data" + "text": "Methodology" }, { "type": "NarrativeText", - "element_id": "18f107bf25f694db07b6aba0a5aaf321", + "element_id": "987542acede56f098db655f02fb814a7", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -546,13 +337,13 @@ "date_modified": "2023-03-10T09:32:44+00:00" }, "filetype": "application/pdf", - "page_number": 1 + "page_number": 2 }, - "text": "Suggested high-level core skills include:" + "text": "The Workforce Excellence team took a three-pronged approach to identifying core skills required of a biomedical data scientist (BDS), drawing from:" }, { "type": "ListItem", - "element_id": "8f90f5970c85f335b1bf50af611ce5c5", + "element_id": "2e3cec7bff1e8c8d8e0087f0bcfa89f0", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -565,13 +356,13 @@ "date_modified": "2023-03-10T09:32:44+00:00" }, "filetype": "application/pdf", - "page_number": 1 + "page_number": 2 }, - "text": "1. General biomedical subject matter knowledge: biomedical data scientists should have a general working knowledge of the principles of biology, bioinformatics, and basic clinical science;" + "text": "a) Responses to a 2017 Kaggle1 survey2 of over 16,000 self-identified data scientists working across many industries. Analysis of the Kaggle survey responses from the current data science workforce provided insights into the current generation of data scientists, including how they were trained and what programming and analysis skills they use." }, { "type": "ListItem", - "element_id": "d1a5bb898aee8de0fbdf048c7a9fb01d", + "element_id": "c6865d507571ccb14d37791134f27f61", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -584,13 +375,13 @@ "date_modified": "2023-03-10T09:32:44+00:00" }, "filetype": "application/pdf", - "page_number": 1 + "page_number": 2 }, - "text": "2. Programming language expertise: biomedical data scientists should be fluent in at" + "text": "b) Data science skills taught in BD2K-funded training programs. A qualitative content analysis was applied to the descriptions of required courses offered under the 12 BD2K-funded training programs. Each course was coded using qualitative data analysis software, with each skill that was present in the description counted once. The coding schema of data science-related skills was inductively developed and was organized into four major categories: (1) statistics and math skills; (2) computer science; (3) subject knowledge; (4) general skills, like communication and teamwork. The coding schema is detailed in Appendix A." }, { - "type": "Title", - "element_id": "18e42d24d6449a9b52fc65fc3f9710b4", + "type": "ListItem", + "element_id": "3f14cc0782485365bad0539f7b1bbb22", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -603,13 +394,13 @@ "date_modified": "2023-03-10T09:32:44+00:00" }, "filetype": "application/pdf", - "page_number": 1 + "page_number": 2 }, - "text": "least one programming language (typically R and/or Python);" + "text": "c) Desired skills identified from data science-related job ads. 59 job ads from government (8.5%), academia (42.4%), industry (33.9%), and the nonprofit sector (15.3%) were sampled from websites like Glassdoor, Linkedin, and Ziprecruiter. The content analysis methodology and coding schema utilized in analyzing the training programs were applied to the job descriptions. Because many job ads mentioned the same skill more than once, each occurrence of the skill was coded, therefore weighting important skills that were mentioned multiple times in a single ad." }, { - "type": "ListItem", - "element_id": "c6be5389b7bd00746d39b7bac468dea0", + "type": "NarrativeText", + "element_id": "c2e95867ed0f25e3d9fe1a6b97447ab9", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -622,13 +413,13 @@ "date_modified": "2023-03-10T09:32:44+00:00" }, "filetype": "application/pdf", - "page_number": 1 + "page_number": 2 }, - "text": "3. Predictive analytics, modeling, and machine learning: while a range of statistical methods may be useful, predictive analytics, modeling, and machine learning emerged as especially important skills in biomedical data science;" + "text": "Analysis of the above data provided insights into the current state of biomedical data science training, as well as a view into data science-related skills likely to be needed to prepare the BDS workforce to succeed in the future. Together, these analyses informed recommendations for core skills necessary for a competitive biomedical data scientist." }, { - "type": "ListItem", - "element_id": "1b8039583cbc15f654c89f2141eb6e10", + "type": "NarrativeText", + "element_id": "f39ddfa6365e505947527153b0ea60d8", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -641,13 +432,13 @@ "date_modified": "2023-03-10T09:32:44+00:00" }, "filetype": "application/pdf", - "page_number": 1 + "page_number": 2 }, - "text": "4. Team science and scientific communication: “soft” skills, like the ability to work well on teams and communicate effectively in both verbal and written venues, may be as important as the more technical skills typically associated with data science." + "text": "1 Kaggle is an online community for data scientists, serving as a platform for collaboration, competition, and learning: http://kaggle.com 2 In August 2017, Kaggle conducted an industry-wide survey to gain a clearer picture of the state of data science and machine learning. A standard set of questions were asked of all respondents, with more specific questions related to work for employed data scientists and questions related to learning for data scientists in training. Methodology and results: https://www.kaggle.com/kaggle/kaggle-survey-2017" }, { - "type": "ListItem", - "element_id": "2f87757b1d497a32c077be543632ed7d", + "type": "Footer", + "element_id": "d4735e3a265e16eee03f59718b9b5d03", "metadata": { "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", @@ -660,902 +451,9 @@ "date_modified": "2023-03-10T09:32:44+00:00" }, "filetype": "application/pdf", - "page_number": 1 + "page_number": 2 }, - "text": "5. Responsible data stewardship: a successful data scientist must be able to implement best practices for data management and stewardship, as well as conduct research in an ethical manner that maintains data security and privacy." - }, - { - "type": "UncategorizedText", - "element_id": "34b28172088bba51c6764df6d4e87674", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "The report further details specific skills and expertise relevant to biomedical data scientists." - }, - { - "type": "Title", - "element_id": "89b1f4c3df983454e25b233320781610", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "Motivation" - }, - { - "type": "NarrativeText", - "element_id": "3d8fbacaba9067faef48850d43801268", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "Training a biomedical data science (BDS) workforce is a central theme in NLM’s Strategic Plan for the coming decade. That commitment is echoed in the NIH-wide Big Data to Knowledge (BD2k) initiative, which invested $61 million between FY2014 and FY2017 in training programs for the development and use of biomedical big data science methods and tools. In line with" - }, - { - "type": "UncategorizedText", - "element_id": "68431de56564c6ad6aa3e6c02b78c89c", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "Core Skills for Biomedical Data Scientists _____________________________________________________________________________________________" - }, - { - "type": "NarrativeText", - "element_id": "326e7d081e9418423ea62bf3802caaa3", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "this commitment, recent report to the NLM Director recommended working across identify and develop core skills required of a biomedical data scientist to consistency across the cohort of NIH-trained data scientists. This report provides a set of recommended core skills based on analysis of current BD2K-funded training programs, biomedical data science job ads, and practicing members of the current data science workforce." - }, - { - "type": "Title", - "element_id": "ca978112ca1bbdcafac231b39a23dc4d", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "a" - }, - { - "type": "Title", - "element_id": "acc8586a874eb74f10c3f90620f20617", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "NIH to" - }, - { - "type": "Title", - "element_id": "f26d07e6b71e42596791a241e2417931", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "Methodology" - }, - { - "type": "Title", - "element_id": "b344d80e24a3679999fa964450b34bc2", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "The" - }, - { - "type": "NarrativeText", - "element_id": "cdc3773cb12cf99d302b9f00c48ae1e8", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "required of" - }, - { - "type": "Title", - "element_id": "aa3b88196a6407c3866c85acdcc8c981", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "Workforce" - }, - { - "type": "NarrativeText", - "element_id": "b72b62f1295c66f199256c1190177ce6", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "three-pronged approach biomedical data scientist (BDS), drawing from:" - }, - { - "type": "Title", - "element_id": "3d366201f5b88bcbfafb078aee5f2a55", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "Excellence" - }, - { - "type": "Title", - "element_id": "ca8b22d0db83a22db163b560b3e4e515", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "team" - }, - { - "type": "NarrativeText", - "element_id": "e0a6230e370d20dece7ca96c77611cb0", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "took" - }, - { - "type": "Title", - "element_id": "ca978112ca1bbdcafac231b39a23dc4d", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "a" - }, - { - "type": "Title", - "element_id": "663ea1bfffe5038f3f0cf667f14c4257", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "to" - }, - { - "type": "NarrativeText", - "element_id": "a5bed2020bd1f4ea3eca933398c4f0d0", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "identifying" - }, - { - "type": "Title", - "element_id": "0d45f5fd462b8c70bffb10021ac1bcff", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "core" - }, - { - "type": "Title", - "element_id": "32c1cf49a2feee269ed74dd860f72644", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "skills" - }, - { - "type": "NarrativeText", - "element_id": "a24acaf1cb5d6f8a0a0af0e81949765b", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "a) Responses to a 2017 Kaggle1 survey2 of over 16,000 self-identified data scientists working across many industries. Analysis of the Kaggle survey responses from the current data science workforce provided insights into the current generation of data scientists, including how they were trained and what programming and analysis skills they use." - }, - { - "type": "Title", - "element_id": "301d35f1042e1eac9fdef8839fd13a4e", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "b)" - }, - { - "type": "NarrativeText", - "element_id": "1117af46b0a22dd02d3869ab9738a8a8", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "Data science skills taught in BD2K-funded training programs. A qualitative content analysis applied to the descriptions of required offered under the BD2kK-funded training programs. Each course was coded using qualitative data analysis software, with each skill that was present in the description counted once. The coding schema of data science-related skills was inductively developed and was organized four major categories: (1) statistics and math skills; (2) computer science; (3) subject knowledge; (4) general skills, like communication and teamwork. The coding schema is detailed in Appendix A." - }, - { - "type": "Title", - "element_id": "6b847a0ed0b2c484c73f2749e29b4db5", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "into" - }, - { - "type": "NarrativeText", - "element_id": "b63b99f6383ba713b57ddfc77737c5f7", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "was" - }, - { - "type": "Title", - "element_id": "936e5cc5021d8a075f91b7864bf0cec8", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "courses" - }, - { - "type": "UncategorizedText", - "element_id": "6b51d431df5d7f141cbececcf79edf3d", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "12" - }, - { - "type": "Title", - "element_id": "2d2e9ceb1db2bc94a266f3e8b24b8f55", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "c)" - }, - { - "type": "NarrativeText", - "element_id": "961a38da2886c3cc25091d912769aa0d", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "job job government (8.5%), (42.4%), industry (83.9%), and nonprofit (15.3%) were sampled from websites like Glassdoor, Linkedin, and Ziprecruiter. The content analysis methodology and coding schema in analyzing the training programs were applied to the job descriptions. Because many job ads mentioned the same skill more than once, each occurrence of the skill was coded, therefore weighting single ad." - }, - { - "type": "NarrativeText", - "element_id": "f9c94ebffe2ab721a096cf42b7a9cff9", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "important skills that were mentioned multiple times in" - }, - { - "type": "Title", - "element_id": "6d0607a7a2ac9823f9fb2a62ea2b7385", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "Desired" - }, - { - "type": "Title", - "element_id": "32c1cf49a2feee269ed74dd860f72644", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "skills" - }, - { - "type": "NarrativeText", - "element_id": "a486fbc90cd5a32fe44275f5948b2066", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "identified" - }, - { - "type": "Title", - "element_id": "de98e5ea566225a14a9a6b3086253f6d", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "academia" - }, - { - "type": "Title", - "element_id": "75857a45899985be4c4d941e90b6b396", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "from" - }, - { - "type": "Title", - "element_id": "3a6eb0790f39ac87c94f3856b2dd2c5d", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "data" - }, - { - "type": "Title", - "element_id": "8b3a4555f5297c340e5fdff392fe5a5b", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "science-related" - }, - { - "type": "Title", - "element_id": "26f8fe3e12ff690c91f73b24bb45ed01", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "utilized" - }, - { - "type": "Title", - "element_id": "b510c96f289ebcf388da7d2dea6a1e73", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "ads." - }, - { - "type": "Title", - "element_id": "b9776d7ddf459c9ad5b0e1d6ac61e27b", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "the" - }, - { - "type": "UncategorizedText", - "element_id": "3e1e967e9b793e908f8eae83c74dba9b", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "59" - }, - { - "type": "Title", - "element_id": "788eb2efc52660fe41472319f0d2c623", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "ads" - }, - { - "type": "Title", - "element_id": "9d5d7fcf3aa35a4809f92551aed1f26e", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "sector" - }, - { - "type": "Title", - "element_id": "75857a45899985be4c4d941e90b6b396", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "from" - }, - { - "type": "Title", - "element_id": "9f25a5b0f5e247294ebcf6723c2169b2", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "for core skills necessary for" - }, - { - "type": "NarrativeText", - "element_id": "f7f4976ebe430b482f073e28add58182", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "Analysis of the above data provided insights into the current state of biomedical data science training, as well as a view into data science-related skills likely to be needed to prepare the BDS workforce to succeed in the future. Together, these analyses informed recommendations competitive biomedical data scientist." - }, - { - "type": "NarrativeText", - "element_id": "4a99b0f26eb7267230c6994d9ab7d60b", - "metadata": { - "data_source": { - "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": 167189396509615428390709838081557906335, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "date_created": "2023-03-10T09:32:44+00:00", - "date_modified": "2023-03-10T09:32:44+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "' Kaggle is an online community for data scientists, serving as a platform for collaboration, competition, and learning: http://kaggle.com ? In August 2017, Kaggle conducted an industry-wide survey to gain clearer picture of the state of data science and machine learning. A standard set of questions were asked of all respondents, with more specific questions related to work for employed data scientists and questions related to learning for data scientists in training. Methodology and results: https://www.kaggle.com/kaggle/kaggle-survey-2017" + "text": "2" }, { "type": "UncategorizedText", diff --git a/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json b/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json index 4355d36569..1aec242c3e 100644 --- a/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json @@ -1,7 +1,7 @@ [ { "type": "Title", - "element_id": "720a6f5640af3333283ae0a2b6ef5d4d", + "element_id": "8b115710b659086909de658b116dd719", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -16,11 +16,543 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "a Department of the Treasury Internal Revenue Service" + "text": "a Department of the Treasury Internal Revenue Service Instructions for Form 3115 (Rev. November 1987) Application for Change in Accounting Method" + }, + { + "type": "NarrativeText", + "element_id": "766cf1d1243ef2cdbb0db5ad32d7f9c9", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "(Section references are to the Internal Revenue Code unless otherwise noted.)" + }, + { + "type": "Title", + "element_id": "61ed58fa51293f429f87e8cf1896c9e4", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "Paperwork Reduction Act Notice" + }, + { + "type": "NarrativeText", + "element_id": "b00492d57199616b7b5459cdf57a58d2", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "Internal Revenue laws of the United States. We need it to ensure that taxpayers are complying with these laws and to allow us to figure and collect the right amount of tax. You are required to us this information." + }, + { + "type": "NarrativeText", + "element_id": "5d18f0234e23bc96198c9fb19601056a", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "We ask for this information to carry out the" + }, + { + "type": "NarrativeText", + "element_id": "0895a532e404a5c9ea96eac7982d268f", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "give" + }, + { + "type": "Title", + "element_id": "a1547a4ed1611eee44b15e99120fb978", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "General Instructions" + }, + { + "type": "Title", + "element_id": "68a3289177b49b285e133a5267eb355f", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "Purpose of Form" + }, + { + "type": "NarrativeText", + "element_id": "fdb8017fc73bdc12f7200dece8b76c99", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "File this form to request a change in your accounting method, including the accounting treatment of any item. If you are requesting a change in accounting period, use Form 1128, Application for Change in Accounting Period. For more information, see Publication 538, Accounting Periods and Methods." + }, + { + "type": "NarrativeText", + "element_id": "7e3ae97a65f12ef0bb8b4d6b5f721f54", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "filing taxpayers are reminded to determine if IRS has published a ruling or procedure dealing with the specific type of change since November 1987 (the current revision date of Form 3115)," + }, + { + "type": "Title", + "element_id": "cf9c7aa24a26aac4f0ec4b6395cbfdcc", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "When" + }, + { + "type": "UncategorizedText", + "element_id": "2127f2ab4fc4feb4d32460c8317bf02f", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "Form 3115," + }, + { + "type": "UncategorizedText", + "element_id": "e53657178cb6855ac4b2029197a64b0c", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "A." + }, + { + "type": "NarrativeText", + "element_id": "faf2673a7d6b6f7c5bf7cae6770a4130", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "Generally, applicants must complete Section In addition, complete the appropriate sections (B-1 through H) for which a change Is desired." + }, + { + "type": "NarrativeText", + "element_id": "bf2a070cb9d03d056e70b26bebf1ef79", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "You must give all relevant facts, including a detailed description of your present and proposed methods. You must also state the reason(s) you believe approval to make the requested change should be granted. Attach additional pages if more space is needed for explanations. Each page should show your name, address, and identifying number." + }, + { + "type": "NarrativeText", + "element_id": "10626f80b0f7b25e661f8f82f5d7c454", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "State whether you desire a conference in National Office if the Service proposes to disapprove your application." + }, + { + "type": "Title", + "element_id": "b9776d7ddf459c9ad5b0e1d6ac61e27b", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "the" + }, + { + "type": "Title", + "element_id": "242a9dba10a04654d4adef9c58ff96f6", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "Changes to Accounting Methods Required Under the Tax Reform Act of 1986" + }, + { + "type": "NarrativeText", + "element_id": "582deac2def308ecc5250773e1683052", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "Uniform capitalization rules and limitation on cash method.—If you are required to change your method of accounting under section,263A (relating to the capitalization and inclusion in inventory costs of certain expenses) or 448 (limiting the use of the cash method of accounting by certain taxpayers) as added by the Tax Reform Act of 1986 (“Act”), the change 1s treated as initiated by the taxpayer, approved by the Commissioner, and the period for taking the adjustments under section 481(a) into account will not exceed 4 years. (Hospitals required to change from the cash method under section 448 have 10 years to take the adjustrnents into account.) Complete Section A and the appropriate sections (B-1 or C and D) for which the change is required." + }, + { + "type": "NarrativeText", + "element_id": "550f9e99054c657264fb9bb26d3023de", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "Disregard the instructions under Time and Place for Filing and Late Applications. Instead, attach Form 3115 to your income tax return for the year of change; do not file it separately. Also include on a separate statement accompanying the Form 3115 the period over which the section 481(a) adjustment will be taken into account and the basis for that conclusion. Identify the automatic change being made at the top of page 1 of Form 3115 (e.g., “Automatic Change to Accrual Method—Section 448\"). See Temporary Regulations sections 1.263A-1T and 1.448-1T for additional information." + }, + { + "type": "NarrativeText", + "element_id": "c7c37f80c11190ab9416495a0d9b7c6e", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "you change your method of accounting for long-term contracts under section 460, see Notice 87-61 (9/21/87), 1987-38 IRB 40, for the notification procedures that must be followed." + }, + { + "type": "Title", + "element_id": "093856d810a56c1557ce2b24c65abf3d", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "Long-term contracts. —If" + }, + { + "type": "NarrativeText", + "element_id": "4a1ba7ce20dde03bf464633002f14b10", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "are required to" + }, + { + "type": "NarrativeText", + "element_id": "6272a6df76820c927d081a1041e3c079", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "Other methods.—Unless the Service has published a regulation or procedure to the contrary, all other changes !n accounting methods required by the Act are automatically considered to be approved by the Commissioner. Examples of method changes automatically approved by the Commissioner are those changes required to effect: (1) the repeal of the reserve method for bad debts of taxpayers other than financial institutions (Act section 805); (2) the repeal of the installment method for sales under a revolving credit plan (Act section 812); (3) the Inclusion of income attributable to the sale or furnishing of utility services no later than the year In which the services were provided to customers (Act section 821); and (4) the repeal of the deduction for qualified discount coupons (Act section 823). Do not file Form 3115 for these" + }, + { + "type": "Title", + "element_id": "d3eda7d7ed44b4b43fcbfa6f83f6fad3", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "changes." + }, + { + "type": "Title", + "element_id": "5756fb398995bb6518a87637f24f426e", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "Time and Place for Filing" + }, + { + "type": "NarrativeText", + "element_id": "af8bdf713f162b09567c8d1a3a2d4de7", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "Generally, applicants must file this form within the first 180 days of the tax year in which it is desired to make the change." + }, + { + "type": "NarrativeText", + "element_id": "9dda11db48254f5e0d0000afb5d1dd9b", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "Taxpayers, other than exempt organizations, should file Form 3115 with the Commissioner of Internal Revenue, Attention: CC:C:4, 1111 Constitution Avenue, NW, Washington, DC 20224, Exempt organizations should file with the Assistant Commissioner (Employee Plans and Exempt Organizations), 1111 Constitution Avenue, NW, Washington, DC 20224." + }, + { + "type": "NarrativeText", + "element_id": "4d063cdbd131401fa29e1d0e824dc017", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "You should normally receive an acknowledgment of receipt of your application within 30 days. If you do not hear from IRS within 30 days of submitting your completed Form 3115, you may inquire as to the receipt of your application by writing to: Control Clerk, CC:C:4, Internal Revenue Service, Room 5040, 1111 Constitution Avenue, NW, Washington, DC 20224." }, { "type": "Title", - "element_id": "88591a76b54e47215c0827ae8838ec13", + "element_id": "ea325d761f98c6b73320e442b67f2a35", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -35,11 +567,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Instructions for Form 3115 (Rev. November 1987)" + "text": "an" }, { "type": "NarrativeText", - "element_id": "4a17cc01a68e2bf011ba1458d70f369a", + "element_id": "c56ebb2883fe0c95b8564fa3969f7010", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -54,11 +586,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Application for Change in Accounting Method" + "text": "See section 5.03 of Rev. Proc. 84-74 for filing early application." }, { "type": "NarrativeText", - "element_id": "766cf1d1243ef2cdbb0db5ad32d7f9c9", + "element_id": "12f877f0bd47f9b761ed7e74be1afacd", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -73,11 +605,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "(Section references are to the Internal Revenue Code unless otherwise noted.)" + "text": "Note: /f this form is being filed in accordance with Rev. Proc. 74-11, see Section G below." }, { "type": "Title", - "element_id": "61ed58fa51293f429f87e8cf1896c9e4", + "element_id": "a4316c02df07840f1beb56609cb09735", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -92,11 +624,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Paperwork Reduction Act Notice" + "text": "Late Applications" }, { "type": "NarrativeText", - "element_id": "828767cbc922e731b59894afba55fe10", + "element_id": "02dd043b5686a46b2f03cfe8cf56aae9", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -111,11 +643,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "We ask for this information to carry out the Internal Revenue laws of the United States. We need it to ensure that taxpayers are complying with these laws and to allow us to figure and collect the right amount of tax. You are required to give us this information." + "text": "If your application is filed after the 180-day period, it is late. The application will be considered for processing only upon a showing of “good cause” and if it can be shown to the satisfaction of the Commissioner that granting you an extension will not jeopardize the Government's interests. For further information, see Rev, Proc. 79-63." }, { "type": "Title", - "element_id": "a1547a4ed1611eee44b15e99120fb978", + "element_id": "025a65465b6fd9635316e92633b24c7e", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -130,11 +662,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "General Instructions" + "text": "Identifying Number" }, { - "type": "Title", - "element_id": "68a3289177b49b285e133a5267eb355f", + "type": "NarrativeText", + "element_id": "8605ee209656c311cec7ce4b001caab2", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -149,11 +681,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Purpose of Form" + "text": "Individuals.—An individual should enter his or her social security number in this block. If the application is made on behalf of a husband and wife who file their income tax return jointly, enter the social security numbers of both." }, { - "type": "NarrativeText", - "element_id": "2ef3cbc8d359155433a0028e73251f95", + "type": "Title", + "element_id": "ea325d761f98c6b73320e442b67f2a35", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -168,11 +700,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "File this form to request a change in your accounting method, including the accounting treatment of any item. If you are requesting a change in accounting period, use Form 1128, Application for Change in Accounting Period. For more information, see Publication 538, Accounting Periods and Methods. When filing Form 3115, taxpayers are reminded to determine if IRS has published a ruling or procedure dealing with the specific type of change since November 1987 (the current revision date of Form 3115)," + "text": "an" }, { "type": "NarrativeText", - "element_id": "84e7e32f584e2ee9f47ba593bf86c559", + "element_id": "7d82c5876c5c1a3596338ae8cfbd1a50", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -187,11 +719,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Generally, applicants must complete Section A. In addition, complete the appropriate sections (B-1 through H) for which a change Is desired." + "text": "Others.-—The employer identification number applicant other than an individual should be entered in this block." }, { - "type": "NarrativeText", - "element_id": "ed7dba38aff5b289c7b6c8a58e800279", + "type": "Title", + "element_id": "28391d3bc64ec15cbb090426b04aa6b7", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -206,11 +738,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "You must give all relevant facts, including a detailed description of your present and proposed methods. You must also state the reason(s) you believe approval to make the requested change should be granted. Attach additional pages if more space is needed for explanations. Each page should show your name, address, and identifying number. State whether you desire a conference in the National Office if the Service proposes to disapprove your application." + "text": "of" }, { "type": "Title", - "element_id": "242a9dba10a04654d4adef9c58ff96f6", + "element_id": "f1a73e2204a114077f988c9da98d7f8b", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -225,11 +757,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Changes to Accounting Methods Required Under the Tax Reform Act of 1986" + "text": "Signature" }, { "type": "NarrativeText", - "element_id": "eb076cfd3d47e546c28611750afedc49", + "element_id": "dc1531183c8e3f45a78f110ec1efe15f", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -244,11 +776,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Place for Filing and Late Applications. Instead, attach Form 3115 to your income tax return for the year of change; do not file it separately. Also include on a separate statement accompanying the Form 3115 the period over which the section 481(a) adjustment will be taken into account and" + "text": "Individuals. —An individual desiring the change should sign the application. If the application pertains to a husband and wife filing a joint income tax return, the names of both should appear in the heading and both should sign." }, { "type": "NarrativeText", - "element_id": "0b320308ba52d4a9625d29cadfc941a9", + "element_id": "7d3a67d75914a504a52ec53998b796af", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -263,11 +795,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Uniform capitalization rules and limitation on cash method.—If you are required to change your method of accounting under section,263A (relating to the capitalization and inclusion in inventory costs of certain expenses) or 448 (limiting the use of the cash method of accounting by certain taxpayers) as added by the Tax Reform Act of 1986 (“Act”), the change 1s treated as initiated by the taxpayer, approved by the Commissioner, and the period for taking the adjustments under section 481(a) into account will not exceed 4 years. (Hospitals required to change from the cash method under section 448 have 10 years to take the adjustrnents into account.) Complete Section A and the appropriate sections (B-1 or C and D) for which the change is required. Disregard the instructions under Time and" + "text": "Partnerships.—The form should be signed with the partnership name followed by the signature of one of the general partners and the words “General Partner.”" }, { "type": "NarrativeText", - "element_id": "ee134711b01cac75692565ae4f785fd4", + "element_id": "9de285e8e3b042aa9ac86edde98a21a9", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -282,11 +814,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "the basis for that conclusion. Identify the automatic change being made at the top of page 1 of Form 3115 (e.g., “Automatic Change to Accrual Method—Section 448\"). See Temporary Regulations sections 1.263A-1T and 1.448-1T for additional information." + "text": "Corporations, cooperatives, and insurance companies.—The form should show the name of the corporation, cooperative, or insurance company and the signature of the president, vice president, treasurer, assistant treasurer, or chief accounting officer (such as tax officer) authorized to sign, and his or her official title. Receivers, trustees, or assignees must sign any application they are required to file. For a subsidiary corporation filing a consolidated return with its parent, the form should be signed by an officer of the parent corporation." }, { - "type": "ListItem", - "element_id": "7b7c33680de5c4a7cb165c103752579e", + "type": "Title", + "element_id": "f5ea55c27511707a88f8efadcdf50b55", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -301,11 +833,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Long-term contracts. —If you are required to change your method of accounting for long-term contracts under section 460, see Notice 87-61 (9/21/87), 1987-38 IRB 40, for the notification procedures that must be followed. Other methods.—Unless the Service has published a regulation or procedure to the contrary, all other changes !n accounting methods required by the Act are automatically considered to be approved by the Commissioner. Examples of method changes automatically approved by the Commissioner are those changes required to effect: (1) the repeal of the reserve method for bad debts of taxpayers other than financial institutions (Act section 805); (2) the repeal of the installment method for sales under a revolving credit plan (Act section 812); (3) the Inclusion of income attributable to the sale or furnishing of utility services no later than the year In which the services were provided to customers (Act section 821); and (4) the repeal of the deduction for qualified discount coupons (Act section 823). Do not file Form 3115 for these changes." + "text": "Fiduciaries.—The-form" }, { - "type": "Title", - "element_id": "af8bdf713f162b09567c8d1a3a2d4de7", + "type": "NarrativeText", + "element_id": "ca02af326f3caed052e30728481fc4fe", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -320,11 +852,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Generally, applicants must file this form within the first 180 days of the tax year in which it is desired to make the change." + "text": "should show the name of the estate or trust and be signed by the fiduciary, personal representative, executor, executrix, administrator, administratrix, etc., having legal authority to sign, and his or her title." }, { - "type": "Title", - "element_id": "5756fb398995bb6518a87637f24f426e", + "type": "NarrativeText", + "element_id": "52e2b8e4b8527ae448e9db2dfd0c43c7", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -339,11 +871,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Time and Place for Filing" + "text": "Preparer other than partner, officer, etc.—The signature of the individual preparing the application should appear in the space provided on page 6." }, { - "type": "NarrativeText", - "element_id": "2aebd5bbfbc983d52ed7aee8eb7bc7cc", + "type": "Title", + "element_id": "ca978112ca1bbdcafac231b39a23dc4d", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -358,11 +890,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Taxpayers, other than exempt organizations, should file Form 3115 with the Commissioner of Internal Revenue, Attention: CC:C:4, 1111 Constitution Avenue, NW, Washington, DC 20224, Exempt organizations should file with the Assistant Commissioner (Employee Plans and Exempt Organizations), 1111 Constitution Avenue, NW, Washington, DC 20224. You should normally receive an acknowledgment of receipt of your application within 30 days. If you do not hear from IRS within 30 days of submitting your completed Form 3115, you may inquire as to the receipt of your application by writing to: Control Clerk, CC:C:4, Internal Revenue Service, Room 5040, 1111 Constitution Avenue, NW, Washington, DC 20224." + "text": "a" }, { "type": "NarrativeText", - "element_id": "0ec978b05caa71414e2f4429b1d18f09", + "element_id": "12a24aabbcef2cabc07babe12d9c82c5", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -377,11 +909,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "See section 5.03 of Rev. Proc. 84-74 for filing an early application." + "text": "If the individual or firm is also authorized to represent the applicant before the IRS, receive copy of the requested ruling, or perform any other act(s), the power of attorney must reflect such authorization(s)." }, { "type": "Title", - "element_id": "12f877f0bd47f9b761ed7e74be1afacd", + "element_id": "8b06cd6e2bf7fc15130d5d9ed7e66283", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -396,11 +928,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Note: /f this form is being filed in accordance with Rev. Proc. 74-11, see Section G below." + "text": "Affiliated Groups" }, { - "type": "Title", - "element_id": "a4316c02df07840f1beb56609cb09735", + "type": "NarrativeText", + "element_id": "58e977f2200b46ac8b372586dfd781bf", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -415,11 +947,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Late Applications" + "text": "Taxpayers that are members of an affiliated group filing a consolidated return that seeks to change to the same accounting method for more than one member of the group must file a separate Form 3115 for each such member," }, { - "type": "NarrativeText", - "element_id": "02dd043b5686a46b2f03cfe8cf56aae9", + "type": "Title", + "element_id": "8b838d95f7d4f66b5453307de1353ff4", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -434,11 +966,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "If your application is filed after the 180-day period, it is late. The application will be considered for processing only upon a showing of “good cause” and if it can be shown to the satisfaction of the Commissioner that granting you an extension will not jeopardize the Government's interests. For further information, see Rev, Proc. 79-63." + "text": "Specific Instructions" }, { "type": "Title", - "element_id": "025a65465b6fd9635316e92633b24c7e", + "element_id": "bc272940e494acf9441070d3eb4b79f6", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -453,11 +985,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Identifying Number" + "text": "Section A" }, { "type": "NarrativeText", - "element_id": "8605ee209656c311cec7ce4b001caab2", + "element_id": "b57b7502430c59194bb865cfa1bcfab5", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -472,11 +1004,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Individuals.—An individual should enter his or her social security number in this block. If the application is made on behalf of a husband and wife who file their income tax return jointly, enter the social security numbers of both." + "text": "Item 5a, page 1.—“Taxable income or (loss) from operations” is to be entered before application of any net operating loss deduction under section 172(a)." }, { "type": "NarrativeText", - "element_id": "742730130f9c14403ad272eec208a456", + "element_id": "9eefeb9556d95a8dd563ff3270cae7f4", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -491,11 +1023,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Others.-—The employer identification number of an applicant other than an individual should be entered in this block." + "text": "Item 6, page 2.—The term “gross receipts” includes total sales (net of returns and allowances) and all amounts received for services. In addition, gross receipts include any income from investments and from incidental or outside sources (e.g., interest, dividends, rents, royalties, and annuities). However, if you are a resaler of personal property, exclude from gross receipts any amounts not derived in the ordinary course of a trade or business. Gross receipts do not include amounts received for sales taxes if, under the applicable state or local law, the tax is legally imposed on the purchaser of the good or service, and the taxpayer merely collects and remits the tax to the taxing authority." }, { - "type": "ListItem", - "element_id": "ede9004eceddf828c2c928f62d0687a0", + "type": "NarrativeText", + "element_id": "3e63f740940cd3ab94c17d2bbf48b13a", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -510,11 +1042,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Signature Individuals. —An individual desiring the change should sign the application. If the application pertains to a husband and wife filing a joint income tax return, the names of both should appear in the heading and both should sign. Partnerships.—The form should be signed with the partnership name followed by the signature of one of the general partners and the words “General Partner.” Corporations, cooperatives, and insurance companies.—The form should show the name of the corporation, cooperative, or insurance company and the signature of the president, vice president, treasurer, assistant treasurer, or chief accounting officer (such as tax officer) authorized to sign, and his or her official title. Receivers, trustees, or assignees must sign any application they are required to file. For a subsidiary corporation filing a consolidated return with its parent, the form should be signed by an officer of the parent corporation. Fiduciaries.—The-form should show the name of the estate or trust and be signed by the fiduciary, personal representative, executor, executrix, administrator, administratrix, etc., having legal authority to sign, and his or her title. Preparer other than partner, officer, etc.—The signature of the individual preparing the application should appear in the space provided on page 6. If the individual or firm is also authorized to" + "text": "Item 7b, page 2.—If item 7b 1s “Yes,” indicate on a separate sheet the following for each separate trade or business: Nature of business" }, { - "type": "Title", - "element_id": "f1a73e2204a114077f988c9da98d7f8b", + "type": "NarrativeText", + "element_id": "3db206c935841c3dcd5b3a1d41e56b84", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -527,13 +1059,13 @@ "date_modified": "2023-03-10T09:36:30+00:00" }, "filetype": "application/pdf", - "page_number": 1 + "page_number": 2 }, - "text": "Signature" + "text": "(manufacturing, retailer, wholesaler, etc.), employer identification number, overall method of accounting, and whether, in the last 6 years, that business has changed its accounting method, or is also changing its accounting method as part of this request or as a separate request." }, { - "type": "Title", - "element_id": "1df7107903f249d938fbf3710f50283a", + "type": "NarrativeText", + "element_id": "48ddf405e03a362566cdbc32cc5cd11c", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -546,13 +1078,13 @@ "date_modified": "2023-03-10T09:36:30+00:00" }, "filetype": "application/pdf", - "page_number": 1 + "page_number": 2 }, - "text": "If the individual or firm is also authorized to represent the applicant before the IRS, receive a copy of the requested ruling, or perform any other act(s), the power of attorney must reflect such authorization(s)." + "text": "Item 11, page 2.—If you cannot provide the requested information, you may sign a statement under penalties of perjury that:" }, { "type": "Title", - "element_id": "8b06cd6e2bf7fc15130d5d9ed7e66283", + "element_id": "28391d3bc64ec15cbb090426b04aa6b7", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -565,13 +1097,13 @@ "date_modified": "2023-03-10T09:36:30+00:00" }, "filetype": "application/pdf", - "page_number": 1 + "page_number": 2 }, - "text": "Affiliated Groups" + "text": "of" }, { "type": "NarrativeText", - "element_id": "58e977f2200b46ac8b372586dfd781bf", + "element_id": "81f087b1fcf4c9870324336c6bc0de78", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -584,13 +1116,13 @@ "date_modified": "2023-03-10T09:36:30+00:00" }, "filetype": "application/pdf", - "page_number": 1 + "page_number": 2 }, - "text": "Taxpayers that are members of an affiliated group filing a consolidated return that seeks to change to the same accounting method for more than one member of the group must file a separate Form 3115 for each such member," + "text": "(1) Gives your best estimate of the percentage the section 481(a) adjustment that would have been required if the requested change had been made for each of the 3 preceding years; and" }, { "type": "Title", - "element_id": "58703de56debc34a1d68e6ed6f8fd067", + "element_id": "b9776d7ddf459c9ad5b0e1d6ac61e27b", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -603,13 +1135,13 @@ "date_modified": "2023-03-10T09:36:30+00:00" }, "filetype": "application/pdf", - "page_number": 1 + "page_number": 2 }, - "text": "Specific Instructions Section A" + "text": "the" }, { "type": "NarrativeText", - "element_id": "33b0dd2cec2ea60810343af08d53ded2", + "element_id": "cde0777402fde810d0fb24b15df92b2b", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -622,13 +1154,13 @@ "date_modified": "2023-03-10T09:36:30+00:00" }, "filetype": "application/pdf", - "page_number": 1 + "page_number": 2 }, - "text": "Item 5a, page 1.—“Taxable income or (loss) from operations” is to be entered before application of any net operating loss deduction under section 172(a). Item 6, page 2.—The term “gross receipts” includes total sales (net of returns and allowances) and all amounts received for services. In addition, gross receipts include any income from investments and from incidental or outside sources (e.g., interest, dividends, rents, royalties, and annuities). However, if you are a resaler of personal property, exclude from gross receipts any amounts not derived in the ordinary course of a trade or business. Gross receipts do not include amounts received for sales taxes if, under the applicable state or local law, the tax is legally imposed on the purchaser of the good or service, and the taxpayer merely collects and remits the tax to the taxing authority. Item 7b, page 2.—If item 7b 1s “Yes,” indicate on a separate sheet the following for each separate trade or business: Nature of business" + "text": "(2) Explains in detail why you cannot provide requested information." }, { "type": "NarrativeText", - "element_id": "c51052c424ee3b8b5a219015f66d4846", + "element_id": "c855d896f610600602f04d9e31253c91", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -643,11 +1175,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "(manufacturing, retailer, wholesaler, etc.), employer identification number, overall method of accounting, and whether, in the last 6 years, that business has changed its accounting method, or is also changing its accounting method as part of this request or as a separate request. Item 11, page 2.—If you cannot provide the requested information, you may sign a statement under penalties of perjury that:" + "text": "See section 5.06(2) of Rev. Proc. 84-74 for required perjury statement that must be attached." }, { - "type": "NarrativeText", - "element_id": "1bbe995811e9fd4c3ce1b218cb641f4e", + "type": "Title", + "element_id": "b9776d7ddf459c9ad5b0e1d6ac61e27b", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -662,11 +1194,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "(1) Gives your best estimate of the percentage of the section 481(a) adjustment that would have been required if the requested change had been made for each of the 3 preceding years; and" + "text": "the" }, { - "type": "NarrativeText", - "element_id": "f7872ac379aa024934461d08fa31ebd9", + "type": "Title", + "element_id": "b9776d7ddf459c9ad5b0e1d6ac61e27b", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -681,11 +1213,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "(2) Explains in detail why you cannot provide the requested information." + "text": "the" }, { "type": "NarrativeText", - "element_id": "2de8f0b5003bcb8c12a4dc59c8e1f740", + "element_id": "1734a701c8a3139ddcb5b857f697318f", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -700,11 +1232,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "See section 5.06(2) of Rev. Proc. 84-74 for the required perjury statement that must be attached." + "text": "If IRS later examines your return for the year change or for later years, it has the right to verify your statement at that time." }, { - "type": "NarrativeText", - "element_id": "751abc8c6a0fa412c3e8c18345f57f95", + "type": "Title", + "element_id": "28391d3bc64ec15cbb090426b04aa6b7", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -719,11 +1251,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Item 13, page 2.—Insert the actual number of tax years. Use of the term “since inception” 1s not acceptable. However, “more than 6 years” Is acceptable." + "text": "of" }, { "type": "NarrativeText", - "element_id": "678ecc0340dc8848f891bf12a555a3fd", + "element_id": "751abc8c6a0fa412c3e8c18345f57f95", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -738,7 +1270,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "If IRS later examines your return for the year of the change or for later years, it has the right to verify your statement at that time." + "text": "Item 13, page 2.—Insert the actual number of tax years. Use of the term “since inception” 1s not acceptable. However, “more than 6 years” Is acceptable." }, { "type": "Title", @@ -798,8 +1330,27 @@ "text": "Sections B-2 and B-3" }, { - "type": "Title", - "element_id": "4688916bf1d6b205af02a0e954156688", + "type": "NarrativeText", + "element_id": "eac562ca19f6198691856c695e2790bd", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 2 + }, + "text": "Limitation on the Use of the Cash Method of Accounting. —Except as provided below, C corporations, partnerships with a C corporation as a partner, and tax shelters may not use the cash method of accounting. For purposes of this limitation, a trust subject to the tax on unrelated business income under section 511 1s treated as aC corporation with respect to its unrelated trade or business activities." + }, + { + "type": "NarrativeText", + "element_id": "e5bed7fe04dd22cabe5e5c0362d37743", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -814,11 +1365,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Limitation on the Use of the Cash Method of Accounting. —Except as provided below, C" + "text": "The limitation on the use of the cash method (except for tax shelters) does not apply to—" }, { "type": "NarrativeText", - "element_id": "aaf93c2be8f4f2db87bd760783fedfa5", + "element_id": "69bd87b2ad5873c030748e62adf61b89", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -833,11 +1384,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "corporations, partnerships with a C corporation as a partner, and tax shelters may not use the cash method of accounting. For purposes of this limitation, a trust subject to the tax on unrelated business income under section 511 1s treated as aC corporation with respect to its unrelated trade or business activities." + "text": "(1) Farming businesses.—F or this purpose, the term “farming business” 1s defined in section 263A(e)(4), but it also includes the raising, harvesting, or growing of trees to which section 263A(c)(5) applies. Notwithstanding this exception, section 447 requires certain C corporations and partnerships with a C corporation as a partner to use the accrual method." }, { "type": "NarrativeText", - "element_id": "e5bed7fe04dd22cabe5e5c0362d37743", + "element_id": "44902073e7cc4fa753f25d40e009dcef", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -852,11 +1403,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "The limitation on the use of the cash method (except for tax shelters) does not apply to—" + "text": "substantially all of the stock of which is owned by employees performing the services, retired employees who had performed the services, any estate of any individual who had performed the services listed above, or any person who acquired stock of the corporation as a result of the death of an employee or retiree described above if the acquisition occurred within 2 years of death." }, { - "type": "ListItem", - "element_id": "69bd87b2ad5873c030748e62adf61b89", + "type": "NarrativeText", + "element_id": "b68a5b5b0d59122e0df42a96d68d2b5e", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -871,11 +1422,30 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "(1) Farming businesses.—F or this purpose, the term “farming business” 1s defined in section 263A(e)(4), but it also includes the raising, harvesting, or growing of trees to which section 263A(c)(5) applies. Notwithstanding this exception, section 447 requires certain C corporations and partnerships with a C corporation as a partner to use the accrual method." + "text": "(3) Entities with gross receipts of $5,000,000 or less. —To qualify for this exception, the C corporation's or partnership’s annual average gross receipts for the three years ending with the prior tax year may not exceed $5,000,000. If the corporation or partnership was not in existence for the entire 3-year period, the period of existence is used to determine whether the corporation or partnership qualifies. If any tax year in the 3-year period is a short tax year, the corporation or partnership must annualize the gross receipts by multiplying the gross receipts by 12 and dividing the result by the number of months in the short period." }, { "type": "NarrativeText", - "element_id": "0607edfa2419dd0cdc80f457872fe238", + "element_id": "a50ed92585ec98497171f56bc829c16a", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 2 + }, + "text": "For more information, see section 448 and Temporary Regulations section 1.448-1T." + }, + { + "type": "Title", + "element_id": "53e33d10c9df4a570490182ccef0cd95", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -890,11 +1460,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "(2) Qualified personal service corporations. — A “qualified personal service corporation” is any corporation: (a) substantially all of the activities of which involve the performance of services in the fields of health, law," + "text": "Section C" }, { "type": "NarrativeText", - "element_id": "50d16fd6b40a428c3befaf6dd19c2dcd", + "element_id": "a9e8c96063f3fea7ea05eb3cd41ebe7a", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -909,11 +1479,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "engineering, architecture, accounting, actuarial science, performing arts, or consulting, and (b)" + "text": "Applicants must give complete details about the present method of valuing inventory and the proposed method. State whether all or part of your inventory ts involved in the change." }, { "type": "NarrativeText", - "element_id": "64758ada28beed36481b14ce8dc67472", + "element_id": "7e90b155b5cdb2481b1dfbb1118142c5", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -928,11 +1498,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "substantially all of the stock of which is owned by employees performing the services, retired employees who had performed the services, any estate of any individual who had performed the services listed above, or any person who acquired stock of the corporation as a result of the death of an employee or retiree described above if the acquisition occurred within 2 years of death. (3) Entities with gross receipts of $5,000,000 or less. —To qualify for this exception, the C corporation's or partnership’s annual average gross receipts for the three years ending with the prior tax year may not exceed $5,000,000. If the corporation or partnership was not in existence for the entire 3-year period, the period of existence is used to determine whether the corporation or partnership qualifies. If any tax year in the 3-year period is a short tax year, the corporation or partnership must annualize the gross receipts by multiplying the gross receipts by 12 and dividing the result by the number of months in the short period. For more information, see section 448 and Temporary Regulations section 1.448-1T." + "text": "Inventories of retail merchants.—The retail method of pricing inventories does not contemplate valuation of goods at the retail selling price. The retail selling price of goods on hand must be reduced to approximate cost or cost or market, whichever Is lower, by the adjustments required in Regulations section 1.471-8." }, { "type": "Title", - "element_id": "53e33d10c9df4a570490182ccef0cd95", + "element_id": "1e3abf61a37e3cad36b11b459b1cc39e", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -947,11 +1517,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Section C" + "text": "If" }, { "type": "NarrativeText", - "element_id": "6d2d2cfa00e5a8caec71ba799f60f8c6", + "element_id": "bbd0f86d34b7622cfff546da0c15584d", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -966,11 +1536,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Applicants must give complete details about the present method of valuing inventory and the proposed method. State whether all or part of your inventory ts involved in the change. Inventories of retail merchants.—The retail method of pricing inventories does not contemplate valuation of goods at the retail selling price. The retail selling price of goods on hand must be reduced to approximate cost or cost or market, whichever Is lower, by the adjustments required in Regulations section 1.471-8." + "text": "LIFO inventory changes.—Attach a schedule with all the required computations when changing the method of figuring LIFO inventories. you are changing from LIFO to a non-LIFO method, attach a schedule with the following additional information:" }, { "type": "NarrativeText", - "element_id": "357d52f500b965abc29ea60039de4fd8", + "element_id": "347f638641329c72c971a522ec07f6b1", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -985,11 +1555,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "LIFO inventory changes.—Attach a schedule with all the required computations when changing the method of figuring LIFO inventories. If you are changing from LIFO to a non-LIFO method, attach a schedule with the following additional information:" + "text": "(1) The specific types and classes of goods in the LIFO inventories involved in the proposed changes and the comparative value of such Inventories as of the end of the tax year preceding the year of change determined by: (a) the LIFO method, and (b) the proposed method and basis (such as FIFO cost or lower of cost or market)." }, { "type": "NarrativeText", - "element_id": "1ac3e7aa5a6139bd80f05a7ac1f63ddf", + "element_id": "aca21cfeadca7d527dd36f01005ff44a", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1004,11 +1574,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "(1) The specific types and classes of goods in the LIFO inventories involved in the proposed changes and the comparative value of such Inventories as of the end of the tax year preceding the year of change determined by: (a) the LIFO method, and (b) the proposed method and basis (such as FIFO cost or lower of cost or market). (2) State whether the proposed identification and valuation methods conform to the inventory method currently used with respect to non-LIFO Inventories, if any, or how such method is otherwise consistent with Regulations section 1.4726." + "text": "(2) proposed and valuation methods conform to the inventory method currently used with respect to non-LIFO Inventories, if any, or how such method is otherwise consistent with Regulations section 1.4726." }, { - "type": "NarrativeText", - "element_id": "6028c579dc843bb5aa2c704f46085914", + "type": "Title", + "element_id": "e850deb3f1e65c13e7cd728279a472bf", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1023,11 +1593,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "(3) The termination event statement required by section 5.10 of Rev. Proc. 84-74 and an explanation if there has been a termination event." + "text": "State whether the" }, { "type": "Title", - "element_id": "92e21a61e1d872dbbe3e3221a920b409", + "element_id": "fd3dfa76050e048e229d35a01da6974a", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1042,11 +1612,49 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Section D" + "text": "identification" + }, + { + "type": "Title", + "element_id": "a7e2d26e8d15814dd9c6a1bdc90585c8", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 2 + }, + "text": "by" }, { "type": "NarrativeText", - "element_id": "a8e72799229bc2d754f44ea167a6e7d6", + "element_id": "4a9430201a20b0868ab81c8c9e71b881", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 2 + }, + "text": "(3) The termination event statement required section 5.10 of Rev. Proc. 84-74 and an explanation if there has been a termination event." + }, + { + "type": "Title", + "element_id": "92e21a61e1d872dbbe3e3221a920b409", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1061,7 +1669,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Applicants requesting to change their method of valuing property produced, property acquired for resale, or long-term contracts under section 263A or 460 MUST complete section D showing the treatment under both the present and proposed methods." + "text": "Section D" }, { "type": "Title", @@ -1083,7 +1691,7 @@ "text": "Section E" }, { - "type": "ListItem", + "type": "NarrativeText", "element_id": "86fab9f7b35d56a2d48baf0782b7c53d", "metadata": { "data_source": { @@ -1102,8 +1710,27 @@ "text": "Section 460(f) provides that the term “long-term contract” means any contract for the manufacturing, building, installation, or construction of property that is not completed within the tax year in which it 1s entered into. However, a manufacturing contract will not qualify as a long-term contract unless the contract involves the manufacture of: (1) a unique item not normally included in your finished goods inventory, or (2) any item that normally requires more than 12 calendar months to complete." }, { - "type": "ListItem", - "element_id": "84cea2af17bb3760234b42f4ea78e175", + "type": "NarrativeText", + "element_id": "825f9197a40400f76d2a527e8d7a2c71", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 2 + }, + "text": "All long-term contracts entered into after February 28, 1986, except for real property construction contracts expected to be completed within 2 years by contractors whose average annual gross receipts for the 3 prior tax years do not exceed $10,000,000, must be accounted for using either the percentage of completion- capitalized cost method or the percentage of completion method. See section 460." + }, + { + "type": "NarrativeText", + "element_id": "dcf589bb37d079ecce4b375abc332606", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1118,7 +1745,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "All long-term contracts entered into after February 28, 1986, except for real property construction contracts expected to be completed within 2 years by contractors whose average annual gross receipts for the 3 prior tax years do not exceed $10,000,000, must be accounted for using either the percentage of completion- capitalized cost method or the percentage of completion method. See section 460. Caution: At the time these instructions were printed, Congress was considering legislation that would repeal the use of the percentage of completion-capitalized cost method for certain long-term contracts." + "text": "Caution: At the time these instructions were printed, Congress was considering legislation that would repeal the use of the percentage of completion-capitalized cost method for certain long-term contracts." }, { "type": "Title", @@ -1141,7 +1768,45 @@ }, { "type": "NarrativeText", - "element_id": "fa41a857716f30d6bbee384eada72a90", + "element_id": "cf5e2bc86b7c77533924eb940fd522d5", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 2 + }, + "text": "This section Is to be used only to request a change in a method of accounting for depreciation under section 167." + }, + { + "type": "NarrativeText", + "element_id": "b8355dc568ea042f9da586188b404bca", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 2 + }, + "text": "Rev. Proc. 74-11 provides a procedure whereby applicants are considered to have obtained the consent of the Commissioner to change their method of accounting for depreciation. You must file Form 3115 with the Service Center where your return will be filed within the first 180 days of the tax year in which it is desired to make the change. Attach a copy of the form to the income tax return for the tax year of the change." + }, + { + "type": "NarrativeText", + "element_id": "319882ba6726e29222f5522c53887960", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1156,7 +1821,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "This section Is to be used only to request a change in a method of accounting for depreciation under section 167. Rev. Proc. 74-11 provides a procedure whereby applicants are considered to have obtained the consent of the Commissioner to change their method of accounting for depreciation. You must file Form 3115 with the Service Center where your return will be filed within the first 180 days of the tax year in which it is desired to make the change. Attach a copy of the form to the income tax return for the tax year of the change. Note: Do not use Form 3115 to make an election under section 168. Such an election may be made only on the tax return for the year in which the property 1s placed in service. In addition, Form 3115 is not to be used to request approval to revoke an election made under section 168. Such a request must be made in accordance with Rev. Proc. 87-1 (updated annually)." + "text": "Note: Do not use Form 3115 to make an election under section 168. Such an election may be made only on the tax return for the year in which the property 1s placed in service. In addition, Form 3115 is not to be used to request approval to revoke an election made under section 168. Such a request must be made in accordance with Rev. Proc. 87-1 (updated annually)." }, { "type": "Title", @@ -1216,8 +1881,8 @@ "text": "If you are making an election under section 458, show the applicable information under Regulations section 1.458-10." }, { - "type": "UncategorizedText", - "element_id": "c0a5f5aa4012d18970939d7bb8299e38", + "type": "NarrativeText", + "element_id": "df67e4b3a4a1352209c2648b87d675e2", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1232,11 +1897,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "% U.S." + "text": "(2) Qualified personal service corporations. — A “qualified personal service corporation” is any corporation: (a) substantially all of the activities of which involve the performance of services in the fields of health, law, engineering, architecture, accounting, actuarial science, performing arts, or consulting, and (b)" }, { - "type": "Title", - "element_id": "c71e90d2f497062ba8d068af0bed2a3d", + "type": "NarrativeText", + "element_id": "3167823c1d2039b4c48efe2f6c89b5c2", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1251,11 +1916,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Government" + "text": "Applicants requesting change valuing property produced, property acquired for resale, or long-term contracts under section 263A or 460 MUST complete section D showing the treatment under both the present and proposed methods." }, { "type": "Title", - "element_id": "c0f169737344e28e87eb123df627ba6a", + "element_id": "663ea1bfffe5038f3f0cf667f14c4257", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1270,11 +1935,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Printing" + "text": "to" }, { "type": "Title", - "element_id": "749720aad1daf3c5dfeda1d87555ff87", + "element_id": "7574058dd32c12eb33bc649b5e36bdcb", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1289,11 +1954,30 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Office:" + "text": "their method of" }, { "type": "UncategorizedText", - "element_id": "de444aa0e8db0c05d86ad56e28d5fb26", + "element_id": "bbf3f11cb5b43e700273a78d12de55e4", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.pdf", + "version": 307846589923949318200712033143133817358, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.pdf" + }, + "date_created": "2023-03-10T09:36:30+00:00", + "date_modified": "2023-03-10T09:36:30+00:00" + }, + "filetype": "application/pdf", + "page_number": 2 + }, + "text": "%" + }, + { + "type": "NarrativeText", + "element_id": "4bde94dc330268d2f63a09423409c6d4", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1308,7 +1992,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "1987—201-993/60166" + "text": "U.S. Government Printing Office: 1987—201-993/60166" }, { "type": "Title", diff --git a/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.png.json b/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.png.json index f89aa759ad..6874f8d561 100644 --- a/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.png.json +++ b/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.png.json @@ -1,7 +1,7 @@ [ { "type": "Title", - "element_id": "9e4a454d91ac1f220324c6d1a0377093", + "element_id": "92405c82f76df8b2cbbc6047bd10e0ff", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -16,11 +16,11 @@ "filetype": "image/png", "page_number": 1 }, - "text": "rh Department of the Treasury Internal Revenue Service" + "text": "rh Department of the Treasury Internal Revenue Service Instructions for Form 3115 (Rev. November 1987) Application for Change in Accoun ig Method" }, { - "type": "Title", - "element_id": "88591a76b54e47215c0827ae8838ec13", + "type": "NarrativeText", + "element_id": "766cf1d1243ef2cdbb0db5ad32d7f9c9", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -35,11 +35,11 @@ "filetype": "image/png", "page_number": 1 }, - "text": "Instructions for Form 3115 (Rev. November 1987)" + "text": "(Section references are to the Internal Revenue Code unless otherwise noted.)" }, { - "type": "Title", - "element_id": "f91d5fcc0fb964060b132e98f23cf182", + "type": "UncategorizedText", + "element_id": "e16bce609163ec96985ae522ca81502a", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -54,11 +54,11 @@ "filetype": "image/png", "page_number": 1 }, - "text": "Application for Change in Accoun ig Method" + "text": "‘A." }, { "type": "NarrativeText", - "element_id": "766cf1d1243ef2cdbb0db5ad32d7f9c9", + "element_id": "c9bc33e913a25aaffa8367aa11bc8ed9", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -73,7 +73,7 @@ "filetype": "image/png", "page_number": 1 }, - "text": "(Section references are to the Internal Revenue Code unless otherwise noted.)" + "text": "Internal Revenue laws of the United States. We need it to ensure that taxpayers are complying with these laws an¢ to allow us to figure and collect the nght amount of tax. You are required to this information." }, { "type": "Title", @@ -96,7 +96,7 @@ }, { "type": "NarrativeText", - "element_id": "4660422c06dddc914ab634c5e4045dec", + "element_id": "5d18f0234e23bc96198c9fb19601056a", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -111,7 +111,26 @@ "filetype": "image/png", "page_number": 1 }, - "text": "We ask for this information to carry out the Internal Revenue laws of the United States. We need it to ensure that taxpayers are complying with these laws an¢ to allow us to figure and collect the nght amount of tax. You are required to give us this information." + "text": "We ask for this information to carry out the" + }, + { + "type": "NarrativeText", + "element_id": "84ab8a2c9ef5f989df144a0ca4576c45", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "give us" }, { "type": "Title", @@ -172,7 +191,7 @@ }, { "type": "NarrativeText", - "element_id": "b3859f2f29884b1d3ba0892e52859a99", + "element_id": "06658399dddcd1d4d4fda8f9fa90fd53", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -187,11 +206,49 @@ "filetype": "image/png", "page_number": 1 }, - "text": "When filing Form 3115, taxpayers are reminded to determine if IRS has published a ruling or procedure dealing with the specific type of change since November 1987 (the current. revision date of Form 3115)" + "text": "filing taxpayers are reminded to determine if IRS has published a ruling or procedure dealing with the specific type of change since November 1987 (the current. revision date of Form 3115)" + }, + { + "type": "Title", + "element_id": "cf9c7aa24a26aac4f0ec4b6395cbfdcc", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "When" + }, + { + "type": "UncategorizedText", + "element_id": "2127f2ab4fc4feb4d32460c8317bf02f", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "Form 3115," }, { "type": "NarrativeText", - "element_id": "e5a95dc10d4071983b70898a21f11175", + "element_id": "067f3707c33a901f968188d9592065e9", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -206,7 +263,7 @@ "filetype": "image/png", "page_number": 1 }, - "text": "Generally, applicants must complete Section ‘A. In addition, complete the appropriate sections (B:1 through H) for which a change is desired." + "text": "Generally, applicants must complete Section In addition, complete the appropriate sections (B:1 through H) for which a change is desired." }, { "type": "NarrativeText", @@ -267,7 +324,7 @@ }, { "type": "NarrativeText", - "element_id": "b07efea243933525e9ec04a90622508d", + "element_id": "c10c0c63b05172dff854d1d0e570c588", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -282,11 +339,11 @@ "filetype": "image/png", "page_number": 1 }, - "text": "cash method.—If you are required to change your method of accounting under section, 263A (relating to the capitalization and inclusion in inventory costs of certain expenses) or 448 (imiting the use of the cash method of accounting by certain taxpayers) as added by the Tax Reform Act of 1986 (\"Act\"), the change is treated as initiated by the taxpayer, approved by the Commissioner, and the period for taking the adjustments under section 481(a) into account will not exceed 4 years. (Hospitals required to cchange from the cash method under section 448 have 10 years to take the adjustments into account.) Complete Section A and the appropriate sections (B-1 or C and D) for which the change is required" + "text": "Uniform capitalization rules and limitation on cash method.—If you are required to change your method of accounting under section, 263A (relating to the capitalization and inclusion in inventory costs of certain expenses) or 448 (imiting the use of the cash method of accounting by certain taxpayers) as added by the Tax Reform Act of 1986 (\"Act\"), the change is treated as initiated by the taxpayer, approved by the Commissioner, and the period for taking the adjustments under section 481(a) into account will not exceed 4 years. (Hospitals required to cchange from the cash method under section 448 have 10 years to take the adjustments into account.) Complete Section A and the appropriate sections (B-1 or C and D) for which the change is required" }, { - "type": "Title", - "element_id": "11c98a9cbd6a200fbc5b93fed15007ac", + "type": "NarrativeText", + "element_id": "fc2252774c86adc22225761fc0bee985", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -301,11 +358,11 @@ "filetype": "image/png", "page_number": 1 }, - "text": "Uniform capitalization rules and limitation on" + "text": "Disregard the instructions under Time and Place for Filing and Late Applications. instead, attach Form 3115 to your income tax return for the year of change; do not file it separately. Also include on a separate statement accompanying the Form 3115 the period over which the section 481(2) adjustment will be taken into account and the basis for that conclusion. Identify the automatic change being made at the top of page 1 of Form 3118 eg. “Automatic Change to Accrual Method—Section 448”). See Temporary Regulations sections 1.263A-1T and 1.448-1T for additional information" }, { "type": "NarrativeText", - "element_id": "39458f370b98a606db29ac6dee975e07", + "element_id": "dbf06d87f9be9871dfd64bd0a7bba567", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -320,11 +377,11 @@ "filetype": "image/png", "page_number": 1 }, - "text": "Disregard the instructions under Time and Place for Filing and Late Applications. instead, attach Form 3115 to your income tax return for the year of change; do not file it separately. Also include on a separate statement accompanying the Form 3115 the period over which the section 481(2) adjustment will be taken into account and" + "text": "change your method of accounting for long-term contracts under section 460, see Notice 87-61 (9/21/87), 1987-38 IRB 40, for the notification procedures that must be followed." }, { "type": "NarrativeText", - "element_id": "663dd3791cc24190a45998ca7914f88e", + "element_id": "03c4a83e399f2f669047b3fcfeae5867", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -339,11 +396,11 @@ "filetype": "image/png", "page_number": 1 }, - "text": "the basis for that conclusion. Identify the automatic change being made at the top of page 1 of Form 3118 eg. “Automatic Change to Accrual Method—Section 448”). See Temporary Regulations sections 1.263A-1T and 1.448-1T for additional information" + "text": "Long-term contracts.—If you are required to" }, { - "type": "ListItem", - "element_id": "4e4069c49822cae18add18758619535b", + "type": "NarrativeText", + "element_id": "463ce4107785bb9854ad10b81d93dc7f", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -358,11 +415,11 @@ "filetype": "image/png", "page_number": 1 }, - "text": "Long-term contracts.—If you are required to change your method of accounting for long-term contracts under section 460, see Notice 87-61 (9/21/87), 1987-38 IRB 40, for the notification procedures that must be followed. Other methods. —Unless the Service has published a regulation or procedure to the contrary, all other changes in accounting methods required by the Act are automatically considered to be approved by the Commissioner. Examples of method changes automatically approved by the Commissioner are those changes required to effect: (1) the repeal of the reserve method for bad debts of taxpayers other than financial institutions (Act section 805); (2) the repeal of the installment method for sales under a revolving credit plan (Act section 812); (3) the Inclusion of mcome attributable to the sale or furnishing of utility services no later than the year in which the services were provided to customers (Act section 821); and (4) the repeal of the deduction for qualified discount coupons (Act section 823). Do not file Form 3115 for these changes." + "text": "Other methods. —Unless the Service has published a regulation or procedure to the contrary, all other changes in accounting methods required by the Act are automatically considered to be approved by the Commissioner. Examples of method changes automatically approved by the Commissioner are those changes required to effect: (1) the repeal of the reserve method for bad debts of taxpayers other than financial institutions (Act section 805); (2) the repeal of the installment method for sales under a revolving credit plan (Act section 812); (3) the Inclusion of mcome attributable to the sale or furnishing of utility services no later than the year in which the services were provided to customers (Act section 821); and (4) the repeal of the deduction for qualified discount coupons (Act section 823). Do not file Form 3115 for these" }, { - "type": "NarrativeText", - "element_id": "7685df2334a5f6c8c8099dea61a8f1b4", + "type": "Title", + "element_id": "d3eda7d7ed44b4b43fcbfa6f83f6fad3", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -377,7 +434,7 @@ "filetype": "image/png", "page_number": 1 }, - "text": "Long-term contracts.—If you are required to change your method of accounting for long-term contracts under section 460, see Notice 87-61 (9/21/87), 1987-38 IRB 40, for the notification procedures that must be followed." + "text": "changes." }, { "type": "Title", @@ -400,7 +457,45 @@ }, { "type": "NarrativeText", - "element_id": "83042962477fa38e403e861f8edfdd4b", + "element_id": "7941057d83c91b25cee4374b3ab06eaa", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "Generally, applicants must file this form within the first 180 days of the tax year in which itis desired to make the change." + }, + { + "type": "NarrativeText", + "element_id": "9dda11db48254f5e0d0000afb5d1dd9b", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "Taxpayers, other than exempt organizations, should file Form 3115 with the Commissioner of Internal Revenue, Attention: CC:C:4, 1111 Constitution Avenue, NW, Washington, DC 20224, Exempt organizations should file with the Assistant Commissioner (Employee Plans and Exempt Organizations), 1111 Constitution Avenue, NW, Washington, DC 20224." + }, + { + "type": "NarrativeText", + "element_id": "4d063cdbd131401fa29e1d0e824dc017", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -415,11 +510,30 @@ "filetype": "image/png", "page_number": 1 }, - "text": "Generally, applicants must file this form within the first 180 days of the tax year in which itis desired to make the change. Taxpayers, other than exempt organizations, should file Form 3115 with the Commissioner of Internal Revenue, Attention: CC:C:4, 1111 Constitution Avenue, NW, Washington, DC 20224, Exempt organizations should file with the Assistant Commissioner (Employee Plans and Exempt Organizations), 1111 Constitution Avenue, NW, Washington, DC 20224. You should normally receive an acknowledgment of receipt of your application within 30 days. If you do not hear from IRS within 30 days of submitting your completed Form 3115, you may inquire as to the receipt of your application by writing to: Control Clerk, CC:C:4, Internal Revenue Service, Room 5040, 1111 Constitution Avenue, NW, Washington, DC 20224." + "text": "You should normally receive an acknowledgment of receipt of your application within 30 days. If you do not hear from IRS within 30 days of submitting your completed Form 3115, you may inquire as to the receipt of your application by writing to: Control Clerk, CC:C:4, Internal Revenue Service, Room 5040, 1111 Constitution Avenue, NW, Washington, DC 20224." + }, + { + "type": "Title", + "element_id": "ea325d761f98c6b73320e442b67f2a35", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "an" }, { "type": "NarrativeText", - "element_id": "df0e66d1a434e95e4051ddcb968c94c9", + "element_id": "e3e2ccf4f0d1524d4f5ce42e8f2d1efa", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -434,7 +548,26 @@ "filetype": "image/png", "page_number": 1 }, - "text": "See section 5.03 of Rev. Proc. 84-74 for filing an early application, Note: If this form is being filed in accordance with Rey. Proc. 74-11, see Section G below." + "text": "See section 5.03 of Rev. Proc. 84-74 for filing early application," + }, + { + "type": "NarrativeText", + "element_id": "11cb901986e9621aadbd76e6f7400809", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "Note: If this form is being filed in accordance with Rey. Proc. 74-11, see Section G below." }, { "type": "Title", @@ -474,6 +607,25 @@ }, "text": "If your application is filed after the 180-day period, itis late. The application will be considered for processing only upon a showing of “good cause” and if it can be shown to the satisfaction of the Commissioner that granting you an extension will not jeopardize the Government's interests. For further information, see Rev. Proc. 79-63." }, + { + "type": "Title", + "element_id": "025a65465b6fd9635316e92633b24c7e", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "Identifying Number" + }, { "type": "NarrativeText", "element_id": "ec3c2d03b846d2a186fc9a8f318f688b", @@ -495,7 +647,7 @@ }, { "type": "Title", - "element_id": "025a65465b6fd9635316e92633b24c7e", + "element_id": "ea325d761f98c6b73320e442b67f2a35", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -510,11 +662,11 @@ "filetype": "image/png", "page_number": 1 }, - "text": "Identifying Number" + "text": "an" }, { "type": "NarrativeText", - "element_id": "9240bfa889b87dc2fb3fa746ca4eeeb4", + "element_id": "e72d9c8a779a47796c4362b7885aa80b", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -529,11 +681,11 @@ "filetype": "image/png", "page_number": 1 }, - "text": "Others.-—The employer identification number of an applicant other than an individual should be entered in this block," + "text": "Others.-—The employer identification number applicant other than an individual should be entered in this block," }, { - "type": "ListItem", - "element_id": "f8e8c87d2e958a23153d7f25b159f0ee", + "type": "Title", + "element_id": "28391d3bc64ec15cbb090426b04aa6b7", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -548,11 +700,11 @@ "filetype": "image/png", "page_number": 1 }, - "text": "Individuals.—An individual desiring the change should sign the application. Ifthe application pertains to a husband and wife filing a joint Income tax return, the names of both should appear in the heading and both should sign Partnerships.—The form should be signed with the partnership name followed by the signature of one of the general partners and the words “General Partner.” Corporations, cooperatives, and insurance companies.—The form should show the name of the corporation, cooperative, or insurance Company and the signature of the president, vice president, treasurer, assistant treasurer, or chief accounting officer (such as tax officer) authorized tosign, and his or her official title. Receivers, trustees, or assignees must sign any application they are required to file, For a subsidiary corporation filing a consolidated return with its parent, the form should be signed by an officer of the parent corporation, Fiduciaries.—The-form should show the name of the estate or trust and be signed by the fiduciary, personal representative, executor, executrix, administrator, administratrx, etc’, having legal authority to'sign, and his or her ttle. Preparer other than partner, officer, etc.—The signature of the individual preparing the application should appear in the space provided on page 6." + "text": "of" }, { "type": "Title", - "element_id": "55d4f33b09f24dd3b27865a5f34bfeb9", + "element_id": "6a7faddb3ac8b6e14dad65f081428865", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -567,11 +719,11 @@ "filetype": "image/png", "page_number": 1 }, - "text": "Signature tea" + "text": "Signature Individieale" }, { "type": "NarrativeText", - "element_id": "35f1273e073cf159019550bc35b6692c", + "element_id": "48cd565f152ff17bab8eba19eb23db34", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -586,7 +738,159 @@ "filetype": "image/png", "page_number": 1 }, - "text": "Ifthe individual or firm is also authorized to represent the applicant before the IRS, receive a copy of the requested ruling, or perform any other act(s), the power of attorney must reflect such authorization(s)." + "text": "Individuals.—An individual desiring the change should sign the application. Ifthe application pertains to a husband and wife filing a joint Income tax return, the names of both should appear in the heading and both should" + }, + { + "type": "Title", + "element_id": "0b6f395ca14ac202374d5cff678b7115", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "sign" + }, + { + "type": "NarrativeText", + "element_id": "7d3a67d75914a504a52ec53998b796af", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "Partnerships.—The form should be signed with the partnership name followed by the signature of one of the general partners and the words “General Partner.”" + }, + { + "type": "NarrativeText", + "element_id": "ee6a9bcef7e5e33bc26f419812e2c77a", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "Corporations, cooperatives, and insurance companies.—The form should show the name of the corporation, cooperative, or insurance Company and the signature of the president, vice president, treasurer, assistant treasurer, or chief accounting officer (such as tax officer) authorized tosign, and his or her official title. Receivers, trustees, or assignees must sign any application they are required to file, For a subsidiary corporation filing a consolidated return with its parent, the form should be signed by an officer of the parent corporation," + }, + { + "type": "NarrativeText", + "element_id": "ba7f9dc18be2bf9219e020112b426526", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "Fiduciaries.—The-form should show the" + }, + { + "type": "NarrativeText", + "element_id": "e3c8d21cabd10cc36b53107e58a5be8d", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "name of the estate or trust and be signed by the fiduciary, personal representative, executor, executrix, administrator, administratrx, etc’, having legal authority to'sign, and his or her ttle." + }, + { + "type": "NarrativeText", + "element_id": "52e2b8e4b8527ae448e9db2dfd0c43c7", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "Preparer other than partner, officer, etc.—The signature of the individual preparing the application should appear in the space provided on page 6." + }, + { + "type": "Title", + "element_id": "ca978112ca1bbdcafac231b39a23dc4d", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "a" + }, + { + "type": "NarrativeText", + "element_id": "8200352b4e91b1be4f14e9248d50380a", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "Ifthe individual or firm is also authorized to represent the applicant before the IRS, receive copy of the requested ruling, or perform any other act(s), the power of attorney must reflect such authorization(s)." }, { "type": "Title", @@ -628,7 +932,64 @@ }, { "type": "Title", - "element_id": "58703de56debc34a1d68e6ed6f8fd067", + "element_id": "8b838d95f7d4f66b5453307de1353ff4", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "Specific Instructions" + }, + { + "type": "Title", + "element_id": "bc272940e494acf9441070d3eb4b79f6", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "Section A" + }, + { + "type": "NarrativeText", + "element_id": "a6c53a8898025076b8c0397178f95fa3", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "Item 5a, page 1.—“Taxable income or (loss) from operations” is to be entered before application of any net operating loss deduction under section 172(a)" + }, + { + "type": "NarrativeText", + "element_id": "e9278d083996ccb1f39236b8064b28cd", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -643,11 +1004,11 @@ "filetype": "image/png", "page_number": 1 }, - "text": "Specific Instructions Section A" + "text": "Item 6, page 2.—The term “gross receipts” Includes total sales (net of returns and allowances) and all amounts received for services. in addition, gross receipts include any income from investments and from incidental or outside sources (e.g., interest, dividends, rents, royalties, and annuities). However, if you area resaler of personal property, exclude from gross receipts any amounts not derived in the ordinary course of a trade or business. Gross receipts do not include amounts received for sales taxes if, tunder the applicable state or local law, the taxis legally imposed on the purchaser of the good or service, and the taxpayer merely collects and remits the tax to the taxing authority." }, { "type": "NarrativeText", - "element_id": "5e7793489f88d7c9187dad66e787898f", + "element_id": "4b4424f821633ea87deab36702d4c113", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -662,6 +1023,6 @@ "filetype": "image/png", "page_number": 1 }, - "text": "Item 5a, page 1.—“Taxable income or (loss) from operations” is to be entered before application of any net operating loss deduction under section 172(a) Item 6, page 2.—The term “gross receipts” Includes total sales (net of returns and allowances) and all amounts received for services. in addition, gross receipts include any income from investments and from incidental or outside sources (e.g., interest, dividends, rents, royalties, and annuities). However, if you area resaler of personal property, exclude from gross receipts any amounts not derived in the ordinary course of a trade or business. Gross receipts do not include amounts received for sales taxes if, tunder the applicable state or local law, the taxis legally imposed on the purchaser of the good or service, and the taxpayer merely collects and remits the tax to the taxing authority. Item 7b, page 2.—If item 7b 1s \"Yes,\" indicate ona separate sheet the following for each separate trade or business: Nature of business" + "text": "Item 7b, page 2.—If item 7b 1s \"Yes,\" indicate ona separate sheet the following for each separate trade or business: Nature of business" } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/biomed-api/65/11/main.PMC6312790.pdf.json b/test_unstructured_ingest/expected-structured-output/biomed-api/65/11/main.PMC6312790.pdf.json index af074dbe60..399ba6c1ab 100644 --- a/test_unstructured_ingest/expected-structured-output/biomed-api/65/11/main.PMC6312790.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/biomed-api/65/11/main.PMC6312790.pdf.json @@ -1,7 +1,7 @@ [ { - "type": "UncategorizedText", - "element_id": "0e58869830c7b4461a4d1879223e4139", + "type": "Header", + "element_id": "c1f4b5ba045830c1866db8f8aa0b54ac", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -11,7 +11,7 @@ }, { "type": "NarrativeText", - "element_id": "b0658ce9dccc0acba9a472c2bb992cc9", + "element_id": "869adddb184177031536477262e0dde0", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -21,7 +21,7 @@ }, { "type": "Title", - "element_id": "f2fe9c33b7e8535efebf7c20ebce297c", + "element_id": "e6fa42b5b4d85001b900e47c050b645b", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -40,8 +40,8 @@ "text": "journal homepage: www.elsevier.com/locate/dib" }, { - "type": "Title", - "element_id": "0ccb3a9876bbc64a1ca09fa40c4f844d", + "type": "NarrativeText", + "element_id": "ac01687ab870e4bb6e7313db4654928a", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -70,7 +70,7 @@ "text": "(Jee" }, { - "type": "NarrativeText", + "type": "Title", "element_id": "4f14d967ea87a75ad1acee27ff34e59e", "metadata": { "data_source": {}, @@ -90,114 +90,114 @@ "text": "Department of Chemical, Metallurgical and Materials Engineering, Tshwane University of Technology, Pretoria, South Africa" }, { - "type": "NarrativeText", - "element_id": "fbd221e3c1f82c8601661213b98b0962", + "type": "Title", + "element_id": "3d71760ba4f1cc95873ee36178f97d82", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 1 }, - "text": "a r t i c l e i n f o" + "text": "ARTICLE INFO" }, { "type": "NarrativeText", - "element_id": "d6923075e35e5f3296e0d24ceb70a2bb", + "element_id": "fbd221e3c1f82c8601661213b98b0962", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 1 }, - "text": "a b s t r a c t" + "text": "a r t i c l e i n f o" }, { - "type": "UncategorizedText", - "element_id": "c382dd715a85d683f056834c4af7be85", + "type": "Title", + "element_id": "3d1626989d3e923485561f1e5bdeaa58", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 1 }, - "text": "Article history: Received 31 August 2018 Received in revised form 17 November 2018 Accepted 27 November 2018 Available online 30 November 2018" + "text": "ABSTRACT" }, { - "type": "Title", - "element_id": "abe4641521caf8385f30e81099f3a8c6", + "type": "NarrativeText", + "element_id": "d6923075e35e5f3296e0d24ceb70a2bb", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 1 }, - "text": "Keywords: Corrosion Stainless steel Inhibitor Sulphuric acid" + "text": "a b s t r a c t" }, { "type": "NarrativeText", - "element_id": "26c73759c3d3cc29d683910c034432da", + "element_id": "4a03002c97925cd9397927ac823369e7", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 1 }, - "text": "This data article contains data related to the research article entitled “enhanced corrosion resistance of stainless steel Type 316 in sulphuric acid solution using eco-friendly waste product” (Sanni et al., 2018). In this data article, a comprehensive effect of waste product and optimized process parameter of the inhibitor in 0.5 M H2SO4 solution was presented using weight loss and potentiody- the inhibitor namic polarization techniques. The presence of (egg shell powder) influenced corrosion resistance of stainless steel. Inhibition efficiency value of 94.74% was recorded as a result of inhibition of the steel by the ionized molecules of the inhibiting compound of the egg shell powder influencing the redox mechan- ism reactions responsible for corrosion and surface deterioration." + "text": "Article history: Received 31 August 2018 Received in revised form 17 November 2018 Accepted 27 November 2018 Available online 30 November 2018" }, { "type": "NarrativeText", - "element_id": "260cf1397ece5718c2d35900917688de", + "element_id": "08bb309957586c280660c11c337dc6d7", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 1 }, - "text": "& 2018 Published by Elsevier Inc. This is an open access article under the CC BY-NC-ND license (http://creativecommons.org/licenses/by-nc-nd/4.0/)." + "text": "Keywords: Corrosion Stainless steel Inhibitor Sulphuric acid" }, { - "type": "Title", - "element_id": "8c625bd30cfb1b77c8ba8d4e863d0bb3", + "type": "NarrativeText", + "element_id": "26c73759c3d3cc29d683910c034432da", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 1 }, - "text": "Specification table" + "text": "This data article contains data related to the research article entitled “enhanced corrosion resistance of stainless steel Type 316 in sulphuric acid solution using eco-friendly waste product” (Sanni et al., 2018). In this data article, a comprehensive effect of waste product and optimized process parameter of the inhibitor in 0.5 M H2SO4 solution was presented using weight loss and potentiody- the inhibitor namic polarization techniques. The presence of (egg shell powder) influenced corrosion resistance of stainless steel. Inhibition efficiency value of 94.74% was recorded as a result of inhibition of the steel by the ionized molecules of the inhibiting compound of the egg shell powder influencing the redox mechan- ism reactions responsible for corrosion and surface deterioration." }, { - "type": "Title", - "element_id": "b877cc5d670d770084dcc0bb41ac73a0", + "type": "NarrativeText", + "element_id": "62e4907f12a32a7b9ccd57ed477eb54a", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 1 }, - "text": "Subject area More specific subject area Type of data" + "text": "reactions responsible for corrosion and surface deterioration. © 2018 Published by Elsevier Inc. This is an open access article under the CC BY-NC-ND license" }, { - "type": "Title", - "element_id": "b27e559f6c00d2bde61efba5db252e31", + "type": "NarrativeText", + "element_id": "260cf1397ece5718c2d35900917688de", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 1 }, - "text": "Materials engineering" + "text": "& 2018 Published by Elsevier Inc. This is an open access article under the CC BY-NC-ND license (http://creativecommons.org/licenses/by-nc-nd/4.0/)." }, { "type": "Title", - "element_id": "a2c3879ecb580742973c6a914fb905bb", + "element_id": "5abba9b1f2c341e0b299fa43a90d0e14", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 1 }, - "text": "Surface science and engineering" + "text": "Specification table" }, { - "type": "Title", - "element_id": "1064dcef42380cfdb90c668aa3a670a3", + "type": "NarrativeText", + "element_id": "ac89a2886224c42ad15982cd34421ff8", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 1 }, - "text": "Table and figure" + "text": "Subject area More specific subject area Surface science and engineering Type of data" }, { "type": "Title", @@ -210,14 +210,14 @@ "text": "n Corresponding author. tayo.sanni@yahoo.com; SanniO@tut.ac.za" }, { - "type": "Title", - "element_id": "e102dc7c1db28c29d5e4bde8062592ed", + "type": "ListItem", + "element_id": "6190ca95b973d4a03fdf4c3b0b260af0", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 1 }, - "text": "E-mail address: tayo.sanni@yahoo.com (O. Sanni)." + "text": "Corresponding author. tayo.sanni@yahoo.com; SanniO@tut.ac.za E-mail address: tayo.sanni@yahoo.com (O. Sanni)." }, { "type": "NarrativeText", @@ -230,8 +230,8 @@ "text": "https://doi.org/10.1016/j.dib.2018.11.134 2352-3409/& 2018 Published by Elsevier Inc. This is an open access article under the CC BY-NC-ND license (http://creativecommons.org/licenses/by-nc-nd/4.0/)." }, { - "type": "UncategorizedText", - "element_id": "549a2fac47d713cc00f2db498ad6b557", + "type": "Header", + "element_id": "78f135d64d5e1307cac651608256a418", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -249,16 +249,6 @@ }, "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457" }, - { - "type": "NarrativeText", - "element_id": "6928b78d26af54b6acb804ed319b5c05", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "How data were acquired" - }, { "type": "Title", "element_id": "41e0fa358cefcadbb2633ec45ff2d129", @@ -289,235 +279,115 @@ }, "text": "Accessibility Related research article" }, - { - "type": "ListItem", - "element_id": "82bf7851faa53c3a4965d4cdfe8d0bce", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "The cleaned and weighed specimen was suspended in beakers con- taining 0.5 M H2SO4 solution of different concentrations of egg shell powder. The pre-weighed stainless steel samples were retrieved from the test solutions after every 24 h, cleaned appropriately, dried and reweighed. Raw, analyzed The difference between the weight at a given time and the initial weight of the specimen was taken as the weight loss, which was used to calculate the corrosion rate and inhibition efficiency. Inhibitor concentration, exposure time Department of Chemical, Metallurgical and Materials Engineering, Tshwane University of Technology, Pretoria, South Africa Data are available within this article O. Sanni, A. P. I. Popoola, and O. S. I. Fayomi, Enhanced corrosion resistance of stainless steel type 316 in sulphuric acid solution using eco-friendly waste product, Results in Physics, 9 (2018) 225–230." - }, - { - "type": "Title", - "element_id": "596eda178f8c5adefbae7cfe1bec78c3", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "Value of the data" - }, - { - "type": "NarrativeText", - "element_id": "682e6210329b84f8b00548088196ffc9", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "(cid:1) Data presented here provide optimum conditions of waste material as inhibitor for stainless steel Type 316 in 0.5 M H2SO4 medium. The given data describe the inhibitive performance of eco-friendly egg shell powder on austenitic stainless steel Type 316 corrosion in sulphuric acid environment." - }, - { - "type": "NarrativeText", - "element_id": "1d61e3468bc681ba1a7e647000c6828c", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "(cid:1) The data obtained for the inhibition of waste product (egg shell powder) on stainless steel Type 316 can be used as basis in determining the inhibitive performance of the same inhibitor in other environments." - }, { "type": "NarrativeText", - "element_id": "39b6040280a179e1f8e4f4fb5ec4ae05", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "(cid:1) The data can be used to examine the relationship between the process variable as it affect the" - }, - { - "type": "Title", - "element_id": "1ddde62c3188f81dfc835b6f036f1734", + "element_id": "6928b78d26af54b6acb804ed319b5c05", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "nature of inhibition of metals." + "text": "How data were acquired" }, { - "type": "Title", - "element_id": "1c3f3de4e65aae5bd147f84779712a65", + "type": "Table", + "element_id": "5eb814dac721c11581f011fbca57a17e", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "1. Data" + "text": "How data were acquired Data format Experimental factors Experimental features Data source location Accessibility Related research article The cleaned and weighed specimen was suspended in beakers con- taining 0.5 M H2SO, solution of different concentrations of egg shell powder. The pre-weighed stainless steel samples were retrieved from the test solutions after every 24h, cleaned appropriately, dried and reweighed. Raw, analyzed The difference between the weight at a given time and the initial weight of the specimen was taken as the weight loss, which was used to calculate the corrosion rate and inhibition efficiency. Inhibitor concentration, exposure time Department of Chemical, Metallurgical and Materials Engineering, Tshwane University of Technology, Pretoria, South Africa Data are available within this article O. Sanni, A. P. I. Popoola, and O. S. I. Fayomi, Enhanced corrosion resistance of stainless steel type 316 in sulphuric acid solution using eco-friendly waste product, Results in Physics, 9 (2018) 225-230." }, { "type": "NarrativeText", - "element_id": "5034c7315aface0b263361d0eae1dd15", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "The results of the experiment are presented in this session. The results obtained from weight loss method for stainless steel Type 316 immersed in 0.5 M H2SO4 solution in the absence and presence of different concentrations of egg shell powder (ES) are presented in Figs. 1–3 respectively. It can be seen clearly from these Figures that the efficiency of egg shell powder increase with the inhibitor con- centration, The increase in its efficiency could be as a result of increase in the constituent molecule" - }, - { - "type": "Title", - "element_id": "e28e0dc941accc8694040c63091b580c", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": ") g m" - }, - { - "type": "UncategorizedText", - "element_id": "32ebb1abcc1c601ceb9c4e3c4faba0ca", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "(" - }, - { - "type": "Title", - "element_id": "b780e72bd4f737713ae202feb46b5d55", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "s s o" - }, - { - "type": "Title", - "element_id": "acac86c0e609ca906f632b0e2dacccb2", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "l" - }, - { - "type": "Title", - "element_id": "1bd621f0b71079e0948b0aad011a7f4b", + "element_id": "7ce6ee1aa50d28b85acf544f1db3e25c", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "t h g e W" + "text": "The cleaned and weighed specimen was suspended in beakers con- taining 0.5 M H2SO4 solution of different concentrations of egg shell powder. The pre-weighed stainless steel samples were retrieved from the test solutions after every 24 h, cleaned appropriately, dried and reweighed. Raw, analyzed The difference between the weight at a given time and the initial weight of the specimen was taken as the weight loss, which was used to calculate the corrosion rate and inhibition efficiency. Inhibitor concentration, exposure time Department of Chemical, Metallurgical and Materials Engineering, Tshwane University of Technology, Pretoria, South Africa Data are available within this article O. Sanni, A. P. I. Popoola, and O. S. I. Fayomi, Enhanced corrosion resistance of stainless steel type 316 in sulphuric acid solution using eco-friendly waste product, Results in Physics, 9 (2018) 225–230." }, { "type": "Title", - "element_id": "de7d1b721a1e0632b7cf04edf5032c8e", + "element_id": "e63f0ed399f0537c9ffeadfcae3baed6", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "i" + "text": "Value of the data" }, { - "type": "Title", - "element_id": "b30b3a63451a0f3f43bad0781c1e9ad8", + "type": "ListItem", + "element_id": "1daeb29ccbc793481f453c7f76b8795b", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "(mg)" + "text": "(cid:1) Data presented here provide optimum conditions of waste material as inhibitor for stainless steel Type 316 in 0.5 M H2SO4 medium. The given data describe the inhibitive performance of eco-friendly egg shell powder on austenitic stainless steel Type 316 corrosion in sulphuric acid environment." }, { - "type": "UncategorizedText", - "element_id": "624b60c58c9d8bfb6ff1886c2fd605d2", + "type": "ListItem", + "element_id": "7aad924d1c00e3d50bc0c24beb00a9e5", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "30" + "text": "(cid:1) The data obtained for the inhibition of waste product (egg shell powder) on stainless steel Type 316 can be used as basis in determining the inhibitive performance of the same inhibitor in other environments." }, { - "type": "UncategorizedText", - "element_id": "f5ca38f748a1d6eaf726b8a42fb575c3", + "type": "NarrativeText", + "element_id": "39b6040280a179e1f8e4f4fb5ec4ae05", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "20" + "text": "(cid:1) The data can be used to examine the relationship between the process variable as it affect the" }, { - "type": "UncategorizedText", - "element_id": "4a44dc15364204a80fe80e9039455cc1", + "type": "ListItem", + "element_id": "b6cdef9ac2c39caf23c7413dcdb3c227", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "10" + "text": "© The data can be used to examine the relationship between the process variable as it affect the nature of inhibition of metals." }, { "type": "Title", - "element_id": "d300d49efc4cd0982dd6bc3377759ae8", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "10g 8g 6g 4g 2g Control" - }, - { - "type": "UncategorizedText", - "element_id": "98010bd9270f9b100b6214a21754fd33", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "48" - }, - { - "type": "UncategorizedText", - "element_id": "7b1a278f5abe8e9da907fc9c29dfd432", + "element_id": "1c3f3de4e65aae5bd147f84779712a65", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "96" + "text": "1. Data" }, { - "type": "UncategorizedText", - "element_id": "5ec1a0c99d428601ce42b407ae9c675e", + "type": "NarrativeText", + "element_id": "5034c7315aface0b263361d0eae1dd15", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "144" + "text": "The results of the experiment are presented in this session. The results obtained from weight loss method for stainless steel Type 316 immersed in 0.5 M H2SO4 solution in the absence and presence of different concentrations of egg shell powder (ES) are presented in Figs. 1–3 respectively. It can be seen clearly from these Figures that the efficiency of egg shell powder increase with the inhibitor con- centration, The increase in its efficiency could be as a result of increase in the constituent molecule" }, { - "type": "UncategorizedText", - "element_id": "eb3be230bbd2844b1f5d8f2e4fab9ffb", + "type": "Image", + "element_id": "6cbfbefb10bbbc9b57cd22704824934e", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "192" + "text": "Weight loss (mg) 96 144 192 Exposure Time (Hours)" }, { "type": "Title", @@ -530,28 +400,18 @@ "text": "Exposure Time (Hours)" }, { - "type": "UncategorizedText", - "element_id": "25db7b1d2f5780559e1034d72bcb4050", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "Fig. 1. Weight loss versus exposure time for stainless steel presence of ES." - }, - { - "type": "NarrativeText", - "element_id": "cbd563dd2fcd7d0b5a0b2173465fd328", + "type": "FigureCaption", + "element_id": "45cd54c64e38abe8c1128a5979ca8cd5", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "immersed in 0.5 M H2SO4 solution in the absence and" + "text": "Fig. 1. Weight loss versus exposure time for stainless steel immersed in 0.5M H2SO, solution in the absence and presence of ES." }, { "type": "NarrativeText", - "element_id": "9ca201e648ed74cfc838b6661f59addf", + "element_id": "8a54dcaa0e2720786903e26e84bd9e93", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -560,8 +420,8 @@ "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457" }, { - "type": "UncategorizedText", - "element_id": "d83c7ee736be931d85b78a4a60881ced", + "type": "Header", + "element_id": "135be522765ce267b8ca6debeeec6dc4", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -569,26 +429,6 @@ }, "text": "453" }, - { - "type": "NarrativeText", - "element_id": "e5d46bc8ceb17f88e1cff33ecac97067", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "Fig. 2. Corrosion rate versus exposure time for stainless steel immersed in 0.5 M H2SO4 solution in the absence and presence of ES." - }, - { - "type": "UncategorizedText", - "element_id": "bbf3f11cb5b43e700273a78d12de55e4", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "%" - }, { "type": "NarrativeText", "element_id": "4f0139b605dfdd9eb93e920a6115e1b5", @@ -599,16 +439,6 @@ }, "text": ") r a e y / m m" }, - { - "type": "UncategorizedText", - "element_id": "32ebb1abcc1c601ceb9c4e3c4faba0ca", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "(" - }, { "type": "NarrativeText", "element_id": "49e7364ce1027887460959b2a757b184", @@ -630,797 +460,337 @@ "text": "i" }, { - "type": "Title", - "element_id": "de7d1b721a1e0632b7cf04edf5032c8e", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "i" - }, - { - "type": "UncategorizedText", - "element_id": "ba5ec51d07a4ac0e951608704431d59a", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": ")" - }, - { - "type": "NarrativeText", - "element_id": "74599fca46202613cccb12e97774b306", + "type": "Image", + "element_id": "84d160dc9075c76de6f6d6c3f2651fe3", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3 }, - "text": "E n o i t i b h n I" + "text": " Corrosion rate (mm/year) 24 48 72 96 120 144 168 192 Exposure time" }, { "type": "Title", - "element_id": "de7d1b721a1e0632b7cf04edf5032c8e", + "element_id": "239bb77f5ec344ce5e614979b8c49742", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3 }, - "text": "i" + "text": "Exposure time" }, { - "type": "NarrativeText", - "element_id": "bbe120714b80df07396e808f98b3f354", + "type": "FigureCaption", + "element_id": "e5d46bc8ceb17f88e1cff33ecac97067", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3 }, - "text": "y c n e c i f f" - }, - { - "type": "UncategorizedText", - "element_id": "525fbe4b6760bd759bfeeae2ee487f12", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "(mm/year) 100 4 80 4 Efficiency (%) 1 _—__. —o— SS v- —a— 74 —~X_ Senn, —y— ~~. —6~ —__, ~ —o- ol, T T T T T T T 1" - }, - { - "type": "UncategorizedText", - "element_id": "0faf54c7569cac28ec5462f872384f7c", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "2.7" - }, - { - "type": "UncategorizedText", - "element_id": "a97b042d7bd59d92a46e8ab17f7dff73", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "1.8" - }, - { - "type": "UncategorizedText", - "element_id": "8139b33952401b3ee0e2ca84651cb9a1", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "0.9" - }, - { - "type": "UncategorizedText", - "element_id": "ad57366865126e55649ecb23ae1d4888", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "100" - }, - { - "type": "UncategorizedText", - "element_id": "4a44dc15364204a80fe80e9039455cc1", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "10" - }, - { - "type": "UncategorizedText", - "element_id": "f5ca38f748a1d6eaf726b8a42fb575c3", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "20" - }, - { - "type": "UncategorizedText", - "element_id": "624b60c58c9d8bfb6ff1886c2fd605d2", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "30" - }, - { - "type": "UncategorizedText", - "element_id": "d59eced1ded07f84c145592f65bdf854", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "40" - }, - { - "type": "UncategorizedText", - "element_id": "1a6562590ef19d1045d06c4055742d38", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "50" - }, - { - "type": "UncategorizedText", - "element_id": "ff5a1ae012afa5d4c889c50ad427aaf5", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "70" - }, - { - "type": "UncategorizedText", - "element_id": "39fa9ec190eee7b6f4dff1100d6343e1", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "60" - }, - { - "type": "UncategorizedText", - "element_id": "48449a14a4ff7d79bb7a1b6f3d488eba", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "80" - }, - { - "type": "UncategorizedText", - "element_id": "69f59c273b6e669ac32a6dd5e1b2cb63", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "90" - }, - { - "type": "UncategorizedText", - "element_id": "5feceb66ffc86f38d952786c6d696c79", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "0" - }, - { - "type": "UncategorizedText", - "element_id": "c2356069e9d1e79ca924378153cfbbfb", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "24" - }, - { - "type": "UncategorizedText", - "element_id": "98010bd9270f9b100b6214a21754fd33", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "48" - }, - { - "type": "UncategorizedText", - "element_id": "8722616204217eddb39e7df969e0698a", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "72" - }, - { - "type": "UncategorizedText", - "element_id": "7b1a278f5abe8e9da907fc9c29dfd432", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "96" - }, - { - "type": "Title", - "element_id": "239bb77f5ec344ce5e614979b8c49742", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "Exposure time" - }, - { - "type": "UncategorizedText", - "element_id": "2abaca4911e68fa9bfbf3482ee797fd5", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "120" - }, - { - "type": "UncategorizedText", - "element_id": "5ec1a0c99d428601ce42b407ae9c675e", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "144" - }, - { - "type": "UncategorizedText", - "element_id": "80c3cd40fa35f9088b8741bd8be6153d", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "168" - }, - { - "type": "UncategorizedText", - "element_id": "eb3be230bbd2844b1f5d8f2e4fab9ffb", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "192" - }, - { - "type": "Title", - "element_id": "d300d49efc4cd0982dd6bc3377759ae8", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "10g 8g 6g 4g 2g Control" - }, - { - "type": "UncategorizedText", - "element_id": "85b99d4e3d8e29e46e512f9cca7ba627", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "2g 4g 6g 8g 10g" - }, - { - "type": "UncategorizedText", - "element_id": "f5ca38f748a1d6eaf726b8a42fb575c3", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "20" - }, - { - "type": "UncategorizedText", - "element_id": "d59eced1ded07f84c145592f65bdf854", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "40" - }, - { - "type": "UncategorizedText", - "element_id": "39fa9ec190eee7b6f4dff1100d6343e1", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "60" - }, - { - "type": "UncategorizedText", - "element_id": "48449a14a4ff7d79bb7a1b6f3d488eba", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "80" - }, - { - "type": "UncategorizedText", - "element_id": "ad57366865126e55649ecb23ae1d4888", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "100" - }, - { - "type": "UncategorizedText", - "element_id": "2abaca4911e68fa9bfbf3482ee797fd5", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "120" - }, - { - "type": "UncategorizedText", - "element_id": "dbae772db29058a88f9bd830e957c695", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "140" - }, - { - "type": "UncategorizedText", - "element_id": "a512db2741cd20693e4b16f19891e72b", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "160" - }, - { - "type": "UncategorizedText", - "element_id": "7b69759630f869f2723875f873935fed", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "180" - }, - { - "type": "Title", - "element_id": "a955dcf1d740ce40d62415d9f16da436", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "Exposure Time (Hours)" - }, - { - "type": "NarrativeText", - "element_id": "950ca7babbae92e76df97f7ee57bc05c", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "Fig. 3. Inhibition efficiency versus exposure time for stainless steel immersed in 0.5 M H2SO4 solution in the presence of ES." - }, - { - "type": "NarrativeText", - "element_id": "83f15bc914c3bfceaa571de50ab77f11", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "number of inhibitor adsorbed on the surface of stainless steel at higher concentration, in order for the active sites of the stainless steel to be protected with the inhibitor molecules. Cathodic and anodic polarized potential are measured in the presence and absence of ES. Fig. 4 shows the cathodic and anodic polarization curves for stainless steel in 0.5 M H2SO4 solution at different ES concentrations. The electrochemical variables such as polarization resistance (PR), corrosion potential (Ecorr), cor- rosion current (icorr), anodic Tafel constant (ba), cathodic Tafel constant (bc) and corrosion rate (mm/ year) values are presented in Table 1. From the polarization curves and electrochemical parameter, icorr value decreased with the addition of inhibitor in 0.5 M H2SO4. Conversely, the icorr further decrease with an increase in inhibitor concentration indicating that the inhibition effects increase with an increase in the egg shell concentration. The process of egg shell inhibition could be attributed to the formation of egg shell powder adsorbed on stainless steel surface protecting corrosion of stainless steel in H2SO4 medium. The likely mechanism is the egg shell adsorption on stainless steel surface through the heteroatoms electron pair and the conjugated systems in egg shell molecular structure as shown in Fig. 1. When the concentration of inhibitor was increased from 2 to 10 g, the corrosion rate values drastically decreased this result show that waste egg shell powder is an effective corrosion inhibitor for stainless steel in H2SO4 solution. The shift in corrosion potential of stainless steel from Tafel curves and electrochemical data indicate that the inhibitor is a mixed-type corrosion inhibitor." - }, - { - "type": "UncategorizedText", - "element_id": "48f89b630677c2cbb70e2ba05bf7a363", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "454" - }, - { - "type": "NarrativeText", - "element_id": "9ca201e648ed74cfc838b6661f59addf", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457" - }, - { - "type": "FigureCaption", - "element_id": "27b45633a0f31b9e01d179d70d7dc282", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "5 1 os = — 10; =o ° © —\" 205 i —~é é —ip a5 — Control -2 — & 2.5 T T T 0.0000001 + —-0.00001 0.001 O14 Current Density (A/cm2)" - }, - { - "type": "UncategorizedText", - "element_id": "f0e5c879f7d220552d8ad5b3503bd038", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "Fig. 4. Anodic and cathodic polarization curve of stainless steel in 0.5 M H2SO4 solution in the presence and absence of ES." - }, - { - "type": "UncategorizedText", - "element_id": "c1589916b4d51307d5d804bbf911ea17", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "Table 1 Potentiodynamic polarization data for stainless steel in the absence and presence of ES in 0.5 M H2SO4 solution." - }, - { - "type": "Table", - "element_id": "9270ab0a1b3ba26a16991abcd0b45dfe", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "Inhibitor be (V/dec) ba (V/dec) Ecorr (V) icorr (A/cm?) Polarization Corrosion concentration (g) resistance (Q) rate (mm/year) oO 0.0335 0.0409 0.0003 24.0910 2.8163 2 1.9460 0.0596 0.0002 121.440 1.5054 4 0.0163 0.2369 0.0001 42.121 0.9476 6 0.3233 0.0540 5.39E-05 373.180 0.4318 8 0.1240 0.0556 5.46E-05 305.650 0.3772 10 0.0382 0.0086 1.24E-05 246.080 0.0919" + "text": "Fig. 2. Corrosion rate versus exposure time for stainless steel immersed in 0.5 M H2SO4 solution in the absence and presence of ES." }, { - "type": "Title", - "element_id": "362d4a20958df0c88550b9e5d1f2ef5b", + "type": "UncategorizedText", + "element_id": "ad57366865126e55649ecb23ae1d4888", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 3 }, - "text": "Inhibitor concentration (g)" + "text": "100" }, { "type": "UncategorizedText", - "element_id": "9492908fadeab22ca81f18f2ba4f4f35", + "element_id": "57e2eb94df928d0cf17b2c0d41ae042e", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 3 }, - "text": "0 2 4 6 8 10" + "text": "100 4" }, { - "type": "Title", - "element_id": "bcf00b4904f5661d6baef52e7e09e9b1", + "type": "Image", + "element_id": "0616fd3aee2db0cdd1a1565987b925ae", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 3 }, - "text": "bc (V/dec)" + "text": " 80 4 Inhibition Efficiency (%) a Ss 1 _—__. —o— 4g SS v- —a— 6g 74 —~X_ Senn, —y— 8g ~~. —6~ 10g —__, ~ —o- 2g ol, T T T T T T T 1 20 40 60 80 100 120 140 160 180 Exposure Time 1e (Hours)" }, { - "type": "UncategorizedText", - "element_id": "12751f842ba5664e7ad255016dbe371b", + "type": "Title", + "element_id": "a955dcf1d740ce40d62415d9f16da436", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 3 }, - "text": "0.0335 1.9460 0.0163 0.3233 0.1240 0.0382" + "text": "Exposure Time (Hours)" }, { - "type": "Title", - "element_id": "12e486f4a9b3a1805bf7e95b5d01847b", + "type": "FigureCaption", + "element_id": "950ca7babbae92e76df97f7ee57bc05c", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 3 }, - "text": "ba (V/dec)" + "text": "Fig. 3. Inhibition efficiency versus exposure time for stainless steel immersed in 0.5 M H2SO4 solution in the presence of ES." }, { - "type": "UncategorizedText", - "element_id": "727d4758bcfadaaf5156b8682cd39810", + "type": "NarrativeText", + "element_id": "83f15bc914c3bfceaa571de50ab77f11", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 3 }, - "text": "0.0409 0.0596 0.2369 0.0540 0.0556 0.0086" + "text": "number of inhibitor adsorbed on the surface of stainless steel at higher concentration, in order for the active sites of the stainless steel to be protected with the inhibitor molecules. Cathodic and anodic polarized potential are measured in the presence and absence of ES. Fig. 4 shows the cathodic and anodic polarization curves for stainless steel in 0.5 M H2SO4 solution at different ES concentrations. The electrochemical variables such as polarization resistance (PR), corrosion potential (Ecorr), cor- rosion current (icorr), anodic Tafel constant (ba), cathodic Tafel constant (bc) and corrosion rate (mm/ year) values are presented in Table 1. From the polarization curves and electrochemical parameter, icorr value decreased with the addition of inhibitor in 0.5 M H2SO4. Conversely, the icorr further decrease with an increase in inhibitor concentration indicating that the inhibition effects increase with an increase in the egg shell concentration. The process of egg shell inhibition could be attributed to the formation of egg shell powder adsorbed on stainless steel surface protecting corrosion of stainless steel in H2SO4 medium. The likely mechanism is the egg shell adsorption on stainless steel surface through the heteroatoms electron pair and the conjugated systems in egg shell molecular structure as shown in Fig. 1. When the concentration of inhibitor was increased from 2 to 10 g, the corrosion rate values drastically decreased this result show that waste egg shell powder is an effective corrosion inhibitor for stainless steel in H2SO4 solution. The shift in corrosion potential of stainless steel from Tafel curves and electrochemical data indicate that the inhibitor is a mixed-type corrosion inhibitor." }, { - "type": "Title", - "element_id": "7bc31ed7ab5a625735657499f636c1f2", + "type": "Header", + "element_id": "8d9bcdac558e606c913189b6ce8db44c", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "Ecorr (V)" + "text": "454" }, { - "type": "UncategorizedText", - "element_id": "2a789110c863b30156d63234c8a51477", + "type": "NarrativeText", + "element_id": "8a54dcaa0e2720786903e26e84bd9e93", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "(cid:3) 0.9393 (cid:3) 0.8276 (cid:3) 0.8825 (cid:3) 0.8027 (cid:3) 0.5896 (cid:3) 0.5356" + "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457" }, { - "type": "Title", - "element_id": "6978574f5e6e70a2883ea5ea51aa34f7", + "type": "Image", + "element_id": "b5ee6af3d776b0bbd2e581a3ab2ab2e1", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "icorr (A/cm2)" + "text": "Potential (Vv)nm°in°}aryT T T0.00001 0.001 olCurrent Density (A/cm2)" }, { - "type": "UncategorizedText", - "element_id": "d71f426079cb8c2bb3d960ce1e23d290", + "type": "FigureCaption", + "element_id": "e8f34726e919c7e2f4d00f6fcf511ef8", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "0.0003 0.0002 0.0001 5.39E-05 5.46E-05 1.24E-05" + "text": "Fig. 4. Anodic and cathodic polarization curve of stainless steel in 0.5 M H2SO4 solution in the presence and absence of ES." }, { - "type": "Title", - "element_id": "7507a06cf675785949d6312f1776e444", + "type": "UncategorizedText", + "element_id": "c1589916b4d51307d5d804bbf911ea17", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "Polarization resistance (Ω)" + "text": "Table 1 Potentiodynamic polarization data for stainless steel in the absence and presence of ES in 0.5 M H2SO4 solution." }, { "type": "UncategorizedText", - "element_id": "1695e2ad2c62a337b135afbfc79ef69d", + "element_id": "9492908fadeab22ca81f18f2ba4f4f35", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "24.0910 121.440 42.121 373.180 305.650 246.080" + "text": "0 2 4 6 8 10" }, { "type": "Title", - "element_id": "6d9d421c5383a3abfc3ff6f15c0b16cc", + "element_id": "362d4a20958df0c88550b9e5d1f2ef5b", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "Corrosion rate (mm/year)" + "text": "Inhibitor concentration (g)" }, { - "type": "UncategorizedText", - "element_id": "48bbf8e8b874e0e1f32be15f6c07c11c", + "type": "Table", + "element_id": "9270ab0a1b3ba26a16991abcd0b45dfe", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "2.8163 1.5054 0.9476 0.4318 0.3772 0.0919" + "text": "Inhibitor be (V/dec) ba (V/dec) Ecorr (V) icorr (A/cm?) Polarization Corrosion concentration (g) resistance (Q) rate (mm/year) oO 0.0335 0.0409 0.0003 24.0910 2.8163 2 1.9460 0.0596 0.0002 121.440 1.5054 4 0.0163 0.2369 0.0001 42.121 0.9476 6 0.3233 0.0540 5.39E-05 373.180 0.4318 8 0.1240 0.0556 5.46E-05 305.650 0.3772 10 0.0382 0.0086 1.24E-05 246.080 0.0919" }, { - "type": "NarrativeText", - "element_id": "ef5851c1e7629b7329ac014d7fb9e9e1", + "type": "UncategorizedText", + "element_id": "12751f842ba5664e7ad255016dbe371b", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "The plot of inhibitor concentration over degree of surface coverage versus inhibitor concentration gives a straight line as shown in Fig. 5. The strong correlation reveals that egg shell adsorption on stainless surface in 0.5 M H2SO4 follow Langmuir adsorption isotherm. Figs. 6–8 show the SEM/EDX surface morphology analysis of stainless steel. Figs. 7 and 8 are the SEM/EDX images of the stainless steel specimens without and with inhibitor after weight loss experiment in sulphuric acid medium. The stainless steel surface corrosion product layer in the absence of inhibitor was porous and as a result gives no corrosion protection. With the presence of ES, corrosion damage was minimized, with an evidence of ES present on the metal surface as shown in Fig. 8." + "text": "0.0335 1.9460 0.0163 0.3233 0.1240 0.0382" }, { - "type": "UncategorizedText", - "element_id": "4a166cad507ccd016e6ad2d8652111e5", + "type": "Title", + "element_id": "bcf00b4904f5661d6baef52e7e09e9b1", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "0 / C" + "text": "bc (V/dec)" }, { "type": "UncategorizedText", - "element_id": "6b51d431df5d7f141cbececcf79edf3d", + "element_id": "727d4758bcfadaaf5156b8682cd39810", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "12" + "text": "0.0409 0.0596 0.2369 0.0540 0.0556 0.0086" }, { - "type": "UncategorizedText", - "element_id": "4a44dc15364204a80fe80e9039455cc1", + "type": "Title", + "element_id": "12e486f4a9b3a1805bf7e95b5d01847b", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "10" + "text": "ba (V/dec)" }, { - "type": "UncategorizedText", - "element_id": "2c624232cdd221771294dfbb310aca00", + "type": "Title", + "element_id": "7bc31ed7ab5a625735657499f636c1f2", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "8" + "text": "Ecorr (V)" }, { "type": "UncategorizedText", - "element_id": "e7f6c011776e8db7cd330b54174fd76f", + "element_id": "2a789110c863b30156d63234c8a51477", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "6" + "text": "(cid:3) 0.9393 (cid:3) 0.8276 (cid:3) 0.8825 (cid:3) 0.8027 (cid:3) 0.5896 (cid:3) 0.5356" }, { "type": "UncategorizedText", - "element_id": "4b227777d4dd1fc61c6f884f48641d02", + "element_id": "d71f426079cb8c2bb3d960ce1e23d290", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "4" + "text": "0.0003 0.0002 0.0001 5.39E-05 5.46E-05 1.24E-05" }, { - "type": "UncategorizedText", - "element_id": "d4735e3a265e16eee03f59718b9b5d03", + "type": "Title", + "element_id": "6978574f5e6e70a2883ea5ea51aa34f7", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "2" + "text": "icorr (A/cm2)" }, { "type": "UncategorizedText", - "element_id": "a0dfa682f99b0794f40f195f9a7adfcd", + "element_id": "1695e2ad2c62a337b135afbfc79ef69d", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "—=—Cc/0 2+ T T T 1" + "text": "24.0910 121.440 42.121 373.180 305.650 246.080" }, { - "type": "UncategorizedText", - "element_id": "1797d9b8b07f302836186c20a19ebd0b", + "type": "Title", + "element_id": "7507a06cf675785949d6312f1776e444", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "C/0" + "text": "Polarization resistance (Ω)" }, { "type": "UncategorizedText", - "element_id": "d4735e3a265e16eee03f59718b9b5d03", + "element_id": "48bbf8e8b874e0e1f32be15f6c07c11c", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "2" + "text": "2.8163 1.5054 0.9476 0.4318 0.3772 0.0919" }, { - "type": "UncategorizedText", - "element_id": "4b227777d4dd1fc61c6f884f48641d02", + "type": "Title", + "element_id": "6d9d421c5383a3abfc3ff6f15c0b16cc", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "4" + "text": "Corrosion rate (mm/year)" }, { - "type": "UncategorizedText", - "element_id": "e7f6c011776e8db7cd330b54174fd76f", + "type": "NarrativeText", + "element_id": "ef5851c1e7629b7329ac014d7fb9e9e1", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "6" + "text": "The plot of inhibitor concentration over degree of surface coverage versus inhibitor concentration gives a straight line as shown in Fig. 5. The strong correlation reveals that egg shell adsorption on stainless surface in 0.5 M H2SO4 follow Langmuir adsorption isotherm. Figs. 6–8 show the SEM/EDX surface morphology analysis of stainless steel. Figs. 7 and 8 are the SEM/EDX images of the stainless steel specimens without and with inhibitor after weight loss experiment in sulphuric acid medium. The stainless steel surface corrosion product layer in the absence of inhibitor was porous and as a result gives no corrosion protection. With the presence of ES, corrosion damage was minimized, with an evidence of ES present on the metal surface as shown in Fig. 8." }, { "type": "UncategorizedText", - "element_id": "2c624232cdd221771294dfbb310aca00", + "element_id": "e2b6d7e2ab125149fa820500cedfffbb", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "8" + "text": "—=—Cc/0" }, { "type": "UncategorizedText", - "element_id": "4a44dc15364204a80fe80e9039455cc1", + "element_id": "1797d9b8b07f302836186c20a19ebd0b", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "10" + "text": "C/0" }, { - "type": "Title", - "element_id": "c74caf15453477bf544f86e069d90da7", + "type": "Image", + "element_id": "330ac6774a7bcf85ad0993abaab2a475", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "Concentration (g)" + "text": " 12 2+ T T T 1 2 4 6 8 10 Concentration (g)" }, { - "type": "NarrativeText", + "type": "FigureCaption", "element_id": "8e9636a780701abc4f16c3f890b8a83f", "metadata": { "data_source": {}, @@ -1430,8 +800,8 @@ "text": "Fig. 5. Langmuir adsorption isotherm of ES." }, { - "type": "NarrativeText", - "element_id": "9ca201e648ed74cfc838b6661f59addf", + "type": "Header", + "element_id": "8a54dcaa0e2720786903e26e84bd9e93", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1440,8 +810,8 @@ "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457" }, { - "type": "UncategorizedText", - "element_id": "f626051bc94422f26f4b774a2bca105e", + "type": "Header", + "element_id": "b5c1fe3f2fa0ef8280a53620dcb31175", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1450,37 +820,37 @@ "text": "455" }, { - "type": "FigureCaption", - "element_id": "273fb301b173075f79b2cbdab962e2ff", + "type": "Image", + "element_id": "caa364fead90039aae1f13d64dcb8b37", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5 }, - "text": "SEM HV: Q0KY WD: 14.89 rmrm ‘9EM MAO: 209 x Det: DOE Pectomsence In nanospact" + "text": "SEM HV: Q0KY WD: 14.89 rmrm‘DEM MAO: 209 x ‘Dor Pecforsence In nenospact" }, { - "type": "FigureCaption", - "element_id": "520d1da08c86ce165cd2843e2dc27f98", + "type": "Image", + "element_id": "a0463ca888a6f2c8c3ba40ba47be0f2f", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5 }, - "text": "SEMHV: 20.0KV WD: 15.54 mm EM ING: ACO x Dei: OSE" + "text": "gEOOwaeSemny. z00RV | WD: 1424 renn rtirint VEoa3 Tescan20 yin Fertormaros in nancepace|" }, { - "type": "FigureCaption", - "element_id": "d04d110c16a4ebc184fa130f09b8d423", + "type": "Image", + "element_id": "88301d6b47b17df03b78789b9890a6f1", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5 }, - "text": "Sem ny. 200 Rv" + "text": "°@¢Naafe«MgsSEM HY: 20.0KV 7 ETOP LU ULL UL OCT 0BEM IAAG: 400 x a" }, { - "type": "NarrativeText", + "type": "FigureCaption", "element_id": "ccc8ab2aeabd9a0f745b9f0f6fcbef6e", "metadata": { "data_source": {}, @@ -1490,7 +860,7 @@ "text": "Fig. 7. SEM/EDX image of stainless steel immersed in 0.5 M H2SO4 solution without inhibitor." }, { - "type": "NarrativeText", + "type": "FigureCaption", "element_id": "6121f41a05c15afa2efe50af3e838da4", "metadata": { "data_source": {}, @@ -1500,7 +870,7 @@ "text": "Fig. 6. SEM/EDX image of as-received stainless steel." }, { - "type": "NarrativeText", + "type": "FigureCaption", "element_id": "d8bc58d446376a881b51208b9a8ee7b7", "metadata": { "data_source": {}, @@ -1510,8 +880,8 @@ "text": "Fig. 8. SEM/EDX image of stainless steel immersed in 0.5 M H2SO4 solution with the presence of inhibitor." }, { - "type": "UncategorizedText", - "element_id": "b3a8e0e1f9ab1bfe3a36f231f676f78b", + "type": "Header", + "element_id": "cdfba543ee8ef7fdb3d8b587648cc22d", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1520,8 +890,8 @@ "text": "456" }, { - "type": "NarrativeText", - "element_id": "9ca201e648ed74cfc838b6661f59addf", + "type": "Header", + "element_id": "8a54dcaa0e2720786903e26e84bd9e93", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1530,8 +900,8 @@ "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457" }, { - "type": "ListItem", - "element_id": "a80826543c9e0d0e9f6c2108ae3c3f73", + "type": "Title", + "element_id": "e00efc537994ab576eaec5a387a5ebc0", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1540,8 +910,8 @@ "text": "2. Experimental design, materials and methods" }, { - "type": "Title", - "element_id": "90b8c00ff7a1b170a14695aa51629f14", + "type": "NarrativeText", + "element_id": "d277e2ba1e8cbda383b0e51703c281c8", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1551,7 +921,7 @@ }, { "type": "NarrativeText", - "element_id": "7c3b7c8c2993a59e71e009d051edd727", + "element_id": "c90848f07a922eff3615e5aa1ee78a2f", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1561,17 +931,7 @@ }, { "type": "FigureCaption", - "element_id": "060e14f01e484ba252e902cd5c6f94f9", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "ou H,;COCHNY OH" - }, - { - "type": "Title", - "element_id": "1dc2692eee9b01e9a960f80c4dabe07b", + "element_id": "c07eeb615f8b0f2d544348b7f0655301", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1580,8 +940,8 @@ "text": "Fig. 9. Chemical structure of egg shell powder." }, { - "type": "Title", - "element_id": "b4a533760fabf85f66294a0441dacd1e", + "type": "NarrativeText", + "element_id": "63584e8d8b4c14d1542778c155ee4b78", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1610,14 +970,14 @@ "text": "The corrosion rate (CR) was calculated using Eq. (1) [1–5]" }, { - "type": "Title", - "element_id": "cecb8b44c9af4b76e85155170c509729", + "type": "NarrativeText", + "element_id": "1cf628987e0d8ee743a4fd01f662cc01", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 6 }, - "text": "Corrosion rate CRð" + "text": ". 87.6W Corrosion rate(CR) = (ar" }, { "type": "UncategorizedText", @@ -1670,24 +1030,14 @@ "text": "where: W is weight loss in mg, A is specimen surface area, T is immersion period in hours and D is the specimen density. From the corrosion rate, the surface coverage (θ) and inhibition efficiencies (IE %) were determined using Eqs. (2) and (3) respectively" }, { - "type": "Title", - "element_id": "62127212535b62092159e4fe305c868d", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "θ ¼ CRo (cid:3) CR" - }, - { - "type": "Title", - "element_id": "5a6824cbd64b72c37057f7d1dbee2798", + "type": "Formula", + "element_id": "59664b2fe1b21e796c905c904f07faae", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 6 }, - "text": "CRo" + "text": "~ CRo" }, { "type": "Title", @@ -1700,54 +1050,14 @@ "text": "ð2Þ" }, { - "type": "Title", - "element_id": "c31b73fca4f97bb7e95a3d8634826d32", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "IE ð%Þ ¼ CRo (cid:3) CR" - }, - { - "type": "Title", - "element_id": "c13539d1568999137c4e0354795cd37b", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "CR" - }, - { - "type": "Title", - "element_id": "5a6824cbd64b72c37057f7d1dbee2798", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "CRo" - }, - { - "type": "Title", - "element_id": "2d711642b726b04401627ca9fbac32f5", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "x" - }, - { - "type": "UncategorizedText", - "element_id": "3a81feba075b8ca26d6f86f392ff06df", + "type": "Formula", + "element_id": "2ceed7a728acd831c0c4c14fc95a3db7", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 6 }, - "text": "100 1" + "text": "CRo=CR , 100 IE (0) = CR" }, { "type": "Title", @@ -1761,7 +1071,7 @@ }, { "type": "NarrativeText", - "element_id": "118f0531277e022b44f152b0bf2dee7c", + "element_id": "4e14cf7db9d9e827482861e7576a1d07", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1770,8 +1080,8 @@ "text": "where: CRo and CR are the corrosion rate in absence and presence of inhibitor respectively." }, { - "type": "Title", - "element_id": "6aa7f759e077aa037614e7f42897f09a", + "type": "NarrativeText", + "element_id": "5dda1fad7e503afe6240d736d50bbe7a", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1800,8 +1110,8 @@ "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457" }, { - "type": "UncategorizedText", - "element_id": "353767b239099863e13ca954e20a66c9", + "type": "Header", + "element_id": "b2dc92f9e9858319664f918c69457257", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1811,7 +1121,7 @@ }, { "type": "NarrativeText", - "element_id": "24dcddab57a1cab7266a3c6b536ad2ff", + "element_id": "01f3f73499621b0a04142f29982336c1", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1821,7 +1131,7 @@ }, { "type": "Title", - "element_id": "c9015d53b90846454375a2fdf2829c66", + "element_id": "9619869f5960ea0375b649dd8cc388a5", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1831,27 +1141,17 @@ }, { "type": "NarrativeText", - "element_id": "63cd602e78daef9ac25a20bbab27ecbc", + "element_id": "dbfead4a6bc5e94c6d8f7de9666b6f30", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 7 }, - "text": "This work was supported by the National Research Foundation of South Africa and the Tshwane" + "text": "This work was supported by the National Research Foundation of South Africa and the Tshwane University of Technology Pretoria South Africa." }, { "type": "Title", - "element_id": "287fb148184f12ff62e9b0207567dac7", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "University of Technology Pretoria South Africa." - }, - { - "type": "NarrativeText", - "element_id": "d202816913e482abce90d70d88f202c3", + "element_id": "81db7fab0806640b0cbbac862671704f", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1861,23 +1161,13 @@ }, { "type": "NarrativeText", - "element_id": "d434a0e19d0d34e92936b9566e1ebb45", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "Transparency document associated with this article can be found in the online version at https://doi." - }, - { - "type": "UncategorizedText", - "element_id": "2ca250dde10d732278a9fa586a97e40a", + "element_id": "eaf72c6c69d317c502026ecf01d28b09", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 7 }, - "text": "org/10.1016/j.dib.2018.11.134." + "text": "Transparency document associated with this article can be found in the online version at https://doi. org/10.1016/j.dib.2018.11.134." }, { "type": "Title", @@ -1890,68 +1180,38 @@ "text": "References" }, { - "type": "NarrativeText", - "element_id": "d844a31ead19b2e2fae786d2a5495072", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "[1] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, Enhanced corrosion resistance of stainless steel type 316 in sulphuric acid solution" - }, - { - "type": "NarrativeText", - "element_id": "d0be94eaaf9c0f43bc51381f031e1381", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "using eco-friendly waste product, Results Phys. 9 (2018) 225–230." - }, - { - "type": "NarrativeText", - "element_id": "7e9cfcc1c32c353e319aae7d9be537bd", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "[2] O. Sanni, A.P.I. Popoola, A. Kolesnikov, Constitutive modeling for prediction of optimal process parameters in corrosion" - }, - { - "type": "NarrativeText", - "element_id": "c00e8be0806aa2ded72da0ef746a4291", + "type": "ListItem", + "element_id": "e275b10ccd88f5d2dbf9f2b2432eb64f", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 7 }, - "text": "inhibition of austenitic stainless steel (Type 316)/acidic medium, Mater. Res. Express. 5 (10) (2018) 1–15." + "text": "[1] 0. Sanni, A.P.I. Popoola, O.S.I. Fayomi, Enhanced corrosion resistance of stainless steel type 316 in sulphuric acid solution using eco-friendly waste product, Results Phys. 9 (2018) 225-230." }, { - "type": "NarrativeText", - "element_id": "1d76a4bb6ba7984cea4548ab574beb8f", + "type": "ListItem", + "element_id": "5068dd4538c596c1d123fd612bdb99e3", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 7 }, - "text": "[3] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, The inhibitive study of egg shell powder on UNS N08904 austenitic stainless steel" + "text": "[2] O. Sanni, A.P.I. Popoola, A. Kolesnikov, Constitutive modeling for prediction of optimal process parameters in corrosion inhibition of austenitic stainless steel (Type 316)/acidic medium, Mater. Res. Express. 5 (10) (2018) 1-15." }, { - "type": "NarrativeText", - "element_id": "ffd9e4babdf76600a881851ebbf35d3f", + "type": "ListItem", + "element_id": "76eb86296cfb136b12d4606217bd3ae3", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 7 }, - "text": "corrosion in chloride solution, Def. Technol. 14 (2018) 463–468." + "text": "[3] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, The inhibitive study of egg shell powder on UNS N08904 austenitic stainless steel corrosion in chloride solution, Def. Technol. 14 (2018) 463-468." }, { - "type": "NarrativeText", - "element_id": "dd7f4838500dd709556225fa3f6b7339", + "type": "ListItem", + "element_id": "a3b65d4f88d6909004419ec92682d14a", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1960,7 +1220,7 @@ "text": "[4] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, C.A. Loto, A comparative study of inhibitive effect of waste product on stainless steel corrosion in sodium chloride/sulfuric acid environments, Metallogr. Microstruct. Anal. (2018) 1–17. https://doi.org/10.1007/ s13632-018-0495-5." }, { - "type": "NarrativeText", + "type": "ListItem", "element_id": "3cd4caf23cd72a06fbf01b16df13ec1f", "metadata": { "data_source": {}, diff --git a/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json b/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json index 59ec34c634..d470025d1e 100644 --- a/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json @@ -1,7 +1,7 @@ [ { - "type": "UncategorizedText", - "element_id": "cfb3400e6eb0487eeb704674d40bf85c", + "type": "Header", + "element_id": "0af8327dc6c8a1694bd0fc75da243db4", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -11,7 +11,7 @@ }, { "type": "NarrativeText", - "element_id": "b0658ce9dccc0acba9a472c2bb992cc9", + "element_id": "869adddb184177031536477262e0dde0", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -21,7 +21,7 @@ }, { "type": "Title", - "element_id": "f2fe9c33b7e8535efebf7c20ebce297c", + "element_id": "e6fa42b5b4d85001b900e47c050b645b", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -41,7 +41,7 @@ }, { "type": "Title", - "element_id": "0ccb3a9876bbc64a1ca09fa40c4f844d", + "element_id": "ac01687ab870e4bb6e7313db4654928a", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -70,7 +70,7 @@ "text": "(eee" }, { - "type": "NarrativeText", + "type": "Title", "element_id": "adf50fc70e660740d796f43a2ba5f500", "metadata": { "data_source": {}, @@ -80,8 +80,8 @@ "text": "Sarang Kulkarni a,b,c,n, Mohan Krishnamoorthy d,e, Abhiram Ranade f, Andreas T. Ernst c, Rahul Patil b" }, { - "type": "NarrativeText", - "element_id": "dcedfc380a2be599bf69af84d49d4803", + "type": "UncategorizedText", + "element_id": "cd24a5d2989d27bd46eede4ea54cb41e", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -89,6 +89,16 @@ }, "text": "a IITB-Monash Research Academy, IIT Bombay, Powai, Mumbai 400076, India b SJM School of Management, IIT Bombay, Powai, Mumbai 400076, India c School of Mathematical Sciences, Monash University, Clayton, VIC 3800, Australia d Department of Mechanical and Aerospace Engineering, Monash University, Clayton, VIC 3800, Australia e School of Information Technology and Electrical Engineering, The University of Queensland, QLD 4072, Australia f Department of Computer Science and Engineering, IIT Bombay, Powai, Mumbai 400076, India" }, + { + "type": "Title", + "element_id": "3d71760ba4f1cc95873ee36178f97d82", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "ARTICLE INFO" + }, { "type": "NarrativeText", "element_id": "fbd221e3c1f82c8601661213b98b0962", @@ -110,8 +120,8 @@ "text": "a b s t r a c t" }, { - "type": "UncategorizedText", - "element_id": "ed0a4666ce85e6310a0984f37e0e98f8", + "type": "NarrativeText", + "element_id": "32133fc9f028473fb3d3d2ca24382c28", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -150,14 +160,14 @@ "text": "DOI of original article: https://doi.org/10.1016/j.trb.2018.11.007 n Corresponding author at: IITB-Monash Research Academy, IIT Bombay, Powai, Mumbai 400076, India." }, { - "type": "Title", - "element_id": "5810d7d862f5f5d65e257a3ed9b102ac", + "type": "ListItem", + "element_id": "7373e1d1cb305b02bf37dc138ba774c4", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 1 }, - "text": "E-mail address: sarangkulkarni@iitb.ac.in (S. Kulkarni)." + "text": "Corresponding author at: IITB-Monash Research Academy, IIT Bombay, Powai, Mumbai 400076, E-mail address: sarangkulkarni@iitb.ac.in (S. Kulkarni)." }, { "type": "NarrativeText", @@ -171,7 +181,7 @@ }, { "type": "NarrativeText", - "element_id": "0a1b09ff562f4d063703cbf021ee297f", + "element_id": "e326e74f4607af7d370e049bc5d9e66a", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -180,8 +190,8 @@ "text": "S. Kulkarni et al. / Data in Brief 22 (2019) 484–487" }, { - "type": "UncategorizedText", - "element_id": "5844a72aee9269a68da28cae55c706d8", + "type": "Header", + "element_id": "28b33efedc139452525a280e548c029b", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -191,7 +201,7 @@ }, { "type": "Title", - "element_id": "5af2c5326780fc58a48ca40c6b47bee5", + "element_id": "39826c423283dfd91f1dbd34664ce038", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -199,16 +209,6 @@ }, "text": "Specifications table" }, - { - "type": "NarrativeText", - "element_id": "5c3978ebc42ea4f11240c221ac3be1cf", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "Subject area Operations research More specific subject area Vehicle scheduling Type of data How data were acquired" - }, { "type": "Title", "element_id": "41e0fa358cefcadbb2633ec45ff2d129", @@ -230,174 +230,134 @@ "text": "Experimental features Data source location Data accessibility Related research article" }, { - "type": "ListItem", - "element_id": "b97bb84430abd87625f9a82f95423073", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "Tables, text files Artificially generated by a C þ þ program on Intels Xeons CPU E5– 2670 v2 with Linux operating system. Raw Sixty randomly generated instances of the MDVSP with the number of depots in (8, 12, 16) and the number of trips in (1500, 2000, 2500, 3000) Randomly generated instances IITB-Monash Research Academy, IIT Bombay, Powai, Mumbai, India. Data can be downloaded from https://orlib.uqcloud.net/ Kulkarni, S., Krishnamoorthy, M., Ranade, A., Ernst, A.T. and Patil, R., 2018. A new formulation and a column generation-based heuristic for the multiple depot vehicle scheduling problem. Transportation Research Part B: Methodological, 118, pp. 457–487 [3]." - }, - { - "type": "Title", - "element_id": "596eda178f8c5adefbae7cfe1bec78c3", + "type": "NarrativeText", + "element_id": "5c3978ebc42ea4f11240c221ac3be1cf", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "Value of the data" + "text": "Subject area Operations research More specific subject area Vehicle scheduling Type of data How data were acquired" }, { - "type": "NarrativeText", - "element_id": "f2fdefc49840022ffb3a88bd4a3512d0", + "type": "Table", + "element_id": "765958cb90f3061bda61fe2f973b2acb", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "(cid:2) The dataset contains 60 different problem instances of the MDVSP that can be used to evaluate the" + "text": "Subject area Operations research More specific subject area Vehicle scheduling Type of data Tables, text files How data were acquired Artificially generated by a C++ program on Intel\" Xeon” CPU E5- 2670 v2 with Linux operating system. Data format Raw Experimental factors Sixty randomly generated instances of the MDVSP with the number of depots in (8, 12, 16) and the number of trips in (1500, 2000, 2500, 3000) Experimental features Randomly generated instances Data source location IITB-Monash Research Academy, IIT Bombay, Powai, Mumbai, India. Data accessibility Data can be downloaded from https://orlib.uqcloud.net/ Related research article Kulkarni, S., Krishnamoorthy, M., Ranade, A., Ernst, A.T. and Patil, R., 2018. A new formulation and a column generation-based heuristic for the multiple depot vehicle scheduling problem. Transportation Research Part B: Methodological, 118, pp. 457-487 [3]." }, { "type": "NarrativeText", - "element_id": "7c8bc2811f71480b433eb6fee2a3bb33", + "element_id": "eed804f27c782a8a3643b5d5379099d4", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "(cid:2) The data provide all the information that is required to model the MDVSP by using the existing" + "text": "Tables, text files Artificially generated by a C þ þ program on Intels Xeons CPU E5– 2670 v2 with Linux operating system. Raw Sixty randomly generated instances of the MDVSP with the number of depots in (8, 12, 16) and the number of trips in (1500, 2000, 2500, 3000) Randomly generated instances IITB-Monash Research Academy, IIT Bombay, Powai, Mumbai, India. Data can be downloaded from https://orlib.uqcloud.net/ Kulkarni, S., Krishnamoorthy, M., Ranade, A., Ernst, A.T. and Patil, R., 2018. A new formulation and a column generation-based heuristic for the multiple depot vehicle scheduling problem. Transportation Research Part B: Methodological, 118, pp. 457–487 [3]." }, { "type": "Title", - "element_id": "bd7d750cb9f652c80c17a264072b8858", + "element_id": "e63f0ed399f0537c9ffeadfcae3baed6", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "performance of the algorithms for the MDVSP." + "text": "Value of the data" }, { "type": "NarrativeText", - "element_id": "e69dab6e2bc16d11cfd2d80a804d89fb", + "element_id": "f2fdefc49840022ffb3a88bd4a3512d0", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "(cid:2) All the problem instances are available for use without any restrictions. (cid:2) The benchmark solutions and solution time for the problem instances are presented in [3] and can" + "text": "(cid:2) The dataset contains 60 different problem instances of the MDVSP that can be used to evaluate the" }, { - "type": "Title", - "element_id": "68d39f7bcfe99749cc221fa901314626", + "type": "ListItem", + "element_id": "407d8a9e0bef6d906ec672c5b59a787f", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "mathematical formulations." + "text": "The dataset contains 60 different problem instances of the MDVSP that can be used to evaluate performance of the algorithms for the MDVSP." }, { - "type": "NarrativeText", - "element_id": "1c1d6b35ac0925a35ea3bb4d018e675f", + "type": "ListItem", + "element_id": "aaedb0d8a48db639a022b216035c56de", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "(cid:2) The dataset includes a program that can generate similar problem instances of different sizes." + "text": "© The data provide all the information that is required to model the MDVSP by using the existing mathematical formulations." }, { "type": "NarrativeText", - "element_id": "24d7f2ed4386a169639b93a5bf03fd79", + "element_id": "e69dab6e2bc16d11cfd2d80a804d89fb", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "be used for the comparison." + "text": "(cid:2) All the problem instances are available for use without any restrictions. (cid:2) The benchmark solutions and solution time for the problem instances are presented in [3] and can" }, { "type": "ListItem", - "element_id": "c2b2b778d53cc9a1cb4dc340476bc5aa", + "element_id": "5d3c15437243e1c067415182c2314622", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "1. Data" + "text": "The benchmark solutions and solution time for the problem instances are presented in [3] and be used for the comparison." }, { "type": "NarrativeText", - "element_id": "41ce7670e476aaf9a595bc28c13dbba0", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "The dataset contains 60 different problem instances of the multiple depot vehicle scheduling pro- blem (MDVSP). Each problem instance is provided in a separate file. Each file is named as ‘RN-m-n-k.dat’, where ‘m’, ‘n’, and ‘k’ denote the number of depots, the number of trips, and the instance number" - }, - { - "type": "NarrativeText", - "element_id": "a18c70d23b71c51ddfe33311232c241c", + "element_id": "1c1d6b35ac0925a35ea3bb4d018e675f", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "‘RN-8-1500-01.dat’, is the first problem instance with 8 depots and 1500 trips. For the number of depots, m, we used three values, 8, 12, and 16. The four values for the number of trips, n, are 1500, 2000, 2500, and 3000. For each size, (m,n), five instances are provided. The dataset can be downloaded from https://orlib.uqcloud.net." + "text": "(cid:2) The dataset includes a program that can generate similar problem instances of different sizes." }, { "type": "Title", - "element_id": "10c22bcf4c768b515be4e94bcafc71bf", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "for" - }, - { - "type": "UncategorizedText", - "element_id": "aea66a7c89c6de4d3e3ed6c1ada31104", + "element_id": "1c3f3de4e65aae5bd147f84779712a65", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "the size," - }, - { - "type": "UncategorizedText", - "element_id": "e0feab8a8888b2955af1cc1a2acff883", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "‘ðm; nÞ’," + "text": "1. Data" }, { - "type": "UncategorizedText", - "element_id": "0b113c91aaaf031e5d7b74747e1b4153", + "type": "NarrativeText", + "element_id": "07732da32c53fed3ffd5342c61ab643b", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "respectively. For example," + "text": "The dataset contains 60 different problem instances of the multiple depot vehicle scheduling pro- blem (MDVSP). Each problem instance is provided in a separate file. Each file is named as ‘RN-m-n-k.dat’, where ‘m’, ‘n’, and ‘k’ denote the number of depots, the number of trips, and the instance number ‘RN-8–1500-01.dat’, for is the first problem instance with 8 depots and 1500 trips. For the number of depots, m, we used three values, 8, 12, and 16. The four values for the number of trips, n, are 1500, 2000, 2500, and 3000. For each size, ðm; nÞ, five instances are provided. The dataset can be downloaded from https://orlib.uqcloud.net." }, { - "type": "UncategorizedText", - "element_id": "6dd3e9101394a1fbacb451c4c9ba03b9", + "type": "NarrativeText", + "element_id": "d1e8a672b8efb9e58dcf4a40204c1687", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "the problem instance," + "text": "For each tripie 1,2,...,n,a start time, ft}, an end time, ff, a start location, i, and an end location, i, and" }, { "type": "NarrativeText", @@ -420,47 +380,7 @@ "text": "Þ," }, { - "type": "Title", - "element_id": "5a15b4000add06e52b66591cd8cac950", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "i , an end time, te" - }, - { - "type": "Title", - "element_id": "7798ae4daad9264de38e67c98f2bd624", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "i , a start location, ls" - }, - { - "type": "UncategorizedText", - "element_id": "801a0d00a5b76dbd0f039368ee45eda3", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "i , and an end location, le i ," - }, - { - "type": "Title", - "element_id": "6201111b83a0cb5b0922cb37cc442b9a", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "and" - }, - { - "type": "NarrativeText", + "type": "ListItem", "element_id": "dcb60b2d7218e86946c2235aad0b6008", "metadata": { "data_source": {}, @@ -480,8 +400,8 @@ "text": "All times are in minutes and integers. The planning duration is from 5 a.m. to around midnight. Each instance has two classes of trips, short trips and long trips, with 40% short trips and 60% long trips. The duration of a short trip is less than a total of 45 min and the travel time between the start" }, { - "type": "UncategorizedText", - "element_id": "86b700fab5db37977a73700b53a0654b", + "type": "Header", + "element_id": "8d0736d21edd4e194e5db02347e129c7", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -490,8 +410,8 @@ "text": "486" }, { - "type": "NarrativeText", - "element_id": "0a1b09ff562f4d063703cbf021ee297f", + "type": "Header", + "element_id": "e326e74f4607af7d370e049bc5d9e66a", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -501,7 +421,7 @@ }, { "type": "NarrativeText", - "element_id": "ab861dc146a84a52e48a75be2ba3f190", + "element_id": "9f77f0db3a785a5bb491fb79fe54cfa0", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -511,93 +431,13 @@ }, { "type": "NarrativeText", - "element_id": "faee1001fc912565a74ea2d69fa0d689", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "travel empty from —¢). Aschedule is given by the sequence in which a vehicle can cover the trips. The MDVSP is to determine the minimum number of schedules to cover all trips that minimizes total time in waiting and empty travel. The following requirements must be satisfied:" - }, - { - "type": "NarrativeText", - "element_id": "a18dff87ecdbfa5d5d8a1ed56f7ce734", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "A trip j can be covered after trip i by the same vehicle, if ts j" - }, - { - "type": "NarrativeText", - "element_id": "3e549e73bba49a63f20841b5821cfda9", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "i to ls" - }, - { - "type": "NarrativeText", - "element_id": "43dad32a26a446c5a2c74f3f2328b849", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": ". If le i ls le i j , otherwise, the vehicle may require waiting at le i for the duration of ðts" - }, - { - "type": "Title", - "element_id": "3feb623147ddb3265b5968ce2efb8f6b", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "Z te" - }, - { - "type": "NarrativeText", - "element_id": "5201e1037409ea15055e320409a9f5eb", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "i þδ" - }, - { - "type": "Title", - "element_id": "189f40034be7a199f1fa9891668ee3ab", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "j" - }, - { - "type": "Title", - "element_id": "a10959d132f2b0d3723ae6b8b77f86b7", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "a ls" - }, - { - "type": "Title", - "element_id": "4137b01e139589b7a1d3b3fc4da031d8", + "element_id": "694b9c582265698bf49806b056c64adc", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3 }, - "text": "must" + "text": "j , the vehicle must travel empty from le j (cid:3)te i Þ. A schedule is given by the sequence in which a vehicle can cover the trips. The MDVSP is to determine the minimum number of schedules to cover all trips that minimizes total time in waiting and empty travel. The following requirements must be satisfied:" }, { "type": "ListItem", @@ -610,24 +450,14 @@ "text": "1. Each schedule should start and end at the same depot. 2. Each trip should be covered by only one vehicle. 3. The number of schedules that start from a depot should not exceed the number of vehicles at" }, { - "type": "Title", - "element_id": "e46a5a30f05d06e82d8b7d10448de683", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "the depot." - }, - { - "type": "NarrativeText", - "element_id": "e731dc92fddc0512e142bfb2bed62bbf", + "type": "ListItem", + "element_id": "3f2b8351a07eef2caa1918b4b21d05af", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3 }, - "text": "The dataset also includes a program ‘GenerateInstance.cpp’ that can be used to generate new instances. The program takes three inputs, the number of depots ðmÞ, the number of trips ðnÞ, and the number of instances for each size ðm; nÞ." + "text": "The number of schedules that start from a depot should not exceed the number of vehicles the depot." }, { "type": "NarrativeText", @@ -641,7 +471,7 @@ }, { "type": "NarrativeText", - "element_id": "928fa0dcad70f173bc989ee5715375c5", + "element_id": "149eebcec86a1b9a43b93af13952870b", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -650,44 +480,14 @@ "text": "The description of the file for each problem instance is presented in Table 2. The first line in the file provides the number of depots ðmÞ, the number of trips, ðnÞ, and the number of locations ðlÞ, in the problem instance. The next n lines present the information for n trips. Each line corresponds to a trip, i A 1; …; n g, and provides the start location, the start time, the end location, and the end time of trip i. The next l lines present the travel times between any two locations, i; jA 1; …; l" }, { - "type": "Title", - "element_id": "252f10c83610ebca1a059c0bae8255eb", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "f" - }, - { - "type": "UncategorizedText", - "element_id": "89507815c6b4a6f31e6d3da7fca6b561", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "(cid:1)" - }, - { - "type": "UncategorizedText", - "element_id": "33a2b57b388470db1cb13defbe73dc18", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "(cid:3)" - }, - { - "type": "UncategorizedText", - "element_id": "cdb4ee2aea69cc6a83331bbe96dc2caa", + "type": "NarrativeText", + "element_id": "e731dc92fddc0512e142bfb2bed62bbf", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3 }, - "text": "." + "text": "The dataset also includes a program ‘GenerateInstance.cpp’ that can be used to generate new instances. The program takes three inputs, the number of depots ðmÞ, the number of trips ðnÞ, and the number of instances for each size ðm; nÞ." }, { "type": "UncategorizedText", @@ -700,14 +500,14 @@ "text": "Table 1 Average number of locations, times, vehicles and empty travels for each instance size." }, { - "type": "Table", - "element_id": "1d8fd023cd0978f7a6500815d2ad0ef6", + "type": "UncategorizedText", + "element_id": "6d1f07a97479928ee102d525dd11d2d7", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3 }, - "text": "Instance size (m, n) Average number of Locations Times Vehicles Possible empty travels (8, 1500) 568.40 975.20 652.20 668,279.40 (8, 2000) 672.80 1048.00 857.20 1,195,844.80 (8, 2500) 923.40 1078.00 1082.40 1,866,175.20 (8, 3000) 977.00 1113.20 1272.80 2,705,617.00 (12, 1500) 566.00 994.00 642.00 674,191.00 (12, 2000) 732.60 1040.60 861.20 1,199,659.80 (12, 2500) 875.00 1081.00 1096.00 1,878,745.20 (12, 3000) 1119.60 1107.40 1286.20 2,711,180.40 (16, 1500) 581.80 985.40 667.80 673,585.80 (16, 2000) 778.00 1040.60 872.40 1,200,560.80 (16, 2500) 879.00 1083.20 1076.40 1,879,387.00 ) (16, 3000 1087.20 1101.60 1284.60 2,684,983.60" + "text": "(8, 1500) (8, 2000) (8, 2500) (8, 3000) (12, 1500) (12, 2000) (12, 2500) (12, 3000) (16, 1500) (16, 2000) (16, 2500) (16, 3000)" }, { "type": "Title", @@ -720,24 +520,24 @@ "text": "Instance size (m, n)" }, { - "type": "UncategorizedText", - "element_id": "6d1f07a97479928ee102d525dd11d2d7", + "type": "Table", + "element_id": "1d8fd023cd0978f7a6500815d2ad0ef6", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3 }, - "text": "(8, 1500) (8, 2000) (8, 2500) (8, 3000) (12, 1500) (12, 2000) (12, 2500) (12, 3000) (16, 1500) (16, 2000) (16, 2500) (16, 3000)" + "text": "Instance size (m, n) Average number of Locations Times Vehicles Possible empty travels (8, 1500) 568.40 975.20 652.20 668,279.40 (8, 2000) 672.80 1048.00 857.20 1,195,844.80 (8, 2500) 923.40 1078.00 1082.40 1,866,175.20 (8, 3000) 977.00 1113.20 1272.80 2,705,617.00 (12, 1500) 566.00 994.00 642.00 674,191.00 (12, 2000) 732.60 1040.60 861.20 1,199,659.80 (12, 2500) 875.00 1081.00 1096.00 1,878,745.20 (12, 3000) 1119.60 1107.40 1286.20 2,711,180.40 (16, 1500) 581.80 985.40 667.80 673,585.80 (16, 2000) 778.00 1040.60 872.40 1,200,560.80 (16, 2500) 879.00 1083.20 1076.40 1,879,387.00 ) (16, 3000 1087.20 1101.60 1284.60 2,684,983.60" }, { - "type": "Title", - "element_id": "47a68d3aa70030f2e7886e3f1cb07c69", + "type": "UncategorizedText", + "element_id": "1cb85e5f94671526c0cf38dc533f87e0", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3 }, - "text": "Average number of" + "text": "568.40 672.80 923.40 977.00 566.00 732.60 875.00 1119.60 581.80 778.00 879.00 1087.20" }, { "type": "Title", @@ -750,14 +550,14 @@ "text": "Locations" }, { - "type": "UncategorizedText", - "element_id": "1cb85e5f94671526c0cf38dc533f87e0", + "type": "Title", + "element_id": "47a68d3aa70030f2e7886e3f1cb07c69", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3 }, - "text": "568.40 672.80 923.40 977.00 566.00 732.60 875.00 1119.60 581.80 778.00 879.00 1087.20" + "text": "Average number of" }, { "type": "Title", @@ -800,28 +600,28 @@ "text": "652.20 857.20 1082.40 1272.80 642.00 861.20 1096.00 1286.20 667.80 872.40 1076.40 1284.60" }, { - "type": "Title", - "element_id": "68ec9a56bde1cd8ea67340bf9cb829cb", + "type": "UncategorizedText", + "element_id": "4a30645cb68832ec26e551345d9cff0a", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3 }, - "text": "Possible empty travels" + "text": "668,279.40 1,195,844.80 1,866,175.20 2,705,617.00 674,191.00 1,199,659.80 1,878,745.20 2,711,180.40 673,585.80 1,200,560.80 1,879,387.00 2,684,983.60" }, { - "type": "UncategorizedText", - "element_id": "4a30645cb68832ec26e551345d9cff0a", + "type": "Title", + "element_id": "68ec9a56bde1cd8ea67340bf9cb829cb", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3 }, - "text": "668,279.40 1,195,844.80 1,866,175.20 2,705,617.00 674,191.00 1,199,659.80 1,878,745.20 2,711,180.40 673,585.80 1,200,560.80 1,879,387.00 2,684,983.60" + "text": "Possible empty travels" }, { - "type": "NarrativeText", - "element_id": "0a1b09ff562f4d063703cbf021ee297f", + "type": "Header", + "element_id": "e326e74f4607af7d370e049bc5d9e66a", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -830,8 +630,8 @@ "text": "S. Kulkarni et al. / Data in Brief 22 (2019) 484–487" }, { - "type": "UncategorizedText", - "element_id": "9b19f9ab816598a0809e4afd5d60800f", + "type": "Header", + "element_id": "dd1252fa6e5f6c3f43669c9cc95952e7", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -850,94 +650,94 @@ "text": "Table 2 Description of file format for each problem instance." }, { - "type": "Title", - "element_id": "151e509ce97fe40eecae3822c78adcf5", + "type": "UncategorizedText", + "element_id": "05f82fa1685502a356c0894aa45b404d", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "Number of lines" + "text": "1 1 n" }, { "type": "Title", - "element_id": "0d42fdb9458af19413eee0a1227f415c", + "element_id": "acac86c0e609ca906f632b0e2dacccb2", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "Number of columns in each line" + "text": "l" }, { "type": "Title", - "element_id": "526e0087cc3f254d9f86f6c7d8e23d95", + "element_id": "151e509ce97fe40eecae3822c78adcf5", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "Description" + "text": "Number of lines" }, { - "type": "UncategorizedText", - "element_id": "05f82fa1685502a356c0894aa45b404d", + "type": "Table", + "element_id": "e33daf2e73d705ed4b27cd4e8fee5f5f", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "1 1 n" + "text": "Number of Number of columns in Description lines each line 1 3 The number of depots, the number of trips, and the number of locations. 1 m The number of vehicles rg at each depot d. n 4 One line for each trip, i= 1,2, ...,n. Each line provides the start location [?, the start time ¢%, the end location [F and the end time ¢¢ for the corresponding trip. I I Each element, 6j, where i,j ¢ 1,2, ...,1, refers to the travel time between location i and location j." }, { - "type": "Title", - "element_id": "acac86c0e609ca906f632b0e2dacccb2", + "type": "UncategorizedText", + "element_id": "ea661ceae235dff310398cd2e921e9d8", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "l" + "text": "3 m 4" }, { - "type": "UncategorizedText", - "element_id": "ea661ceae235dff310398cd2e921e9d8", + "type": "Title", + "element_id": "acac86c0e609ca906f632b0e2dacccb2", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "3 m 4" + "text": "l" }, { "type": "Title", - "element_id": "acac86c0e609ca906f632b0e2dacccb2", + "element_id": "0d42fdb9458af19413eee0a1227f415c", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "l" + "text": "Number of columns in each line" }, { - "type": "NarrativeText", - "element_id": "78f6ff03dfac8dfb7f319de1e369590d", + "type": "Title", + "element_id": "526e0087cc3f254d9f86f6c7d8e23d95", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "The number of depots, the number of trips, and the number of locations. The number of vehicles rg at each depot d. One line for each trip, i= 1,2, ...,n. Each line provides the start location and the end time ¢¢ for the corresponding trip. Each element, 6j, where i,j ¢ 1,2, ...,1, refers to the travel time between location i and location j." + "text": "Description" }, { - "type": "Title", - "element_id": "336074805fc853987abe6f7fe3ad97a6", + "type": "NarrativeText", + "element_id": "d2660f6e66916959c5de8a941bfa89c7", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "time" + "text": "The number of depots, the number of trips, and the number of locations. The number of vehicles rd at each depot d. One line for each trip, i ¼ 1; 2; …; n. Each line provides the start location ls time ts i and the end time te i for the corresponding trip. Each element, δij; where i; j A 1; 2; …; l, refers to the travel time between location i and location j." }, { "type": "Title", @@ -960,8 +760,8 @@ "text": "i , the start" }, { - "type": "ListItem", - "element_id": "764eef872135149aaf95224bab69c844", + "type": "Title", + "element_id": "5b0294965f25f778012e27476e7ec042", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -990,7 +790,7 @@ "text": "Our dataset provides start/end location and time of trips as well as the travel time between any two locations. The location and time information is required to model the MDVSP on a time-space network. The feasible connections and the cost of connections between the trips can be obtained as discussed in [3]. Thus, the dataset has all the information that is required to model the MDVSP on the time-space network (see [2]) as well as the connection-network (see [5]). The benchmark solutions for all the problem instances are presented in [3]." }, { - "type": "NarrativeText", + "type": "Title", "element_id": "81db7fab0806640b0cbbac862671704f", "metadata": { "data_source": {}, @@ -1001,23 +801,13 @@ }, { "type": "NarrativeText", - "element_id": "d434a0e19d0d34e92936b9566e1ebb45", + "element_id": "8f0264ba00616d29c2648dc51f24b439", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "Transparency document associated with this article can be found in the online version at https://doi." - }, - { - "type": "UncategorizedText", - "element_id": "fa783fbedd3cbd108b99d04da7fb7e8b", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "org/10.1016/j.dib.2018.12.055." + "text": "Transparency document associated with this article can be found in the online version at https://doi. org/10.1016/j.dib.2018.12.055." }, { "type": "Title", @@ -1030,103 +820,53 @@ "text": "References" }, { - "type": "NarrativeText", - "element_id": "5a1d84f7d74fc4ceeacb634d524cc041", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "[1] G. Carpaneto, M. Dell'Amico, M. Fischetti, P. Toth, A branch and bound algorithm for the multiple depot vehicle scheduling" - }, - { - "type": "UncategorizedText", - "element_id": "bec40b25a277a08de3415e33284fc76d", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "problem, Networks 19 (5) (1989) 531–548." - }, - { - "type": "NarrativeText", - "element_id": "19dee0a4e8fd073350e234b4352b8af6", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "[2] N. Kliewer, T. Mellouli, L. Suhl, A time–space network based exact optimization model for multi-depot bus scheduling, Eur." - }, - { - "type": "UncategorizedText", - "element_id": "5f5ca82752a3220998c06ea0c44eb80e", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "J. Oper. Res. 175 (3) (2006) 1616–1627." - }, - { - "type": "UncategorizedText", - "element_id": "64cd13c78330953bd999d37dacbeaf0e", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "[3] S. Kulkarni, M. Krishnamoorthy, A. Ranade, A.T. Ernst, R. Patil, A new formulation and a column generation-based heuristic" - }, - { - "type": "NarrativeText", - "element_id": "16c341408703257ff517dcc76140e2c0", + "type": "ListItem", + "element_id": "6e1b1affc6fddc7c465dff0416c8a234", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "[4] A.S. Pepin, G. Desaulniers, A. Hertz, D. Huisman, A comparison of five heuristics for the multiple depot vehicle scheduling" + "text": "[1] G. Carpaneto, M. Dell'Amico, M. Fischetti, P. Toth, A branch and bound algorithm for the multiple depot vehicle scheduling problem, Networks 19 (5) (1989) 531-548." }, { - "type": "NarrativeText", - "element_id": "c4f2c64b5f38feaa921647abceebaec8", + "type": "ListItem", + "element_id": "be401eb5b247632c2f3966e4c37dd8ae", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "for the multiple depot vehicle scheduling problem, Transp. Res. Part B Methodol. 118 (2018) 457–487." + "text": "[2] N. Kliewer, T. Mellouli, L. Suhl, A time-space network based exact optimization model for multi-depot bus scheduling, Eur. J. Oper. Res. 175 (3) (2006) 1616-1627." }, { - "type": "UncategorizedText", - "element_id": "aa252076bc877d1ba2b95aa13b73ff72", + "type": "ListItem", + "element_id": "dd8920331ab639dbe3fd39605c0d583f", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "problem, J. Sched. 12 (1) (2009) 17." + "text": "[3] S. Kulkarni, M. Krishnamoorthy, A. Ranade, A.T. Ernst, R. Patil, A new formulation and a column generation-based heuristic for the multiple depot vehicle scheduling problem, Transp. Res. Part B Methodol. 118 (2018) 457-487." }, { - "type": "UncategorizedText", - "element_id": "2e00441177bee9377583470218bea299", + "type": "ListItem", + "element_id": "33edf93e6f8900c4bccbff43de487158", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "[5] C.C. Ribeiro, F. Soumis, A column generation approach to the multiple-depot vehicle scheduling problem, Oper. Res. 42 (1)" + "text": "[4] A.S. Pepin, G. Desaulniers, A. Hertz, D. Huisman, A comparison of five heuristics for the multiple depot vehicle scheduling problem, J. Sched. 12 (1) (2009) 17." }, { - "type": "UncategorizedText", - "element_id": "4b1b8c9df00f25e26176a85d84c8c927", + "type": "ListItem", + "element_id": "ec1963edde66d2c57c5ff9f05b5829c8", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "(1994) 41–52." + "text": "[5] C.C. Ribeiro, F. Soumis, A column generation approach to the multiple-depot vehicle scheduling problem, Oper. Res. 42 (1) (1994) 41-52." } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/biomed-path/07/07/sbaa031.073.PMC7234218.pdf.json b/test_unstructured_ingest/expected-structured-output/biomed-path/07/07/sbaa031.073.PMC7234218.pdf.json index c96928b601..ea0709f203 100644 --- a/test_unstructured_ingest/expected-structured-output/biomed-path/07/07/sbaa031.073.PMC7234218.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/biomed-path/07/07/sbaa031.073.PMC7234218.pdf.json @@ -1,7 +1,7 @@ [ { - "type": "UncategorizedText", - "element_id": "055b9fd1463ee2c4481b4eb9e20d4b0f", + "type": "Header", + "element_id": "cda1ae2f061dbdafb3374e6411d3823e", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -10,8 +10,8 @@ "text": "S32" }, { - "type": "Title", - "element_id": "b8b976f4707d2af116239c70acf8f2be", + "type": "Header", + "element_id": "d7106f2241a37dc4e61314f45da1ff5b", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -21,7 +21,7 @@ }, { "type": "NarrativeText", - "element_id": "d16d8a1280ba2acf52f98e9d3c9c2301", + "element_id": "f7573da2765829e5fcbc8eed02057106", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -40,7 +40,7 @@ "text": "Discussion: Our data confirm previous findings on reduced slow wave density in FEP, and expand them to acute subjects, before any treatment is prescribed. This is in line with available data on diffuse abnormalities of cortico-cortical and cortico-thalamic networks in these patients. Interestingly, our data also offer preliminary evidence that this deficit is specific for SCZ, as it appears to differentiate patients who developed SCZ from those with other diagnoses at follow-up. Given the traveling properties of slow waves, future research should establish their potential as markers of connectivity in SCZ." }, { - "type": "Title", + "type": "NarrativeText", "element_id": "c02ccab64d2a356a96f5394a2b92fa0b", "metadata": { "data_source": {}, @@ -60,38 +60,38 @@ "text": "S6. SLEEP ENDOPHENOTYPES OF SCHIZOPHRENIA: A HIGH-DENSITY EEG STUDY IN DRUG-NAÏVE, FIRST EPISODE PSYCHOSIS PATIENTS" }, { - "type": "UncategorizedText", - "element_id": "e97f1cf1c49f397732e68cf1efb2355e", + "type": "NarrativeText", + "element_id": "d981d6dfaa8794c0bb733db0965b2831", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 1 }, - "text": "Anna Castelnovo1, Cecilia Casetta2, Francesco Donati3, Renata del Giudice3, Caroline Zangani3, Simone Sarasso3, Armando D’Agostino*3 1Faculty of Biomedical Sciences, Università della Svizzera Italiana, Switzerland; 2Institute of Psychiatry, Psychology and Neuroscience, King’s College London, England; 3Università degli Studi di Milano, Italy" + "text": "Amedeo Minichino*1, Beata Godlewska1, Philip Cowen1, Philip Burnet1, Belinda Lennox1 1University of Oxford" }, { - "type": "NarrativeText", - "element_id": "1252f8d8921acac5f706e4402e504a75", + "type": "UncategorizedText", + "element_id": "e97f1cf1c49f397732e68cf1efb2355e", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 1 }, - "text": "Background: Slow waves, the hallmark of the deep nonrapid eye move- ment sleep electroencephalogram (EEG), are critical for restorative sleep and brain plasticity. They arise from the synchronous depolarization and hyperpolarization of millions of cortical neurons and their proper gen- eration and propagation relies upon the integrity of widespread cortico- thalamic networks. Slow wave abnormalities have been reported in patient with Schizophrenia, although with partially contradictory results, probably related to antipsychotic and sedative medications. Recently, their presence and delineation, have been convincingly shown in first-episode psychosis patients (FEP). However, clear evidence of this biomarker at the onset of the disease, prior to any psychopharmacological intervention, remains limited. Moreover, no attempt has been made to elucidate the prognostic meaning of this finding. Methods: We collected whole night sleep high–density electroencephalog- raphy recordings (64-channel BrainAmp, Brain Products GmbH, Gilching, Germany) in 20 drug-naive FEP patients and 20 healthy control subjects (HC). Several clinical psychometric scales as well as neurocognitive tests were administered to all subjects in order to better define psychopatholog- ical status and vulnerability. EEG slow wave activity (SWA, spectral power between 1 and 4 Hz) and several slow wave parameters were computed at each electrode location, including density and amplitude, at each electrode location. Along with a group analysis between FEP and HC, a subgroup analysis was also computed between patients who showed a progression of symptoms to full-blown Schizophrenia (SCZ, n = 10) over the next 12-month follow-up and those who did not (OTH, n = 10). Results: Sleep macro-architecture was globally preserved in FEP patients. SWA (1–4 Hz) was lower in FEP compared to HC but this difference didn’t reach statistical significance. Slow wave density was decreased in FEP compared to HC, with a significance that survived multiple comparison correction over a large fronto-central cluster. Mean amplitude was pre- served. At the subgroup analysis, these results were largely driven by the subgroup of patients with a confirmed diagnosis of SCZ at a 12-month fol- low-up. Indeed, no difference could be found between OTH and HC, while a strong significance was still evident between SCZ and HC." + "text": "Anna Castelnovo1, Cecilia Casetta2, Francesco Donati3, Renata del Giudice3, Caroline Zangani3, Simone Sarasso3, Armando D’Agostino*3 1Faculty of Biomedical Sciences, Università della Svizzera Italiana, Switzerland; 2Institute of Psychiatry, Psychology and Neuroscience, King’s College London, England; 3Università degli Studi di Milano, Italy" }, { "type": "NarrativeText", - "element_id": "d981d6dfaa8794c0bb733db0965b2831", + "element_id": "1252f8d8921acac5f706e4402e504a75", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 1 }, - "text": "Amedeo Minichino*1, Beata Godlewska1, Philip Cowen1, Philip Burnet1, Belinda Lennox1 1University of Oxford" + "text": "Background: Slow waves, the hallmark of the deep nonrapid eye move- ment sleep electroencephalogram (EEG), are critical for restorative sleep and brain plasticity. They arise from the synchronous depolarization and hyperpolarization of millions of cortical neurons and their proper gen- eration and propagation relies upon the integrity of widespread cortico- thalamic networks. Slow wave abnormalities have been reported in patient with Schizophrenia, although with partially contradictory results, probably related to antipsychotic and sedative medications. Recently, their presence and delineation, have been convincingly shown in first-episode psychosis patients (FEP). However, clear evidence of this biomarker at the onset of the disease, prior to any psychopharmacological intervention, remains limited. Moreover, no attempt has been made to elucidate the prognostic meaning of this finding. Methods: We collected whole night sleep high–density electroencephalog- raphy recordings (64-channel BrainAmp, Brain Products GmbH, Gilching, Germany) in 20 drug-naive FEP patients and 20 healthy control subjects (HC). Several clinical psychometric scales as well as neurocognitive tests were administered to all subjects in order to better define psychopatholog- ical status and vulnerability. EEG slow wave activity (SWA, spectral power between 1 and 4 Hz) and several slow wave parameters were computed at each electrode location, including density and amplitude, at each electrode location. Along with a group analysis between FEP and HC, a subgroup analysis was also computed between patients who showed a progression of symptoms to full-blown Schizophrenia (SCZ, n = 10) over the next 12-month follow-up and those who did not (OTH, n = 10). Results: Sleep macro-architecture was globally preserved in FEP patients. SWA (1–4 Hz) was lower in FEP compared to HC but this difference didn’t reach statistical significance. Slow wave density was decreased in FEP compared to HC, with a significance that survived multiple comparison correction over a large fronto-central cluster. Mean amplitude was pre- served. At the subgroup analysis, these results were largely driven by the subgroup of patients with a confirmed diagnosis of SCZ at a 12-month fol- low-up. Indeed, no difference could be found between OTH and HC, while a strong significance was still evident between SCZ and HC." }, { "type": "NarrativeText", - "element_id": "6164e852cb79f9408e833e350240ac5c", + "element_id": "9e7cc386b1093b082bccf936861747aa", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -100,7 +100,7 @@ "text": "Background: Meta-analytic evidence showed increased levels of periph- eral endocannabinoid metabolites in psychotic illness. Alterations in the endocannabinoid system are believed to compromise glutamate and do- pamine transmission, which play a central role in pathophysiological models of psychosis. I will present preliminary data from an ongoing high-field proton magnetic resonance spectroscopy (MRS) study aimed at investigating the association between peripheral levels of endocannabinoid system metabolites and central glutamate metabolism in individuals at their first non-affective psychotic episode (NA-FEP) and healthy controls. Methods: We expect to recruit 17 NA-FEP and 20 healthy controls by January 2020. Currently, we recruited 12 NA-FEP and 18 healthy controls from two different research facilities (Imperial College London and University of Oxford) as part of a cross-sectional study. Participants un- derwent MRS scanning at 7-T with voxels placed in right dorsolateral prefrontal cortex (right-DLPFC), anterior cingulate cortex (ACC), and oc- cipital cortex. Neuro-metabolites will be calculated using the unsuppressed water signal as reference. Endocannabinoid metabolites were quantified from serum samples, collected during the same imaging session. Results: Analyses are ongoing. Based on previous evidence, expected findings are: (i) reduced glutamate levels in the ACC and right-DLPFC of NA-FEP compared to controls; (ii) increased peripheral endocannabinoid metabolites in NA-FEP compared to controls; and (iii) inverse association between peripheral endocannabinoid metabolites and glutamate levels in ACC and right-DLPFC in NA-FEP Discussion: This study will help clarifying the contribution of peripheral endocannabinoid system to central brain mechanisms of key relevance for psychotic illness. It will also add further evidence on the limited literature on high-resolution characterisation of brain metabolites in early psychosis. Strengths of the study include: (i) use of high-field MRS, which allows the estimation of glutamate-related compounds at higher precision than at lower field strength; (ii) reduced heterogeneity of the clinical sample (only male and NA-FEP). Limitations: small sample size and cross-sectional design." }, { - "type": "Title", + "type": "NarrativeText", "element_id": "80abb04ec613b1d325ce6b8d0bb3349d", "metadata": { "data_source": {}, @@ -120,8 +120,8 @@ "text": "Camila Loureiro*1, Corsi-Zuelli Fabiana1, Fachim Helene Aparecida1, Shuhama Rosana1, Menezes Paulo Rossi1, Dalton Caroline F2," }, { - "type": "Title", - "element_id": "3aa954bd1e29835edef83b7cd04e9769", + "type": "NarrativeText", + "element_id": "117f7774fd093a60d964cc5b461f3e22", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -131,7 +131,7 @@ }, { "type": "Title", - "element_id": "574d62523bf0c0a56967c26c82840550", + "element_id": "44b59a545030365cd1ad225ed05ff22d", "metadata": { "data_source": {}, "filetype": "application/pdf", diff --git a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-0.json b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-0.json index 1b2c921b80..3734bfb66a 100644 --- a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-0.json +++ b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-0.json @@ -5,6 +5,7 @@ "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/sample-delta-lake-data/deltatable/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-0.parquet", + "version": 264934223616864047145159629306912568989, "date_created": "2023-08-16 13:30:38.644000", "date_modified": "2023-08-16 13:30:38.604000" }, diff --git a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-1.json b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-1.json index 0f3e9b3812..202391c129 100644 --- a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-1.json +++ b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-1.json @@ -5,6 +5,7 @@ "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/sample-delta-lake-data/deltatable/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-1.parquet", + "version": 139732878514171884135017505553329458078, "date_created": "2023-08-16 13:30:38.644000", "date_modified": "2023-08-16 13:30:38.629000" }, diff --git a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-2.json b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-2.json index fc8e77b1ea..e4004e0515 100644 --- a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-2.json +++ b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-2.json @@ -5,6 +5,7 @@ "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/sample-delta-lake-data/deltatable/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-2.parquet", + "version": 94569544647555135566266174719335103474, "date_created": "2023-08-16 13:30:38.644000", "date_modified": "2023-08-16 13:30:38.634000" }, diff --git a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-3.json b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-3.json index c30c964986..644a82922c 100644 --- a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-3.json +++ b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-3.json @@ -5,6 +5,7 @@ "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/sample-delta-lake-data/deltatable/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-3.parquet", + "version": 153924277850028657610430472976884166368, "date_created": "2023-08-16 13:30:38.644000", "date_modified": "2023-08-16 13:30:38.609000" }, diff --git a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-4.json b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-4.json index a89ae23fb0..3c3059e5c5 100644 --- a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-4.json +++ b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-4.json @@ -5,6 +5,7 @@ "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/sample-delta-lake-data/deltatable/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-4.parquet", + "version": 106461216032689499284003671440554259965, "date_created": "2023-08-16 13:30:38.644000", "date_modified": "2023-08-16 13:30:38.599000" }, diff --git a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-5.json b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-5.json index 58aaf20908..37d6988151 100644 --- a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-5.json +++ b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-5.json @@ -5,6 +5,7 @@ "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/sample-delta-lake-data/deltatable/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-5.parquet", + "version": 164150003651878262139646734756859067992, "date_created": "2023-08-16 13:30:38.644000", "date_modified": "2023-08-16 13:30:38.614000" }, diff --git a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-6.json b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-6.json index 313dd687ca..34bb7d179f 100644 --- a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-6.json +++ b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-6.json @@ -5,6 +5,7 @@ "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/sample-delta-lake-data/deltatable/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-6.parquet", + "version": 117019847084687446803154205344125897829, "date_created": "2023-08-16 13:30:38.644000", "date_modified": "2023-08-16 13:30:38.619000" }, diff --git a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-7.json b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-7.json index f63c3badd0..9046006c97 100644 --- a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-7.json +++ b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-7.json @@ -5,6 +5,7 @@ "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/sample-delta-lake-data/deltatable/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-7.parquet", + "version": 93578343538662480706683120160579695806, "date_created": "2023-08-16 13:30:38.644000", "date_modified": "2023-08-16 13:30:38.624000" }, diff --git a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-8.json b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-8.json index 05fd088f3b..59f11ba85a 100644 --- a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-8.json +++ b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-8.json @@ -5,6 +5,7 @@ "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/sample-delta-lake-data/deltatable/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-8.parquet", + "version": 329407810704817028643559273505069222621, "date_created": "2023-08-16 13:30:38.644000", "date_modified": "2023-08-16 13:30:38.644000" }, diff --git a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-9.json b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-9.json index 3604829e13..82acb28ae2 100644 --- a/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-9.json +++ b/test_unstructured_ingest/expected-structured-output/delta-table/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-9.json @@ -5,6 +5,7 @@ "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/sample-delta-lake-data/deltatable/0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-9.parquet", + "version": 127086160869624650753647884727730407942, "date_created": "2023-08-16 13:30:38.644000", "date_modified": "2023-08-16 13:30:38.639000" }, diff --git a/test_unstructured_ingest/expected-structured-output/jira-diff/JCTP2/10010.json b/test_unstructured_ingest/expected-structured-output/jira-diff/JCTP2/10010.json index 371718d0b3..e6cb0c425c 100644 --- a/test_unstructured_ingest/expected-structured-output/jira-diff/JCTP2/10010.json +++ b/test_unstructured_ingest/expected-structured-output/jira-diff/JCTP2/10010.json @@ -10,7 +10,7 @@ "issue_key": "JCTP2-8" }, "date_created": "2023-08-22T11:35:48.407+0000", - "date_modified": "2023-08-29T11:46:18.193+0000" + "date_modified": "2023-09-29T05:55:11.066+0000" }, "filetype": "text/plain", "languages": [ @@ -30,7 +30,7 @@ "issue_key": "JCTP2-8" }, "date_created": "2023-08-22T11:35:48.407+0000", - "date_modified": "2023-08-29T11:46:18.193+0000" + "date_modified": "2023-09-29T05:55:11.066+0000" }, "filetype": "text/plain", "languages": [ @@ -50,7 +50,7 @@ "issue_key": "JCTP2-8" }, "date_created": "2023-08-22T11:35:48.407+0000", - "date_modified": "2023-08-29T11:46:18.193+0000" + "date_modified": "2023-09-29T05:55:11.066+0000" }, "filetype": "text/plain", "languages": [ @@ -70,7 +70,7 @@ "issue_key": "JCTP2-8" }, "date_created": "2023-08-22T11:35:48.407+0000", - "date_modified": "2023-08-29T11:46:18.193+0000" + "date_modified": "2023-09-29T05:55:11.066+0000" }, "filetype": "text/plain", "languages": [ @@ -90,7 +90,7 @@ "issue_key": "JCTP2-8" }, "date_created": "2023-08-22T11:35:48.407+0000", - "date_modified": "2023-08-29T11:46:18.193+0000" + "date_modified": "2023-09-29T05:55:11.066+0000" }, "filetype": "text/plain", "languages": [ @@ -110,7 +110,7 @@ "issue_key": "JCTP2-8" }, "date_created": "2023-08-22T11:35:48.407+0000", - "date_modified": "2023-08-29T11:46:18.193+0000" + "date_modified": "2023-09-29T05:55:11.066+0000" }, "filetype": "text/plain", "languages": [ @@ -130,7 +130,7 @@ "issue_key": "JCTP2-8" }, "date_created": "2023-08-22T11:35:48.407+0000", - "date_modified": "2023-08-29T11:46:18.193+0000" + "date_modified": "2023-09-29T05:55:11.066+0000" }, "filetype": "text/plain", "languages": [ @@ -150,7 +150,7 @@ "issue_key": "JCTP2-8" }, "date_created": "2023-08-22T11:35:48.407+0000", - "date_modified": "2023-08-29T11:46:18.193+0000" + "date_modified": "2023-09-29T05:55:11.066+0000" }, "filetype": "text/plain", "languages": [ @@ -170,7 +170,7 @@ "issue_key": "JCTP2-8" }, "date_created": "2023-08-22T11:35:48.407+0000", - "date_modified": "2023-08-29T11:46:18.193+0000" + "date_modified": "2023-09-29T05:55:11.066+0000" }, "filetype": "text/plain", "languages": [ @@ -190,7 +190,7 @@ "issue_key": "JCTP2-8" }, "date_created": "2023-08-22T11:35:48.407+0000", - "date_modified": "2023-08-29T11:46:18.193+0000" + "date_modified": "2023-09-29T05:55:11.066+0000" }, "filetype": "text/plain", "languages": [ @@ -210,7 +210,7 @@ "issue_key": "JCTP2-8" }, "date_created": "2023-08-22T11:35:48.407+0000", - "date_modified": "2023-08-29T11:46:18.193+0000" + "date_modified": "2023-09-29T05:55:11.066+0000" }, "filetype": "text/plain", "languages": [ diff --git a/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.pdf.json b/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.pdf.json index 6f3354a254..a05bf96dd0 100644 --- a/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.pdf.json @@ -30,44 +30,14 @@ "text": "1 2 0 2" }, { - "type": "Title", - "element_id": "2e26dc2c4d8d6e4e53865d5697d3a983", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "n u J" - }, - { - "type": "UncategorizedText", - "element_id": "f71998fe363b9c29116c80b5eecf33a2", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "1 2" - }, - { - "type": "UncategorizedText", - "element_id": "cfae0d4248f7142f7b17f826cd7a5192", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "]" - }, - { - "type": "Title", - "element_id": "19d05c4115a6b94b3b470e7c10e29698", + "type": "Header", + "element_id": "f03c6d91abe08ae952f1122ce62bb508", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 1 }, - "text": "V C . s c [" + "text": "2103.15348v2 [cs.CV] 21 Jun" }, { "type": "UncategorizedText", @@ -80,7 +50,7 @@ "text": "2 v 8 4 3 5 1 . 3 0 1 2 : v i X r a" }, { - "type": "ListItem", + "type": "NarrativeText", "element_id": "4fcc5b6364213b1efa9272bdce4f9fcd", "metadata": { "data_source": {}, @@ -109,16 +79,6 @@ }, "text": "Keywords: Document Image Analysis · Deep Learning · Layout Analysis · Character Recognition · Open Source library · Toolkit." }, - { - "type": "UncategorizedText", - "element_id": "6b86b273ff34fce19d6b804eff5a3f57", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "1" - }, { "type": "Title", "element_id": "3fa53fc0dab8ef96d05d8fd4c7e41b49", @@ -139,25 +99,15 @@ }, "text": "Deep Learning(DL)-based approaches are the state-of-the-art for a wide range of document image analysis (DIA) tasks including document image classification [11," }, - { - "type": "UncategorizedText", - "element_id": "d4735e3a265e16eee03f59718b9b5d03", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "2" - }, { "type": "Title", - "element_id": "22364b7a1d2b35282b360d61ae08e2b9", + "element_id": "0119810584ee0b01e4d14dfd8c250bf2", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "Z. Shen et al." + "text": "2 Z. Shen et al." }, { "type": "NarrativeText", @@ -171,13 +121,13 @@ }, { "type": "NarrativeText", - "element_id": "1f0f5df7c23d4f8e8de4de3085abd7d8", + "element_id": "836e6ef5cecc9a73356c0d5bee181829", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "The library implements simple and intuitive Python APIs without sacrificing generalizability and versatility, and can be easily installed via pip. Its convenient functions for handling document image data can be seamlessly integrated with existing DIA pipelines. With detailed documentations and carefully curated tutorials, we hope this tool will benefit a variety of end-users, and will lead to advances in applications in both industry and academic research." + "text": "LayoutParser provides a unified toolkit to support DL-based document image analysis and processing. To address the aforementioned challenges, LayoutParser is built with the following components:" }, { "type": "NarrativeText", @@ -191,83 +141,73 @@ }, { "type": "NarrativeText", - "element_id": "836e6ef5cecc9a73356c0d5bee181829", + "element_id": "1f0f5df7c23d4f8e8de4de3085abd7d8", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "LayoutParser provides a unified toolkit to support DL-based document image analysis and processing. To address the aforementioned challenges, LayoutParser is built with the following components:" + "text": "The library implements simple and intuitive Python APIs without sacrificing generalizability and versatility, and can be easily installed via pip. Its convenient functions for handling document image data can be seamlessly integrated with existing DIA pipelines. With detailed documentations and carefully curated tutorials, we hope this tool will benefit a variety of end-users, and will lead to advances in applications in both industry and academic research." }, { "type": "ListItem", - "element_id": "074b2bd4ba1bf0caf3dbf1973217416a", + "element_id": "18b1855acfb386ae6e6a253da566e93b", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "1. An off-the-shelf toolkit for applying DL models for layout detection, character" + "text": "4. A DL model hub and community platform for the easy sharing, distribu- tion, and discussion of DIA models and pipelines, to promote reusability, reproducibility, and extensibility (Section 4)" }, { "type": "ListItem", - "element_id": "569ce8891b02bc38f50a0cde0039e951", + "element_id": "22b127e6d05ce12ad9b9170909c64bbc", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "2. A rich repository of pre-trained neural network models (Model Zoo) that" + "text": "1. An off-the-shelf toolkit for applying DL models for recognition, and other DIA tasks (Section Bp ayout det ection, character" }, { "type": "ListItem", - "element_id": "18dcbc2839f9783d2c91cbce75d3e685", + "element_id": "569ce8891b02bc38f50a0cde0039e951", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "3. Comprehensive tools for efficient document image data annotation and model" + "text": "2. A rich repository of pre-trained neural network models (Model Zoo) that" }, { "type": "ListItem", - "element_id": "efe6ba3afae54e3c7a05d81583543296", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "4. A DL model hub and community platform for the easy sharing, distribu- tion, and discussion of DIA models and pipelines, to promote reusability, reproducibility, and extensibility (Section 4)" - }, - { - "type": "Title", - "element_id": "c7f4b9a2c7b93fdcc32112de7d9563ba", + "element_id": "18dcbc2839f9783d2c91cbce75d3e685", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "recognition, and other DIA tasks (Section 3)" + "text": "3. Comprehensive tools for efficient document image data annotation and model" }, { - "type": "Title", - "element_id": "50f59772d4134ececeaf37069d480784", + "type": "ListItem", + "element_id": "e4b1d076c9e9c84a45bd11fcf816bddf", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "underlies the off-the-shelf usage" + "text": "Comprehensive tools for efficient document image tuning to support different levels of customization ata annotation and model" }, { - "type": "NarrativeText", - "element_id": "9a576fe6eb4355cdf1e772cf462a9eb7", + "type": "ListItem", + "element_id": "90deab7b4ea81483c3431cebb1621c61", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "tuning to support different levels of customization" + "text": "A rich repository of pre-trained neural network models (Model Zoo) underlies the off-the-shelf usage" }, { "type": "NarrativeText", @@ -280,8 +220,8 @@ "text": "LayoutParser is well aligned with recent efforts for improving DL model reusability in other disciplines like natural language processing [8, 34] and com- puter vision [35], but with a focus on unique challenges in DIA. We show LayoutParser can be applied in sophisticated and large-scale digitization projects" }, { - "type": "Title", - "element_id": "69c327f77af9a7259f0febf0dffa7e1a", + "type": "Header", + "element_id": "4c2478cf439baab6ace34761eda527d9", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -290,14 +230,14 @@ "text": "LayoutParser: A Unified Toolkit for DL-Based DIA" }, { - "type": "UncategorizedText", - "element_id": "4e07408562bedb8b60ce05c1decfe3ad", + "type": "NarrativeText", + "element_id": "74a7758f83612467af8eea9d20e4a6f7", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3 }, - "text": "3" + "text": "that require precision, efficiency, and robustness, as well as simple and light- weight document processing tasks focusing on efficacy and flexibility (Section 5). LayoutParser is being actively maintained, and support for more deep learning models and novel methods in text-based layout analysis methods [37, 34] is planned." }, { "type": "NarrativeText", @@ -309,16 +249,6 @@ }, "text": "The rest of the paper is organized as follows. Section 2 provides an overview of related work. The core LayoutParser library, DL Model Zoo, and customized model training are described in Section 3, and the DL model hub and commu- nity platform are detailed in Section 4. Section 5 shows two examples of how LayoutParser can be used in practical DIA projects, and Section 6 concludes." }, - { - "type": "NarrativeText", - "element_id": "74a7758f83612467af8eea9d20e4a6f7", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "that require precision, efficiency, and robustness, as well as simple and light- weight document processing tasks focusing on efficacy and flexibility (Section 5). LayoutParser is being actively maintained, and support for more deep learning models and novel methods in text-based layout analysis methods [37, 34] is planned." - }, { "type": "Title", "element_id": "1513104c7bf6cd40223a7cc23798378f", @@ -360,8 +290,8 @@ "text": "Recent years have also seen numerous efforts to create libraries for promoting reproducibility and reusability in the field of DL. Libraries like Dectectron2 [35]," }, { - "type": "ListItem", - "element_id": "bbde5bc98ffe50bc4557c848cb1a0473", + "type": "NarrativeText", + "element_id": "77ddbbd89513c49479cd4dad3261d07d", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -370,34 +300,24 @@ "text": "6 The number shown is obtained by specifying the search type as ‘code’. 7 https://ocr-d.de/en/about 8 https://github.com/BobLd/DocumentLayoutAnalysis 9 https://github.com/leonlulu/DeepLayout 10 https://github.com/hpanwar08/detectron2 11 https://github.com/JaidedAI/EasyOCR 12 https://github.com/PaddlePaddle/PaddleOCR" }, { - "type": "UncategorizedText", - "element_id": "4b227777d4dd1fc61c6f884f48641d02", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "4" - }, - { - "type": "NarrativeText", - "element_id": "3993b330c2b3b86513c3edbcd33afc91", + "type": "ListItem", + "element_id": "90b6d90b1496cbc35cb08e310e03d063", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "Z. Shen et al." + "text": "Shen et al. ~ N n" }, { - "type": "FigureCaption", + "type": "Image", "element_id": "812dcaaec927a84d57af36e20adb5ded", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "Efficient Data Annotation Model Customization Document Images Community Platform ‘a >) ¥ DIA Model Hub i .) Customized Model Training] == | Layout Detection Models | ——= DIA Pipeline Sharing ~ OCR Module = { Layout Data stuctue ) = (storage Visualization VY" + "text": " Efficient Data Annotation Model Customization Document Images Community Platform ‘a >) ¥ DIA Model Hub i .) Customized Model Training] == | Layout Detection Models | ——= DIA Pipeline Sharing ~ OCR Module = { Layout Data stuctue ) = (storage Visualization VY " }, { "type": "NarrativeText", @@ -450,7 +370,7 @@ "text": "At the core of LayoutParser is an off-the-shelf toolkit that streamlines DL- based document image analysis. Five components support a simple interface with comprehensive functionalities: 1) The layout detection models enable using pre-trained or self-trained DL models for layout detection with just four lines of code. 2) The detected layout information is stored in carefully engineered" }, { - "type": "NarrativeText", + "type": "Header", "element_id": "4c2478cf439baab6ace34761eda527d9", "metadata": { "data_source": {}, @@ -460,25 +380,14 @@ "text": "LayoutParser: A Unified Toolkit for DL-Based DIA" }, { - "type": "UncategorizedText", - "element_id": "ef2d127de37b942baad06145e54b0c61", + "type": "FigureCaption", + "element_id": "b51f99cb953082a922ba43c09d4492b3", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5 }, - "text": "5" - }, - { - "type": "Table", - "element_id": "34923b77ca76e1808956ade5e766f7c2", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 5, - "text_as_html": "
    Dataset| Base Model'|Large ModelNotes
    PubLayNet B8]|F/MMLayouts of modern scientific documents
    M-Layouts of scanned modern magazines and scientific reports
    F-Layouts of scanned US newspapers from the 20th century
    TableBankFFnd business document. Table region on modern scientific
    HJDatasetF/M-Layouts of history Japanese documents
    " - }, - "text": "Dataset | Base Model'| Large Model | Notes PubLayNet B8]| F/M M Layouts of modern scientific documents PRImA M - nned modern magazines and scientific reports Newspapei F - canned US newspapers from the 20th century TableBank F F Table region on modern scientific and business document HJDataset F/M - Layouts of history Japanese documents" + "text": "Table 1: Current layout detection models in the LayoutParser model zoo" }, { "type": "NarrativeText", @@ -491,24 +400,25 @@ "text": "PubLayNet [38] PRImA [3] Newspaper [17] TableBank [18] HJDataset [31]" }, { - "type": "NarrativeText", - "element_id": "b51f99cb953082a922ba43c09d4492b3", + "type": "Title", + "element_id": "4411e525721e7dd801755882fd2361b2", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5 }, - "text": "Table 1: Current layout detection models in the LayoutParser model zoo" + "text": "Dataset" }, { - "type": "Title", - "element_id": "4411e525721e7dd801755882fd2361b2", + "type": "Table", + "element_id": "34923b77ca76e1808956ade5e766f7c2", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "text_as_html": "
    Dataset| Base Model'| Large Model| Notes
    PubLayNet B8]|F/MMLayouts of modern scientific documents
    PRImAM-Layouts of scanned modern magazines and scientific reports
    NewspaperF-Layouts of scanned US newspapers from the 20th century
    TableBankFFTable region on modern scientific and business document
    HJDatasetF/M-Layouts of history Japanese documents
    " }, - "text": "Dataset" + "text": "Dataset | Base Model'| Large Model | Notes PubLayNet B8]| F/M M Layouts of modern scientific documents PRImA M - nned modern magazines and scientific reports Newspapei F - canned US newspapers from the 20th century TableBank F F Table region on modern scientific and business document HJDataset F/M - Layouts of history Japanese documents" }, { "type": "Title", @@ -551,7 +461,7 @@ "text": "Layouts of modern scientific documents Layouts of scanned modern magazines and scientific reports Layouts of scanned US newspapers from the 20th century Table region on modern scientific and business document Layouts of history Japanese documents" }, { - "type": "NarrativeText", + "type": "Footer", "element_id": "c24bcb2cf98d6226bd805b6f99d3b61a", "metadata": { "data_source": {}, @@ -560,26 +470,6 @@ }, "text": "1 For each dataset, we train several models of different sizes for different needs (the trade-off between accuracy vs. computational cost). For “base model” and “large model”, we refer to using the ResNet 50 or ResNet 101 backbones [13], respectively. One can train models of different architectures, like Faster R-CNN [28] (F) and Mask R-CNN [12] (M). For example, an F in the Large Model column indicates it has a Faster R-CNN model trained using the ResNet 101 backbone. The platform is maintained and a number of additions will be made to the model zoo in coming months." }, - { - "type": "NarrativeText", - "element_id": "11dff8778699e76422be6b86c9eaa62a", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "In LayoutParser, a layout model takes a document image as an input and generates a list of rectangular boxes for the target content regions. Different from traditional methods, it relies on deep convolutional neural networks rather than manually curated rules to identify content regions. It is formulated as an object detection problem and state-of-the-art models like Faster R-CNN [28] and Mask R-CNN [12] are used. This yields prediction results of high accuracy and makes it possible to build a concise, generalized interface for layout detection. LayoutParser, built upon Detectron2 [35], provides a minimal API that can perform layout detection with only four lines of code in Python:" - }, - { - "type": "NarrativeText", - "element_id": "9fb9573af5bf767f81cdaf2cf1a72cd9", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "layout data structures, which are optimized for efficiency and versatility. 3) When necessary, users can employ existing or customized OCR models via the unified API provided in the OCR module. 4) LayoutParser comes with a set of utility functions for the visualization and storage of the layout data. 5) LayoutParser is also highly customizable, via its integration with functions for layout data annotation and model training. We now provide detailed descriptions for each component." - }, { "type": "Title", "element_id": "9f26ca353a2c130a2e32f457d71c1350", @@ -592,33 +482,33 @@ }, { "type": "NarrativeText", - "element_id": "65f9f864775ddef6f9895c53e16c50d4", + "element_id": "9fb9573af5bf767f81cdaf2cf1a72cd9", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5 }, - "text": "1 import layoutparser as lp 2 image = cv2 . imread ( \" image_file \" ) # load images 3 model = lp . De t e c tro n2 Lay outM odel (" + "text": "layout data structures, which are optimized for efficiency and versatility. 3) When necessary, users can employ existing or customized OCR models via the unified API provided in the OCR module. 4) LayoutParser comes with a set of utility functions for the visualization and storage of the layout data. 5) LayoutParser is also highly customizable, via its integration with functions for layout data annotation and model training. We now provide detailed descriptions for each component." }, { - "type": "Title", - "element_id": "61b33f079528d200f91471f41645cdc6", + "type": "NarrativeText", + "element_id": "11dff8778699e76422be6b86c9eaa62a", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5 }, - "text": "4 5 layout = model . detect ( image )" + "text": "In LayoutParser, a layout model takes a document image as an input and generates a list of rectangular boxes for the target content regions. Different from traditional methods, it relies on deep convolutional neural networks rather than manually curated rules to identify content regions. It is formulated as an object detection problem and state-of-the-art models like Faster R-CNN [28] and Mask R-CNN [12] are used. This yields prediction results of high accuracy and makes it possible to build a concise, generalized interface for layout detection. LayoutParser, built upon Detectron2 [35], provides a minimal API that can perform layout detection with only four lines of code in Python:" }, { "type": "NarrativeText", - "element_id": "6cd3a9e132c1264a05ec11a2df6b8066", + "element_id": "e416e69991bf6a4b338df18ebdb6e712", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5 }, - "text": "\" lp :// PubLayNet / f as t er _ r c nn _ R _ 50 _ F P N_ 3 x / config \" )" + "text": "import layoutparser as lp image = cv2.imread(\"image_file\") # load images model = lp.Detectron2LayoutModel ( \"1p://PubLayNet/faster_rcnn_R_50_FPN_3x/config\") layout = model.detect (image)" }, { "type": "NarrativeText", @@ -630,45 +520,35 @@ }, "text": "LayoutParser provides a wealth of pre-trained model weights using various datasets covering different languages, time periods, and document types. Due to domain shift [7], the prediction performance can notably drop when models are ap- plied to target samples that are significantly different from the training dataset. As document structures and layouts vary greatly in different domains, it is important to select models trained on a dataset similar to the test samples. A semantic syntax is used for initializing the model weights in LayoutParser, using both the dataset name and model name lp:///." }, - { - "type": "UncategorizedText", - "element_id": "e7f6c011776e8db7cd330b54174fd76f", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "6" - }, { "type": "NarrativeText", - "element_id": "3993b330c2b3b86513c3edbcd33afc91", + "element_id": "5c44994a44f74b706d8a5e74cd753a8b", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 6 }, - "text": "Z. Shen et al." + "text": "6 Z. Shen et al." }, { - "type": "FigureCaption", - "element_id": "185e67615d123b35d38ea72e0cdb6d99", + "type": "Image", + "element_id": "2f498bdd91739a7083490999507420a5", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 6 }, - "text": "- ° . 3 a a 4 a 3 oo er ‘ 2 § 8 a 8 3 3 ‘ £ 4 A g a 9 ‘ 3 ¥ Coordinate g 4 5 3 + § 3 H Extra Features [O=\") [Bo] eaing i Text | | Type | | ower ° & a ¢ o [ coordinatel textblock1, 3 3 ’ g Q 3 , textblock2 , layoutl ] 4 q ® A list of the layout elements Ff" + "text": "33§3 fectange vada8883 Coordinate83 +*Block | [Block | [Read8 Extra features Tet | [Tye | [oder[ coordinatel textblock1 |» , see383 , textblock2 , layout] ]4A list of the layout elementsThe same transformation and operation APIs" }, { - "type": "NarrativeText", - "element_id": "cafae07120d714f0822e89865adf62da", + "type": "Title", + "element_id": "acd4f4584a990134d927e19b6d7e5f88", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 6 }, - "text": "Fig. 2: The relationship between the three types of layout data structures. Coordinate supports three kinds of variation; TextBlock consists of the co- ordinate information and extra features like block text, types, and reading orders; a Layout object is a list of all possible layout elements, including other Layout objects. They all support the same set of transformation and operation APIs for maximum flexibility." + "text": "3.2 Layout Data Structures" }, { "type": "NarrativeText", @@ -681,28 +561,28 @@ "text": "Shown in Table 1, LayoutParser currently hosts 9 pre-trained models trained on 5 different datasets. Description of the training dataset is provided alongside with the trained models such that users can quickly identify the most suitable models for their tasks. Additionally, when such a model is not readily available, LayoutParser also supports training customized layout models and community sharing of the models (detailed in Section 3.5)." }, { - "type": "Title", - "element_id": "acd4f4584a990134d927e19b6d7e5f88", + "type": "NarrativeText", + "element_id": "fb271c99cdcfca1001a1a7d56425c5b4", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 6 }, - "text": "3.2 Layout Data Structures" + "text": "A critical feature of LayoutParser is the implementation of a series of data structures and operations that can be used to efficiently process and manipulate the layout elements. In document image analysis pipelines, various post-processing on the layout analysis model outputs is usually required to obtain the final outputs. Traditionally, this requires exporting DL model outputs and then loading the results into other pipelines. All model outputs from LayoutParser will be stored in carefully engineered data types optimized for further processing, which makes it possible to build an end-to-end document digitization pipeline within LayoutParser. There are three key components in the data structure, namely the Coordinate system, the TextBlock, and the Layout. They provide different levels of abstraction for the layout data, and a set of APIs are supported for transformations or operations on these classes." }, { - "type": "NarrativeText", - "element_id": "fb271c99cdcfca1001a1a7d56425c5b4", + "type": "FigureCaption", + "element_id": "cafae07120d714f0822e89865adf62da", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 6 }, - "text": "A critical feature of LayoutParser is the implementation of a series of data structures and operations that can be used to efficiently process and manipulate the layout elements. In document image analysis pipelines, various post-processing on the layout analysis model outputs is usually required to obtain the final outputs. Traditionally, this requires exporting DL model outputs and then loading the results into other pipelines. All model outputs from LayoutParser will be stored in carefully engineered data types optimized for further processing, which makes it possible to build an end-to-end document digitization pipeline within LayoutParser. There are three key components in the data structure, namely the Coordinate system, the TextBlock, and the Layout. They provide different levels of abstraction for the layout data, and a set of APIs are supported for transformations or operations on these classes." + "text": "Fig. 2: The relationship between the three types of layout data structures. Coordinate supports three kinds of variation; TextBlock consists of the co- ordinate information and extra features like block text, types, and reading orders; a Layout object is a list of all possible layout elements, including other Layout objects. They all support the same set of transformation and operation APIs for maximum flexibility." }, { "type": "Title", - "element_id": "69c327f77af9a7259f0febf0dffa7e1a", + "element_id": "4c2478cf439baab6ace34761eda527d9", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -711,57 +591,47 @@ "text": "LayoutParser: A Unified Toolkit for DL-Based DIA" }, { - "type": "UncategorizedText", - "element_id": "7902699be42c8a8e46fbbb4501726517", + "type": "Title", + "element_id": "89c6cd1d893f782ea68d75737e3393fd", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 7 }, - "text": "7" + "text": "3.3 OCR" }, { "type": "NarrativeText", - "element_id": "e284bd66511cfa064681253e7ac57a9a", + "element_id": "f2a3e5fbb983d9132dddecc381ed6e0b", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 7 }, - "text": "LayoutParser provides a unified interface for existing OCR tools. Though there are many OCR tools available, they are usually configured differently with distinct APIs or protocols for using them. It can be inefficient to add new OCR tools into an existing pipeline, and difficult to make direct comparisons among the available tools to find the best option for a particular project. To this end, LayoutParser builds a series of wrappers among existing OCR engines, and provides nearly the same syntax for using them. It supports a plug-and-play style of using OCR engines, making it effortless to switch, evaluate, and compare different OCR modules:" + "text": "Coordinates are the cornerstones for storing layout information. Currently, three types of Coordinate data structures are provided in LayoutParser, shown in Figure 2. Interval and Rectangle are the most common data types and support specifying 1D or 2D regions within a document. They are parameterized with 2 and 4 parameters. A Quadrilateral class is also implemented to support a more generalized representation of rectangular regions when the document is skewed or distorted, where the 4 corner points can be specified and a total of 8 degrees of freedom are supported. A wide collection of transformations like shift, pad, and scale, and operations like intersect, union, and is_in, are supported for these classes. Notably, it is common to separate a segment of the image and analyze it individually. LayoutParser provides full support for this scenario via image cropping operations crop_image and coordinate transformations like relative_to and condition_on that transform coordinates to and from their relative representations. We refer readers to Table 2 for a more detailed description of these operations13." }, { "type": "NarrativeText", - "element_id": "eec800eef6e395c21feacd729868dd18", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "Based on Coordinates, we implement the TextBlock class that stores both the positional and extra features of individual layout elements. It also supports specifying the reading orders via setting the parent field to the index of the parent object. A Layout class is built that takes in a list of TextBlocks and supports processing the elements in batch. Layout can also be nested to support hierarchical layout structures. They support the same operations and transformations as the Coordinate classes, minimizing both learning and deployment effort." - }, - { - "type": "Title", - "element_id": "89c6cd1d893f782ea68d75737e3393fd", + "element_id": "e284bd66511cfa064681253e7ac57a9a", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 7 }, - "text": "3.3 OCR" + "text": "LayoutParser provides a unified interface for existing OCR tools. Though there are many OCR tools available, they are usually configured differently with distinct APIs or protocols for using them. It can be inefficient to add new OCR tools into an existing pipeline, and difficult to make direct comparisons among the available tools to find the best option for a particular project. To this end, LayoutParser builds a series of wrappers among existing OCR engines, and provides nearly the same syntax for using them. It supports a plug-and-play style of using OCR engines, making it effortless to switch, evaluate, and compare different OCR modules:" }, { "type": "NarrativeText", - "element_id": "f2a3e5fbb983d9132dddecc381ed6e0b", + "element_id": "eec800eef6e395c21feacd729868dd18", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 7 }, - "text": "Coordinates are the cornerstones for storing layout information. Currently, three types of Coordinate data structures are provided in LayoutParser, shown in Figure 2. Interval and Rectangle are the most common data types and support specifying 1D or 2D regions within a document. They are parameterized with 2 and 4 parameters. A Quadrilateral class is also implemented to support a more generalized representation of rectangular regions when the document is skewed or distorted, where the 4 corner points can be specified and a total of 8 degrees of freedom are supported. A wide collection of transformations like shift, pad, and scale, and operations like intersect, union, and is_in, are supported for these classes. Notably, it is common to separate a segment of the image and analyze it individually. LayoutParser provides full support for this scenario via image cropping operations crop_image and coordinate transformations like relative_to and condition_on that transform coordinates to and from their relative representations. We refer readers to Table 2 for a more detailed description of these operations13." + "text": "Based on Coordinates, we implement the TextBlock class that stores both the positional and extra features of individual layout elements. It also supports specifying the reading orders via setting the parent field to the index of the parent object. A Layout class is built that takes in a list of TextBlocks and supports processing the elements in batch. Layout can also be nested to support hierarchical layout structures. They support the same operations and transformations as the Coordinate classes, minimizing both learning and deployment effort." }, { - "type": "ListItem", + "type": "NarrativeText", "element_id": "55ab2654fa8c2c01de322b52f4fad508", "metadata": { "data_source": {}, @@ -792,7 +662,7 @@ }, { "type": "NarrativeText", - "element_id": "8bcb4c948fda07d2fdbf7d582983b93e", + "element_id": "9a44827ec5ebbf51ad441ff9927c6e83", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -801,17 +671,7 @@ "text": "13 This is also available in the LayoutParser documentation pages." }, { - "type": "UncategorizedText", - "element_id": "2c624232cdd221771294dfbb310aca00", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "8" - }, - { - "type": "NarrativeText", + "type": "ListItem", "element_id": "3993b330c2b3b86513c3edbcd33afc91", "metadata": { "data_source": {}, @@ -831,115 +691,115 @@ "text": "Table 2: All operations supported by the layout elements. The same APIs are supported across different layout element classes including Coordinate types, TextBlock and Layout." }, { - "type": "Table", - "element_id": "f81d4915b54758e0d4d52af3566bb813", + "type": "Title", + "element_id": "abf4059c5c98ff5bbd0dde9f8c2b7c75", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8, - "text_as_html": "
    Operation NameDescription
    block.pad(top, bottom,right,left)| Enlarge the current block according to the input
    block.scale(fx, fy)Scale the current block given the ratio ; in x and y direction
    . block.shift(dx, dy)Move the current block with the shift : : a distances in x and y direction
    block1.is_in(block2)Whether block] is inside of block2
    . block1. intersect (block2)Return the intersection region of block1 and block2. . . . Coordinate type to be determined based on the inputs.
    . block1.union(block2)Return the union region of block1 and block2. . . . Coordinate type to be determined based on the inputs.
    . block1.relative_to(block2)Convert the absolute coordinates of block to ' ' relative coordinates to block2
    . block1.condition_on(block2)Calculate the absolute coordinates of blockl given . the canvas block2’s absolute coordinates
    block. crop_image (image)Obtain the image segments in the block region
    " + "page_number": 8 }, - "text": "Operation Name Description block.pad(top, bottom, right, left) Enlarge the current block according to the input block.scale(fx, fy) Scale the current block given the ratio ion in x and y di block.shift(dx, dy) Move the current block with the shift distances in x and y direction block1.is_in(block2) Whether block] is inside of block2 ; Return the intersection region of block and block2. block1. intersect (block2) . . . Coordinate type to be determined based on the inputs. ; Return the union region of block1 and block2. block1.union(block2) . . . Coordinate type to be determined based on the inputs. Convert the absolute coordinates of block to block1.relative_to(block2) ' ' relative coordinates to block2 . Calculate the absolute coordinates of block1 given block1.condition_on(block2) . the canvas block2’s absolute coordinates block. crop_image (image) Obtain the image segments in the block region" + "text": "Operation Name" }, { "type": "Title", - "element_id": "2092f29df87c3cfd32244b325faaba33", + "element_id": "526e0087cc3f254d9f86f6c7d8e23d95", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8 }, - "text": "block1.condition on(block2)" + "text": "Description" }, { "type": "Title", - "element_id": "aac9bbf1c375a005651b5d2929778d3b", + "element_id": "8dcb74f5ee2eabd0d8e966d46bcdf3be", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8 }, - "text": "block1.relative to(block2)" + "text": "block.scale(fx, fy)" }, { "type": "Title", - "element_id": "505791f52a5741b58f5dd02836da7b31", + "element_id": "1c1464d6a8f85d78202f67293ee7ac42", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8 }, - "text": "block1.union(block2)" + "text": "block.shift(dx, dy)" }, { "type": "Title", - "element_id": "39fca1b21a889218bd84127a4d7f27c5", + "element_id": "acfa5090fbb8986000a92d84d41d8140", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8 }, - "text": "block1.intersect(block2)" + "text": "block1.is in(block2)" }, { "type": "Title", - "element_id": "1c1464d6a8f85d78202f67293ee7ac42", + "element_id": "505791f52a5741b58f5dd02836da7b31", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8 }, - "text": "block.shift(dx, dy)" + "text": "block1.union(block2)" }, { "type": "Title", - "element_id": "acfa5090fbb8986000a92d84d41d8140", + "element_id": "39fca1b21a889218bd84127a4d7f27c5", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8 }, - "text": "block1.is in(block2)" + "text": "block1.intersect(block2)" }, { "type": "Title", - "element_id": "8dcb74f5ee2eabd0d8e966d46bcdf3be", + "element_id": "aac9bbf1c375a005651b5d2929778d3b", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8 }, - "text": "block.scale(fx, fy)" + "text": "block1.relative to(block2)" }, { - "type": "NarrativeText", - "element_id": "f60c4482bfe6a1b0eb9095bb8cf21e64", + "type": "Title", + "element_id": "2092f29df87c3cfd32244b325faaba33", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8 }, - "text": "block.pad(top, bottom, right, left) Enlarge the current block according to the input" + "text": "block1.condition on(block2)" }, { - "type": "Title", - "element_id": "abf4059c5c98ff5bbd0dde9f8c2b7c75", + "type": "Table", + "element_id": "f73e2a20abbf1180916a4b29b15e3b32", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "text_as_html": "
    block.pad(top, bottom,right,left)Enlarge the current block according to the input
    block.scale(fx, fy)Scale the current block given the ratio in x and y direction
    block.shift(dx, dy)Move the current block with the shift distances in x and y direction
    block1.is_in(block2)Whether block] is inside of block2
    block1. intersect (block2)Return the intersection region of blockl and block2. Coordinate type to be determined based on the inputs.
    block1.union(block2)Return the union region of blockl and block2. Coordinate type to be determined based on the inputs.
    block1.relative_to(block2)Convert the absolute coordinates of block to relative coordinates to block2
    block1.condition_on(block2) block. crop_image (image)Calculate the absolute coordinates of blockl given the canvas block2’s absolute coordinates Obtain the image segments in the block region
    " }, - "text": "Operation Name" + "text": "Operation Name Description block.pad(top, bottom, right, left) Enlarge the current block according to the input block.scale(fx, fy) Scale the current block given the ratio ion in x and y di block.shift(dx, dy) Move the current block with the shift distances in x and y direction block1.is_in(block2) Whether block] is inside of block2 ; Return the intersection region of block and block2. block1. intersect (block2) . . . Coordinate type to be determined based on the inputs. ; Return the union region of block1 and block2. block1.union(block2) . . . Coordinate type to be determined based on the inputs. Convert the absolute coordinates of block to block1.relative_to(block2) ' ' relative coordinates to block2 . Calculate the absolute coordinates of block1 given block1.condition_on(block2) . the canvas block2’s absolute coordinates block. (image) Obtain the in the block" }, { - "type": "Title", - "element_id": "7d52bf6c2abc8aebeda26c2400f00ddd", + "type": "NarrativeText", + "element_id": "f60c4482bfe6a1b0eb9095bb8cf21e64", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8 }, - "text": "block.crop image(image)" + "text": "block.pad(top, bottom, right, left) Enlarge the current block according to the input" }, { "type": "NarrativeText", @@ -952,84 +812,84 @@ "text": "Whether block1 is inside of block2" }, { - "type": "Title", - "element_id": "fdf3d6c91387c02a0cdaa1ff6b3c67c5", + "type": "UncategorizedText", + "element_id": "a270fb0a45b9ed73f992f73dbf0b9a3f", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8 }, - "text": "Obtain the image segments in the block region" + "text": "Move the current block with the shift distances in x and y direction" }, { "type": "NarrativeText", - "element_id": "401c342fc214105b4a45dba74c62cae0", + "element_id": "494d23eb529015f662df16e6da39f810", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8 }, - "text": "Return the intersection region of block1 and block2. Coordinate type to be determined based on the inputs." + "text": "Scale the current block given the ratio in x and y direction" }, { "type": "NarrativeText", - "element_id": "494d23eb529015f662df16e6da39f810", + "element_id": "d3b069f9dcc24bfac92a6de9e26f2501", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8 }, - "text": "Scale the current block given the ratio in x and y direction" + "text": "Convert the absolute coordinates of block1 to relative coordinates to block2" }, { "type": "NarrativeText", - "element_id": "ec0a5482fa70f4d98212b6b3a748003a", + "element_id": "bb15ecc186d598c93a1cffa30e9e1b6e", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8 }, - "text": "Return the union region of block1 and block2. Coordinate type to be determined based on the inputs." + "text": "Calculate the absolute coordinates of block1 given the canvas block2’s absolute coordinates" }, { "type": "NarrativeText", - "element_id": "d3b069f9dcc24bfac92a6de9e26f2501", + "element_id": "401c342fc214105b4a45dba74c62cae0", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8 }, - "text": "Convert the absolute coordinates of block1 to relative coordinates to block2" + "text": "Return the intersection region of block1 and block2. Coordinate type to be determined based on the inputs." }, { - "type": "Title", - "element_id": "526e0087cc3f254d9f86f6c7d8e23d95", + "type": "NarrativeText", + "element_id": "ec0a5482fa70f4d98212b6b3a748003a", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8 }, - "text": "Description" + "text": "Return the union region of block1 and block2. Coordinate type to be determined based on the inputs." }, { - "type": "NarrativeText", - "element_id": "bb15ecc186d598c93a1cffa30e9e1b6e", + "type": "Title", + "element_id": "7d52bf6c2abc8aebeda26c2400f00ddd", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8 }, - "text": "Calculate the absolute coordinates of block1 given the canvas block2’s absolute coordinates" + "text": "block.crop image(image)" }, { - "type": "UncategorizedText", - "element_id": "a270fb0a45b9ed73f992f73dbf0b9a3f", + "type": "Title", + "element_id": "fdf3d6c91387c02a0cdaa1ff6b3c67c5", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8 }, - "text": "Move the current block with the shift distances in x and y direction" + "text": "Obtain the image segments in the block region" }, { "type": "Title", @@ -1082,7 +942,7 @@ "text": "14 https://altoxml.github.io" }, { - "type": "NarrativeText", + "type": "ListItem", "element_id": "4c2478cf439baab6ace34761eda527d9", "metadata": { "data_source": {}, @@ -1092,24 +952,14 @@ "text": "LayoutParser: A Unified Toolkit for DL-Based DIA" }, { - "type": "UncategorizedText", - "element_id": "19581e27de7ced00ff1ce50b2047e7a5", + "type": "Image", + "element_id": "6df6057f894a166cf24fd34f64267f09", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 9 }, - "text": "9" - }, - { - "type": "FigureCaption", - "element_id": "975d6cb141cb0a0313375630ae063fa8", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 9 - }, - "text": "x09 Burpunog uayor Aeydsiq 1 vondo 10g Guypunog usyoy apir:z uondo Mode I: Showing Layout on the Original Image Mode Il: Drawing OCR'd Text at the Correspoding Position" + "text": "a ESStee eaeoooMode I: Showing Layout on the Original ImageMode Il: Drawing OCR'd Text at the Correspoding Position10g Bpunog vayoy feyds1q :1 vondo‘xog Burpunog vay apiH z word" }, { "type": "NarrativeText", @@ -1152,34 +1002,24 @@ "text": "After the training dataset is curated, LayoutParser supports different modes for training the layout models. Fine-tuning can be used for training models on a small newly-labeled dataset by initializing the model with existing pre-trained weights. Training from scratch can be helpful when the source dataset and target are significantly different and a large training set is available. However, as suggested in Studer et al.’s work[33], loading pre-trained weights on large-scale datasets like ImageNet [5], even from totally different domains, can still boost model performance. Through the integrated API provided by LayoutParser, users can easily compare model performances on the benchmark datasets." }, { - "type": "FigureCaption", - "element_id": "2680b3c7a55754a3ba2738cb3d9d5e8b", + "type": "ListItem", + "element_id": "9bf176adca2cfa747e7f0255bfc3594a", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10 }, - "text": "et Intra-column reading order Token Categories tie (Adress 2) tee (NE sumber Variable HEE company type Column Categories (J tite we) adaress —_ (7) section Header by ‘e * Column reading order a a (a) Illustration of the original Japanese Maximum Allowed Height BRE B>e EER eR (b) Illustration of the recreated document with dense text structure for better OCR performance" + "text": "10 Z. Shen et al." }, { - "type": "UncategorizedText", - "element_id": "4a44dc15364204a80fe80e9039455cc1", + "type": "Image", + "element_id": "cd0055b04f6049e9d9bf49a4f309f7e9", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10 }, - "text": "10" - }, - { - "type": "NarrativeText", - "element_id": "3993b330c2b3b86513c3edbcd33afc91", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "Z. Shen et al." + "text": "Text‘Token CategoriestieAddress(Numberig:3pio Bupeas uwunjog(a) Illustration of the original Japanese document with detected layout elements highlighted in colored boxesColumn CategoriesCRE) OR REKER te setPikes enceee+41ybiay pamoyy wnwrxey(b) Illustration of the recreated document with dense text structure for better OCR performance" }, { "type": "NarrativeText", @@ -1243,7 +1083,7 @@ }, { "type": "Title", - "element_id": "69c327f77af9a7259f0febf0dffa7e1a", + "element_id": "4c2478cf439baab6ace34761eda527d9", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1251,16 +1091,6 @@ }, "text": "LayoutParser: A Unified Toolkit for DL-Based DIA" }, - { - "type": "UncategorizedText", - "element_id": "4fc82b26aecb47d2868c4efbe3581732", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 11 - }, - "text": "11" - }, { "type": "NarrativeText", "element_id": "5cdbcea58a81d8f7de9a4fa841107be1", @@ -1283,7 +1113,7 @@ }, { "type": "NarrativeText", - "element_id": "59e46c1089fd1f2c58bba66545420ad6", + "element_id": "fa19ab2536cbbb48c09de29fdebd52bd", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1302,18 +1132,18 @@ "text": "The digitization of historical documents can unlock valuable data that can shed light on many important social, economic, and historical questions. Yet due to scan noises, page wearing, and the prevalence of complicated layout structures, ob- taining a structured representation of historical document scans is often extremely complicated. In this example, LayoutParser was used to develop a comprehensive pipeline, shown in Figure 5, to gener- ate high-quality structured data from historical Japanese firm financial ta- bles with complicated layouts. The pipeline applies two layout models to identify different levels of document structures and two customized OCR engines for optimized character recog- nition accuracy." }, { - "type": "FigureCaption", - "element_id": "b33b2bc3b9c416673c7f74c6a00c49d8", + "type": "Image", + "element_id": "d32d5d93079c0053b7ef655185e47bb4", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 11 }, - "text": "(spe peepee, ‘Active Learning Layout Annotate Layout Dataset | + ‘Annotation Toolkit ¥ a Deep Leaming Layout Model Training & Inference, ¥ ; Handy Data Structures & Post-processing El Apis for Layout Det a LAR ror tye eats) 4 Text Recognition | <—— Default ane Customized ¥ ee Layout Structure Visualization & Export | <—— | visualization & Storage The Japanese Document Helpful LayoutParser Digitization Pipeline Modules" + "text": "Annotate Layout Dataset(spe peepee,Active Learning LayoutAnnotation Toolkit4Layout Detection<—Deep Learning LayoutModel Training & Inference,4Post-processin Handy Data Structures &pl 9 APIs for Layout DataText Recognition Default and Customized: r OCR ModelsVisualization & Export |], bayou StructureVisualization & StorageThe Japanese DocumentDigitization PipelineHelpful LayoutParserModules" }, { "type": "NarrativeText", - "element_id": "39ed00ce33ad04a4542357a1f912aed8", + "element_id": "3cbd8234ac0c6d29feb24e6202144aa8", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1323,43 +1153,33 @@ }, { "type": "NarrativeText", - "element_id": "4005fd5e1a8a65c8e989071255cd7386", + "element_id": "de8f09a4156ca73defac521bb354a297", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 11 }, - "text": "15 A document page consists of eight rows like this. For simplicity we skip the row" + "text": "& document page consists of eight rows like this. For simplicity we skip the row segmentation discussion and refer readers to the source code when available." }, { "type": "Title", - "element_id": "5d0786de7b188a10caffb32c951327a2", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 11 - }, - "text": "segmentation discussion and refer readers to the source code when available." - }, - { - "type": "UncategorizedText", - "element_id": "6b51d431df5d7f141cbececcf79edf3d", + "element_id": "de2a222ad7b9cf1e5e5432f53c15996d", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 12 }, - "text": "12" + "text": "12 Z. Shen et al." }, { - "type": "Title", - "element_id": "22364b7a1d2b35282b360d61ae08e2b9", + "type": "NarrativeText", + "element_id": "7174760d4c8d9b7b13da3918015312dc", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 12 }, - "text": "Z. Shen et al." + "text": "To decipher the complicated layout" }, { "type": "NarrativeText", @@ -1373,23 +1193,23 @@ }, { "type": "NarrativeText", - "element_id": "164904dc2ff256763b3e64f1b56a784e", + "element_id": "888b9c9ec4431146d744bc6f39e16fd0", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 12 }, - "text": "To decipher the complicated layout" + "text": "A combination of character recognition methods is developed to tackle the unique challenges in this document. In our experiments, we found that irregular spacing between the tokens led to a low character recognition recall rate, whereas existing OCR models tend to perform better on densely-arranged texts. To overcome this challenge, we create a document reorganization algorithm that rearranges the text based on the token bounding boxes detected in the layout analysis step. Figure 4 (b) illustrates the generated image of dense text, which is sent to the OCR APIs as a whole to reduce the transaction costs. The flexible coordinate system in LayoutParser is used to transform the OCR results relative to their original positions on the page." }, { "type": "NarrativeText", - "element_id": "888b9c9ec4431146d744bc6f39e16fd0", + "element_id": "07be9fda679b805e67cf5e563eada033", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 12 }, - "text": "A combination of character recognition methods is developed to tackle the unique challenges in this document. In our experiments, we found that irregular spacing between the tokens led to a low character recognition recall rate, whereas existing OCR models tend to perform better on densely-arranged texts. To overcome this challenge, we create a document reorganization algorithm that rearranges the text based on the token bounding boxes detected in the layout analysis step. Figure 4 (b) illustrates the generated image of dense text, which is sent to the OCR APIs as a whole to reduce the transaction costs. The flexible coordinate system in LayoutParser is used to transform the OCR results relative to their original positions on the page." + "text": "Additionally, it is common for historical documents to use unique fonts with different glyphs, which significantly degrades the accuracy of OCR models trained on modern texts. In this document, a special flat font is used for printing numbers and could not be detected by off-the-shelf OCR engines. Using the highly flexible functionalities from LayoutParser, a pipeline approach is constructed that achieves a high recognition accuracy with minimal effort. As the characters have unique visual structures and are usually clustered together, we train the layout model to identify number regions with a dedicated category. Subsequently, LayoutParser crops images within these regions, and identifies characters within them using a self-trained OCR model based on a CNN-RNN [6]. The model detects a total of 15 possible categories, and achieves a 0.98 Jaccard score16 and a 0.17 average Levinstein distances17 for token prediction on the test set." }, { "type": "NarrativeText", @@ -1412,48 +1232,28 @@ "text": "16 This measures the overlap between the detected and ground-truth characters, and" }, { - "type": "NarrativeText", - "element_id": "07be9fda679b805e67cf5e563eada033", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 12 - }, - "text": "Additionally, it is common for historical documents to use unique fonts with different glyphs, which significantly degrades the accuracy of OCR models trained on modern texts. In this document, a special flat font is used for printing numbers and could not be detected by off-the-shelf OCR engines. Using the highly flexible functionalities from LayoutParser, a pipeline approach is constructed that achieves a high recognition accuracy with minimal effort. As the characters have unique visual structures and are usually clustered together, we train the layout model to identify number regions with a dedicated category. Subsequently, LayoutParser crops images within these regions, and identifies characters within them using a self-trained OCR model based on a CNN-RNN [6]. The model detects a total of 15 possible categories, and achieves a 0.98 Jaccard score16 and a 0.17 average Levinstein distances17 for token prediction on the test set." - }, - { - "type": "NarrativeText", - "element_id": "5b6b4f6a5766bdb4f09f0a0387a3a373", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 12 - }, - "text": "the maximum is 1." - }, - { - "type": "NarrativeText", - "element_id": "48033291e6d72fefde1a56827e6dacfb", + "type": "ListItem", + "element_id": "e67f07837a2a4c207b21a168c4f0aa6c", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 12 }, - "text": "17 This measures the number of edits from the ground-truth text to the predicted text," + "text": "This measures the overlap between the detected and ground-truth characters, the maximum is 1." }, { - "type": "NarrativeText", - "element_id": "5737ba23368c5333b0c39f7e8e474d03", + "type": "ListItem", + "element_id": "f06c47bb49334c82c636ac2d1fe9ec4e", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 12 }, - "text": "and lower is better." + "text": "'7 This measures the number of edits from the ground-truth text to the predicted text, and lower is better." }, { - "type": "Title", - "element_id": "69c327f77af9a7259f0febf0dffa7e1a", + "type": "ListItem", + "element_id": "4c2478cf439baab6ace34761eda527d9", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1462,27 +1262,17 @@ "text": "LayoutParser: A Unified Toolkit for DL-Based DIA" }, { - "type": "UncategorizedText", - "element_id": "3fdba35f04dc8c462986c992bcf87554", + "type": "Image", + "element_id": "f58d47bde7ebddd81c4a678c918a8f1b", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 13 }, - "text": "13" + "text": "(2) Partial table atthe bottom (&) Full page table (6) Partial table at the top (d) Mis-detected tet line" }, { "type": "FigureCaption", - "element_id": "7d42bb6af1404a95a6e8870d5c4d07bf", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 13 - }, - "text": "(@) Partial table at the bottom (&) Full page table (6) Partial table at the top (d) Mis-detected tet line" - }, - { - "type": "NarrativeText", "element_id": "1a2b9e59d53ac38ee6affb3ffcda6b8c", "metadata": { "data_source": {}, @@ -1493,7 +1283,7 @@ }, { "type": "Title", - "element_id": "555b11646d1541685d37f9a18773dd74", + "element_id": "76c98240da7b06b4b3fcf8109edbbaba", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1532,28 +1322,18 @@ "text": "18 https://github.com/atlanhq/camelot, https://github.com/tabulapdf/tabula" }, { - "type": "UncategorizedText", - "element_id": "8527a891e224136950ff32ca212b45bc", + "type": "ListItem", + "element_id": "91e724833d5794abbd5fd6ad6c54aa9f", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 14 }, - "text": "14" - }, - { - "type": "NarrativeText", - "element_id": "3993b330c2b3b86513c3edbcd33afc91", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 14 - }, - "text": "Z. Shen et al." + "text": "14 Z. Shen et al." }, { "type": "Title", - "element_id": "35f7d23fd70cfc85a80573db030804ad", + "element_id": "a2a71736439cbc5e1445bddd40712b9b", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1592,8 +1372,8 @@ "text": "References" }, { - "type": "UncategorizedText", - "element_id": "b5bf13691648f2be7e686436513a7366", + "type": "ListItem", + "element_id": "f7e8d95a8f2b84a4461e037b0a7b9704", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1602,8 +1382,8 @@ "text": "[1] Abadi, M., Agarwal, A., Barham, P., Brevdo, E., Chen, Z., Citro, C., Corrado, G.S., Davis, A., Dean, J., Devin, M., Ghemawat, S., Goodfellow, I., Harp, A., Irving, G., Isard, M., Jia, Y., Jozefowicz, R., Kaiser, L., Kudlur, M., Levenberg, J., Man´e, D., Monga, R., Moore, S., Murray, D., Olah, C., Schuster, M., Shlens, J., Steiner, B., Sutskever, I., Talwar, K., Tucker, P., Vanhoucke, V., Vasudevan, V., Vi´egas, F., Vinyals, O., Warden, P., Wattenberg, M., Wicke, M., Yu, Y., Zheng, X.: TensorFlow: Large-scale machine learning on heterogeneous systems (2015), https://www.tensorflow.org/, software available from tensorflow.org" }, { - "type": "NarrativeText", - "element_id": "098ca0ae774b51e7eba5dbe98641da88", + "type": "ListItem", + "element_id": "24862433f743a0910da62ec3fb4f537c", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1612,8 +1392,8 @@ "text": "[2] Alberti, M., Pondenkandath, V., W¨ursch, M., Ingold, R., Liwicki, M.: Deepdiva: a highly-functional python framework for reproducible experiments. In: 2018 16th International Conference on Frontiers in Handwriting Recognition (ICFHR). pp. 423–428. IEEE (2018)" }, { - "type": "NarrativeText", - "element_id": "0054c11c9691968349806c35f6aa5f0f", + "type": "ListItem", + "element_id": "79a1f55a3945eb6304697ec72847ed35", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1622,8 +1402,8 @@ "text": "[3] Antonacopoulos, A., Bridson, D., Papadopoulos, C., Pletschacher, S.: A realistic dataset for performance evaluation of document layout analysis. In: 2009 10th International Conference on Document Analysis and Recognition. pp. 296–300. IEEE (2009)" }, { - "type": "NarrativeText", - "element_id": "607a64b13da109e96c62ecaedce91c4f", + "type": "ListItem", + "element_id": "cafb24e03d3f74ce81ba82312af7bfc2", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1632,18 +1412,18 @@ "text": "[4] Baek, Y., Lee, B., Han, D., Yun, S., Lee, H.: Character region awareness for text detection. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. pp. 9365–9374 (2019)" }, { - "type": "UncategorizedText", - "element_id": "9409d20f2ee25336c2566bda8d8bb83c", + "type": "ListItem", + "element_id": "49df59253e226989981b7fc9628ecd40", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 14 }, - "text": "[5] Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: A Large-Scale" + "text": "ot Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: A Large-Scale Hierarchical Image Database. In: CVPRO9 (2009)" }, { - "type": "NarrativeText", - "element_id": "ad1bf75fc53d123c878f8254f9304c9f", + "type": "ListItem", + "element_id": "b000578a41ffcc554faac04609d2f4e1", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1652,7 +1432,7 @@ "text": "[6] Deng, Y., Kanervisto, A., Ling, J., Rush, A.M.: Image-to-markup generation with coarse-to-fine attention. In: International Conference on Machine Learning. pp. 980–989. PMLR (2017)" }, { - "type": "NarrativeText", + "type": "ListItem", "element_id": "c6e835fe03323406543926cc0f5a94de", "metadata": { "data_source": {}, @@ -1662,18 +1442,8 @@ "text": "[7] Ganin, Y., Lempitsky, V.: Unsupervised domain adaptation by backpropagation. In: International conference on machine learning. pp. 1180–1189. PMLR (2015)" }, { - "type": "NarrativeText", - "element_id": "44c5093519506610b07942b24d966d77", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 14 - }, - "text": "Hierarchical Image Database. In: CVPR09 (2009)" - }, - { - "type": "Title", - "element_id": "69c327f77af9a7259f0febf0dffa7e1a", + "type": "ListItem", + "element_id": "4c2478cf439baab6ace34761eda527d9", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1682,78 +1452,68 @@ "text": "LayoutParser: A Unified Toolkit for DL-Based DIA" }, { - "type": "UncategorizedText", - "element_id": "e629fa6598d732768f7c726b4b621285", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 15 - }, - "text": "15" - }, - { - "type": "Title", - "element_id": "9b9688203e9cdea89ded788342be4032", + "type": "NarrativeText", + "element_id": "068bf90a7743f50c4a00d4827035e42f", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 15 }, - "text": "[14] Kay, A.: Tesseract: An open-source optical character recognition engine. Linux J." + "text": "[11] Harley, A.W., Ufkes, A., Derpanis, K.G.: Evaluation of deep convolutional nets for document image classification and retrieval. In: 2015 13th International Conference on Document Analysis and Recognition (ICDAR). pp. 991–995. IEEE (2015) [12] He, K., Gkioxari, G., Doll´ar, P., Girshick, R.: Mask r-cnn. In: Proceedings of the" }, { - "type": "NarrativeText", - "element_id": "62b12089ccbd0d2dd2f6c292cfa6a6fb", + "type": "ListItem", + "element_id": "c8f5863d94cc9b9d77f153c6d1b0015a", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 15 }, - "text": "[20] Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 3431–3440 (2015)" + "text": "[10] Graves, A., Fern´andez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd international conference on Machine learning. pp. 369–376 (2006)" }, { - "type": "NarrativeText", - "element_id": "890eb2d0b6b7dbf00a5e0a4ad2f82107", + "type": "ListItem", + "element_id": "60fbf9d2525b5a22588082da96a41ff8", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 15 }, - "text": "[19] Lin, T.Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll´ar, P., Zitnick, C.L.: Microsoft coco: Common objects in context. In: European conference on computer vision. pp. 740–755. Springer (2014)" + "text": "He, K., Gkioxari, G., Dollar, P., Girshick, R.: Mask r-cnn. In: Proceedings of the IEEE international conference on computer vision. pp. 2961-2969 (2017)" }, { "type": "NarrativeText", - "element_id": "be647bda3f1ca1b63554ef22d1313a43", + "element_id": "2f103adde52e35a8853cbb476720a6ef", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 15 }, - "text": "[18] Li, M., Cui, L., Huang, S., Wei, F., Zhou, M., Li, Z.: Tablebank: Table benchmark for image-based table detection and recognition. arXiv preprint arXiv:1903.01949 (2019)" + "text": "[8] Gardner, M., Grus, J., Neumann, M., Tafjord, O., Dasigi, P., Liu, N., Peters, M., Schmitz, M., Zettlemoyer, L.: Allennlp: A deep semantic natural language processing platform. arXiv preprint arXiv:1803.07640 (2018) (cid:32)Lukasz Garncarek, Powalski, R., Stanis(cid:32)lawek, T., Topolski, B., Halama, P., Grali´nski, F.: Lambert: Layout-aware (language) modeling using bert for in- formation extraction (2020)" }, { - "type": "NarrativeText", - "element_id": "09cfad31b28b1315b0bc7bd219136057", + "type": "ListItem", + "element_id": "7ceaba2290e3f9c5f3754032ce4d5663", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 15 }, - "text": "[17] Lee, B.C.G., Mears, J., Jakeway, E., Ferriter, M., Adams, C., Yarasavage, N., Thomas, D., Zwaard, K., Weld, D.S.: The Newspaper Navigator Dataset: Extracting Headlines and Visual Content from 16 Million Historic Newspaper Pages in Chronicling America, p. 3055–3062. Association for Computing Machinery, New York, NY, USA (2020), https://doi.org/10.1145/3340531.3412767" + "text": "[13] He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 770–778 (2016)" }, { - "type": "NarrativeText", - "element_id": "80498c312fd32cb744e5953dfef18604", + "type": "ListItem", + "element_id": "a772a029ff3b22f4dca5f7df3fe1897b", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 15 }, - "text": "[16] Lee, B.C., Weld, D.S.: Newspaper navigator: Open faceted search for 1.5 million images. In: Adjunct Publication of the 33rd Annual ACM Sym- posium on User Interface Software and Technology. p. 120–122. UIST ’20 Adjunct, Association for Computing Machinery, New York, NY, USA (2020). https://doi.org/10.1145/3379350.3416143, https://doi-org.offcampus. lib.washington.edu/10.1145/3379350.3416143" + "text": "Kay, A.: Tesseract: An open-source optical character recognition engine. Linux J. 2007(159), 2 (Jul 2007)" }, { - "type": "NarrativeText", - "element_id": "3e0b97d540b7b43ad61292a89a58137f", + "type": "ListItem", + "element_id": "1f1a0fac1bae95f076ea34c955551632", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1762,154 +1522,154 @@ "text": "[15] Lamiroy, B., Lopresti, D.: An open architecture for end-to-end document analysis benchmarking. In: 2011 International Conference on Document Analysis and Recognition. pp. 42–47. IEEE (2011)" }, { - "type": "NarrativeText", - "element_id": "f7cfa7ca2e7175d8bdba9c0cb26a7c98", + "type": "ListItem", + "element_id": "0aabfb2a8e358618179ec2e1d322e519", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 15 }, - "text": "[21] Neudecker, C., Schlarb, S., Dogan, Z.M., Missier, P., Sufi, S., Williams, A., Wolsten- croft, K.: An experimental workflow development platform for historical document digitisation and analysis. In: Proceedings of the 2011 workshop on historical document imaging and processing. pp. 161–168 (2011)" + "text": "[16] Lee, B.C., Weld, D.S.: Newspaper navigator: Open faceted search for 1.5 million images. In: Adjunct Publication of the 33rd Annual ACM Sym- posium on User Interface Software and Technology. p. 120–122. UIST ’20 Adjunct, Association for Computing Machinery, New York, NY, USA (2020). https://doi.org/10.1145/3379350.3416143, https://doi-org.offcampus. lib.washington.edu/10.1145/3379350.3416143" }, { - "type": "NarrativeText", - "element_id": "aae12b8f70e03a3e35015ebda5974ebe", + "type": "ListItem", + "element_id": "df18427a8013b4df36e8ac4e2ee5da3a", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 15 }, - "text": "[22] Oliveira, S.A., Seguin, B., Kaplan, F.: dhsegment: A generic deep-learning approach for document segmentation. In: 2018 16th International Conference on Frontiers in Handwriting Recognition (ICFHR). pp. 7–12. IEEE (2018)" + "text": "[17] Lee, B.C.G., Mears, J., Jakeway, E., Ferriter, M., Adams, C., Yarasavage, N., Thomas, D., Zwaard, K., Weld, D.S.: The Newspaper Navigator Dataset: Extracting Headlines and Visual Content from 16 Million Historic Newspaper Pages in Chronicling America, p. 3055–3062. Association for Computing Machinery, New York, NY, USA (2020), https://doi.org/10.1145/3340531.3412767" }, { - "type": "NarrativeText", - "element_id": "068bf90a7743f50c4a00d4827035e42f", + "type": "ListItem", + "element_id": "257e7b8aef89c41e03bf837ea517885e", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 15 }, - "text": "[11] Harley, A.W., Ufkes, A., Derpanis, K.G.: Evaluation of deep convolutional nets for document image classification and retrieval. In: 2015 13th International Conference on Document Analysis and Recognition (ICDAR). pp. 991–995. IEEE (2015) [12] He, K., Gkioxari, G., Doll´ar, P., Girshick, R.: Mask r-cnn. In: Proceedings of the" + "text": "[18] Li, M., Cui, L., Huang, S., Wei, F., Zhou, M., Li, Z.: Tablebank: Table benchmark for image-based table detection and recognition. arXiv preprint arXiv:1903.01949 (2019)" }, { - "type": "NarrativeText", - "element_id": "813cac1316043d454f3c928740435736", + "type": "ListItem", + "element_id": "00c7abdd98fedd1746994d16ca44d45f", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 15 }, - "text": "[10] Graves, A., Fern´andez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd international conference on Machine learning. pp. 369–376 (2006)" + "text": "[19] Lin, T.Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll´ar, P., Zitnick, C.L.: Microsoft coco: Common objects in context. In: European conference on computer vision. pp. 740–755. Springer (2014)" }, { - "type": "NarrativeText", - "element_id": "124b6b55da69fccc1c06568bda34f63c", + "type": "ListItem", + "element_id": "7a0afd734c99f6b076dc58b2e57cfec6", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 15 }, - "text": "[13] He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 770–778 (2016)" + "text": "[20] Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 3431–3440 (2015)" }, { - "type": "UncategorizedText", - "element_id": "16390873ae6b6a173fc894a873bab022", + "type": "ListItem", + "element_id": "00d6ff1b3fb21f8a608f3b6269df56be", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 15 }, - "text": "[9]" + "text": "[21] Neudecker, C., Schlarb, S., Dogan, Z.M., Missier, P., Sufi, S., Williams, A., Wolsten- croft, K.: An experimental workflow development platform for historical document digitisation and analysis. In: Proceedings of the 2011 workshop on historical document imaging and processing. pp. 161–168 (2011)" }, { - "type": "NarrativeText", - "element_id": "2f103adde52e35a8853cbb476720a6ef", + "type": "ListItem", + "element_id": "deecdfacbce71dd1425fd54010b2fad1", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 15 }, - "text": "[8] Gardner, M., Grus, J., Neumann, M., Tafjord, O., Dasigi, P., Liu, N., Peters, M., Schmitz, M., Zettlemoyer, L.: Allennlp: A deep semantic natural language processing platform. arXiv preprint arXiv:1803.07640 (2018) (cid:32)Lukasz Garncarek, Powalski, R., Stanis(cid:32)lawek, T., Topolski, B., Halama, P., Grali´nski, F.: Lambert: Layout-aware (language) modeling using bert for in- formation extraction (2020)" + "text": "[22] Oliveira, S.A., Seguin, B., Kaplan, F.: dhsegment: A generic deep-learning approach for document segmentation. In: 2018 16th International Conference on Frontiers in Handwriting Recognition (ICFHR). pp. 7–12. IEEE (2018)" }, { - "type": "UncategorizedText", - "element_id": "e90f44c0e10f9acb4d8f4c5895846d1e", + "type": "ListItem", + "element_id": "5c44994a44f74b706d8a5e74cd753a8b", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 15 + "page_number": 16 }, - "text": "2007(159), 2 (Jul 2007)" + "text": "6 Z. Shen et al." }, { - "type": "Title", - "element_id": "4d54eb351d8fc3bfbbf7286aa15eabe3", + "type": "ListItem", + "element_id": "c9d8f6434425015c72f94fb212bba28f", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 15 + "page_number": 16 }, - "text": "IEEE international conference on computer vision. pp. 2961–2969 (2017)" + "text": "[27] Qasim, S.R., Mahmood, H., Shafait, F.: Rethinking table recognition using graph neural networks. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 142–147. IEEE (2019)" }, { - "type": "UncategorizedText", - "element_id": "b17ef6d19c7a5b1ee83b907c595526dc", + "type": "ListItem", + "element_id": "9c3e13a0e9738b846289bff06952da3b", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 16 }, - "text": "16" + "text": "[28] Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: Towards real-time object detection with region proposal networks. In: Advances in neural information processing systems. pp. 91–99 (2015)" }, { - "type": "Title", - "element_id": "21d399ba787aabbf69a8ca861cbcc4a3", + "type": "ListItem", + "element_id": "bd680d8baa57cc15337de2e0c299d121", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 16 }, - "text": "[38] Zhong, X., Tang, J., Yepes, A.J.: Publaynet:" + "text": "[31] Shen, Z., Zhang, K., Dell, M.: A large dataset of historical japanese documents with complex layouts. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops. pp. 548–549 (2020)" }, { - "type": "NarrativeText", - "element_id": "219033258f3fff3de33bed379610c8f3", + "type": "UncategorizedText", + "element_id": "b66713d3f2d1689f9174e1cb87429eed", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 16 }, - "text": "[23] Paszke, A., Gross, S., Chintala, S., Chanan, G., Yang, E., DeVito, Z., Lin, Z., Desmaison, A., Antiga, L., Lerer, A.: Automatic differentiation in pytorch (2017) [24] Paszke, A., Gross, S., Massa, F., Lerer, A., Bradbury, J., Chanan, G., Killeen, T., Lin, Z., Gimelshein, N., Antiga, L., et al.: Pytorch: An imperative style, high-performance deep learning library. arXiv preprint arXiv:1912.01703 (2019) [25] Pletschacher, S., Antonacopoulos, A.: The page (page analysis and ground-truth elements) format framework. In: 2010 20th International Conference on Pattern Recognition. pp. 257–260. IEEE (2010)" + "text": "[32] Shen, Z., Zhao, J., Dell, M., Yu, Y., Li, W.: Olala: Object-level active learning" }, { - "type": "NarrativeText", - "element_id": "285ce5849d6fd9036e5d16724c024ab9", + "type": "UncategorizedText", + "element_id": "10a3ff59f6157f21733e659a41031f83", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 16 }, - "text": "[26] Prasad, D., Gadpal, A., Kapadni, K., Visave, M., Sultanpure, K.: Cascadetabnet: An approach for end to end table detection and structure recognition from image- based documents. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops. pp. 572–573 (2020)" + "text": "[37] Xu, Y., Li, M., Cui, L., Huang, S., Wei, F., Zhou, M.: Layoutlm: Pre-training of" }, { "type": "NarrativeText", - "element_id": "1abcfa28cce9b0f5194dec0d534f28e5", + "element_id": "219033258f3fff3de33bed379610c8f3", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 16 }, - "text": "[27] Qasim, S.R., Mahmood, H., Shafait, F.: Rethinking table recognition using graph neural networks. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 142–147. IEEE (2019)" + "text": "[23] Paszke, A., Gross, S., Chintala, S., Chanan, G., Yang, E., DeVito, Z., Lin, Z., Desmaison, A., Antiga, L., Lerer, A.: Automatic differentiation in pytorch (2017) [24] Paszke, A., Gross, S., Massa, F., Lerer, A., Bradbury, J., Chanan, G., Killeen, T., Lin, Z., Gimelshein, N., Antiga, L., et al.: Pytorch: An imperative style, high-performance deep learning library. arXiv preprint arXiv:1912.01703 (2019) [25] Pletschacher, S., Antonacopoulos, A.: The page (page analysis and ground-truth elements) format framework. In: 2010 20th International Conference on Pattern Recognition. pp. 257–260. IEEE (2010)" }, { - "type": "NarrativeText", - "element_id": "f7c67eae65521c3a753337d08c5a7cc3", + "type": "ListItem", + "element_id": "21d151e4c182a1f441c3486d2f79afc0", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 16 }, - "text": "[28] Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: Towards real-time object detection with region proposal networks. In: Advances in neural information processing systems. pp. 91–99 (2015)" + "text": "[26] Prasad, D., Gadpal, A., Kapadni, K., Visave, M., Sultanpure, K.: Cascadetabnet: An approach for end to end table detection and structure recognition from image- based documents. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops. pp. 572–573 (2020)" }, { "type": "NarrativeText", @@ -1922,28 +1682,8 @@ "text": "[29] Scarselli, F., Gori, M., Tsoi, A.C., Hagenbuchner, M., Monfardini, G.: The graph neural network model. IEEE transactions on neural networks 20(1), 61–80 (2008) [30] Schreiber, S., Agne, S., Wolf, I., Dengel, A., Ahmed, S.: Deepdesrt: Deep learning for detection and structure recognition of tables in document images. In: 2017 14th IAPR international conference on document analysis and recognition (ICDAR). vol. 1, pp. 1162–1167. IEEE (2017)" }, { - "type": "NarrativeText", - "element_id": "4f43b2e563a35ae0208a8626f7e3280e", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 16 - }, - "text": "[31] Shen, Z., Zhang, K., Dell, M.: A large dataset of historical japanese documents with complex layouts. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops. pp. 548–549 (2020)" - }, - { - "type": "UncategorizedText", - "element_id": "b66713d3f2d1689f9174e1cb87429eed", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 16 - }, - "text": "[32] Shen, Z., Zhao, J., Dell, M., Yu, Y., Li, W.: Olala: Object-level active learning" - }, - { - "type": "NarrativeText", - "element_id": "da6733a53c75743361e9edcc1d36a20c", + "type": "ListItem", + "element_id": "4c8ddc159ec208bb7f454603fcd7c4bd", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1962,8 +1702,8 @@ "text": "[34] Wolf, T., Debut, L., Sanh, V., Chaumond, J., Delangue, C., Moi, A., Cistac, P., Rault, T., Louf, R., Funtowicz, M., et al.: Huggingface’s transformers: State-of- the-art natural language processing. arXiv preprint arXiv:1910.03771 (2019) [35] Wu, Y., Kirillov, A., Massa, F., Lo, W.Y., Girshick, R.: Detectron2. https://" }, { - "type": "NarrativeText", - "element_id": "d207e2724a17741e3ae1986d63cb5636", + "type": "ListItem", + "element_id": "6c94dd219ce339c358163833e20d099e", "metadata": { "data_source": {}, "filetype": "application/pdf", @@ -1972,83 +1712,53 @@ "text": "[36] Xu, Y., Xu, Y., Lv, T., Cui, L., Wei, F., Wang, G., Lu, Y., Florencio, D., Zhang, C., Che, W., et al.: Layoutlmv2: Multi-modal pre-training for visually-rich document understanding. arXiv preprint arXiv:2012.14740 (2020)" }, { - "type": "UncategorizedText", - "element_id": "10a3ff59f6157f21733e659a41031f83", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 16 - }, - "text": "[37] Xu, Y., Li, M., Cui, L., Huang, S., Wei, F., Zhou, M.: Layoutlm: Pre-training of" - }, - { - "type": "Title", - "element_id": "462753569cb801c6f858759742a93793", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 16 - }, - "text": "ument Analysis and Recognition (ICDAR). pp. 1015–1022. https://doi.org/10.1109/ICDAR.2019.00166" - }, - { - "type": "Title", - "element_id": "2625b6830768eac986cfee208c0270de", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 16 - }, - "text": "text and layout for document image understanding (2019)" - }, - { - "type": "NarrativeText", - "element_id": "9dce913bddaa63724f5de64e539b7016", + "type": "ListItem", + "element_id": "94ce48002d0ae80dc04f26a5dd2e8f11", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 16 }, - "text": "based layout annotation. arXiv preprint arXiv:2010.01762 (2020)" + "text": "Xu, Y., Li, M., Cui, L., Huang, S., Wei, F., Zhou, M.: Layoutlm: Pre-training of text and layout for document image understanding (2019)" }, { - "type": "Title", - "element_id": "93d261a89a8422fb8d166e6cdf95d8f6", + "type": "ListItem", + "element_id": "5657166191992144b2b06f2bd05ffabf", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 16 }, - "text": "github.com/facebookresearch/detectron2 (2019)" + "text": "github. com/facebookresearch/detectron2) (2019) Wu, Y., Kirillov, A., Massa, F., Lo, W.Y., Girshick, R.: Detectron2." }, { - "type": "NarrativeText", - "element_id": "3993b330c2b3b86513c3edbcd33afc91", + "type": "ListItem", + "element_id": "c1780f7a01a76540c5eb5cecf1a2270d", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 16 }, - "text": "Z. Shen et al." + "text": "Shen, Z., Zhao, J., Dell, M., Yu, Y., Li, W.: Olala: Object-level active learning based layout annotation. arXiv preprint arXiv:2010.01762 (2020)" }, { "type": "Title", - "element_id": "c7fc0ade487926854bb602bca85fad60", + "element_id": "21d399ba787aabbf69a8ca861cbcc4a3", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 16 }, - "text": "layout analysis." + "text": "[38] Zhong, X., Tang, J., Yepes, A.J.: Publaynet:" }, { - "type": "UncategorizedText", - "element_id": "96c49c3fbbb585f8062778e9a404b00f", + "type": "ListItem", + "element_id": "435e423f8ca655521a6fe38e8e0a3e1d", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 16 }, - "text": "largest dataset ever for doc- In: 2019 International Conference on Document IEEE (Sep 2019)." + "text": "Zhong, X., Tang, J., Yepes, A.J.: Publaynet: largest dataset ever for doc- ument layout analysis. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 1015-1022. IEEE (Sep 2019). https: //doi.org/10.1109/ICDAR.2019.00166" } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json index 1032755b81..8b6febb9d7 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json @@ -5,7 +5,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Core Skills for Biomedical Data Scientists" }, @@ -15,7 +16,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Maryam Zaringhalam, PhD, AAAS Science & Technology Policy Fellow" }, @@ -25,7 +27,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Lisa Federer, MLIS, Data Science Training Coordinator" }, @@ -35,7 +38,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Michael F. Huerta, PhD, Associate Director of NLM for Program Development and NLM Coordinator of Data Science and Open Science Initiatives" }, @@ -45,7 +49,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Executive Summary" }, @@ -55,7 +60,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "This report provides recommendations for a minimal set of core skills for biomedical data scientists based on analysis that draws on opinions of data scientists, curricula for existing biomedical data science programs, and requirements for biomedical data science jobs. Suggested high-level core skills include:" }, @@ -65,7 +71,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "1. General biomedical subject matter knowledge: biomedical data scientists should have a general working knowledge of the principles of biology, bioinformatics, and basic clinical science;" }, @@ -75,7 +82,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "2. Programming language expertise: biomedical data scientists should be fluent in at" }, @@ -85,7 +93,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "least one programming language (typically R and/or Python);" }, @@ -95,7 +104,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "3. Predictive analytics, modeling, and machine learning: while a range of statistical methods may be useful, predictive analytics, modeling, and machine learning emerged as especially important skills in biomedical data science;" }, @@ -105,7 +115,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "4. Team science and scientific communication: “soft” skills, like the ability to work well on teams and communicate effectively in both verbal and written venues, may be as important as the more technical skills typically associated with data science." }, @@ -115,7 +126,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "5. Responsible data stewardship: a successful data scientist must be able to implement best practices for data management and stewardship, as well as conduct research in an ethical manner that maintains data security and privacy." }, @@ -125,7 +137,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "The report further details specific skills and expertise relevant to biomedical data scientists." }, @@ -135,7 +148,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Motivation" }, @@ -145,7 +159,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Training a biomedical data science (BDS) workforce is a central theme in NLM’s Strategic Plan for the coming decade. That commitment is echoed in the NIH-wide Big Data to Knowledge (BD2K) initiative, which invested $61 million between FY2014 and FY2017 in training programs for the development and use of biomedical big data science methods and tools. In line with" }, @@ -155,7 +170,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "Core Skills for Biomedical Data Scientists _____________________________________________________________________________________________" }, @@ -165,7 +181,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "this commitment, a recent report to the NLM Director recommended working across NIH to identify and develop core skills required of a biomedical data scientist to consistency across the cohort of NIH-trained data scientists. This report provides a set of recommended core skills based on analysis of current BD2K-funded training programs, biomedical data science job ads, and practicing members of the current data science workforce." }, @@ -175,7 +192,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "Methodology" }, @@ -185,7 +203,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "The Workforce Excellence team took a three-pronged approach to identifying core skills required of a biomedical data scientist (BDS), drawing from:" }, @@ -195,7 +214,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "a) Responses to a 2017 Kaggle1 survey2 of over 16,000 self-identified data scientists working across many industries. Analysis of the Kaggle survey responses from the current data science workforce provided insights into the current generation of data scientists, including how they were trained and what programming and analysis skills they use." }, @@ -205,7 +225,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "b) Data science skills taught in BD2K-funded training programs. A qualitative content analysis was applied to the descriptions of required courses offered under the 12 BD2K-funded training programs. Each course was coded using qualitative data analysis software, with each skill that was present in the description counted once. The coding schema of data science-related skills was inductively developed and was organized into four major categories: (1) statistics and math skills; (2) computer science; (3) subject knowledge; (4) general skills, like communication and teamwork. The coding schema is detailed in Appendix A." }, @@ -215,7 +236,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "c) Desired skills identified from data science-related job ads. 59 job ads from government (8.5%), academia (42.4%), industry (33.9%), and the nonprofit sector (15.3%) were sampled from websites like Glassdoor, Linkedin, and Ziprecruiter. The content analysis methodology and coding schema utilized in analyzing the training programs were applied to the job descriptions. Because many job ads mentioned the same skill more than once, each occurrence of the skill was coded, therefore weighting important skills that were mentioned multiple times in a single ad." }, @@ -225,7 +247,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "Analysis of the above data provided insights into the current state of biomedical data science training, as well as a view into data science-related skills likely to be needed to prepare the BDS workforce to succeed in the future. Together, these analyses informed recommendations for core skills necessary for a competitive biomedical data scientist." }, @@ -235,7 +258,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "1 Kaggle is an online community for data scientists, serving as a platform for collaboration, competition, and learning: http://kaggle.com 2 In August 2017, Kaggle conducted an industry-wide survey to gain a clearer picture of the state of data science and machine learning. A standard set of questions were asked of all respondents, with more specific questions related to work for employed data scientists and questions related to learning for data scientists in training. Methodology and results: https://www.kaggle.com/kaggle/kaggle-survey-2017" }, @@ -245,7 +269,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "2" } diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/65/11/main.PMC6312790.pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/65/11/main.PMC6312790.pdf.json index 8fe11dc463..06c384a72c 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/65/11/main.PMC6312790.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/65/11/main.PMC6312790.pdf.json @@ -5,7 +5,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [ + { + "text": "DatainBrief22 ( 2019 ) 451 –", + "url": "https://doi.org/10.1016/j.dib.2018.11.134", + "start_index": 0 + } + ] }, "text": "Data in Brief 22 (2019) 451–457" }, @@ -15,7 +22,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [ + { + "text": "", + "url": "www.sciencedirect.com/science/journal/23523409", + "start_index": -1 + } + ] }, "text": "Contents lists available at ScienceDirect" }, @@ -25,7 +39,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Data in Brief" }, @@ -35,7 +50,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [ + { + "text": "www . elsevier . com / locate /", + "url": "www.elsevier.com/locate/dib", + "start_index": 18 + } + ] }, "text": "journal homepage: www.elsevier.com/locate/dib" }, @@ -45,7 +67,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Data Article" }, @@ -55,7 +78,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Data on environmental sustainable corrosion inhibitor for stainless steel in aggressive environment" }, @@ -65,7 +89,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Omotayo Sanni n, Abimbola Patricia I. Popoola" }, @@ -75,7 +100,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Department of Chemical, Metallurgical and Materials Engineering, Tshwane University of Technology, Pretoria, South Africa" }, @@ -85,7 +111,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "a r t i c l e i n f o" }, @@ -95,7 +122,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "a b s t r a c t" }, @@ -105,7 +133,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Article history: Received 31 August 2018 Received in revised form 17 November 2018 Accepted 27 November 2018 Available online 30 November 2018" }, @@ -115,7 +144,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Keywords: Corrosion Stainless steel Inhibitor Sulphuric acid" }, @@ -125,7 +155,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "This data article contains data related to the research article entitled “enhanced corrosion resistance of stainless steel Type 316 in sulphuric acid solution using eco-friendly waste product” (Sanni et al., 2018). In this data article, a comprehensive effect of waste product and optimized process parameter of the inhibitor in 0.5 M H2SO4 solution was presented using weight loss and potentiody- the inhibitor namic polarization techniques. The presence of (egg shell powder) influenced corrosion resistance of stainless steel. Inhibition efficiency value of 94.74% was recorded as a result of inhibition of the steel by the ionized molecules of the inhibiting compound of the egg shell powder influencing the redox mechan- ism reactions responsible for corrosion and surface deterioration." }, @@ -135,7 +166,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "& 2018 Published by Elsevier Inc. This is an open access article under the CC BY-NC-ND license (http://creativecommons.org/licenses/by-nc-nd/4.0/)." }, @@ -145,7 +177,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Specification table" }, @@ -155,29 +188,32 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Subject area More specific subject area Surface science and engineering Type of data" }, { "type": "Title", - "element_id": "b27e559f6c00d2bde61efba5db252e31", + "element_id": "1064dcef42380cfdb90c668aa3a670a3", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, - "text": "Materials engineering" + "text": "Table and figure" }, { "type": "Title", - "element_id": "1064dcef42380cfdb90c668aa3a670a3", + "element_id": "b27e559f6c00d2bde61efba5db252e31", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, - "text": "Table and figure" + "text": "Materials engineering" }, { "type": "Title", @@ -185,7 +221,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "n Corresponding author. tayo.sanni@yahoo.com; SanniO@tut.ac.za" }, @@ -195,7 +232,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [ + { + "text": "tayo . sanni @ yahoo . com", + "url": "mailto:tayo.sanni@yahoo.com", + "start_index": 16 + } + ] }, "text": "E-mail address: tayo.sanni@yahoo.com (O. Sanni)." }, @@ -205,7 +249,24 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [ + { + "text": "https", + "url": "https://doi.org/10.1016/j.dib.2018.11.134", + "start_index": 0 + }, + { + "text": "https", + "url": "https://doi.org/10.1016/j.dib.2018.11.134", + "start_index": 0 + }, + { + "text": "https :// doi . org / 10 . 1016 / j . dib . 2018 . 11 .", + "url": "https://doi.org/10.1016/j.dib.2018.11.134", + "start_index": 0 + } + ] }, "text": "https://doi.org/10.1016/j.dib.2018.11.134 2352-3409/& 2018 Published by Elsevier Inc. This is an open access article under the CC BY-NC-ND license (http://creativecommons.org/licenses/by-nc-nd/4.0/)." }, @@ -215,7 +276,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "452" }, @@ -225,7 +287,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457" }, @@ -235,7 +298,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "How data were acquired" }, @@ -245,7 +309,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "Data format Experimental factors" }, @@ -255,7 +320,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "Experimental features Data source location" }, @@ -265,7 +331,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "Accessibility Related research article" }, @@ -275,7 +342,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "The cleaned and weighed specimen was suspended in beakers con- taining 0.5 M H2SO4 solution of different concentrations of egg shell powder. The pre-weighed stainless steel samples were retrieved from the test solutions after every 24 h, cleaned appropriately, dried and reweighed. Raw, analyzed The difference between the weight at a given time and the initial weight of the specimen was taken as the weight loss, which was used to calculate the corrosion rate and inhibition efficiency. Inhibitor concentration, exposure time Department of Chemical, Metallurgical and Materials Engineering, Tshwane University of Technology, Pretoria, South Africa Data are available within this article O. Sanni, A. P. I. Popoola, and O. S. I. Fayomi, Enhanced corrosion resistance of stainless steel type 316 in sulphuric acid solution using eco-friendly waste product, Results in Physics, 9 (2018) 225–230." }, @@ -285,7 +353,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "Value of the data" }, @@ -295,7 +364,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "(cid:1) Data presented here provide optimum conditions of waste material as inhibitor for stainless steel Type 316 in 0.5 M H2SO4 medium. The given data describe the inhibitive performance of eco-friendly egg shell powder on austenitic stainless steel Type 316 corrosion in sulphuric acid environment." }, @@ -305,7 +375,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "(cid:1) The data obtained for the inhibition of waste product (egg shell powder) on stainless steel Type 316 can be used as basis in determining the inhibitive performance of the same inhibitor in other environments." }, @@ -315,7 +386,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "(cid:1) The data can be used to examine the relationship between the process variable as it affect the" }, @@ -325,7 +397,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "nature of inhibition of metals." }, @@ -335,7 +408,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "1. Data" }, @@ -345,17 +419,30 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "The results of the experiment are presented in this session. The results obtained from weight loss method for stainless steel Type 316 immersed in 0.5 M H2SO4 solution in the absence and presence of different concentrations of egg shell powder (ES) are presented in Figs. 1–3 respectively. It can be seen clearly from these Figures that the efficiency of egg shell powder increase with the inhibitor con- centration, The increase in its efficiency could be as a result of increase in the constituent molecule" }, + { + "type": "UncategorizedText", + "element_id": "624b60c58c9d8bfb6ff1886c2fd605d2", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 2, + "links": [] + }, + "text": "30" + }, { "type": "Title", "element_id": "e28e0dc941accc8694040c63091b580c", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": ") g m" }, @@ -365,7 +452,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "(" }, @@ -375,7 +463,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "s s o" }, @@ -385,7 +474,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "l" }, @@ -395,7 +485,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "t h g e W" }, @@ -405,27 +496,19 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "i" }, - { - "type": "UncategorizedText", - "element_id": "624b60c58c9d8bfb6ff1886c2fd605d2", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "30" - }, { "type": "UncategorizedText", "element_id": "f5ca38f748a1d6eaf726b8a42fb575c3", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "20" }, @@ -435,7 +518,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "10g 8g 6g 4g 2g Control" }, @@ -445,7 +529,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "10" }, @@ -455,7 +540,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "48" }, @@ -465,7 +551,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "96" }, @@ -475,7 +562,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "144" }, @@ -485,7 +573,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "192" }, @@ -495,7 +584,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "Exposure Time (Hours)" }, @@ -505,7 +595,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "Fig. 1. Weight loss versus exposure time for stainless steel presence of ES." }, @@ -515,7 +606,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "immersed in 0.5 M H2SO4 solution in the absence and" }, @@ -525,7 +617,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457" }, @@ -535,7 +628,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "453" }, @@ -545,7 +639,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "2.7" }, @@ -555,7 +650,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": ") r a e y / m m" }, @@ -565,7 +661,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "( e t a r n o s o r r o C" }, @@ -575,7 +672,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "i" }, @@ -585,7 +683,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "1.8" }, @@ -595,7 +694,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "0.9" }, @@ -605,7 +705,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "10g 8g 6g 4g 2g Control" }, @@ -615,7 +716,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "24" }, @@ -625,7 +727,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "48" }, @@ -635,7 +738,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "72" }, @@ -645,7 +749,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "96" }, @@ -655,7 +760,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "120" }, @@ -665,7 +771,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "144" }, @@ -675,7 +782,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "168" }, @@ -685,7 +793,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "192" }, @@ -695,7 +804,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Exposure time" }, @@ -705,7 +815,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Fig. 2. Corrosion rate versus exposure time for stainless steel immersed in 0.5 M H2SO4 solution in the absence and presence of ES." }, @@ -715,7 +826,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "100" }, @@ -725,7 +837,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "90" }, @@ -735,7 +848,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": ")" }, @@ -745,7 +859,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "%" }, @@ -755,7 +870,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "(" }, @@ -765,7 +881,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "y c n e c i f f" }, @@ -775,7 +892,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "i" }, @@ -785,7 +903,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "E n o i t i b h n I" }, @@ -795,7 +914,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "i" }, @@ -805,7 +925,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "80" }, @@ -815,7 +936,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "70" }, @@ -825,7 +947,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "60" }, @@ -835,7 +958,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "50" }, @@ -845,7 +969,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "40" }, @@ -855,7 +980,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "30" }, @@ -865,7 +991,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "2g 4g 6g 8g 10g" }, @@ -875,7 +1002,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "20" }, @@ -885,7 +1013,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "10" }, @@ -895,7 +1024,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "0" }, @@ -905,7 +1035,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "20" }, @@ -915,7 +1046,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "40" }, @@ -925,7 +1057,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "60" }, @@ -935,7 +1068,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "80" }, @@ -945,7 +1079,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "100" }, @@ -955,7 +1090,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "120" }, @@ -965,7 +1101,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "140" }, @@ -975,7 +1112,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "160" }, @@ -985,7 +1123,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "180" }, @@ -995,7 +1134,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Exposure Time (Hours)" }, @@ -1005,7 +1145,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Fig. 3. Inhibition efficiency versus exposure time for stainless steel immersed in 0.5 M H2SO4 solution in the presence of ES." }, @@ -1015,7 +1156,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "number of inhibitor adsorbed on the surface of stainless steel at higher concentration, in order for the active sites of the stainless steel to be protected with the inhibitor molecules. Cathodic and anodic polarized potential are measured in the presence and absence of ES. Fig. 4 shows the cathodic and anodic polarization curves for stainless steel in 0.5 M H2SO4 solution at different ES concentrations. The electrochemical variables such as polarization resistance (PR), corrosion potential (Ecorr), cor- rosion current (icorr), anodic Tafel constant (ba), cathodic Tafel constant (bc) and corrosion rate (mm/ year) values are presented in Table 1. From the polarization curves and electrochemical parameter, icorr value decreased with the addition of inhibitor in 0.5 M H2SO4. Conversely, the icorr further decrease with an increase in inhibitor concentration indicating that the inhibition effects increase with an increase in the egg shell concentration. The process of egg shell inhibition could be attributed to the formation of egg shell powder adsorbed on stainless steel surface protecting corrosion of stainless steel in H2SO4 medium. The likely mechanism is the egg shell adsorption on stainless steel surface through the heteroatoms electron pair and the conjugated systems in egg shell molecular structure as shown in Fig. 1. When the concentration of inhibitor was increased from 2 to 10 g, the corrosion rate values drastically decreased this result show that waste egg shell powder is an effective corrosion inhibitor for stainless steel in H2SO4 solution. The shift in corrosion potential of stainless steel from Tafel curves and electrochemical data indicate that the inhibitor is a mixed-type corrosion inhibitor." }, @@ -1025,7 +1167,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "454" }, @@ -1035,7 +1178,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457" }, @@ -1045,7 +1189,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Fig. 4. Anodic and cathodic polarization curve of stainless steel in 0.5 M H2SO4 solution in the presence and absence of ES." }, @@ -1055,7 +1200,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Table 1 Potentiodynamic polarization data for stainless steel in the absence and presence of ES in 0.5 M H2SO4 solution." }, @@ -1065,7 +1211,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Inhibitor concentration (g)" }, @@ -1075,7 +1222,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "bc (V/dec)" }, @@ -1085,7 +1233,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "ba (V/dec)" }, @@ -1095,7 +1244,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Ecorr (V)" }, @@ -1105,7 +1255,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "icorr (A/cm2)" }, @@ -1115,7 +1266,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Polarization resistance (Ω)" }, @@ -1125,7 +1277,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Corrosion rate (mm/year)" }, @@ -1135,7 +1288,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "0 2 4 6 8 10" }, @@ -1145,7 +1299,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "0.0335 1.9460 0.0163 0.3233 0.1240 0.0382" }, @@ -1155,7 +1310,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "0.0409 0.0596 0.2369 0.0540 0.0556 0.0086" }, @@ -1165,7 +1321,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "(cid:3) 0.9393 (cid:3) 0.8276 (cid:3) 0.8825 (cid:3) 0.8027 (cid:3) 0.5896 (cid:3) 0.5356" }, @@ -1175,7 +1332,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "0.0003 0.0002 0.0001 5.39E-05 5.46E-05 1.24E-05" }, @@ -1185,7 +1343,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "24.0910 121.440 42.121 373.180 305.650 246.080" }, @@ -1195,7 +1354,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "2.8163 1.5054 0.9476 0.4318 0.3772 0.0919" }, @@ -1205,7 +1365,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "The plot of inhibitor concentration over degree of surface coverage versus inhibitor concentration gives a straight line as shown in Fig. 5. The strong correlation reveals that egg shell adsorption on stainless surface in 0.5 M H2SO4 follow Langmuir adsorption isotherm. Figs. 6–8 show the SEM/EDX surface morphology analysis of stainless steel. Figs. 7 and 8 are the SEM/EDX images of the stainless steel specimens without and with inhibitor after weight loss experiment in sulphuric acid medium. The stainless steel surface corrosion product layer in the absence of inhibitor was porous and as a result gives no corrosion protection. With the presence of ES, corrosion damage was minimized, with an evidence of ES present on the metal surface as shown in Fig. 8." }, @@ -1215,7 +1376,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "12" }, @@ -1225,7 +1387,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "C/0" }, @@ -1235,7 +1398,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "10" }, @@ -1245,7 +1409,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "8" }, @@ -1255,7 +1420,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "0 / C" }, @@ -1265,7 +1431,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "6" }, @@ -1275,7 +1442,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "4" }, @@ -1285,7 +1453,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "2" }, @@ -1295,7 +1464,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "2" }, @@ -1305,7 +1475,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "4" }, @@ -1315,7 +1486,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "6" }, @@ -1325,7 +1497,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "8" }, @@ -1335,7 +1508,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "10" }, @@ -1345,7 +1519,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Concentration (g)" }, @@ -1355,7 +1530,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Fig. 5. Langmuir adsorption isotherm of ES." }, @@ -1365,7 +1541,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457" }, @@ -1375,7 +1552,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "455" }, @@ -1385,7 +1563,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "Fig. 6. SEM/EDX image of as-received stainless steel." }, @@ -1395,7 +1574,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "Fig. 7. SEM/EDX image of stainless steel immersed in 0.5 M H2SO4 solution without inhibitor." }, @@ -1405,7 +1585,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "Fig. 8. SEM/EDX image of stainless steel immersed in 0.5 M H2SO4 solution with the presence of inhibitor." }, @@ -1415,7 +1596,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "456" }, @@ -1425,7 +1607,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457" }, @@ -1435,7 +1618,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "2. Experimental design, materials and methods" }, @@ -1445,7 +1629,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "2.1. Material" }, @@ -1455,7 +1640,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "Austenitic stainless steel Type 316 was used in this study with chemical composition reported in [1,2]. The chemicals used were of annular grade. The inhibitor concentrations are in the range of 2, 4, 6, 8 and 10 g [3–5]. The structural formula of egg shell powder is shown in Fig. 9." }, @@ -1465,7 +1651,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "Fig. 9. Chemical structure of egg shell powder." }, @@ -1475,7 +1662,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "2.2. Weight loss method" }, @@ -1485,7 +1673,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "This physical measurement was carried out in order to provide direct result on how the corrosive environment affects the test sample. The cleaned and weighed specimen was suspended in beakers with the aid of glass hooks and rods with the test solution of ES at different concentration (2, 4, 6, 8 and 10 g). The pre-weighed specimen was retrieved from the test solution after every 24 h, cleaned, dried and reweighed. The difference between the weight at a given time and the initial weight of the specimen was taken as the weight loss which was used to calculate corrosion rate and inhibition efficiency." }, @@ -1495,7 +1684,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "The corrosion rate (CR) was calculated using Eq. (1) [1–5]" }, @@ -1505,7 +1695,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "Corrosion rate CRð" }, @@ -1515,7 +1706,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "(cid:1) Þ ¼ 87:6W DAT" }, @@ -1525,7 +1717,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "(cid:3)" }, @@ -1535,7 +1728,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "ð1Þ" }, @@ -1545,7 +1739,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "where: W is weight loss in mg, A is specimen surface area, T is immersion period in hours and D is the specimen density. From the corrosion rate, the surface coverage (θ) and inhibition efficiencies (IE %) were determined using Eqs. (2) and (3) respectively" }, @@ -1555,7 +1750,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "θ ¼ CRo (cid:3) CR" }, @@ -1565,7 +1761,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "CRo" }, @@ -1575,7 +1772,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "ð2Þ" }, @@ -1585,7 +1783,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "IE ð%Þ ¼ CRo (cid:3) CR" }, @@ -1595,7 +1794,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "CRo" }, @@ -1605,7 +1805,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "x" }, @@ -1615,7 +1816,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "100 1" }, @@ -1625,7 +1827,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "ð3Þ" }, @@ -1635,7 +1838,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "where: CRo and CR are the corrosion rate in absence and presence of inhibitor respectively." }, @@ -1645,7 +1849,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "2.3. Potentiodynamic polarization method" }, @@ -1655,7 +1860,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "The potentiodynamic polarization method was performed on the prepared test samples immersed in 0.5 M H2SO4 solution in the presence and absence of different ES concentrations. A three electrode system was used; stainless steel Type 316 plate as working electrode with an exposed area of 1.0 cm2, platinum rod as counter electrode and silver chloride electrode as reference electrode. The electrode was polished, degreased in acetone and thoroughly rinsed with distilled water before the experiment. Current density against applied potential was plotted. The slope of the linear part in anodic and cathodic plots gives anodic and cathodic constants according to the Stern–Geary equation, and the" }, @@ -1665,7 +1871,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457" }, @@ -1675,7 +1882,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "457" }, @@ -1685,7 +1893,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "steps of the linear polarization plot are substituted to get corrosion current. Nova software was used with linear polarization resistance (LPR) and the current was set to 10 mA (maximum) and 10 nA (minimum). LSV staircase parameter start potential (cid:3) 1.5 v, step potential 0.001 m/s and stop potential of þ1.5 v set was used in this study." }, @@ -1695,7 +1904,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Acknowledgements" }, @@ -1705,7 +1915,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "This work was supported by the National Research Foundation of South Africa and the Tshwane" }, @@ -1715,7 +1926,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "University of Technology Pretoria South Africa." }, @@ -1725,7 +1937,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Transparency document. Supporting information" }, @@ -1735,7 +1948,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [ + { + "text": ":// doi", + "url": "https://doi.org/10.1016/j.dib.2018.11.134", + "start_index": 94 + } + ] }, "text": "Transparency document associated with this article can be found in the online version at https://doi." }, @@ -1745,7 +1965,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [ + { + "text": "org / 10 . 1016 / j . dib . 2018 . 11 . 134", + "url": "https://doi.org/10.1016/j.dib.2018.11.134", + "start_index": 0 + } + ] }, "text": "org/10.1016/j.dib.2018.11.134." }, @@ -1755,7 +1982,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "References" }, @@ -1765,7 +1993,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [ + { + "text": "O . Sanni , A . P . I . Popoola , O . S . I . Fayomi ,", + "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref1", + "start_index": 4 + } + ] }, "text": "[1] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, Enhanced corrosion resistance of stainless steel type 316 in sulphuric acid solution" }, @@ -1775,7 +2010,19 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [ + { + "text": "usingeco - friendlywasteproduct , ResultsPhys . 9 ( 2018 ) 225 –", + "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref1", + "start_index": 0 + }, + { + "text": "usingeco - friendlywasteproduct , ResultsPhys . 9 ( 2018 ) 225 – 230", + "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref1", + "start_index": 0 + } + ] }, "text": "using eco-friendly waste product, Results Phys. 9 (2018) 225–230." }, @@ -1785,7 +2032,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [ + { + "text": "O . Sanni , A . P . I . Popoola , A . Kolesnikov ,", + "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref2", + "start_index": 4 + } + ] }, "text": "[2] O. Sanni, A.P.I. Popoola, A. Kolesnikov, Constitutive modeling for prediction of optimal process parameters in corrosion" }, @@ -1795,7 +2049,19 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [ + { + "text": "inhibitionofausteniticstainlesssteel ( Type316 )/ acidicmedium , Mater . Res . Express . 5 ( 10 )( 2018 ) 1 –", + "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref2", + "start_index": 0 + }, + { + "text": "inhibitionofausteniticstainlesssteel ( Type316 )/ acidicmedium , Mater . Res . Express . 5 ( 10 )( 2018 ) 1 – 15", + "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref2", + "start_index": 0 + } + ] }, "text": "inhibition of austenitic stainless steel (Type 316)/acidic medium, Mater. Res. Express. 5 (10) (2018) 1–15." }, @@ -1805,7 +2071,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [ + { + "text": "O . Sanni , A . P . I . Popoola , O . S . I . Fayomi ,", + "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref3", + "start_index": 4 + } + ] }, "text": "[3] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, The inhibitive study of egg shell powder on UNS N08904 austenitic stainless steel" }, @@ -1815,7 +2088,19 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [ + { + "text": "corrosioninchloridesolution , Def . Technol . 14 ( 2018 ) 463 –", + "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref3", + "start_index": 0 + }, + { + "text": "corrosioninchloridesolution , Def . Technol . 14 ( 2018 ) 463 – 468", + "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref3", + "start_index": 0 + } + ] }, "text": "corrosion in chloride solution, Def. Technol. 14 (2018) 463–468." }, @@ -1825,7 +2110,29 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [ + { + "text": "https", + "url": "https://doi.org/10.1007/s13632-018-0495-5", + "start_index": 233 + }, + { + "text": "https", + "url": "https://doi.org/10.1007/s13632-018-0495-5", + "start_index": 233 + }, + { + "text": "https :// doi . org / 10 . 1007", + "url": "https://doi.org/10.1007/s13632-018-0495-5", + "start_index": 233 + }, + { + "text": "s13632 - 018 - 0495 - 5", + "url": "https://doi.org/10.1007/s13632-018-0495-5", + "start_index": 258 + } + ] }, "text": "[4] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, C.A. Loto, A comparative study of inhibitive effect of waste product on stainless steel corrosion in sodium chloride/sulfuric acid environments, Metallogr. Microstruct. Anal. (2018) 1–17. https://doi.org/10.1007/ s13632-018-0495-5." }, @@ -1835,7 +2142,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [ + { + "text": "https :// doi . org / 10 . 7449 / 2018 / MST _ 2018 _ 254 _ 261", + "url": "https://doi.org/10.7449/2018/MST_2018_254_261", + "start_index": 202 + } + ] }, "text": "[5] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, Inhibition of engineering material in sulphuric acid solution using waste product, Contributed Papers from Materials Science and Technology (MS&T18), 2018. 〈https://doi.org/10.7449/2018/MST_2018_254_261〉." } diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/75/29/main.PMC6312793.pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/75/29/main.PMC6312793.pdf.json index c6ebb46fe7..abcea312b5 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/75/29/main.PMC6312793.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/75/29/main.PMC6312793.pdf.json @@ -5,7 +5,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [ + { + "text": "DatainBrief22 ( 2019 ) 484 –", + "url": "https://doi.org/10.1016/j.dib.2018.12.055", + "start_index": 0 + } + ] }, "text": "Data in Brief 22 (2019) 484–487" }, @@ -15,7 +22,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [ + { + "text": "", + "url": "www.sciencedirect.com/science/journal/23523409", + "start_index": -1 + } + ] }, "text": "Contents lists available at ScienceDirect" }, @@ -25,7 +39,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Data in Brief" }, @@ -35,7 +50,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [ + { + "text": "www . elsevier . com / locate /", + "url": "www.elsevier.com/locate/dib", + "start_index": 18 + } + ] }, "text": "journal homepage: www.elsevier.com/locate/dib" }, @@ -45,7 +67,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Data Article" }, @@ -55,7 +78,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "A benchmark dataset for the multiple depot vehicle scheduling problem" }, @@ -65,7 +89,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Sarang Kulkarni a,b,c,n, Mohan Krishnamoorthy d,e, Abhiram Ranade f, Andreas T. Ernst c, Rahul Patil b" }, @@ -75,7 +100,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "a IITB-Monash Research Academy, IIT Bombay, Powai, Mumbai 400076, India b SJM School of Management, IIT Bombay, Powai, Mumbai 400076, India c School of Mathematical Sciences, Monash University, Clayton, VIC 3800, Australia d Department of Mechanical and Aerospace Engineering, Monash University, Clayton, VIC 3800, Australia e School of Information Technology and Electrical Engineering, The University of Queensland, QLD 4072, Australia f Department of Computer Science and Engineering, IIT Bombay, Powai, Mumbai 400076, India" }, @@ -85,7 +111,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "a r t i c l e i n f o" }, @@ -95,7 +122,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "a b s t r a c t" }, @@ -105,7 +133,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Article history: Received 21 November 2018 Received in revised form 13 December 2018 Accepted 15 December 2018 Available online 18 December 2018" }, @@ -115,7 +144,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "This data article presents a description of a benchmark dataset for the multiple depot vehicle scheduling problem (MDVSP). The MDVSP is to assign vehicles from different depots to timetabled trips to minimize the total cost of empty travel and waiting. The dataset has been developed to evaluate the heuristics of the MDVSP that are presented in “A new formulation and a column generation-based heuristic for the multiple depot vehicle sche- duling problem” (Kulkarni et al., 2018). The dataset contains 60 problem instances of varying size. Researchers can use the dataset to evaluate the future algorithms for the MDVSP and compare the performance with the existing algorithms. The dataset includes a program that can be used to generate new problem instances of the MDVSP." }, @@ -125,7 +155,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "& 2018 Published by Elsevier Inc. This is an open access article under the CC BY-NC-ND license (http://creativecommons.org/licenses/by-nc-nd/4.0/)." }, @@ -135,7 +166,24 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [ + { + "text": "https :// doi . org / 10 . 1016 / j . trb . 2018 . 11 . nCorrespondingauthorat", + "url": "http://dx.doi.org/10.1016/j.trb.2018.11.007", + "start_index": 25 + }, + { + "text": "https", + "url": "http://dx.doi.org/10.1016/j.trb.2018.11.007", + "start_index": 25 + }, + { + "text": "https :// doi . org / 10 . 1016 / j . trb . 2018 . 11 .", + "url": "http://dx.doi.org/10.1016/j.trb.2018.11.007", + "start_index": 25 + } + ] }, "text": "DOI of original article: https://doi.org/10.1016/j.trb.2018.11.007 n Corresponding author at: IITB-Monash Research Academy, IIT Bombay, Powai, Mumbai 400076, India." }, @@ -145,7 +193,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [ + { + "text": "sarangkulkarni @ iitb . ac . in", + "url": "mailto:sarangkulkarni@iitb.ac.in", + "start_index": 16 + } + ] }, "text": "E-mail address: sarangkulkarni@iitb.ac.in (S. Kulkarni)." }, @@ -155,7 +210,24 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [ + { + "text": "https", + "url": "https://doi.org/10.1016/j.dib.2018.12.055", + "start_index": 0 + }, + { + "text": "https", + "url": "https://doi.org/10.1016/j.dib.2018.12.055", + "start_index": 0 + }, + { + "text": "https :// doi . org / 10 . 1016 / j . dib . 2018 . 12 .", + "url": "https://doi.org/10.1016/j.dib.2018.12.055", + "start_index": 0 + } + ] }, "text": "https://doi.org/10.1016/j.dib.2018.12.055 2352-3409/& 2018 Published by Elsevier Inc. This is an open access article under the CC BY-NC-ND license (http://creativecommons.org/licenses/by-nc-nd/4.0/)." }, @@ -165,7 +237,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "S. Kulkarni et al. / Data in Brief 22 (2019) 484–487" }, @@ -175,7 +248,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "485" }, @@ -185,39 +259,43 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "Specifications table" }, { - "type": "NarrativeText", - "element_id": "5c3978ebc42ea4f11240c221ac3be1cf", + "type": "Title", + "element_id": "41e0fa358cefcadbb2633ec45ff2d129", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, - "text": "Subject area Operations research More specific subject area Vehicle scheduling Type of data How data were acquired" + "text": "Data format Experimental factors" }, { "type": "Title", - "element_id": "41e0fa358cefcadbb2633ec45ff2d129", + "element_id": "27d70c97431a2bec06d0a89368489dfb", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, - "text": "Data format Experimental factors" + "text": "Experimental features Data source location Data accessibility Related research article" }, { - "type": "Title", - "element_id": "27d70c97431a2bec06d0a89368489dfb", + "type": "NarrativeText", + "element_id": "5c3978ebc42ea4f11240c221ac3be1cf", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, - "text": "Experimental features Data source location Data accessibility Related research article" + "text": "Subject area Operations research More specific subject area Vehicle scheduling Type of data How data were acquired" }, { "type": "NarrativeText", @@ -225,7 +303,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [ + { + "text": ".,", + "url": "https://orlib.uqcloud.net/", + "start_index": 444 + } + ] }, "text": "Tables, text files Artificially generated by a C þ þ program on Intels Xeons CPU E5– 2670 v2 with Linux operating system. Raw Sixty randomly generated instances of the MDVSP with the number of depots in (8, 12, 16) and the number of trips in (1500, 2000, 2500, 3000) Randomly generated instances IITB-Monash Research Academy, IIT Bombay, Powai, Mumbai, India. Data can be downloaded from https://orlib.uqcloud.net/ Kulkarni, S., Krishnamoorthy, M., Ranade, A., Ernst, A.T. and Patil, R., 2018. A new formulation and a column generation-based heuristic for the multiple depot vehicle scheduling problem. Transportation Research Part B: Methodological, 118, pp. 457–487 [3]." }, @@ -235,7 +320,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "Value of the data" }, @@ -245,39 +331,32 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "(cid:2) The dataset contains 60 different problem instances of the MDVSP that can be used to evaluate the" }, - { - "type": "NarrativeText", - "element_id": "7c8bc2811f71480b433eb6fee2a3bb33", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "(cid:2) The data provide all the information that is required to model the MDVSP by using the existing" - }, { "type": "Title", "element_id": "bd7d750cb9f652c80c17a264072b8858", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "performance of the algorithms for the MDVSP." }, { "type": "NarrativeText", - "element_id": "e69dab6e2bc16d11cfd2d80a804d89fb", + "element_id": "7c8bc2811f71480b433eb6fee2a3bb33", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, - "text": "(cid:2) All the problem instances are available for use without any restrictions. (cid:2) The benchmark solutions and solution time for the problem instances are presented in [3] and can" + "text": "(cid:2) The data provide all the information that is required to model the MDVSP by using the existing" }, { "type": "Title", @@ -285,19 +364,21 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "mathematical formulations." }, { "type": "NarrativeText", - "element_id": "1c1d6b35ac0925a35ea3bb4d018e675f", + "element_id": "e69dab6e2bc16d11cfd2d80a804d89fb", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, - "text": "(cid:2) The dataset includes a program that can generate similar problem instances of different sizes." + "text": "(cid:2) All the problem instances are available for use without any restrictions. (cid:2) The benchmark solutions and solution time for the problem instances are presented in [3] and can" }, { "type": "NarrativeText", @@ -305,17 +386,30 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "be used for the comparison." }, + { + "type": "NarrativeText", + "element_id": "1c1d6b35ac0925a35ea3bb4d018e675f", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 2, + "links": [] + }, + "text": "(cid:2) The dataset includes a program that can generate similar problem instances of different sizes." + }, { "type": "ListItem", "element_id": "c2b2b778d53cc9a1cb4dc340476bc5aa", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "1. Data" }, @@ -325,7 +419,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [ + { + "text": "2500 , and3000 . size , ðm ; nÞ , fiveinstancesareprovided . Thedatasetcanbedownloadedfromhttps :// orlib . uqcloud . net", + "url": "https://orlib.uqcloud.net", + "start_index": 509 + } + ] }, "text": "The dataset contains 60 different problem instances of the multiple depot vehicle scheduling pro- blem (MDVSP). Each problem instance is provided in a separate file. Each file is named as ‘RN-m-n-k.dat’, where ‘m’, ‘n’, and ‘k’ denote the number of depots, the number of trips, and the instance number ‘RN-8–1500-01.dat’, for is the first problem instance with 8 depots and 1500 trips. For the number of depots, m, we used three values, 8, 12, and 16. The four values for the number of trips, n, are 1500, 2000, 2500, and 3000. For each size, ðm; nÞ, five instances are provided. The dataset can be downloaded from https://orlib.uqcloud.net." }, @@ -335,7 +436,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "the size," }, @@ -345,7 +447,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "‘ðm; nÞ’," }, @@ -355,7 +458,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "respectively. For example," }, @@ -365,7 +469,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "the problem instance," }, @@ -375,7 +480,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "For each problem instance, the following information is provided: The number of depots mð The number of trips ðnÞ, The number of locations ðlÞ, The number of vehicles at each depot, For each trip i A 1; 2; …; n, a start time, ts" }, @@ -385,7 +491,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "Þ," }, @@ -395,7 +502,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "i , an end time, te" }, @@ -405,7 +513,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "i , a start location, ls" }, @@ -415,7 +524,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "i , and an end location, le i ," }, @@ -425,7 +535,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "and" }, @@ -435,7 +546,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "(cid:2) The travel time, δij, between any two locations i; j A 1; …; l." }, @@ -445,27 +557,19 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "All times are in minutes and integers. The planning duration is from 5 a.m. to around midnight. Each instance has two classes of trips, short trips and long trips, with 40% short trips and 60% long trips. The duration of a short trip is less than a total of 45 min and the travel time between the start" }, - { - "type": "UncategorizedText", - "element_id": "86b700fab5db37977a73700b53a0654b", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "486" - }, { "type": "NarrativeText", "element_id": "0a1b09ff562f4d063703cbf021ee297f", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "S. Kulkarni et al. / Data in Brief 22 (2019) 484–487" }, @@ -475,7 +579,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "and end location of the trip. A long trip is about 3–5 h in duration and has the same start and end location. For all instances, m r l and the locations 1; …; m correspond to depots, while the remaining locations only appear as trip start and end locations." }, @@ -485,7 +590,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "j , the vehicle must travel empty from le j (cid:3)te i Þ. A schedule is given by the sequence in which a vehicle can cover the trips. The MDVSP is to determine the minimum number of schedules to cover all trips that minimizes total time in waiting and empty travel. The following requirements must be satisfied:" }, @@ -495,7 +601,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "A trip j can be covered after trip i by the same vehicle, if ts j" }, @@ -505,7 +612,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "i to ls" }, @@ -515,7 +623,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": ". If le i ls le i j , otherwise, the vehicle may require waiting at le i for the duration of ðts" }, @@ -525,7 +634,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Z te" }, @@ -535,7 +645,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "i þδ" }, @@ -545,7 +656,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "j" }, @@ -555,7 +667,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "a ls" }, @@ -565,29 +678,32 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "1. Each schedule should start and end at the same depot. 2. Each trip should be covered by only one vehicle. 3. The number of schedules that start from a depot should not exceed the number of vehicles at the depot." }, { - "type": "NarrativeText", - "element_id": "4fa30384f002f9a1d85b03ebdb0c8143", + "type": "ListItem", + "element_id": "2d6b506bd58a7dd7bbf1c8599ef630c8", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, - "text": "A sufficient number of vehicles are provided to maintain the feasibility of an instance. For each instance size ðm; nÞ, Table 1 provides the average of the number of locations, the number of times, the number of vehicles, and the number of possible empty travels, over five instances. The number of locations includes m distinct locations for depots and the number of locations at which various trips start or end. The number of times includes the start and the end time of the planning horizon and the start/end times for the trips. The number of vehicles is the total number of vehicles from all the depots. The number of possible empty travels is the number of possible connections between trips that require a vehicle travelling empty between two consecutive trips in a schedule." + "text": "1. Each schedule should start and end at the same depot. 2. Each trip should be covered by only one vehicle. 3. The number of schedules that start from a depot should not exceed the number of vehicles at the depot." }, { "type": "NarrativeText", - "element_id": "928fa0dcad70f173bc989ee5715375c5", + "element_id": "4fa30384f002f9a1d85b03ebdb0c8143", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, - "text": "The description of the file for each problem instance is presented in Table 2. The first line in the file provides the number of depots ðmÞ, the number of trips, ðnÞ, and the number of locations ðlÞ, in the problem instance. The next n lines present the information for n trips. Each line corresponds to a trip, i A 1; …; n g, and provides the start location, the start time, the end location, and the end time of trip i. The next l lines present the travel times between any two locations, i; jA 1; …; l" + "text": "A sufficient number of vehicles are provided to maintain the feasibility of an instance. For each instance size ðm; nÞ, Table 1 provides the average of the number of locations, the number of times, the number of vehicles, and the number of possible empty travels, over five instances. The number of locations includes m distinct locations for depots and the number of locations at which various trips start or end. The number of times includes the start and the end time of the planning horizon and the start/end times for the trips. The number of vehicles is the total number of vehicles from all the depots. The number of possible empty travels is the number of possible connections between trips that require a vehicle travelling empty between two consecutive trips in a schedule." }, { "type": "Title", @@ -595,17 +711,30 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "f" }, + { + "type": "NarrativeText", + "element_id": "928fa0dcad70f173bc989ee5715375c5", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 3, + "links": [] + }, + "text": "The description of the file for each problem instance is presented in Table 2. The first line in the file provides the number of depots ðmÞ, the number of trips, ðnÞ, and the number of locations ðlÞ, in the problem instance. The next n lines present the information for n trips. Each line corresponds to a trip, i A 1; …; n g, and provides the start location, the start time, the end location, and the end time of trip i. The next l lines present the travel times between any two locations, i; jA 1; …; l" + }, { "type": "UncategorizedText", "element_id": "89507815c6b4a6f31e6d3da7fca6b561", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "(cid:1)" }, @@ -615,7 +744,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "(cid:3)" }, @@ -625,7 +755,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "." }, @@ -635,7 +766,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "The dataset also includes a program ‘GenerateInstance.cpp’ that can be used to generate new instances. The program takes three inputs, the number of depots ðmÞ, the number of trips ðnÞ, and the number of instances for each size ðm; nÞ." }, @@ -645,7 +777,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Table 1 Average number of locations, times, vehicles and empty travels for each instance size." }, @@ -655,7 +788,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Instance size (m, n)" }, @@ -665,7 +799,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Average number of" }, @@ -675,7 +810,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Locations" }, @@ -685,7 +821,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Times" }, @@ -695,7 +832,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Vehicles" }, @@ -705,7 +843,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Possible empty travels" }, @@ -715,7 +854,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "(8, 1500) (8, 2000) (8, 2500) (8, 3000) (12, 1500) (12, 2000) (12, 2500) (12, 3000) (16, 1500) (16, 2000) (16, 2500) (16, 3000)" }, @@ -725,7 +865,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "568.40 672.80 923.40 977.00 566.00 732.60 875.00 1119.60 581.80 778.00 879.00 1087.20" }, @@ -735,7 +876,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "975.20 1048.00 1078.00 1113.20 994.00 1040.60 1081.00 1107.40 985.40 1040.60 1083.20 1101.60" }, @@ -745,7 +887,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "652.20 857.20 1082.40 1272.80 642.00 861.20 1096.00 1286.20 667.80 872.40 1076.40 1284.60" }, @@ -755,7 +898,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "668,279.40 1,195,844.80 1,866,175.20 2,705,617.00 674,191.00 1,199,659.80 1,878,745.20 2,711,180.40 673,585.80 1,200,560.80 1,879,387.00 2,684,983.60" }, @@ -765,7 +909,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "S. Kulkarni et al. / Data in Brief 22 (2019) 484–487" }, @@ -775,7 +920,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "487" }, @@ -785,7 +931,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Table 2 Description of file format for each problem instance." }, @@ -795,7 +942,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Number of lines" }, @@ -805,7 +953,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Number of columns in each line" }, @@ -815,7 +964,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Description" }, @@ -825,7 +975,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "1 1 n" }, @@ -835,7 +986,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "l" }, @@ -845,7 +997,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "3 m 4" }, @@ -855,7 +1008,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "l" }, @@ -865,7 +1019,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "The number of depots, the number of trips, and the number of locations. The number of vehicles rd at each depot d. One line for each trip, i ¼ 1; 2; …; n. Each line provides the start location ls time ts i and the end time te i for the corresponding trip. Each element, δij; where i; j A 1; 2; …; l, refers to the travel time between location i and location j." }, @@ -875,7 +1030,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "i , the end location le" }, @@ -885,7 +1041,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "i , the start" }, @@ -895,7 +1052,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "2. Experimental design, materials, and methods" }, @@ -905,7 +1063,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "The procedure presented by Carpaneto et al. in [1] is used to generate the problem instances. The same procedure has been used by Pepin et al. in [4] to generate the benchmark dataset of the MDVSP. A detailed description of the procedure is presented in [3]." }, @@ -915,7 +1074,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Our dataset provides start/end location and time of trips as well as the travel time between any two locations. The location and time information is required to model the MDVSP on a time-space network. The feasible connections and the cost of connections between the trips can be obtained as discussed in [3]. Thus, the dataset has all the information that is required to model the MDVSP on the time-space network (see [2]) as well as the connection-network (see [5]). The benchmark solutions for all the problem instances are presented in [3]." }, @@ -925,7 +1085,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Transparency document. Supporting information" }, @@ -935,7 +1096,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [ + { + "text": ":// doi", + "url": "https://doi.org/10.1016/j.dib.2018.12.055", + "start_index": 94 + } + ] }, "text": "Transparency document associated with this article can be found in the online version at https://doi." }, @@ -945,7 +1113,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [ + { + "text": "org / 10 . 1016 / j . dib . 2018 . 12 . 055", + "url": "https://doi.org/10.1016/j.dib.2018.12.055", + "start_index": 0 + } + ] }, "text": "org/10.1016/j.dib.2018.12.055." }, @@ -955,7 +1130,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "References" }, @@ -965,29 +1141,55 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [ + { + "text": "G . Carpaneto , M . Dell ' Amico , M . Fischetti , P . Toth ,", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref1", + "start_index": 4 + } + ] }, "text": "[1] G. Carpaneto, M. Dell'Amico, M. Fischetti, P. Toth, A branch and bound algorithm for the multiple depot vehicle scheduling" }, { - "type": "NarrativeText", - "element_id": "19dee0a4e8fd073350e234b4352b8af6", + "type": "UncategorizedText", + "element_id": "bec40b25a277a08de3415e33284fc76d", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [ + { + "text": "problem , Networks19 ( 5 )( 1989 ) 531 –", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref1", + "start_index": 0 + }, + { + "text": "problem , Networks19 ( 5 )( 1989 ) 531 – 548", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref1", + "start_index": 0 + } + ] }, - "text": "[2] N. Kliewer, T. Mellouli, L. Suhl, A time–space network based exact optimization model for multi-depot bus scheduling, Eur." + "text": "problem, Networks 19 (5) (1989) 531–548." }, { - "type": "UncategorizedText", - "element_id": "bec40b25a277a08de3415e33284fc76d", + "type": "NarrativeText", + "element_id": "19dee0a4e8fd073350e234b4352b8af6", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [ + { + "text": "N . Kliewer , T . Mellouli , L . Suhl , Atime – spacenetworkbasedexactoptimizationmodelformulti - depotbusscheduling , Eur", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref2", + "start_index": 4 + } + ] }, - "text": "problem, Networks 19 (5) (1989) 531–548." + "text": "[2] N. Kliewer, T. Mellouli, L. Suhl, A time–space network based exact optimization model for multi-depot bus scheduling, Eur." }, { "type": "UncategorizedText", @@ -995,7 +1197,19 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [ + { + "text": "J . Oper . Res . 175 ( 3 )( 2006 ) 1616 –", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref2", + "start_index": 0 + }, + { + "text": "J . Oper . Res . 175 ( 3 )( 2006 ) 1616 – 1627", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref2", + "start_index": 0 + } + ] }, "text": "J. Oper. Res. 175 (3) (2006) 1616–1627." }, @@ -1005,7 +1219,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [ + { + "text": "S . Kulkarni , M . Krishnamoorthy , A . Ranade , A . T . Ernst , R . Patil , Anewformulationandacolumngeneration -", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref3", + "start_index": 4 + } + ] }, "text": "[3] S. Kulkarni, M. Krishnamoorthy, A. Ranade, A.T. Ernst, R. Patil, A new formulation and a column generation-based heuristic" }, @@ -1015,7 +1236,19 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [ + { + "text": "forthemultipledepotvehicleschedulingproblem , Transp . Res . PartBMethodol . 118 ( 2018 ) 457 –", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref3", + "start_index": 0 + }, + { + "text": "forthemultipledepotvehicleschedulingproblem , Transp . Res . PartBMethodol . 118 ( 2018 ) 457 – 487", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref3", + "start_index": 0 + } + ] }, "text": "for the multiple depot vehicle scheduling problem, Transp. Res. Part B Methodol. 118 (2018) 457–487." }, @@ -1025,7 +1258,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [ + { + "text": "A . S . Pepin , G . Desaulniers , A . Hertz , D . Huisman ,", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref4", + "start_index": 4 + } + ] }, "text": "[4] A.S. Pepin, G. Desaulniers, A. Hertz, D. Huisman, A comparison of five heuristics for the multiple depot vehicle scheduling" }, @@ -1035,7 +1275,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [ + { + "text": "problem , J . Sched . 12 ( 1 )( 2009 ) 17", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref4", + "start_index": 0 + } + ] }, "text": "problem, J. Sched. 12 (1) (2009) 17." }, @@ -1045,7 +1292,14 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [ + { + "text": "C . C . Ribeiro , F . Soumis , Acolumngenerationapproachtothemultiple - depotvehicleschedulingproblem , Oper . Res . 42 ( 1", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref5", + "start_index": 4 + } + ] }, "text": "[5] C.C. Ribeiro, F. Soumis, A column generation approach to the multiple-depot vehicle scheduling problem, Oper. Res. 42 (1)" }, @@ -1055,7 +1309,19 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [ + { + "text": "( 1994 ) 41 –", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref5", + "start_index": 0 + }, + { + "text": "( 1994 ) 41 – 52", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref5", + "start_index": 0 + } + ] }, "text": "(1994) 41–52." } diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-path/07/07/sbaa031.073.PMC7234218.pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-path/07/07/sbaa031.073.PMC7234218.pdf.json index 2f4b6f5b34..29af05f0b2 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-path/07/07/sbaa031.073.PMC7234218.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-path/07/07/sbaa031.073.PMC7234218.pdf.json @@ -5,7 +5,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "S32" }, @@ -15,7 +16,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Poster Session I" }, @@ -25,7 +27,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "ns; 40 mg/day=3.6%, p<0.05; 80 mg/day=4.9%, p<0.01; 120 mg/day=9.3%, p<0.001, PM dosing group: 20 mg/day=-0.4%, ns; 40 mg/day=2.8%, p<0.05: 80 mg/day=0.2%, ns; 160 mg/day=5.8%, p<0.05). There was no clear dose-dependent trend associated with nausea and RD was similar between AM and PM dosing group (AM dosing group: 20 mg/ day=0.2% ns; 40 mg/day=3.8%, p<0.05; 80 mg/day=3.8%, ns; 120 mg/ day=6.6%, ns, PM dosing group: 20 mg/day=-1.6%, ns; 40 mg/day=-1.7%, ns; 80 mg/day=5.5%, p<0.01; 160 mg/day=2.8%, ns). Discussion: The risk of adverse events in the treatment of schizophrenia with lurasidone can vary depending on the timing of administration. In particular, for akathisia and somnolence, the incidence risks were reduced when lurasidone was administered in PM. Unlike with AM administration, the dose-dependence in the risks of these adverse events were not observed in lurasidone PM administration. The timing of lurasidone administration could be considered in effort to minimize potential adverse events." }, @@ -35,7 +38,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Discussion: Our data confirm previous findings on reduced slow wave density in FEP, and expand them to acute subjects, before any treatment is prescribed. This is in line with available data on diffuse abnormalities of cortico-cortical and cortico-thalamic networks in these patients. Interestingly, our data also offer preliminary evidence that this deficit is specific for SCZ, as it appears to differentiate patients who developed SCZ from those with other diagnoses at follow-up. Given the traveling properties of slow waves, future research should establish their potential as markers of connectivity in SCZ." }, @@ -45,7 +49,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "S7. INVESTIGATING THE LINK BETWEEN THE PERIPHERAL ENDOCANNABINOID SYSTEM AND CENTRAL GLUTAMATERGIC NEUROTRANSMISSION IN EARLY PSYCHOSIS: A 7T-MRS STUDY" }, @@ -55,39 +60,43 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "S6. SLEEP ENDOPHENOTYPES OF SCHIZOPHRENIA: A HIGH-DENSITY EEG STUDY IN DRUG-NAÏVE, FIRST EPISODE PSYCHOSIS PATIENTS" }, { "type": "UncategorizedText", - "element_id": "e97f1cf1c49f397732e68cf1efb2355e", + "element_id": "5ce0f6dc16582eaf81312c412e99ebb9", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, - "text": "Anna Castelnovo1, Cecilia Casetta2, Francesco Donati3, Renata del Giudice3, Caroline Zangani3, Simone Sarasso3, Armando D’Agostino*3 1Faculty of Biomedical Sciences, Università della Svizzera Italiana, Switzerland; 2Institute of Psychiatry, Psychology and Neuroscience, King’s College London, England; 3Università degli Studi di Milano, Italy" + "text": "Amedeo Minichino*1, Beata Godlewska1, Philip Cowen1, Philip Burnet1, Belinda Lennox1 1University of Oxford" }, { - "type": "NarrativeText", - "element_id": "1252f8d8921acac5f706e4402e504a75", + "type": "UncategorizedText", + "element_id": "e97f1cf1c49f397732e68cf1efb2355e", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, - "text": "Background: Slow waves, the hallmark of the deep nonrapid eye move- ment sleep electroencephalogram (EEG), are critical for restorative sleep and brain plasticity. They arise from the synchronous depolarization and hyperpolarization of millions of cortical neurons and their proper gen- eration and propagation relies upon the integrity of widespread cortico- thalamic networks. Slow wave abnormalities have been reported in patient with Schizophrenia, although with partially contradictory results, probably related to antipsychotic and sedative medications. Recently, their presence and delineation, have been convincingly shown in first-episode psychosis patients (FEP). However, clear evidence of this biomarker at the onset of the disease, prior to any psychopharmacological intervention, remains limited. Moreover, no attempt has been made to elucidate the prognostic meaning of this finding. Methods: We collected whole night sleep high–density electroencephalog- raphy recordings (64-channel BrainAmp, Brain Products GmbH, Gilching, Germany) in 20 drug-naive FEP patients and 20 healthy control subjects (HC). Several clinical psychometric scales as well as neurocognitive tests were administered to all subjects in order to better define psychopatholog- ical status and vulnerability. EEG slow wave activity (SWA, spectral power between 1 and 4 Hz) and several slow wave parameters were computed at each electrode location, including density and amplitude, at each electrode location. Along with a group analysis between FEP and HC, a subgroup analysis was also computed between patients who showed a progression of symptoms to full-blown Schizophrenia (SCZ, n = 10) over the next 12-month follow-up and those who did not (OTH, n = 10). Results: Sleep macro-architecture was globally preserved in FEP patients. SWA (1–4 Hz) was lower in FEP compared to HC but this difference didn’t reach statistical significance. Slow wave density was decreased in FEP compared to HC, with a significance that survived multiple comparison correction over a large fronto-central cluster. Mean amplitude was pre- served. At the subgroup analysis, these results were largely driven by the subgroup of patients with a confirmed diagnosis of SCZ at a 12-month fol- low-up. Indeed, no difference could be found between OTH and HC, while a strong significance was still evident between SCZ and HC." + "text": "Anna Castelnovo1, Cecilia Casetta2, Francesco Donati3, Renata del Giudice3, Caroline Zangani3, Simone Sarasso3, Armando D’Agostino*3 1Faculty of Biomedical Sciences, Università della Svizzera Italiana, Switzerland; 2Institute of Psychiatry, Psychology and Neuroscience, King’s College London, England; 3Università degli Studi di Milano, Italy" }, { - "type": "UncategorizedText", - "element_id": "5ce0f6dc16582eaf81312c412e99ebb9", + "type": "NarrativeText", + "element_id": "1252f8d8921acac5f706e4402e504a75", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, - "text": "Amedeo Minichino*1, Beata Godlewska1, Philip Cowen1, Philip Burnet1, Belinda Lennox1 1University of Oxford" + "text": "Background: Slow waves, the hallmark of the deep nonrapid eye move- ment sleep electroencephalogram (EEG), are critical for restorative sleep and brain plasticity. They arise from the synchronous depolarization and hyperpolarization of millions of cortical neurons and their proper gen- eration and propagation relies upon the integrity of widespread cortico- thalamic networks. Slow wave abnormalities have been reported in patient with Schizophrenia, although with partially contradictory results, probably related to antipsychotic and sedative medications. Recently, their presence and delineation, have been convincingly shown in first-episode psychosis patients (FEP). However, clear evidence of this biomarker at the onset of the disease, prior to any psychopharmacological intervention, remains limited. Moreover, no attempt has been made to elucidate the prognostic meaning of this finding. Methods: We collected whole night sleep high–density electroencephalog- raphy recordings (64-channel BrainAmp, Brain Products GmbH, Gilching, Germany) in 20 drug-naive FEP patients and 20 healthy control subjects (HC). Several clinical psychometric scales as well as neurocognitive tests were administered to all subjects in order to better define psychopatholog- ical status and vulnerability. EEG slow wave activity (SWA, spectral power between 1 and 4 Hz) and several slow wave parameters were computed at each electrode location, including density and amplitude, at each electrode location. Along with a group analysis between FEP and HC, a subgroup analysis was also computed between patients who showed a progression of symptoms to full-blown Schizophrenia (SCZ, n = 10) over the next 12-month follow-up and those who did not (OTH, n = 10). Results: Sleep macro-architecture was globally preserved in FEP patients. SWA (1–4 Hz) was lower in FEP compared to HC but this difference didn’t reach statistical significance. Slow wave density was decreased in FEP compared to HC, with a significance that survived multiple comparison correction over a large fronto-central cluster. Mean amplitude was pre- served. At the subgroup analysis, these results were largely driven by the subgroup of patients with a confirmed diagnosis of SCZ at a 12-month fol- low-up. Indeed, no difference could be found between OTH and HC, while a strong significance was still evident between SCZ and HC." }, { "type": "NarrativeText", @@ -95,7 +104,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Background: Meta-analytic evidence showed increased levels of periph- eral endocannabinoid metabolites in psychotic illness. Alterations in the endocannabinoid system are believed to compromise glutamate and do- pamine transmission, which play a central role in pathophysiological models of psychosis. I will present preliminary data from an ongoing high-field proton magnetic resonance spectroscopy (MRS) study aimed at investigating the association between peripheral levels of endocannabinoid system metabolites and central glutamate metabolism in individuals at their first non-affective psychotic episode (NA-FEP) and healthy controls. Methods: We expect to recruit 17 NA-FEP and 20 healthy controls by January 2020. Currently, we recruited 12 NA-FEP and 18 healthy controls from two different research facilities (Imperial College London and University of Oxford) as part of a cross-sectional study. Participants un- derwent MRS scanning at 7-T with voxels placed in right dorsolateral prefrontal cortex (right-DLPFC), anterior cingulate cortex (ACC), and oc- cipital cortex. Neuro-metabolites will be calculated using the unsuppressed water signal as reference. Endocannabinoid metabolites were quantified from serum samples, collected during the same imaging session. Results: Analyses are ongoing. Based on previous evidence, expected findings are: (i) reduced glutamate levels in the ACC and right-DLPFC of NA-FEP compared to controls; (ii) increased peripheral endocannabinoid metabolites in NA-FEP compared to controls; and (iii) inverse association between peripheral endocannabinoid metabolites and glutamate levels in ACC and right-DLPFC in NA-FEP Discussion: This study will help clarifying the contribution of peripheral endocannabinoid system to central brain mechanisms of key relevance for psychotic illness. It will also add further evidence on the limited literature on high-resolution characterisation of brain metabolites in early psychosis. Strengths of the study include: (i) use of high-field MRS, which allows the estimation of glutamate-related compounds at higher precision than at lower field strength; (ii) reduced heterogeneity of the clinical sample (only male and NA-FEP). Limitations: small sample size and cross-sectional design." }, @@ -105,7 +115,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "S8. GRIN1 PROMOTER METHYLATION CHANGES IN BLOOD OF EARLY-ONSET PSYCHOTIC PATIENTS AND UNAFFECTED SIBLINGS WITH CHILDHOOD TRAUMA" }, @@ -115,7 +126,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Camila Loureiro*1, Corsi-Zuelli Fabiana1, Fachim Helene Aparecida1, Shuhama Rosana1, Menezes Paulo Rossi1, Dalton Caroline F2," }, @@ -125,7 +137,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "AQ3" }, @@ -135,7 +148,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "SIRS 2020 Abstracts" } diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json index 9b293a695e..b9c9aa49b9 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json @@ -5,7 +5,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "INTERNATIONAL MONETARY FUND" }, @@ -15,7 +16,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "WORLD ECONOMIC OUTLOOK UPDATE Inflation Peaking amid Low Growth" }, @@ -25,39 +27,43 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "2023 JAN" }, { - "type": "Title", - "element_id": "85e4ff3addb38328ecc08ec49759def7", + "type": "ListItem", + "element_id": "c4e0168ffab999611a92e8ebd8fe48a9", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, - "text": "Inflation Peaking amid Low Growth" + "text": "The balance of risks remains tilted to the downside, but adverse risks have moderated since the October 2022 WEO. On the upside, a stronger boost from pent-up demand in numerous economies or a faster fall in inflation are plausible. On the downside, severe health outcomes in China could hold back the recovery, Russia’s war in Ukraine could escalate, and tighter global financing conditions could worsen debt distress. Financial markets could also suddenly reprice in response to adverse inflation news, while further geopolitical fragmentation could hamper economic progress." }, { "type": "ListItem", - "element_id": "f1d5f4ed63a14db581e985bf15416cdd", + "element_id": "c4e0168ffab999611a92e8ebd8fe48a9", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, - "text": "Global growth is projected to fall from an estimated 3.4 percent in 2022 to 2.9 percent in 2023, then rise to 3.1 percent in 2024. The forecast for 2023 is 0.2 percentage point higher than predicted in the October 2022 World Economic Outlook (WEO) but below the historical (2000–19) average of 3.8 percent. The rise in central bank rates to fight inflation and Russia’s war in Ukraine continue to weigh on economic activity. The rapid spread of COVID-19 in China dampened growth in 2022, but the recent reopening has paved the way for a faster-than-expected recovery. Global inflation is expected to fall from 8.8 percent in 2022 to 6.6 percent in 2023 and 4.3 percent in 2024, still above pre-pandemic (2017–19) levels of about 3.5 percent." + "text": "The balance of risks remains tilted to the downside, but adverse risks have moderated since the October 2022 WEO. On the upside, a stronger boost from pent-up demand in numerous economies or a faster fall in inflation are plausible. On the downside, severe health outcomes in China could hold back the recovery, Russia’s war in Ukraine could escalate, and tighter global financing conditions could worsen debt distress. Financial markets could also suddenly reprice in response to adverse inflation news, while further geopolitical fragmentation could hamper economic progress." }, { "type": "ListItem", - "element_id": "c4e0168ffab999611a92e8ebd8fe48a9", + "element_id": "5e9b501fc056965a744f6598d022f31d", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, - "text": "The balance of risks remains tilted to the downside, but adverse risks have moderated since the October 2022 WEO. On the upside, a stronger boost from pent-up demand in numerous economies or a faster fall in inflation are plausible. On the downside, severe health outcomes in China could hold back the recovery, Russia’s war in Ukraine could escalate, and tighter global financing conditions could worsen debt distress. Financial markets could also suddenly reprice in response to adverse inflation news, while further geopolitical fragmentation could hamper economic progress." + "text": "In most economies, amid the cost-of-living crisis, the priority remains achieving sustained disinflation. With tighter monetary conditions and lower growth potentially affecting financial and debt stability, it is necessary to deploy macroprudential tools and strengthen debt restructuring frameworks. Accelerating COVID-19 vaccinations in China would safeguard the recovery, with positive cross-border spillovers. Fiscal support should be better targeted at those most affected by elevated food and energy prices, and broad-based fiscal relief measures should be withdrawn. Stronger multilateral cooperation is essential to preserve the gains from the rules-based multilateral system and to mitigate climate change by limiting emissions and raising green investment." }, { "type": "ListItem", @@ -65,7 +71,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "In most economies, amid the cost-of-living crisis, the priority remains achieving sustained disinflation. With tighter monetary conditions and lower growth potentially affecting financial and debt stability, it is necessary to deploy macroprudential tools and strengthen debt restructuring frameworks. Accelerating COVID-19 vaccinations in China would safeguard the recovery, with positive cross-border spillovers. Fiscal support should be better targeted at those most affected by elevated food and energy prices, and broad-based fiscal relief measures should be withdrawn. Stronger multilateral cooperation is essential to preserve the gains from the rules-based multilateral system and to mitigate climate change by limiting emissions and raising green investment." }, @@ -75,7 +82,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "The global fight against inflation, Russia’s war in Ukraine, and a resurgence of COVID-19 in China weighed on global economic activity in 2022, and the first two factors will continue to do so in 2023." }, @@ -85,7 +93,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "Despite these headwinds, real GDP was surprisingly strong in the third quarter of 2022 in numerous economies, including the United States, the euro area, and major emerging market and developing economies. The sources of these surprises were in many cases domestic: stronger-than-expected private consumption and investment amid tight labor markets and greater-than-anticipated fiscal support. Households spent more to satisfy pent-up demand, particularly on services, partly by drawing down their stock of savings as economies reopened. Business investment rose to meet demand. On the supply side, easing bottlenecks and declining transportation costs reduced pressures on input prices and allowed for a rebound in previously constrained sectors, such as motor vehicles. Energy markets have adjusted faster than expected to the shock from Russia’s invasion of Ukraine." }, @@ -95,7 +104,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "In the fourth quarter of 2022, however, this uptick is estimated to have faded in most—though not all––major economies. US growth remains stronger than expected, with consumers continuing to spend from their stock of savings (the personal saving rate is at its lowest in more than 60 years, except for July 2005), unemployment near historic lows, and plentiful job opportunities. But elsewhere, high-frequency activity indicators (such as business and consumer sentiment, purchasing manager surveys, and mobility indicators) generally point to a slowdown." }, @@ -105,7 +115,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "COVID-19 deepens China’s slowdown. Economic activity in China slowed in the fourth quarter amid multiple large COVID-19 outbreaks in Beijing and other densely populated localities. Renewed lockdowns accompanied the outbreaks until the relaxation of COVID-19 restrictions in November and December, which paved the way for a full reopening. Real estate investment continued to contract, and developer restructuring is proceeding slowly, amid the lingering property market crisis. Developers have yet to deliver on a large backlog of presold housing, and downward pressure is building on house prices (so far limited by home price floors). The authorities have responded with additional monetary and fiscal policy easing, new vaccination targets for the elderly, and steps to support the completion of unfinished real estate projects. However, consumer and business sentiment remained subdued in late 2022. China’s slowdown has reduced global trade growth and international commodity prices." }, @@ -115,27 +126,19 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Monetary policy starts to bite. Signs are apparent that monetary policy tightening is starting to cool demand and inflation, but the full impact is unlikely to be realized before 2024. Global headline inflation appears to have peaked in the third quarter of 2022 (Figure 1). Prices of fuel and nonfuel commodities have declined, lowering headline inflation, notably in the United States, the euro area, and Latin America. But underlying (core) inflation has not yet peaked in most economies and remains well above pre-pandemic levels. It has persisted amid second-round effects from earlier cost shocks and tight labor markets with robust wage growth as consumer demand has remained resilient. Medium-term inflation expectations generally remain anchored, but some gauges are up. These developments have caused central banks to raise rates faster than expected, especially in the United States and the euro area, and to signal that rates will stay elevated for longer. Core inflation is declining in some economies that have completed their tightening cycle—such as Brazil. Financial markets are displaying high sensitivity to inflation news, with equity markets rising following recent releases of lower inflation data in anticipation of interest rate cuts (Box 1), despite central banks’ communicating their resolve to tighten policy further. With the peak in US headline inflation and an acceleration in rate hikes by several non-US central banks, the dollar has weakened since September but remains significantly stronger than a year ago." }, - { - "type": "Title", - "element_id": "0cce65035ca66e9be782c845ddd606e2", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "Figure 1. Twin Peaks? Headline and Core Inflation (Percent, year over year)" - }, { "type": "UncategorizedText", "element_id": "808caaef5b114d874a25b7fec21b5516", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "18 16 14 12 10 8 6 4 2 0 –2" }, @@ -145,29 +148,32 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "–2" }, { - "type": "NarrativeText", - "element_id": "e26dceaba57a5f670d91ac170e8706d1", + "type": "UncategorizedText", + "element_id": "c2c7be4534a60790d1d18451c91dc138", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, - "text": "Sources: Haver Analytics; and IMF staff calculations. Note: The figure shows the developments in headline and core inflation across 18 advanced economies and 17 emerging market and developing economies. Core inflation is the change in prices for goods and services, but excluding those for food and energy (or the closest available measure). For the euro area (and other European countries for which the data are available), energy, food, alcohol, and tobacco are excluded. The gray bands depict the 10th to 90th percentiles of inflation across economies." + "text": "16 14 12 10 8 6 4 2 0" }, { "type": "UncategorizedText", - "element_id": "c2c7be4534a60790d1d18451c91dc138", + "element_id": "c7c72889cb49cf43d9bd1f892db1be2c", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, - "text": "16 14 12 10 8 6 4 2 0" + "text": "Jan. 2019" }, { "type": "UncategorizedText", @@ -175,19 +181,32 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Jan. 2019" }, { - "type": "UncategorizedText", - "element_id": "c7c72889cb49cf43d9bd1f892db1be2c", + "type": "Title", + "element_id": "0cce65035ca66e9be782c845ddd606e2", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, - "text": "Jan. 2019" + "text": "Figure 1. Twin Peaks? Headline and Core Inflation (Percent, year over year)" + }, + { + "type": "NarrativeText", + "element_id": "e26dceaba57a5f670d91ac170e8706d1", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 3, + "links": [] + }, + "text": "Sources: Haver Analytics; and IMF staff calculations. Note: The figure shows the developments in headline and core inflation across 18 advanced economies and 17 emerging market and developing economies. Core inflation is the change in prices for goods and services, but excluding those for food and energy (or the closest available measure). For the euro area (and other European countries for which the data are available), energy, food, alcohol, and tobacco are excluded. The gray bands depict the 10th to 90th percentiles of inflation across economies." }, { "type": "ListItem", @@ -195,7 +214,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "1. Headline Inflation" }, @@ -205,7 +225,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "2. Core Inflation" }, @@ -215,7 +236,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Median country Brazil" }, @@ -225,7 +247,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Jul. 19" }, @@ -235,7 +258,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Jul. 19" }, @@ -245,7 +269,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Jan. 20" }, @@ -255,7 +280,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Jan. 20" }, @@ -265,7 +291,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Jul. 20" }, @@ -275,7 +302,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Jul. 20" }, @@ -285,7 +313,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "United States" }, @@ -295,7 +324,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Jan. 21" }, @@ -305,7 +335,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Jan. 21" }, @@ -315,7 +346,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Jul. 21" }, @@ -325,7 +357,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Jul. 21" }, @@ -335,7 +368,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Jan. 22" }, @@ -345,7 +379,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Jan. 22" }, @@ -355,7 +390,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Euro area" }, @@ -365,7 +401,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Jul. 22" }, @@ -375,7 +412,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Jul. 22" }, @@ -385,7 +423,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Nov. 22" }, @@ -395,7 +434,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Nov. 22" }, @@ -405,7 +445,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Winter comes to Europe. European economic growth in 2022 was more resilient than expected in the face of the large negative terms-of-trade shock from the war in Ukraine. This resilience––which is" }, @@ -415,7 +456,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "visible in consumption and investment data for the third quarter––partly reflects government support of about 1.2 percent of European Union GDP (net budgetary cost) to households and firms hit by the energy crisis, as well as dynamism from economies reopening. Gas prices have declined by more than expected amid higher non-Russian pipeline and liquefied natural gas flows, compression of demand for gas, and a warmer-than-usual winter. However, the boost from reopening appears to be fading. High-frequency indicators for the fourth quarter suggest that the manufacturing and services sectors are contracting. Consumer confidence and business sentiment have worsened. With inflation at about 10 percent or above in several euro area countries and the United Kingdom, household budgets remain stretched. The accelerated pace of rate increases by the Bank of England and the European Central Bank is tightening financial conditions and cooling demand in the housing sector and beyond." }, @@ -425,7 +467,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Global growth, estimated at 3.4 percent in 2022, is projected to fall to 2.9 percent in 2023 before rising to 3.1 percent in 2024 (Table 1). Compared with the October forecast, the estimate for 2022 and the forecast for 2023 are both higher by about 0.2 percentage point, reflecting positive surprises and greater-than-expected resilience in numerous economies. Negative growth in global GDP or global GDP per capita—which often happens when there is a global recession—is not expected. Nevertheless, global growth projected for 2023 and 2024 is below the historical (2000–19) annual average of 3.8 percent." }, @@ -435,7 +478,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "The forecast of low growth in 2023 reflects the rise in central bank rates to fight inflation–– especially in advanced economies––as well as the war in Ukraine. The decline in growth in 2023 from 2022 is driven by advanced economies; in emerging market and developing economies, growth is estimated to have bottomed out in 2022. Growth is expected to pick up in China with the full reopening in 2023. The expected pickup in 2024 in both groups of economies reflects gradual recovery from the effects of the war in Ukraine and subsiding inflation. Following the path of global demand, world trade growth is expected to decline in 2023 to 2.4 percent, despite an easing of supply bottlenecks, before rising to 3.4 percent in 2024." }, @@ -445,7 +489,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "These forecasts are based on a number of assumptions, including on fuel and nonfuel commodity prices, which have generally been revised down since October, and on interest rates, which have been revised up. In 2023, oil prices are projected to fall by about 16 percent, while nonfuel commodity prices are expected to fall by, on average, 6.3 percent. Global interest rate assumptions are revised up, reflecting intensified actual and signaled policy tightening by major central banks since October." }, @@ -455,7 +500,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "For advanced economies, growth is projected to decline sharply from 2.7 percent in 2022 to 1.2 percent in 2023 before rising to 1.4 percent in 2024, with a downward revision of 0.2 percentage point for 2024. About 90 percent of advanced economies are projected to see a decline in growth in 2023." }, @@ -465,7 +511,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "" }, @@ -475,39 +522,32 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "In the United States, growth is projected to fall from 2.0 percent in 2022 to 1.4 percent in 2023 and 1.0 percent in 2024. With growth rebounding in the second half of 2024, growth in 2024 will be faster than in 2023 on a fourth-quarter-over-fourth-quarter basis, as in most advanced" }, - { - "type": "NarrativeText", - "element_id": "70f05b9620aa1b7236058898e7e59192", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "economies. There is a 0.4 percentage point upward revision for annual growth in 2023, reflecting carryover effects from domestic demand resilience in 2022, but a 0.2 percentage point downward revision of growth in 2024 due to the steeper path of Federal Reserve rate hikes, to a peak of about 5.1 percent in 2023." - }, { "type": "ListItem", - "element_id": "fd6c549473e196512c076844988f465c", + "element_id": "3be6554964c172468cceaee89294f59d", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, - "text": "Growth in the euro area is projected to bottom out at 0.7 percent in 2023 before rising to 1.6 percent in 2024. The 0.2 percentage point upward revision to the forecast for 2023 reflects the effects of faster rate hikes by the European Central Bank and eroding real incomes, offset by the carryover from the 2022 outturn, lower wholesale energy prices, and additional announcements of fiscal purchasing power support in the form of energy price controls and cash transfers." + "text": "Growth in the United Kingdom is projected to be –0.6 percent in 2023, a 0.9 percentage point downward revision from October, reflecting tighter fiscal and monetary policies and financial conditions and still-high energy retail prices weighing on household budgets." }, { "type": "ListItem", - "element_id": "3be6554964c172468cceaee89294f59d", + "element_id": "b24771387a5318eeda21adaa49629186", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, - "text": "Growth in the United Kingdom is projected to be –0.6 percent in 2023, a 0.9 percentage point downward revision from October, reflecting tighter fiscal and monetary policies and financial conditions and still-high energy retail prices weighing on household budgets." + "text": "Growth in Japan is projected to rise to 1.8 percent in 2023, with continued monetary and fiscal policy support. High corporate profits from a depreciated yen and earlier delays in implementing previous projects will support business investment. In 2024, growth is expected to decline to 0.9 percent as the effects of past stimulus dissipate." }, { "type": "ListItem", @@ -515,7 +555,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "Growth in Japan is projected to rise to 1.8 percent in 2023, with continued monetary and fiscal policy support. High corporate profits from a depreciated yen and earlier delays in implementing previous projects will support business investment. In 2024, growth is expected to decline to 0.9 percent as the effects of past stimulus dissipate." }, @@ -525,7 +566,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "For emerging market and developing economies, growth is projected to rise modestly, from 3.9 percent in 2022 to 4.0 percent in 2023 and 4.2 percent in 2024, with an upward revision of 0.3 percentage point for 2023 and a downward revision of 0.1 percentage point for 2024. About half of emerging market and developing economies have lower growth in 2023 than in 2022." }, @@ -535,19 +577,21 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "Growth in emerging and developing Asia is expected to rise in 2023 and 2024 to 5.3 percent and 5.2 percent, respectively, after the deeper-than-expected slowdown in 2022 to 4.3 percent attributable to China’s economy. China’s real GDP slowdown in the fourth quarter of 2022 implies a 0.2 percentage point downgrade for 2022 growth to 3.0 percent—the first time in more than 40 years with China’s growth below the global average. Growth in China is projected to rise to 5.2 percent in 2023, reflecting rapidly improving mobility, and to fall to 4.5 percent in 2024 before settling at below 4 percent over the medium term amid declining business dynamism and slow progress on structural reforms. Growth in India is set to decline from 6.8 percent in 2022 to 6.1 percent in 2023 before picking up to 6.8 percent in 2024, with resilient domestic demand despite external headwinds. Growth in the ASEAN-5 countries (Indonesia, Malaysia, Philippines, Singapore, Thailand) is similarly projected to slow to 4.3 percent in 2023 and then pick up to 4.7 percent in 2024." }, { "type": "ListItem", - "element_id": "afde979c99a73646915fe253c85c5a9c", + "element_id": "2ba41350ae3c684802f0e2b785c2d11b", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, - "text": "Growth in emerging and developing Europe is projected to have bottomed out in 2022 at 0.7 percent and, since the October forecast, has been revised up for 2023 by 0.9 percentage point to 1.5 percent. This reflects a smaller economic contraction in Russia in 2022 (estimated at –2.2 percent compared with a predicted –3.4 percent) followed by modestly positive growth in 2023. At the current oil price cap level of the Group of Seven, Russian crude oil export volumes are not expected to be significantly affected, with Russian trade continuing to be redirected from sanctioning to non-sanctioning countries. In Latin America and the Caribbean, growth is projected to decline from 3.9 percent in 2022 to 1.8 percent in 2023, with an upward revision for 2023 of 0.1 percentage point since October. The forecast revision reflects upgrades of 0.2 percentage point for Brazil and 0.5 percentage point for Mexico due to unexpected domestic demand resilience, higher-than-expected growth in" + "text": "Growth in emerging and developing Asia is expected to rise in 2023 and 2024 to 5.3 percent and 5.2 percent, respectively, after the deeper-than-expected slowdown in 2022 to 4.3 percent attributable to China’s economy. China’s real GDP slowdown in the fourth quarter of 2022 implies a 0.2 percentage point downgrade for 2022 growth to 3.0 percent—the first time in more than 40 years with China’s growth below the global average. Growth in China is projected to rise to 5.2 percent in 2023, reflecting rapidly improving mobility, and to fall to 4.5 percent in 2024 before settling at below 4 percent over the medium term amid declining business dynamism and slow progress on structural reforms. Growth in India is set to decline from 6.8 percent in 2022 to 6.1 percent in 2023 before picking up to 6.8 percent in 2024, with resilient domestic demand despite external headwinds. Growth in the ASEAN-5 countries (Indonesia, Malaysia, Philippines, Singapore, Thailand) is similarly projected to slow to 4.3 percent in 2023 and then pick up to 4.7 percent in 2024." }, { "type": "ListItem", @@ -555,39 +599,54 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "" }, + { + "type": "ListItem", + "element_id": "afde979c99a73646915fe253c85c5a9c", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 5, + "links": [] + }, + "text": "Growth in emerging and developing Europe is projected to have bottomed out in 2022 at 0.7 percent and, since the October forecast, has been revised up for 2023 by 0.9 percentage point to 1.5 percent. This reflects a smaller economic contraction in Russia in 2022 (estimated at –2.2 percent compared with a predicted –3.4 percent) followed by modestly positive growth in 2023. At the current oil price cap level of the Group of Seven, Russian crude oil export volumes are not expected to be significantly affected, with Russian trade continuing to be redirected from sanctioning to non-sanctioning countries. In Latin America and the Caribbean, growth is projected to decline from 3.9 percent in 2022 to 1.8 percent in 2023, with an upward revision for 2023 of 0.1 percentage point since October. The forecast revision reflects upgrades of 0.2 percentage point for Brazil and 0.5 percentage point for Mexico due to unexpected domestic demand resilience, higher-than-expected growth in" + }, { "type": "NarrativeText", "element_id": "e7a8e30d6d49ffbca56f87cd6883c9a0", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "major trading partner economies, and in Brazil, greater-than-expected fiscal support. Growth in the region is projected to rise to 2.1 percent in 2024, although with a downward revision of 0.3 percentage point, reflecting tighter financial conditions, lower prices of exported commodities, and downward revisions to trading partner growth." }, { "type": "ListItem", - "element_id": "25e2f1dc031b5421b8a234945098e58b", + "element_id": "e3b0c44298fc1c149afbf4c8996fb924", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, - "text": "Growth in the Middle East and Central Asia is projected to decline from 5.3 percent in 2022 to 3.2 percent in 2023, with a downward revision of 0.4 percentage point since October, mainly attributable to a steeper-than-expected growth slowdown in Saudi Arabia, from 8.7 percent in 2022 (which was stronger than expected by 1.1 percentage points) to 2.6 percent in 2023, with a negative revision of 1.1 percentage points. The downgrade for 2023 reflects mainly lower oil production in line with an agreement through OPEC+ (Organization of the Petroleum Exporting Countries, including Russia and other non-OPEC oil exporters), while non-oil growth is expected to remain robust. In sub-Saharan Africa, growth is projected to remain moderate at 3.8 percent in 2023 amid prolonged fallout from the COVID-19 pandemic, although with a modest upward revision since October, before picking up to 4.1 percent in 2024. The small upward revision for 2023 (0.1 percentage point) reflects Nigeria’s rising growth in 2023 due to measures to address insecurity issues in the oil sector. In South Africa, by contrast, after a COVID-19 reopening rebound in 2022, projected growth more than halves in 2023, to 1.2 percent, reflecting weaker external demand, power shortages, and structural constraints." + "text": "" }, { "type": "ListItem", - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", + "element_id": "25e2f1dc031b5421b8a234945098e58b", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, - "text": "" + "text": "Growth in the Middle East and Central Asia is projected to decline from 5.3 percent in 2022 to 3.2 percent in 2023, with a downward revision of 0.4 percentage point since October, mainly attributable to a steeper-than-expected growth slowdown in Saudi Arabia, from 8.7 percent in 2022 (which was stronger than expected by 1.1 percentage points) to 2.6 percent in 2023, with a negative revision of 1.1 percentage points. The downgrade for 2023 reflects mainly lower oil production in line with an agreement through OPEC+ (Organization of the Petroleum Exporting Countries, including Russia and other non-OPEC oil exporters), while non-oil growth is expected to remain robust. In sub-Saharan Africa, growth is projected to remain moderate at 3.8 percent in 2023 amid prolonged fallout from the COVID-19 pandemic, although with a modest upward revision since October, before picking up to 4.1 percent in 2024. The small upward revision for 2023 (0.1 percentage point) reflects Nigeria’s rising growth in 2023 due to measures to address insecurity issues in the oil sector. In South Africa, by contrast, after a COVID-19 reopening rebound in 2022, projected growth more than halves in 2023, to 1.2 percent, reflecting weaker external demand, power shortages, and structural constraints." }, { "type": "NarrativeText", @@ -595,7 +654,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "About 84 percent of countries are expected to have lower headline (consumer price index) inflation in 2023 than in 2022. Global inflation is set to fall from 8.8 percent in 2022 (annual average) to 6.6 percent in 2023 and 4.3 percent in 2024––above pre-pandemic (2017–19) levels of about 3.5 percent. The projected disinflation partly reflects declining international fuel and nonfuel commodity prices due to weaker global demand. It also reflects the cooling effects of monetary policy tightening on underlying (core) inflation, which globally is expected to decline from 6.9 percent in the fourth quarter of 2022 (year over year) to 4.5 percent by the fourth quarter of 2023. Still, disinflation will take time: by 2024, projected annual average headline and core inflation will, respectively, still be above pre-pandemic levels in 82 percent and 86 percent of economies." }, @@ -605,7 +665,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "In advanced economies, annual average inflation is projected to decline from 7.3 percent in 2022 to 4.6 percent in 2023 and 2.6 percent in 2024––above target in several cases. In emerging market and developing economies, projected annual inflation declines from 9.9 percent in 2022 to 8.1 percent in 2023 and 5.5 percent in 2024, above the 4.9 percent pre-pandemic (2017–19) average. In low-income developing countries, inflation is projected to moderate from 14.2 percent in 2022 to 8.6 percent in 2024––still high, but close to the pre-pandemic average." }, @@ -615,27 +676,19 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "The balance of risks to the global outlook remains tilted to the downside, with scope for lower growth and higher inflation, but adverse risks have moderated since the October 2022 World Economic Outlook." }, - { - "type": "UncategorizedText", - "element_id": "8f81c653cbf1334344d3063cb9f4de04", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "Table 1. Overview of the World Economic Outlook Projections (Percent change, unless noted otherwise)" - }, { "type": "Title", "element_id": "d11a1c04bd3a9891350b4bd94104df58", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Year over Year" }, @@ -645,7 +698,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Difference from October 2022" }, @@ -655,7 +709,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Q4 over Q4 2/" }, @@ -665,7 +720,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "2021" }, @@ -675,7 +731,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Estimate 2022" }, @@ -685,7 +742,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Projections 2023" }, @@ -695,7 +753,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "2024" }, @@ -705,7 +764,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "WEO Projections 1/" }, @@ -715,7 +775,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "2023" }, @@ -725,7 +786,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "2024" }, @@ -735,7 +797,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Estimate 2022" }, @@ -745,7 +808,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Projections 2023" }, @@ -755,7 +819,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "2024" }, @@ -765,7 +830,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "World Output" }, @@ -775,7 +841,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "6.2" }, @@ -785,7 +852,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "3.4" }, @@ -795,7 +863,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "2.9" }, @@ -805,7 +874,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "3.1" }, @@ -815,7 +885,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "0.2" }, @@ -825,7 +896,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "–0.1" }, @@ -835,7 +907,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "1.9" }, @@ -845,7 +918,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "3.2" }, @@ -855,7 +929,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "3.0" }, @@ -865,7 +940,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Advanced Economies United States Euro Area" }, @@ -875,7 +951,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Germany France Italy Spain" }, @@ -885,7 +962,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Japan United Kingdom Canada Other Advanced Economies 3/" }, @@ -895,7 +973,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "5.4 5.9 5.3 2.6 6.8 6.7 5.5 2.1 7.6 5.0 5.3" }, @@ -905,7 +984,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "2.7 2.0 3.5 1.9 2.6 3.9 5.2 1.4 4.1 3.5 2.8" }, @@ -915,7 +995,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "1.2 1.4 0.7 0.1 0.7 0.6 1.1 1.8 –0.6 1.5 2.0" }, @@ -925,7 +1006,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "1.4 1.0 1.6 1.4 1.6 0.9 2.4 0.9 0.9 1.5 2.4" }, @@ -935,7 +1017,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "0.1 0.4 0.2 0.4 0.0 0.8 –0.1 0.2 –0.9 0.0 –0.3" }, @@ -945,7 +1028,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "–0.2 –0.2 –0.2 –0.1 0.0 –0.4 –0.2 –0.4 0.3 –0.1 –0.2" }, @@ -955,7 +1039,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "1.3 0.7 1.9 1.4 0.5 2.1 2.1 1.7 0.4 2.3 1.4" }, @@ -965,7 +1050,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "1.1 1.0 0.5 0.0 0.9 0.1 1.3 1.0 –0.5 1.2 2.1" }, @@ -975,7 +1061,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "1.6 1.3 2.1 2.3 1.8 1.0 2.8 1.0 1.8 1.9 2.2" }, @@ -985,7 +1072,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Emerging Market and Developing Economies Emerging and Developing Asia" }, @@ -995,7 +1083,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "China India 4/" }, @@ -1005,7 +1094,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Emerging and Developing Europe" }, @@ -1015,7 +1105,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Russia" }, @@ -1025,29 +1116,32 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Latin America and the Caribbean" }, { "type": "Title", - "element_id": "24af2841400373443d80b6c91180918b", + "element_id": "e30a554d7d1cbf308651f8c267ad6872", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, - "text": "Middle East and Central Asia" + "text": "Brazil Mexico" }, { "type": "Title", - "element_id": "e30a554d7d1cbf308651f8c267ad6872", + "element_id": "24af2841400373443d80b6c91180918b", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, - "text": "Brazil Mexico" + "text": "Middle East and Central Asia" }, { "type": "Title", @@ -1055,7 +1149,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Saudi Arabia Sub-Saharan Africa" }, @@ -1065,7 +1160,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Nigeria South Africa" }, @@ -1075,7 +1171,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "6.7 7.4 8.4 8.7 6.9 4.7 7.0 5.0 4.7 4.5 3.2 4.7 3.6 4.9" }, @@ -1085,7 +1182,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "3.9 4.3 3.0 6.8 0.7 –2.2 3.9 3.1 3.1 5.3 8.7 3.8 3.0 2.6" }, @@ -1095,7 +1193,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "4.0 5.3 5.2 6.1 1.5 0.3 1.8 1.2 1.7 3.2 2.6 3.8 3.2 1.2" }, @@ -1105,7 +1204,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "4.2 5.2 4.5 6.8 2.6 2.1 2.1 1.5 1.6 3.7 3.4 4.1 2.9 1.3" }, @@ -1115,7 +1215,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "0.3 0.4 0.8 0.0 0.9 2.6 0.1 0.2 0.5 –0.4 –1.1 0.1 0.2 0.1" }, @@ -1125,7 +1226,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "–0.1 0.0 0.0 0.0 0.1 0.6 –0.3 –0.4 –0.2 0.2 0.5 0.0 0.0 0.0" }, @@ -1135,7 +1237,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "2.5 3.4 2.9 4.3 –2.0 –4.1 2.6 2.8 3.7 . . . 4.6 . . . 2.6 3.0" }, @@ -1145,7 +1248,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "5.0 6.2 5.9 7.0 3.5 1.0 1.9 0.8 1.1 . . . 2.7 . . . 3.1 0.5" }, @@ -1155,7 +1259,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "4.1 4.9 4.1 7.1 2.8 2.0 1.9 2.2 1.9 . . . 3.5 . . . 2.9 1.8" }, @@ -1165,7 +1270,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Memorandum World Growth Based on Market Exchange Rates European Union ASEAN-5 5/ Middle East and North Africa Emerging Market and Middle-Income Economies Low-Income Developing Countries" }, @@ -1175,7 +1281,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "6.0 5.5 3.8 4.1 7.0 4.1" }, @@ -1185,7 +1292,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "3.1 3.7 5.2 5.4 3.8 4.9" }, @@ -1195,7 +1303,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "2.4 0.7 4.3 3.2 4.0 4.9" }, @@ -1205,7 +1314,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "2.5 1.8 4.7 3.5 4.1 5.6" }, @@ -1215,7 +1325,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "0.3 0.0 –0.2 –0.4 0.4 0.0" }, @@ -1225,7 +1336,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "–0.1 –0.3 –0.2 0.2 0.0 0.1" }, @@ -1235,7 +1347,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "1.7 1.8 3.7 . . . 2.5 . . ." }, @@ -1245,7 +1358,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "2.5 1.2 5.7 . . . 5.0 . . ." }, @@ -1255,7 +1369,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "2.5 2.0 4.0 . . . 4.1 . . ." }, @@ -1265,7 +1380,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "World Trade Volume (goods and services) 6/ Advanced Economies Emerging Market and Developing Economies" }, @@ -1275,7 +1391,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "10.4 9.4 12.1" }, @@ -1285,7 +1402,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "5.4 6.6 3.4" }, @@ -1295,7 +1413,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "2.4 2.3 2.6" }, @@ -1305,7 +1424,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "3.4 2.7 4.6" }, @@ -1315,7 +1435,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "–0.1 0.0 –0.3" }, @@ -1325,7 +1446,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "–0.3 –0.4 0.0" }, @@ -1335,7 +1457,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": ". . . . . . . . ." }, @@ -1345,7 +1468,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": ". . . . . . . . ." }, @@ -1355,7 +1479,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": ". . . . . . . . ." }, @@ -1365,7 +1490,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Commodity Prices Oil 7/ Nonfuel (average based on world commodity import weights)" }, @@ -1375,7 +1501,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "65.8 26.4" }, @@ -1385,7 +1512,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "39.8 7.0" }, @@ -1395,7 +1523,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "–16.2 –6.3" }, @@ -1405,7 +1534,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "–7.1 –0.4" }, @@ -1415,7 +1545,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "–3.3 –0.1" }, @@ -1425,7 +1556,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "–0.9 0.3" }, @@ -1435,7 +1567,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "11.2 –2.0" }, @@ -1445,7 +1578,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "–9.8 1.4" }, @@ -1455,7 +1589,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "–5.9 –0.2" }, @@ -1465,7 +1600,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "World Consumer Prices 8/ Advanced Economies 9/ Emerging Market and Developing Economies 8/" }, @@ -1475,7 +1611,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "4.7 3.1 5.9" }, @@ -1485,7 +1622,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "8.8 7.3 9.9" }, @@ -1495,7 +1633,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "6.6 4.6 8.1" }, @@ -1505,7 +1644,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "4.3 2.6 5.5" }, @@ -1515,7 +1655,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "0.1 0.2 0.0" }, @@ -1525,7 +1666,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "0.2 0.2 0.2" }, @@ -1535,7 +1677,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "9.2 7.8 10.4" }, @@ -1545,7 +1688,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "5.0 3.1 6.6" }, @@ -1555,7 +1699,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "3.5 2.3 4.5" }, @@ -1565,7 +1710,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Note: Real effective exchange rates are assumed to remain constant at the levels prevailing during October 26, 2022--November 23, 2022. Economies are listed on the basis of economic size. The aggregated quarterly data are seasonally adjusted. WEO = World Economic Outlook. 1/ Difference based on rounded figures for the current and October 2022 WEO forecasts. Countries whose forecasts have been updated relative to October 2022 WEO forecasts account for approximately 90 percent of world GDP measured at purchasing-power-parity weights. 2/ For World Output (Emerging Market and Developing Economies), the quarterly estimates and projections account for approximately 90 percent (80 percent) of annual world (emerging market and developing economies') output at purchasing-power-parity weights. 3/ Excludes the Group of Seven (Canada, France, Germany, Italy, Japan, United Kingdom, United States) and euro area countries. 4/ For India, data and projections are presented on a fiscal year basis, with FY 2022/23 (starting in April 2022) shown in the 2022 column. India's growth projections are 5.4 percent in 2023 and 6.8 percent in 2024 based on calendar year. 5/ Indonesia, Malaysia, Philippines, Singapore, Thailand. 6/ Simple average of growth rates for export and import volumes (goods and services). 7/ Simple average of prices of UK Brent, Dubai Fateh, and West Texas Intermediate crude oil. The average assumed price of oil in US dollars a barrel, based on futures markets (as of November 29, 2022), is $81.13 in 2023 and $75.36 in 2024. 8/ Excludes Venezuela. 9/ The inflation rate for the euro area is 5.7% in 2023 and 3.3% in 2024, that for Japan is 2.8% in 2023 and 2.0% in 2024, and that for the United States is 4.0% in 2023 and 2.2% in 2024." }, @@ -1575,7 +1721,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Upside risks—Plausible upside risks include more favorable surprises to domestic spending—as in the third quarter of 2022—which, however, would increase inflation further. At the same time, there is room for an upside scenario with lower-than-expected inflation and less monetary tightening:" }, @@ -1585,49 +1732,54 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Pent-up demand boost: Fueled by the stock of excess private savings from the pandemic fiscal support and, in many cases, still-tight labor markets and solid wage growth, pent-up demand remains an upside risk to the growth outlook. In some advanced economies, recent data show that households are still on net adding to their stock of excess savings (as in some euro area countries and the United Kingdom) or have ample savings left (as in the United States). This leaves scope for a further boost to consumption—particularly of services, including tourism." }, { - "type": "NarrativeText", - "element_id": "d379a79a55cecddeed62b21eb6a0ff00", + "type": "ListItem", + "element_id": "cf20f95904c591b6ac4ccd5d43fa8a98", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 7, + "links": [] }, - "text": "However, the boost to demand could stoke core inflation, leading to even tighter monetary policies and a stronger-than-expected slowdown later on. Pent-up demand could also fuel a stronger rebound in China." + "text": "Pent-up demand boost: Fueled by the stock of excess private savings from the pandemic fiscal support and, in many cases, still-tight labor markets and solid wage growth, pent-up demand remains an upside risk to the growth outlook. In some advanced economies, recent data show that households are still on net adding to their stock of excess savings (as in some euro area countries and the United Kingdom) or have ample savings left (as in the United States). This leaves scope for a further boost to consumption—particularly of services, including tourism." }, { "type": "ListItem", - "element_id": "2bbe57e6c291db638d3fcddca9e0199a", + "element_id": "90a90e12a4c6b8b74d3c8d20a76f22dc", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "Faster disinflation: An easing in labor market pressures in some advanced economies due to falling vacancies could cool wage inflation without necessarily increasing unemployment. A sharp fall in the prices of goods, as consumers shift back to services, could further push down inflation. Such developments could imply a “softer” landing with less monetary tightening." + "text": "China’s recovery stalling: Amid still-low population immunity levels and insufficient hospital capacity, especially outside the major urban areas, significant health consequences could hamper the recovery. A deepening crisis in the real estate market remains a major source of vulnerability, with risks of widespread defaults by developers and resulting financial sector instability. Spillovers to the rest of the world would operate primarily through lower demand and potentially renewed supply chain problems." }, { - "type": "NarrativeText", - "element_id": "a2f806b25a06969405637298b4c85139", + "type": "ListItem", + "element_id": "90a90e12a4c6b8b74d3c8d20a76f22dc", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "Downside risks—Numerous downside risks continue to weigh on the global outlook, lowering growth while, in a number of cases, adding further to inflation:" + "text": "China’s recovery stalling: Amid still-low population immunity levels and insufficient hospital capacity, especially outside the major urban areas, significant health consequences could hamper the recovery. A deepening crisis in the real estate market remains a major source of vulnerability, with risks of widespread defaults by developers and resulting financial sector instability. Spillovers to the rest of the world would operate primarily through lower demand and potentially renewed supply chain problems." }, { "type": "ListItem", - "element_id": "90a90e12a4c6b8b74d3c8d20a76f22dc", + "element_id": "42ac57e394bf7c98d908745cefce0b80", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "China’s recovery stalling: Amid still-low population immunity levels and insufficient hospital capacity, especially outside the major urban areas, significant health consequences could hamper the recovery. A deepening crisis in the real estate market remains a major source of vulnerability, with risks of widespread defaults by developers and resulting financial sector instability. Spillovers to the rest of the world would operate primarily through lower demand and potentially renewed supply chain problems." + "text": "War in Ukraine escalating: An escalation of the war in Ukraine remains a major source of vulnerability, particularly for Europe and lower-income countries. Europe is facing lower-than- anticipated gas prices, having stored enough gas to make shortages unlikely this winter. However, refilling storage with much-diminished Russian flows will be challenging ahead of next winter, particularly if it is a very cold one and China’s energy demand picks up, causing price spikes. A possible increase in food prices from a failed extension of the Black Sea grain initiative would put further pressure on lower-income countries that are experiencing food insecurity and have limited budgetary room to cushion the impact on households and businesses. With elevated food and fuel prices, social unrest may increase." }, { "type": "ListItem", @@ -1635,29 +1787,32 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "War in Ukraine escalating: An escalation of the war in Ukraine remains a major source of vulnerability, particularly for Europe and lower-income countries. Europe is facing lower-than- anticipated gas prices, having stored enough gas to make shortages unlikely this winter. However, refilling storage with much-diminished Russian flows will be challenging ahead of next winter, particularly if it is a very cold one and China’s energy demand picks up, causing price spikes. A possible increase in food prices from a failed extension of the Black Sea grain initiative would put further pressure on lower-income countries that are experiencing food insecurity and have limited budgetary room to cushion the impact on households and businesses. With elevated food and fuel prices, social unrest may increase." }, { "type": "ListItem", - "element_id": "2d14934d52ff357c52e9ae1c38f7390e", + "element_id": "e3b0c44298fc1c149afbf4c8996fb924", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "Debt distress: Since October, sovereign spreads for emerging market and developing economies have modestly declined on the back of an easing in global financial conditions (Box 1) and dollar depreciation. About 15 percent of low-income countries are estimated to be in debt distress, with an additional 45 percent at high risk of debt distress and about 25 percent of emerging market economies also at high risk. The combination of high debt levels from the pandemic, lower growth, and higher borrowing costs exacerbates the vulnerability of these economies, especially those with significant near-term dollar financing needs. Inflation persisting: Persistent labor market tightness could translate into stronger-than-expected wage growth. Higher-than-expected oil, gas, and food prices from the war in Ukraine or from a faster rebound in China’s growth could again raise headline inflation and pass through into underlying inflation. Such developments could cause inflation expectations to de-anchor and require an even tighter monetary policy." + "text": "" }, { "type": "ListItem", - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", + "element_id": "2d14934d52ff357c52e9ae1c38f7390e", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "" + "text": "Debt distress: Since October, sovereign spreads for emerging market and developing economies have modestly declined on the back of an easing in global financial conditions (Box 1) and dollar depreciation. About 15 percent of low-income countries are estimated to be in debt distress, with an additional 45 percent at high risk of debt distress and about 25 percent of emerging market economies also at high risk. The combination of high debt levels from the pandemic, lower growth, and higher borrowing costs exacerbates the vulnerability of these economies, especially those with significant near-term dollar financing needs. Inflation persisting: Persistent labor market tightness could translate into stronger-than-expected wage growth. Higher-than-expected oil, gas, and food prices from the war in Ukraine or from a faster rebound in China’s growth could again raise headline inflation and pass through into underlying inflation. Such developments could cause inflation expectations to de-anchor and require an even tighter monetary policy." }, { "type": "ListItem", @@ -1665,7 +1820,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "Sudden financial market repricing: A premature easing in financial conditions in response to lower headline inflation data could complicate anti-inflation policies and necessitate additional monetary tightening. For the same reason, unfavorable inflation data releases could trigger sudden repricing of assets and increase volatility in financial markets. Such movements could strain liquidity and the functioning of critical markets, with ripple effects on the real economy." }, @@ -1675,7 +1831,19 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] + }, + "text": "Geopolitical fragmentation: The war in Ukraine and the related international sanctions aimed at  pressuring Russia to end hostilities are splitting the world economy into blocs and reinforcing earlier geopolitical tensions, such as those associated with the US-China trade dispute." + }, + { + "type": "ListItem", + "element_id": "75bd22ee0ba778cc3a616ed0a9b42292", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 8, + "links": [] }, "text": "Geopolitical fragmentation: The war in Ukraine and the related international sanctions aimed at  pressuring Russia to end hostilities are splitting the world economy into blocs and reinforcing earlier geopolitical tensions, such as those associated with the US-China trade dispute." }, @@ -1685,7 +1853,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "Fragmentation could intensify—with more restrictions on cross-border movements of capital, workers, and international payments—and could hamper multilateral cooperation on providing global public goods.1 The costs of such fragmentation are especially high in the short term, as replacing disrupted cross-border flows takes time." }, @@ -1695,7 +1864,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "Securing global disinflation: For most economies, the priority remains achieving a sustained reduction in inflation toward target levels. Raising real policy rates and keeping them above their neutral levels until underlying inflation is clearly declining would ward off risks of inflation expectations de- anchoring. Clear central bank communication and appropriate reactions to shifts in the data will help keep inflation expectations anchored and lessen wage and price pressures. Central banks’ balance sheets will need to be unwound carefully, amid market liquidity risks. Gradual and steady fiscal tightening would contribute to cooling demand and limit the burden on monetary policy in the fight against inflation. In countries where output remains below potential and inflation is in check, maintaining monetary and fiscal accommodation may be appropriate." }, @@ -1705,7 +1875,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "Containing the reemergence of COVID-19: Addressing the ongoing pandemic requires coordinated efforts to boost vaccination and medicine access in countries where coverage remains low as well as the deployment of pandemic preparedness measures—including a global push toward sequencing and sharing data. In China, focusing vaccination efforts on vulnerable groups and maintaining sufficiently high coverage of boosters and antiviral medicines would minimize the risks of severe health outcomes and safeguard the recovery, with favorable cross-border spillovers." }, @@ -1715,7 +1886,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "Ensuring financial stability: Depending on country circumstances, macroprudential tools can be used to tackle pockets of elevated financial sector vulnerabilities. Monitoring housing sector developments and conducting stress tests in economies where house prices have increased significantly over the past few years are warranted. In China, central government action to resolve the property crisis and reduce the risk of spillovers to financial stability and growth is a priority, including by strengthening temporary mechanisms to protect presale homebuyers from the risk of non-delivery and by restructuring troubled developers. Globally, financial sector regulations introduced after the global financial crisis have contributed to the resilience of banking sectors throughout the pandemic, but there is a need to address data and supervisory gaps in the less-regulated nonbank financial sector, where risks may have built up inconspicuously. Recent turmoil in the crypto space also highlights the urgent need to introduce common standards and reinforce oversight of crypto assets." }, @@ -1725,7 +1897,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "Restoring debt sustainability: Lower growth and higher borrowing costs have raised public debt ratios in several economies. Where debt is unsustainable, implementing restructuring or reprofiling early on as part of a package of reforms (including fiscal consolidation and growth-enhancing supply-side reforms) can avert the need for more disruptive adjustment later." }, @@ -1735,7 +1908,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "Supporting the vulnerable: The surge in global energy and food prices triggered a cost-of-living crisis. Governments acted swiftly with support to households and firms, which helped cushion effects on growth and at times limited the pass-through from energy prices to headline inflation through price" }, @@ -1745,49 +1919,43 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "1 See “Geo-Economic Fragmentation and the Future of Multilateralism,” IMF Staff Discussion Note 2023/001." }, { - "type": "NarrativeText", - "element_id": "1344e770221822b381fb428d9390a446", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "controls. The temporary and broad-based measures are becoming increasingly costly and should be withdrawn and replaced by targeted approaches. Preserving the energy price signal will encourage a reduction in energy consumption and limit the risks of shortages. Targeting can be achieved through social safety nets such as cash transfers to eligible households based on income or demographics or by transfers through electricity companies based on past energy consumption. Subsidies should be temporary and offset by revenue-generating measures, including one-time solidarity taxes on high- income households and companies, where appropriate." - }, - { - "type": "NarrativeText", - "element_id": "5f63f2b3388c5c9f2ab22f4136d4196d", + "type": "ListItem", + "element_id": "bd7674df887463bc9f05c8030a151dea", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, - "text": "Reinforcing supply: Supply-side policies could address the key structural factors impeding growth— including market power, rent seeking, rigid regulation and planning, and inefficient education—and could help build resilience, reduce bottlenecks, and alleviate price pressures. A concerted push for investment along the supply chain of green energy technologies would bolster energy security and help advance progress on the green transition." + "text": "Restraining the pandemic: Global coordination is needed to resolve bottlenecks in the global distribution of vaccines and treatments. Public support for the development of new vaccine technologies and the design of systematic responses to future epidemics also remains essential.  Addressing debt distress: Progress has been made for countries that requested debt treatment under the Group of Twenty’s Common Framework initiative, and more will be needed to strengthen it. It is also necessary to agree on mechanisms to resolve debt in a broader set of economies, including middle-income countries that are not eligible under the Common Framework. Non– Paris Club and private creditors have a crucial role to play in ensuring coordinated, effective, and timely debt resolution processes." }, { - "type": "NarrativeText", - "element_id": "c64f29a38dae74989484539db014364f", + "type": "ListItem", + "element_id": "bd7674df887463bc9f05c8030a151dea", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, - "text": "Strengthening multilateral cooperation—Urgent action is needed to limit the risks stemming from geopolitical fragmentation and to ensure cooperation on fundamental areas of common interest:" + "text": "Restraining the pandemic: Global coordination is needed to resolve bottlenecks in the global distribution of vaccines and treatments. Public support for the development of new vaccine technologies and the design of systematic responses to future epidemics also remains essential.  Addressing debt distress: Progress has been made for countries that requested debt treatment under the Group of Twenty’s Common Framework initiative, and more will be needed to strengthen it. It is also necessary to agree on mechanisms to resolve debt in a broader set of economies, including middle-income countries that are not eligible under the Common Framework. Non– Paris Club and private creditors have a crucial role to play in ensuring coordinated, effective, and timely debt resolution processes." }, { "type": "ListItem", - "element_id": "bd7674df887463bc9f05c8030a151dea", + "element_id": "af6eef18ec41f4980c1a4cbb5b7d4fec", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, - "text": "Restraining the pandemic: Global coordination is needed to resolve bottlenecks in the global distribution of vaccines and treatments. Public support for the development of new vaccine technologies and the design of systematic responses to future epidemics also remains essential.  Addressing debt distress: Progress has been made for countries that requested debt treatment under the Group of Twenty’s Common Framework initiative, and more will be needed to strengthen it. It is also necessary to agree on mechanisms to resolve debt in a broader set of economies, including middle-income countries that are not eligible under the Common Framework. Non– Paris Club and private creditors have a crucial role to play in ensuring coordinated, effective, and timely debt resolution processes." + "text": "Strengthening global trade: Strengthening the global trading system would address risks associated with trade fragmentation. This can be achieved by rolling back restrictions on food exports and other essential items such as medicine, upgrading World Trade Organization (WTO) rules in critical areas such as agricultural and industrial subsidies, concluding and implementing new WTO-based agreements, and fully restoring the WTO dispute settlement system." }, { "type": "ListItem", @@ -1795,7 +1963,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "Strengthening global trade: Strengthening the global trading system would address risks associated with trade fragmentation. This can be achieved by rolling back restrictions on food exports and other essential items such as medicine, upgrading World Trade Organization (WTO) rules in critical areas such as agricultural and industrial subsidies, concluding and implementing new WTO-based agreements, and fully restoring the WTO dispute settlement system." }, @@ -1805,7 +1974,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "Using the global financial safety net: With the cascading of shocks to the global economy, using the global financial safety net to its fullest extent is appropriate, including by proactively utilizing the IMF’s precautionary financial arrangements and channeling aid from the international community to low-income countries facing shocks." }, @@ -1815,29 +1985,32 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "Speeding the green transition: To meet governments’ climate change goals, it is necessary to swiftly implement credible mitigation policies. International coordination on carbon pricing or equivalent policies would facilitate faster decarbonization. Global cooperation is needed to build resilience to climate shocks, including through aid to vulnerable countries." }, { - "type": "NarrativeText", - "element_id": "14187a5be9e3a125267bfe10e6c67fae", + "type": "ListItem", + "element_id": "089c5759e7030e34a3b537d9e20bcd13", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 10, + "links": [] }, - "text": "Overall, financial stability risks remain elevated as investors reassess their inflation and monetary policy outlook. Global financial conditions have eased somewhat since the October 2022 Global Financial Stability Report, driven largely by changing market expectations regarding the interest rate cycle (Figure 1.1). While the expected peak in policy rates—the terminal rate—has risen, markets now also expect the subsequent fall in rates will be significantly faster, and further, than what was forecast in October (Figure 1.2). As a result, global bond yields have recently declined, corporate spreads have tightened, and equity markets have rebounded. That said, central banks are likely to continue to tighten monetary policy to fight inflation, and concerns that this restrictive stance could tip the economy into a recession have increased in major advanced economies." + "text": "Speeding the green transition: To meet governments’ climate change goals, it is necessary to swiftly implement credible mitigation policies. International coordination on carbon pricing or equivalent policies would facilitate faster decarbonization. Global cooperation is needed to build resilience to climate shocks, including through aid to vulnerable countries." }, { "type": "NarrativeText", - "element_id": "e118be83abfed92b8969eca98bb4d53b", + "element_id": "14187a5be9e3a125267bfe10e6c67fae", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, - "text": "Slowing aggregate demand and weaker-than-expected inflation prints in some major advanced economies have prompted investors’ anticipation of a further reduction in the pace of future policy rate hikes. Corporate earnings forecasts have been cut due to headwinds from slowing demand, and margins have contracted across most regions. In addition, survey-based probabilities of recession have been increasing, particularly in the United States and Europe. However, upside risks to the inflation outlook remain. Despite the recent moderation in headline inflation, core inflation remains stubbornly high across most regions, labor markets are still tight, energy prices remain pressured by Russia’s ongoing war in Ukraine, and supply chain disruptions may reappear. To keep these risks in check, financial conditions will likely need to tighten further. If not, central banks may need to increase policy rates even more in order to achieve their inflation objectives." + "text": "Overall, financial stability risks remain elevated as investors reassess their inflation and monetary policy outlook. Global financial conditions have eased somewhat since the October 2022 Global Financial Stability Report, driven largely by changing market expectations regarding the interest rate cycle (Figure 1.1). While the expected peak in policy rates—the terminal rate—has risen, markets now also expect the subsequent fall in rates will be significantly faster, and further, than what was forecast in October (Figure 1.2). As a result, global bond yields have recently declined, corporate spreads have tightened, and equity markets have rebounded. That said, central banks are likely to continue to tighten monetary policy to fight inflation, and concerns that this restrictive stance could tip the economy into a recession have increased in major advanced economies." }, { "type": "Title", @@ -1845,7 +2018,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "Figure 1.1. Global Financial Conditions: Selected Regions (Standard deviations from mean)" }, @@ -1855,7 +2029,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "7" }, @@ -1865,7 +2040,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "6" }, @@ -1875,7 +2051,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "5" }, @@ -1885,7 +2062,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "4" }, @@ -1895,7 +2073,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "United States Euro area China Other AEs Other EMs" }, @@ -1905,7 +2084,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "October 2022 GFSR" }, @@ -1915,7 +2095,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "3" }, @@ -1925,7 +2106,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "2" }, @@ -1935,7 +2117,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "1" }, @@ -1945,7 +2128,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "0" }, @@ -1955,7 +2139,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "–1" }, @@ -1965,7 +2150,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "–2" }, @@ -1975,7 +2161,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "–3" }, @@ -1985,7 +2172,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "2006 08 08" }, @@ -1995,7 +2183,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "06" }, @@ -2005,7 +2194,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "10 10" }, @@ -2015,7 +2205,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "12 12" }, @@ -2025,7 +2216,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "14 16 14" }, @@ -2035,7 +2227,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "16" }, @@ -2045,7 +2238,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "18 18" }, @@ -2055,7 +2249,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "20 22 22" }, @@ -2065,7 +2260,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "20" }, @@ -2075,17 +2271,96 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "Sources: Bloomberg Finance L.P.; Haver Analytics; national data sources; and IMF staff calculations. Note: AEs = advanced economies; EMs = emerging markets. GFSR = Global Financial Stability Report." }, + { + "type": "NarrativeText", + "element_id": "e118be83abfed92b8969eca98bb4d53b", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 11, + "links": [] + }, + "text": "Slowing aggregate demand and weaker-than-expected inflation prints in some major advanced economies have prompted investors’ anticipation of a further reduction in the pace of future policy rate hikes. Corporate earnings forecasts have been cut due to headwinds from slowing demand, and margins have contracted across most regions. In addition, survey-based probabilities of recession have been increasing, particularly in the United States and Europe. However, upside risks to the inflation outlook remain. Despite the recent moderation in headline inflation, core inflation remains stubbornly high across most regions, labor markets are still tight, energy prices remain pressured by Russia’s ongoing war in Ukraine, and supply chain disruptions may reappear. To keep these risks in check, financial conditions will likely need to tighten further. If not, central banks may need to increase policy rates even more in order to achieve their inflation objectives." + }, + { + "type": "UncategorizedText", + "element_id": "e7f6c011776e8db7cd330b54174fd76f", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 11, + "links": [] + }, + "text": "6" + }, + { + "type": "UncategorizedText", + "element_id": "ef2d127de37b942baad06145e54b0c61", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 11, + "links": [] + }, + "text": "5" + }, + { + "type": "UncategorizedText", + "element_id": "4b227777d4dd1fc61c6f884f48641d02", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 11, + "links": [] + }, + "text": "4" + }, + { + "type": "UncategorizedText", + "element_id": "4e07408562bedb8b60ce05c1decfe3ad", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 11, + "links": [] + }, + "text": "3" + }, + { + "type": "UncategorizedText", + "element_id": "d4735e3a265e16eee03f59718b9b5d03", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 11, + "links": [] + }, + "text": "2" + }, + { + "type": "UncategorizedText", + "element_id": "6b86b273ff34fce19d6b804eff5a3f57", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 11, + "links": [] + }, + "text": "1" + }, { "type": "Title", "element_id": "6ef230728534d871e5126e2a55e12b26", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "Figure 1.2. Market-Implied Expectations of Policy Rates (Percent)" }, @@ -2095,7 +2370,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "Latest" }, @@ -2105,27 +2381,19 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "October 2022 GFSR" }, - { - "type": "UncategorizedText", - "element_id": "e7f6c011776e8db7cd330b54174fd76f", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 11 - }, - "text": "6" - }, { "type": "ListItem", "element_id": "7d4f55875c970d850a152ba1d5ba02a5", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "1. United States" }, @@ -2135,7 +2403,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "2. Euro area" }, @@ -2145,7 +2414,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "5" }, @@ -2155,7 +2425,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "4" }, @@ -2165,7 +2436,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "3" }, @@ -2175,7 +2447,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "2" }, @@ -2185,7 +2458,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "1" }, @@ -2195,7 +2469,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "Oct. 22" }, @@ -2205,7 +2480,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "Apr. 23" }, @@ -2215,7 +2491,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "Oct. 23" }, @@ -2225,7 +2502,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "Dec. 24" }, @@ -2235,7 +2513,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "Dec. 26" }, @@ -2245,7 +2524,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "Oct. 22" }, @@ -2255,7 +2535,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "Apr. 23" }, @@ -2265,7 +2546,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "Oct. 23" }, @@ -2275,7 +2557,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "Dec. 24" }, @@ -2285,67 +2568,19 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "Dec. 26" }, - { - "type": "UncategorizedText", - "element_id": "ef2d127de37b942baad06145e54b0c61", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 11 - }, - "text": "5" - }, - { - "type": "UncategorizedText", - "element_id": "4b227777d4dd1fc61c6f884f48641d02", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 11 - }, - "text": "4" - }, - { - "type": "UncategorizedText", - "element_id": "4e07408562bedb8b60ce05c1decfe3ad", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 11 - }, - "text": "3" - }, - { - "type": "UncategorizedText", - "element_id": "d4735e3a265e16eee03f59718b9b5d03", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 11 - }, - "text": "2" - }, - { - "type": "UncategorizedText", - "element_id": "6b86b273ff34fce19d6b804eff5a3f57", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 11 - }, - "text": "1" - }, { "type": "NarrativeText", "element_id": "da431b9817da923cc48a538c4b3b8ade", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "Given the tension between rising recession risks and monetary policy uncertainty, markets have seen significant volatility. While many central banks in advanced economies have stepped down the size of hikes, they have also explicitly stated they will need to keep rates higher, for a longer period of time, to tamp down inflation. Risk assets could face significant declines if earnings retrench further or if investors reassess their outlook for monetary policy given central bank communications. Globally, the partial reversal of the dollar rally has contributed to recent easing due to improved risk appetite, and some emerging market central banks have paused tightening amid tentative signs that inflation may have peaked." }, @@ -2355,7 +2590,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "Sources: Bloomberg Finance L.P.; and IMF staff calculations. Note: GFSR = Global Financial Stability Report." }, @@ -2365,7 +2601,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "Financial market volatility is expected to remain elevated and could be exacerbated by poor market liquidity. For some asset classes (such as US Treasuries), liquidity has deteriorated to the March 2020 lows of the COVID-19 pandemic. With the process of central bank balance sheet reduction (quantitative tightening) underway, market liquidity is expected to remain challenging." }, @@ -2375,7 +2612,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 11 + "page_number": 11, + "links": [] }, "text": "WEO Update © 2023 • ISBN: 979-8-40023-224-4" } diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/Silent-Giant-(1).pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/Silent-Giant-(1).pdf.json index 2f6c16233d..2751529948 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/Silent-Giant-(1).pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/Silent-Giant-(1).pdf.json @@ -5,7 +5,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "The Silent Giant" }, @@ -15,7 +16,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "The need for nuclear in a clean energy system" }, @@ -25,7 +27,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Executive Summary" }, @@ -35,7 +38,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "In a world centred on short-term fixes, many of the traits that make nuclear energy a key player in the transition to a sustainable world are not properly valued and often taken for granted. Reflecting on the popular discourse in the world of energy politics it would seem that renewables, and renewables alone, will be responsible for, and capable of, delivering a zero-carbon energy system – and that it is just a matter of time." }, @@ -45,7 +49,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "The reality today is that both global carbon dioxide emissions and fossil fuel use are still on the rise. This does not only make the battle against climate change much harder, but also results in hundreds of thousands of pollution deaths every year." }, @@ -55,7 +60,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Energy is the essential agent for promoting human development, and global demand is projected to increase significantly in the coming decades. Securing access to modern and affordable energy is essential for lifting people out of poverty, and for promoting energy independence and economic growth." }, @@ -65,7 +71,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Nuclear energy is a proven solution with a long and well-established track record. Nuclear reactors – a grand total of 445 in 30 countries – are the low-carbon backbone of electricity systems, operating in the background, day in and day out, often out of sight and out of mind. Capable of generating immense amounts of clean power, they are the silent giants upon which we rely daily." }, @@ -75,7 +82,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Nuclear energy has shown – be it in France or Sweden – that it has the potential to be the catalyst for delivering sustainable energy transitions, long before climate change was on the agenda. The use of nuclear energy is the fast track to a high-powered and clean energy system, which not only delivers a healthier environment and an affordable supply of electricity, but also strengthens energy security and helps mitigate climate change." }, @@ -85,7 +93,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "The global nuclear industry, led by World Nuclear Association, is ready to take on the challenge. As part of the Harmony Programme, we have set a target to build an additional 1000GWe of reactors across the world before 2050, bringing the global share of electricity production of nuclear to 25%." }, @@ -95,7 +104,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "In order to realise the full potential of nuclear energy we have identified three key areas where actions are required:" }, @@ -105,7 +115,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "The need to create a level playing field that values reliability and energy security" }, @@ -115,7 +126,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "The need for harmony in the nuclear regulatory environment" }, @@ -125,7 +137,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "The need for a holistic safety paradigm for the whole electricity system." }, @@ -135,7 +148,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "1" }, @@ -145,7 +159,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "The drivers for a clean energy system" }, @@ -155,7 +170,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Electricity is central to modern life – it powers our daily lives, as well as our dreams and ambitions. Demand has grown steadily for more than 100 years, and will continue to do so as many parts of the world continue to develop, and electrification takes a central role in efforts to decarbonize (Figure 1). With nearly a billion people around the world still living in the dark, without access to electricity, humanity has a responsibility to learn from the past - everyone has the right to enjoy a modern lifestyle in a way that does not cause harm to people or the planet." }, @@ -165,7 +181,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "45,000" }, @@ -175,7 +192,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": " Marine" }, @@ -185,7 +203,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "40,000" }, @@ -195,7 +214,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": " CSP" }, @@ -205,7 +225,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "35,000" }, @@ -215,7 +236,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": " Solar PV" }, @@ -225,7 +247,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": " Geothermal" }, @@ -235,7 +258,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "30,000" }, @@ -245,7 +269,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": " Wind" }, @@ -255,7 +280,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "h W T" }, @@ -265,7 +291,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "25,000" }, @@ -275,7 +302,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": " Bioenergy" }, @@ -285,7 +313,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "20,000" }, @@ -295,7 +324,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": " Hydro" }, @@ -305,7 +335,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": " Nuclear" }, @@ -315,7 +346,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "15,000" }, @@ -325,7 +357,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": " Gas" }, @@ -335,7 +368,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "10,000" }, @@ -345,7 +379,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": " Oil" }, @@ -355,7 +390,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "5,000" }, @@ -365,7 +401,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": " Coal" }, @@ -375,7 +412,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "0" }, @@ -385,7 +423,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "2000" }, @@ -395,7 +434,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "2010" }, @@ -405,7 +445,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "2020" }, @@ -415,7 +456,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "2030" }, @@ -425,7 +467,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "2040" }, @@ -435,7 +478,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Figure 1. IEA projected electricity production and sources to 2040 i" }, @@ -445,7 +489,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "The challenge before us, however, goes far beyond just electricity – we will need to find ways to decarbonize all parts of the economy, and we need solutions that are sustainable in the long-term. That means changing the way we heat our homes and power our industrial processes, as well as ensuring that the way we travel, export our products and ship our food moves away from fossil fuels." }, @@ -455,7 +500,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Despite the very considerable efforts to decarbonize the economy and the countless billions spent, our world remains heavily addicted to fossil fuels. The trend is clear – instead of reducing our dependence on fossil fuels, we are increasing it (Figure 2). As a direct result, greenhouse gas emissions continue to rise when they need to drastically fall." }, @@ -465,7 +511,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "30,000,000" }, @@ -475,7 +522,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": " High-carbon  Low-carbon" }, @@ -485,7 +533,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "25,000,000" }, @@ -495,7 +544,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "20,000,000" }, @@ -505,7 +555,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "h W G" }, @@ -515,7 +566,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "15,000,000" }, @@ -525,7 +577,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "10,000,000" }, @@ -535,7 +588,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "5,000,000" }, @@ -545,7 +599,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "0" }, @@ -555,7 +610,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "1990" }, @@ -565,7 +621,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "1995" }, @@ -575,7 +632,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "2000" }, @@ -585,7 +643,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "2005" }, @@ -595,7 +654,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "2010" }, @@ -605,7 +665,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "2015" }, @@ -615,7 +676,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Figure 2. Worldwide electricity generation by fuel (1990-2016)ii" }, @@ -625,7 +687,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "2" }, @@ -635,7 +698,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "We need to deliver a worldwide transformation that is socially, economically and environmentally sustainable. We need a system that is affordable – no one should have to choose between heating their home, and essentials like eating – as well as helping to alleviate poverty, and ensure the realization of human potential globally. We need a power source that can not only help us mitigate the effects of climate change and environmental degradation, but can also help bring the enormous benefits of reliable electricity supply to the corners of the world that do not have access to it." }, @@ -645,7 +709,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "Nuclear energy is already making a major contribution. By using nuclear energy rather than fossil fuels, we currently avoid the emission of more than 2500 million tonnes of carbon dioxide every year. To put that into perspective, it is the equivalent of removing about 400 million cars from the world’s roads." }, @@ -655,7 +720,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "Modern society is dependent on the steady supply of electricity, every day of the year – regardless of weather, season or time of day – and nuclear energy is particularly well-suited to providing this service. Given that the majority of baseload supply is fossil-based, an increase in the use of nuclear energy would result in a rapid decarbonization of the electricity system. The International Energy Agency’s (IEA) recent report III on nuclear energy highlighted the importance of dependable baseload electricity generators and the need to properly value and compensate them for the electricity security and reliability services they provide." }, @@ -665,7 +731,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "3" }, @@ -675,7 +742,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "Despite impressive recent growth, the stark reality is that renewables alone will not be able to resolve our dependence on fossil fuels. Clearly, the sun does not always shine, and the wind does not always blow, and this is compounded by the fact that many times these periods coincide with when electricity demand is at its highest, but renewables can be complementary to nuclear energy. Storage solutions, such as batteries, will not be able to power our societies for days or weeks when the weather is not favourable. Natural gas is currently the most used solution for the intermittency problem, which only serves to reinforce our economy’s dependence of fossil fuels, and severely undermines the apparently ‘green credentials’ of many renewables." }, @@ -685,7 +753,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "Moving to a sustainable future" }, @@ -695,7 +764,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "The Intergovernmental Panel on Climate Change (IPCC) special report on Global Warming of 1.5°C iv examined a large number of different scenarios for limiting global warming to 1.5°C. Of those scenarios which would achieve the 1.5°C target, the mean increase in nuclear energy’s contribution to electricity production was 2.5 times higher compared to today. However, the ‘middle-of-the-road’ scenario – in which social, economic, and technological trends follow current patterns and would not require major changes to, for example, diet and travel habits – sees the need for nuclear increase by five times globally by 2050." }, @@ -705,7 +775,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "The IEA has concluded that without an expanded contribution from nuclear energy, the already huge challenge of achieving emissions reductions will become drastically harder and more costly. In their latest report on nuclear energy v, published in 2019, they also conclude that not using nuclear would have negative implications for energy security and result in higher costs for the consumers. The IEA recommends policy reforms to ‘… ensure competition on a level playing field’ and that the ‘… focus should be on designing electricity markets in a way that values the clean energy and energy security attributes of low-carbon technologies, including nuclear power.’ Such reforms should also ensure that reliability of electricity production is properly valued and compensated." }, @@ -715,7 +786,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "As part of the Harmony Programme, the world’s nuclear industry has identified three key policy areas for action to unlock the true potential of nuclear energy - the need for a level playing field, the harmonization of regulations and the establishment of an effective safety paradigm." }, @@ -725,7 +797,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "In regard to the need for a level playing field, we see that many of the world’s electricity markets operate in an unsustainable fashion, dominated by short-term thinking. Electricity supply which is affordable, reliable and available 24/7 generates broad societal benefits, and as seen in Figure 3, nuclear is one of the most affordable electricity sources." }, @@ -735,7 +808,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "4" }, @@ -745,7 +819,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "300" }, @@ -755,7 +830,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "250" }, @@ -765,7 +841,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "200" }, @@ -775,7 +852,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "h W M / $" }, @@ -785,7 +863,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "150" }, @@ -795,7 +874,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "100" }, @@ -805,7 +885,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "50" }, @@ -815,7 +896,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "0" }, @@ -825,7 +907,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "C o m" }, @@ -835,7 +918,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "m ercial Photovoltaic" }, @@ -845,7 +929,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "O nshore Wind" }, @@ -855,7 +940,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Offshore Wind" }, @@ -865,7 +951,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "N uclear" }, @@ -875,7 +962,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "C C G T" }, @@ -885,7 +973,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "C oal" }, @@ -895,7 +984,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Figure 3. Comparative cost projections for main electricity generators vi" }, @@ -905,7 +995,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "However, markets fail to give due credit to electricity generators, such as nuclear energy, that are able to meet these societal demands. This has resulted in situations where nuclear energy has struggled to compete with energy sources that have been subsidized, do not pay the hidden costs brought on by their intermittency (e.g. costly backup provisions and investments in the grid), or do not have to take responsibility for using our common atmosphere as a dumping ground." }, @@ -915,7 +1006,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Additionally, electricity markets fail to recognize the relative costs of different forms of electricity generation. Whilst the nuclear industry takes responsibility for its lifecycle costs (including decommissioning and waste management), other electricity generators do not. Fossil fuel generators are rarely required to pay the price in line with the environmental and health damage that their emissions cause, whilst the cost of wind and solar does not include the disposal of the sometimes toxic materials at the end of their life." }, @@ -925,7 +1017,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "In regard to the need to harmonize regulations, multiple regulatory barriers stemming from diverse national licensing processes and safety requirements currently limit global nuclear trade and investment. A lack of international standardization places unnecessary regulatory burdens on nuclear activities and causes delays in the licensing of new designs, hindering innovation." }, @@ -935,7 +1028,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "The International Atomic Energy Agency (IAEA) has highlighted the importance of addressing this issue, concluding that the lack of regulatory harmony ‘…causes many drawbacks for the entire nuclear industry, including developers, vendors, operators and even regulators themselves…This results in increased costs and reduced predictability in project execution’. vii It is therefore crucial that we harmonize the regulatory process to address these weaknesses, and avoid unnecessary duplication and inconsistencies." }, @@ -945,7 +1039,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "5" }, @@ -955,7 +1050,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "In regard to the need for a holistic safety paradigm for the whole electricity system, we need to consider safety from a societal perspective, something the current energy system fails to do. The health, environmental and safety benefits of nuclear energy are not sufficiently understood and valued when compared with other electricity sources. Nuclear energy remains the safest form of electricity generation (Figure 4). Additionally, the use of nuclear consistently prevents many tens of thousands of deaths (mainly resulting from air pollution) every year by avoiding the use of coal - lifesaving measures which must be better recognised and valued." }, @@ -965,7 +1061,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "140" }, @@ -975,169 +1072,186 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "r a e y" }, { "type": "Title", - "element_id": "f83714d89302473e0e4f5399bd50e7a9", + "element_id": "3f79bb7b435b05321651daefd374cdc6", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "W T" + "text": "e" }, { - "type": "Title", - "element_id": "3f79bb7b435b05321651daefd374cdc6", + "type": "UncategorizedText", + "element_id": "2abaca4911e68fa9bfbf3482ee797fd5", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "e" + "text": "120" }, { - "type": "NarrativeText", - "element_id": "f9bb49945b60897227abdd75b5f8d39b", + "type": "UncategorizedText", + "element_id": "ad57366865126e55649ecb23ae1d4888", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "r e p s e i t i l" + "text": "100" }, { - "type": "Title", - "element_id": "1fb2ec4fc8fc547c0de86ba79ba651e5", + "type": "UncategorizedText", + "element_id": "2abaca4911e68fa9bfbf3482ee797fd5", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "a t a F" + "text": "120" }, { "type": "UncategorizedText", - "element_id": "2abaca4911e68fa9bfbf3482ee797fd5", + "element_id": "b725d20650649a5221675144bab5946e", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "120" + "text": "99.5" }, { - "type": "UncategorizedText", - "element_id": "ad57366865126e55649ecb23ae1d4888", + "type": "Title", + "element_id": "f83714d89302473e0e4f5399bd50e7a9", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "100" + "text": "W T" }, { - "type": "UncategorizedText", - "element_id": "48449a14a4ff7d79bb7a1b6f3d488eba", + "type": "NarrativeText", + "element_id": "f9bb49945b60897227abdd75b5f8d39b", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "80" + "text": "r e p s e i t i l" }, { - "type": "UncategorizedText", - "element_id": "39fa9ec190eee7b6f4dff1100d6343e1", + "type": "Title", + "element_id": "1fb2ec4fc8fc547c0de86ba79ba651e5", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "60" + "text": "a t a F" }, { "type": "UncategorizedText", - "element_id": "d59eced1ded07f84c145592f65bdf854", + "element_id": "48449a14a4ff7d79bb7a1b6f3d488eba", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "40" + "text": "80" }, { "type": "UncategorizedText", - "element_id": "f5ca38f748a1d6eaf726b8a42fb575c3", + "element_id": "39fa9ec190eee7b6f4dff1100d6343e1", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "20" + "text": "60" }, { "type": "UncategorizedText", - "element_id": "5feceb66ffc86f38d952786c6d696c79", + "element_id": "d59eced1ded07f84c145592f65bdf854", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "0" + "text": "40" }, { "type": "UncategorizedText", - "element_id": "2abaca4911e68fa9bfbf3482ee797fd5", + "element_id": "ce3201efc2e495241a85e4fc84575f50", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "120" + "text": "71.9" }, { - "type": "Title", - "element_id": "6c25ebfc9ffd2510c4c41d4bd5cb7ea9", + "type": "UncategorizedText", + "element_id": "f5ca38f748a1d6eaf726b8a42fb575c3", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "C oal" + "text": "20" }, { "type": "UncategorizedText", - "element_id": "b725d20650649a5221675144bab5946e", + "element_id": "5feceb66ffc86f38d952786c6d696c79", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "99.5" + "text": "0" }, { "type": "Title", - "element_id": "2378bdd2cf4f491cf401e6b215cbb4fd", + "element_id": "6c25ebfc9ffd2510c4c41d4bd5cb7ea9", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "Oil" + "text": "C oal" }, { - "type": "UncategorizedText", - "element_id": "ce3201efc2e495241a85e4fc84575f50", + "type": "Title", + "element_id": "2378bdd2cf4f491cf401e6b215cbb4fd", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, - "text": "71.9" + "text": "Oil" }, { "type": "Title", @@ -1145,7 +1259,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "N atural gas" }, @@ -1155,7 +1270,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "8.5" }, @@ -1165,7 +1281,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "1.78" }, @@ -1175,7 +1292,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "Offshore wind" }, @@ -1185,7 +1303,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "(U K)" }, @@ -1195,7 +1314,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "O nshore wind (G erm any)" }, @@ -1205,7 +1325,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "0.245" }, @@ -1215,7 +1336,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "S olar P V" }, @@ -1225,7 +1347,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "<0.01" }, @@ -1235,7 +1358,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "N uclear*" }, @@ -1245,7 +1369,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "Figure 4. Comparison of number of fatalities due to electricity generation viii" }, @@ -1255,7 +1380,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "Nuclear for a sustainable tomorrow" }, @@ -1265,7 +1391,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "Nuclear energy is already making a significant contribution to providing the world with clean and abundant electricity, and has a proven track record of being a reliable workhorse around the world. Countries like France, Sweden and Switzerland have proven that it is possible to divorce economic growth from an increase in damaging emissions and over the timescales required to effectively challenge climate change and environmental degradation (Figures 5 and 6). Nuclear can ensure that fast-growing populations achieve rising standards of living – without having to sacrifice the planet or their own well-being." }, @@ -1275,7 +1402,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "100" }, @@ -1285,7 +1413,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "90" }, @@ -1295,7 +1424,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": " Coal" }, @@ -1305,7 +1435,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": " Gas/Oil" }, @@ -1315,7 +1446,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "80" }, @@ -1325,7 +1457,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": " Biofuels/Waste" }, @@ -1335,7 +1468,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "70" }, @@ -1345,7 +1479,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": " Wind/Solar" }, @@ -1355,7 +1490,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "60" }, @@ -1365,7 +1501,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": " Hydro" }, @@ -1375,7 +1512,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": " Nuclear" }, @@ -1385,7 +1523,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "%" }, @@ -1395,7 +1534,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "50" }, @@ -1405,7 +1545,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "40" }, @@ -1415,7 +1556,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "30" }, @@ -1425,7 +1567,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "20" }, @@ -1435,7 +1578,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "10" }, @@ -1445,7 +1589,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "0" }, @@ -1455,7 +1600,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "France" }, @@ -1465,7 +1611,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "Sweden" }, @@ -1475,7 +1622,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "Switzerland" }, @@ -1485,7 +1633,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "Figure 5. The importance of nuclear in ensuring clean energy systems in France, Sweden and Switzerland ix" }, @@ -1495,7 +1644,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "6" }, @@ -1505,7 +1655,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "600" }, @@ -1515,7 +1666,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "500" }, @@ -1525,7 +1677,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": " Non-hydro" }, @@ -1535,59 +1688,65 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "ren. & waste" }, { - "type": "Title", - "element_id": "563a2980d46c81119e1d7d952b375a41", + "type": "UncategorizedText", + "element_id": "26d228663f13a88592a12d16cf9587ca", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, - "text": "h W T" + "text": "400" }, { - "type": "UncategorizedText", - "element_id": "26d228663f13a88592a12d16cf9587ca", + "type": "Title", + "element_id": "f35457739b3bd74c61625c986c844726", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, - "text": "400" + "text": " Nuclear" }, { - "type": "UncategorizedText", - "element_id": "983bd614bb5afece5ab3b6023f71147c", + "type": "Title", + "element_id": "f6e172956a9472fa43f9a895f99c2836", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, - "text": "300" + "text": " Natural gas" }, { "type": "Title", - "element_id": "f35457739b3bd74c61625c986c844726", + "element_id": "563a2980d46c81119e1d7d952b375a41", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, - "text": " Nuclear" + "text": "h W T" }, { - "type": "Title", - "element_id": "f6e172956a9472fa43f9a895f99c2836", + "type": "UncategorizedText", + "element_id": "983bd614bb5afece5ab3b6023f71147c", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, - "text": " Natural gas" + "text": "300" }, { "type": "Title", @@ -1595,7 +1754,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": " Hydro" }, @@ -1605,7 +1765,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "200" }, @@ -1615,7 +1776,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": " Oil" }, @@ -1625,7 +1787,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": " Coal" }, @@ -1635,7 +1798,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "100" }, @@ -1645,7 +1809,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "0" }, @@ -1655,7 +1820,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "1974" }, @@ -1665,7 +1831,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "1980 1985 1990 1995 2000 2005 2010" }, @@ -1675,7 +1842,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "2017" }, @@ -1685,7 +1853,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "Figure 6. The lasting decarbonization of French electricity and nuclear’s ability to meet growing demand x" }, @@ -1695,7 +1864,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "The incredible energy density of uranium means that just a few kilos is all that is required to provide one person with enough power for a lifetime. Uranium is abundant and can be found in many parts of the world, as well as in seawater. Furthermore, spent nuclear fuel is well managed and can in most cases be recycled to produce even more power. By using nuclear energy, countries are able to take charge of their own destinies by decreasing their reliance on imported energy – enhanced independence and security in uncertain times." }, @@ -1705,7 +1875,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "One fuel pellet contains as much energy as a tonne of coal" }, @@ -1715,7 +1886,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "Unlike other power sources, nuclear energy helps us reduce our total footprint, going beyond just the environment. When accounting for factors such as cost (e.g. fuel and construction costs), carbon (lifecycle greenhouse gas emissions), water and land footprints, nuclear is far ahead of all other energy generators." }, @@ -1725,7 +1897,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "Nuclear energy offers a multitude of services beyond just electricity. With nuclear, we can decarbonize the way we heat our homes, provide process heat for industry, and ensure access to clean water. As freshwater supplies come under increasing pressure worldwide, nuclear reactors can provide desalination, ensuring a reliable flow of fresh drinking water in areas where it is scarce." }, @@ -1735,7 +1908,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "7" }, @@ -1745,7 +1919,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "Nuclear energy can be relied upon to power the new mobility revolution taking place. Every day, we use almost 20 million barrels of oil to power our vehicles. By swapping to an electric or hydrogen-powered transport fleet – all powered by the atom – we are able to address one of the key challenges to a sustainable economy." }, @@ -1755,7 +1930,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "We cannot afford to wait – we need every part of the puzzle to contribute towards solving some of the greatest challenges faced by humankind in a very long time. The impacts of climate change will hit the poorest and most vulnerable first, and failing to act will have significant humanitarian consequences." }, @@ -1765,7 +1941,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "Nuclear power is the silent giant of today’s energy system – it runs quietly in the background, capable of delivering immense amounts of power, regardless of weather or season, allowing us to focus on everything else in life. It is a technology that is available now, and can be expanded quickly across the world to help us solve some of the most defining challenges we face. Nuclear energy holds the potential to herald a new, cleaner and truly sustainable world – enabling us to pass on a cleaner planet to our children." }, @@ -1775,7 +1952,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "References" }, @@ -1785,7 +1963,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "i" }, @@ -1795,7 +1974,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "ii" }, @@ -1805,7 +1985,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "iii" }, @@ -1815,7 +1996,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "iv" }, @@ -1825,7 +2007,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "v" }, @@ -1835,7 +2018,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "vi" }, @@ -1845,7 +2029,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "vii" }, @@ -1855,7 +2040,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "International Energy Agency (2018), World Energy Outlook 2018. Data accessed from https://www.iea.org/weo/ – Based on the New Policies Scenario, which incorporates existing energy policies as well as an assessment of the results likely to stem from the implementation of announced policy intentions – with visual modification by World Nuclear Association. International Energy Agency (n.d.), Statistics. Accessed from: https://www.iea.org/statistics/?country=WORLD&year=2016&category=Electricity&indicator=ElecGenByFuel&mode =chart&dataTable=ELECTRICITYANDHEAT – with visual modifications by World Nuclear Association. International Energy Agency (2019), Nuclear Power in a Clean Energy System. Accessed from: https://www.iea.org/ publications/nuclear/ Intergovernmental Panel on Climate Change (2018), Special Report on Global Warming of 1.5 °C. Accessed from: https://www.ipcc.ch/sr15/ International Energy Agency (2019), Nuclear Power in a Clean Energy System. Accessed from: https://www.iea.org/ publications/nuclear/ International Energy Agency & OECD Nuclear Energy Agency (2015), Projected Costs of generating Electricity – 2015 Edition. Accessed from: https://www.oecd-nea.org/ndd/pubs/2015/7057-proj-costs-electricity-2015.pdf International Atomic Energy Agency (2015), Technical challenges in the application and licensing of digital instrumentation and control systems in nuclear power plants. Accessed from: https://www-pub.iaea.org/MTCD/ Publications/PDF/P1695_web.pdf" }, @@ -1865,7 +2051,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "viii Paul-Scherrer Institute. Data for nuclear accidents modified to reflect UNSCEAR findings/recommendations (2012)" }, @@ -1875,7 +2062,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "ix" }, @@ -1885,7 +2073,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "x" }, @@ -1895,7 +2084,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "and NRC SOARCA study 2015 International Energy Agency (2018), Electricity Information 2018 https://webstore.iea.org/electricity-information-2018-overview Ibid." }, @@ -1905,7 +2095,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "Photo credits: Front cover: Mike Baird; page 2: Vattenfall; page 4: Getty Images; page 5: Adobe Stock; page 6: Rosatom; page 8: Dean Calma, IAEA; page 10: Kazatomprom; page 11: EDF." }, @@ -1915,7 +2106,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "8" }, @@ -1925,7 +2117,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 12 + "page_number": 12, + "links": [] }, "text": "World Nuclear Association Tower House 10 Southampton Street London WC2E 7HA United Kingdom" }, @@ -1935,7 +2128,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 12 + "page_number": 12, + "links": [] }, "text": "+44 (0)20 7451 1520 www.world-nuclear.org info@world-nuclear.org" }, @@ -1945,7 +2139,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 12 + "page_number": 12, + "links": [] }, "text": "World Nuclear Association is the international organization that represents the global nuclear industry. Its mission is to promote a wider understanding of nuclear energy among key international influencers by producing authoritative information, developing common industry positions, and contributing to the energy debate." }, @@ -1955,7 +2150,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 12 + "page_number": 12, + "links": [] }, "text": "The Silent Giant © 2019 World Nuclear Association. Registered in England and Wales, company number 01215741" } diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/recalibrating-risk-report.pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/recalibrating-risk-report.pdf.json index 6879697f80..a30000b49a 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/recalibrating-risk-report.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/recalibrating-risk-report.pdf.json @@ -5,7 +5,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Recalibrating risk" }, @@ -15,7 +16,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 1 + "page_number": 1, + "links": [] }, "text": "Putting nuclear risk in context and perspective" }, @@ -25,7 +27,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "© 2021 World Nuclear Association" }, @@ -35,7 +38,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 2 + "page_number": 2, + "links": [] }, "text": "Registered in England and Wales, company number 01215741. This report represents the views of individual experts, but does not necessarily represent those of any of the World Nuclear Association’s individual member organizations." }, @@ -45,7 +49,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Executive Summary" }, @@ -55,7 +60,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Nuclear energy is crucial to meeting the world’s ever-increasing demand for energy, thanks to its ability to supply affordable, reliable, and sustainable electricity and heat. Despite the many benefits of nuclear energy, its deployment is hindered in some parts of the world due to long-standing misconceptions about its risks. Even with its safety record – unmatched by any other energy source – the perception of nuclear power as uniquely dangerous endures." }, @@ -65,7 +71,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "This is reflected in the regulatory burden placed on the nuclear industry, which is geared towards an “as low as possible” approach, demanding radiation levels to be far below the levels where health effects have been observed (and in many cases below natural background radiation). This has resulted in higher costs, without delivering any additional health benefits, and has resulted in policymakers choosing other, more risky energy sources. More often than not, those alternative energy sources have been fossil fuels, greatly exacerbating the well-known risks posed by air pollution and climate change." }, @@ -75,7 +82,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Expanding the use of nuclear energy is essential for solving some of the biggest challenges facing humanity. Nuclear power has already played a major role in avoiding the emission of air pollutants and greenhouse gases, a role that will have to be greatly expanded in the future to ensure global energy supplies are decarbonized by 2050. Nuclear energy will also play a major part in ensuring that the transition to a low-carbon future is done in an equitable fashion, providing people across the world with a high-powered and sustainable future." }, @@ -85,7 +93,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "In order to fully unlock the potential of the atom, it is crucial that the gap between perceived and actual risks is addressed. The window of opportunity to act on climate change and other global challenges is closing fast – we must not delay increasing the contribution of nuclear energy on the grounds of myths and misconceptions." }, @@ -95,7 +104,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "Therefore, World Nuclear Association calls upon policymakers and regulators to adopt an all-hazards approach, where different risks associated with energy producing technologies are placed in perspective and the appropriate context, and examined in line with the latest scientific evidence. Policymakers and regulators must ensure that their decisions regarding radiation protection do not create greater risks elsewhere. This include the recalibration of existing regulations regarding nuclear power and radiation, weighing the cost of regulatory measures against the societal benefits provided by nuclear energy." }, @@ -105,7 +115,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 3 + "page_number": 3, + "links": [] }, "text": "1" }, @@ -115,7 +126,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Perceived versus actual risk" }, @@ -125,7 +137,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "It is widely accepted that humans have skewed perceptions of risks, and the way we respond to them is shaped by these perceptions, rather than the actual threats posed. Approximately 1.35 millioni people die every year because of traffic accidents, in comparison with 257 aviation fatalities in 2019ii, yet more people are nervous about flying, fearing a rare deadly crash, than being in a fatal traffic accident. These numbers tell a powerful and well-established story: evaluations of risk are largely the result of emotions, rather than logic or facts. Although it is hard to recognize and accept that our perceptions may mislead us and curtail effective decision making, this is a well-established characteristic of humanity." }, @@ -135,39 +148,43 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Nuclear energy and the risk of radiation is one of the most extreme cases in which perceived and actual risks have diverged. The fear of radiation, whilst pre- dating the Second World War, was firmly established by the debate on the potential impacts of low-dose radiation from the fallout from nuclear weapons testing in the early years of the Cold War. Radiation in many ways became linked with the mental imagery of nuclear war, playing an important role in increasing public concern about radiation and its health effects. There is a well-established discrepancy between fact-based risk assessments and public perception of different risks. This is very much the case with nuclear power, and this is clearly highlighted in Figure 1, with laypersons ranking nuclear power as the highest risk out of 30 activities and technologies, with experts ranking nuclear as 20th. In many ways, popular culture’s depiction of radiation has played a role in ensuring that this discrepancy has remained, be it Godzilla, The Incredible Hulk, or The Simpsons, which regularly plays on the notion of radiation from nuclear power plants causing three-eyed fish, something that has been firmly rejected as unscientific." }, { "type": "Title", - "element_id": "4d7c9c95f808a09f6b0bcfe8b255e537", + "element_id": "d977fff4c69c437aa4a44a5c5f4bf02e", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "Figure 1. Ordering of perceived risks for 30 activities and technologies1,iii" + "text": "Rank Order Laypersons" }, { "type": "Title", - "element_id": "d977fff4c69c437aa4a44a5c5f4bf02e", + "element_id": "4d7c9c95f808a09f6b0bcfe8b255e537", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "Rank Order Laypersons" + "text": "Figure 1. Ordering of perceived risks for 30 activities and technologies1,iii" }, { "type": "UncategorizedText", - "element_id": "4523540f1504cd17100c4835e85b7eef", + "element_id": "624b60c58c9d8bfb6ff1886c2fd605d2", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "17" + "text": "30" }, { "type": "UncategorizedText", @@ -175,39 +192,43 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "22" }, { "type": "UncategorizedText", - "element_id": "624b60c58c9d8bfb6ff1886c2fd605d2", + "element_id": "4523540f1504cd17100c4835e85b7eef", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "30" + "text": "17" }, { "type": "UncategorizedText", - "element_id": "d1429f8178a04f7fc73a66edf10ab8b5", + "element_id": "6b86b273ff34fce19d6b804eff5a3f57", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "" + "text": "1" }, { "type": "UncategorizedText", - "element_id": "4b227777d4dd1fc61c6f884f48641d02", + "element_id": "d1429f8178a04f7fc73a66edf10ab8b5", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "4" + "text": "" }, { "type": "UncategorizedText", @@ -215,39 +236,43 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "" }, { "type": "UncategorizedText", - "element_id": "d1429f8178a04f7fc73a66edf10ab8b5", + "element_id": "4b227777d4dd1fc61c6f884f48641d02", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "" + "text": "4" }, { "type": "UncategorizedText", - "element_id": "d4735e3a265e16eee03f59718b9b5d03", + "element_id": "d1429f8178a04f7fc73a66edf10ab8b5", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "2" + "text": "" }, { "type": "UncategorizedText", - "element_id": "6b86b273ff34fce19d6b804eff5a3f57", + "element_id": "d4735e3a265e16eee03f59718b9b5d03", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "1" + "text": "2" }, { "type": "UncategorizedText", @@ -255,79 +280,87 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "3" }, { "type": "Title", - "element_id": "1656c455012b016fbac5eac0a38397bd", + "element_id": "eda8f72476c539920d2c0e3515ba4b07", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "Electric power (non-nuclear)" + "text": "Smoking" }, { "type": "Title", - "element_id": "602d25f25cca4ebb709f8b48f54d99d9", + "element_id": "2f3122790ccc9e095abe1b5ceedddf88", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "Motor vehicles" + "text": "X-rays" }, { "type": "Title", - "element_id": "eda8f72476c539920d2c0e3515ba4b07", + "element_id": "ed3861e631428b9b77e2bdc0384d2cbe", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "Smoking" + "text": "Vaccinations" }, { "type": "Title", - "element_id": "2f3122790ccc9e095abe1b5ceedddf88", + "element_id": "602d25f25cca4ebb709f8b48f54d99d9", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "X-rays" + "text": "Motor vehicles" }, { "type": "Title", - "element_id": "ed3861e631428b9b77e2bdc0384d2cbe", + "element_id": "82a60569029ed9032f1b08891e8524c2", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "Vaccinations" + "text": "Nuclear power" }, { "type": "Title", - "element_id": "82a60569029ed9032f1b08891e8524c2", + "element_id": "f8e3740e358309bd0570d4f3ca141793", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "Nuclear power" + "text": "Handguns" }, { "type": "Title", - "element_id": "f8e3740e358309bd0570d4f3ca141793", + "element_id": "1656c455012b016fbac5eac0a38397bd", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "Handguns" + "text": "Electric power (non-nuclear)" }, { "type": "Title", @@ -335,7 +368,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "Experts" }, @@ -345,7 +379,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "25" }, @@ -355,7 +390,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "20" }, @@ -365,29 +401,32 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "" }, { "type": "UncategorizedText", - "element_id": "7902699be42c8a8e46fbbb4501726517", + "element_id": "4b227777d4dd1fc61c6f884f48641d02", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "7" + "text": "4" }, { "type": "UncategorizedText", - "element_id": "d4735e3a265e16eee03f59718b9b5d03", + "element_id": "7902699be42c8a8e46fbbb4501726517", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "2" + "text": "7" }, { "type": "UncategorizedText", @@ -395,39 +434,43 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "" }, { "type": "UncategorizedText", - "element_id": "4b227777d4dd1fc61c6f884f48641d02", + "element_id": "19581e27de7ced00ff1ce50b2047e7a5", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "4" + "text": "9" }, { "type": "UncategorizedText", - "element_id": "6b86b273ff34fce19d6b804eff5a3f57", + "element_id": "d4735e3a265e16eee03f59718b9b5d03", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "1" + "text": "2" }, { "type": "UncategorizedText", - "element_id": "19581e27de7ced00ff1ce50b2047e7a5", + "element_id": "6b86b273ff34fce19d6b804eff5a3f57", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, - "text": "9" + "text": "1" }, { "type": "UncategorizedText", @@ -435,7 +478,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "" }, @@ -445,7 +489,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "In reality, radiation is a natural part of life; indeed, we are all exposed to radiation every day, on average receiving 2-3 millisieverts (mSv) per year. Most of this radiation is naturally occurring, with radon gas from the ground being the main source of exposure. The nuclear industry is responsible for a very small part of radiation exposure to the public, as seen in Figure 2. To put this into perspective, eating 10 bananas or two Brazil nuts results in the same radiation dose as living nearby a nuclear power plant for a year. Humans are also naturally radioactive, and the radiation dose from sleeping next to someone else each night for a year is ten times higher than the exposure from living nearby a nuclear power plant for the same time span." }, @@ -455,7 +500,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "In fact, scientific consensus is that when it comes to preventing exposure to radiation, nuclear power is much better than other electricity generators. A 2016 reportiii from the United Nations Scientific Committee on the Effects of Atomic Radiation (UNSCEAR) found that coal-generated electricity is responsible for more than half of the total global radiation exposure arising from electricity generation, while nuclear power contributed less than a fifth. Coal miners received high occupational exposure and workers in solar and wind farms received the highest occupational exposure associated with plant construction for the same amount of installed capacity." }, @@ -465,7 +511,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "1 The original study was published in 1978, but its findings have been confirmed by numerous studies since." }, @@ -475,7 +522,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4 + "page_number": 4, + "links": [] }, "text": "2" }, @@ -485,7 +533,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "Natural" }, @@ -495,7 +544,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "Artificial" }, @@ -505,7 +555,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": " 48% Radon  14% Buildings & soil  12% Food & water  10% Cosmic  4% Thoron" }, @@ -515,7 +566,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": " 11% Medicine  0.4%  0.4% Miscellaneous  0.2% Occupational  0.04% Nuclear discharges" }, @@ -525,7 +577,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "Fallout" }, @@ -535,7 +588,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "Figure 2. Global average exposure from different sources of radiation" }, @@ -545,7 +599,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "Fossil fuels – currently accounting for around 81% of total energy supplyiv – cause significant levels of emissions in terms of both greenhouse gases and air pollutants. Despite the serious and ongoing health and environmental harms caused by air pollution, it is often considered to be an inevitable consequence of economic development. Air pollution’s contribution to the burden of disease is profound, with an estimated 8.7 million people dying worldwide prematurely in 2018 alonev,vi. Despite this, it fails to induce the same fears and anxieties in people as nuclear energy does." }, @@ -555,7 +610,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "In terms of accidents, hydropower is the deadliest electricity generator, mostly due to collapsing dams and the consequences of flooding. The Banqiao Dam failure in 1975 led to at least 26,000 people drowning, and as many as 150,000 deaths resulting from the secondary effects of the accident. In comparison, radiation exposure following Chernobyl caused 54 deaths2, while no casualties due to radiation are likely to occur from the accident at Fukushima Daiichi." }, @@ -565,7 +621,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "25" }, @@ -575,7 +632,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "24.6" }, @@ -585,7 +643,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "20" }, @@ -595,7 +654,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "18.4" }, @@ -605,39 +665,43 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "r a e y" }, { "type": "Title", - "element_id": "f83714d89302473e0e4f5399bd50e7a9", + "element_id": "3f79bb7b435b05321651daefd374cdc6", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, - "text": "W T" + "text": "e" }, { - "type": "Title", - "element_id": "3f79bb7b435b05321651daefd374cdc6", + "type": "UncategorizedText", + "element_id": "e629fa6598d732768f7c726b4b621285", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, - "text": "e" + "text": "15" }, { - "type": "UncategorizedText", - "element_id": "e629fa6598d732768f7c726b4b621285", + "type": "Title", + "element_id": "f83714d89302473e0e4f5399bd50e7a9", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, - "text": "15" + "text": "W T" }, { "type": "NarrativeText", @@ -645,29 +709,32 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "r e p s e i t i l" }, { - "type": "Title", - "element_id": "1fb2ec4fc8fc547c0de86ba79ba651e5", + "type": "UncategorizedText", + "element_id": "4a44dc15364204a80fe80e9039455cc1", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, - "text": "a t a F" + "text": "10" }, { - "type": "UncategorizedText", - "element_id": "4a44dc15364204a80fe80e9039455cc1", + "type": "Title", + "element_id": "1fb2ec4fc8fc547c0de86ba79ba651e5", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, - "text": "10" + "text": "a t a F" }, { "type": "UncategorizedText", @@ -675,7 +742,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "5" }, @@ -685,7 +753,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "4.6" }, @@ -695,7 +764,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "2.8" }, @@ -705,7 +775,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "0" }, @@ -715,7 +786,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "C oal" }, @@ -725,7 +797,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "Oil" }, @@ -735,7 +808,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "Bio m ass" }, @@ -745,7 +819,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "N atural gas" }, @@ -755,7 +830,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "0.07" }, @@ -765,7 +841,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "Wind" }, @@ -775,7 +852,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "0.04" }, @@ -785,7 +863,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "H ydropo w er" }, @@ -795,7 +874,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "0.02" }, @@ -805,7 +885,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "S olar" }, @@ -815,7 +896,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "0.01" }, @@ -825,7 +907,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "N uclear" }, @@ -835,7 +918,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "Figure 3. Comparison of number of fatalities due to electricity generation, including accidents and air pollution3" }, @@ -845,7 +929,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "Contrary to perceptions, nuclear is an incredibly safe source of energy (see Figure 3 for a comparison). What is also clear is that the continued use of alternative energy sources in preference to nuclear energy – in particular fossil fuels – poses a far greater risk to public health by significantly contributing to climate change and air pollution." }, @@ -855,7 +940,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "2 Including 28 firefighters that were exposed to lethal amounts of radiation during the accident night, and 15 fatal cases of thyroid cancer. 3 Sources drawn upon: Markandya, A., & Wilkinson, P. (2007), Sovacool et al. (2016). Data for nuclear accidents modified to reflect the" }, @@ -865,7 +951,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "2012 UNSCEAR report and the 2015 US NRC SOARCA study." }, @@ -875,7 +962,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5 + "page_number": 5, + "links": [] }, "text": "3" }, @@ -885,7 +973,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "The low-dose question" }, @@ -895,7 +984,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "Since the 1950s, the Linear No-Threshold (LNT) theory has been used to inform regulatory decisions, positing that any dose of radiation, regardless of the amount or the duration over which it is received, poses a risk. Assuming that LNT is correct, we should expect to see that people living in areas of the world where background doses are higher (e.g. India, Iran and northern Europe) have a higher incidence of cancer. However, despite people living in areas of the world where radiation doses are naturally higher than those that would be received in parts of the evacuation zones around Chernobyl and Fukushima Daiichi, there is no evidence that these populations exhibit any negative health effects. Living nearby a nuclear power plant on average exposes the local population to 0.00009mSv/year, which according to LNT would increase the risk of developing cancer by 0.00000045%. After Chernobyl, the average dose to those evacuated was 30mSv, which would theoretically increase the risk of cancer at some point in their lifetime by 0.15% (on top of the average baseline lifetime risk of cancer, which is 39.5% in the USviii, 50% in the UKix)." }, @@ -905,7 +995,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "Since the 1980s, there has been considerable scientific debate as to whether the LNT theory is valid, following scientific breakthroughs within, for example, radiobiology and medicine. Indeed, the Chernobyl accident helped illuminate some of the issues associated with LNT. Multiplication of the low doses after the accident (many far too low to be of any health concern) with large populations – using the assumptions made by LNT – led to a large number of predicted cancer deaths, which have not, and likely will not materialize. This practice has been heavily criticized for being inappropriate in making risk assessments by UNSCEAR, the International Commission on Radiation Protection and a large number of independent scientists." }, @@ -915,7 +1006,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "Determining the precise risk (or lack thereof) of the extremely small radiation doses associated with the routine operations of nuclear power plants, the disposal of nuclear waste or even extremely rare nuclear accidents is a purely academic exercise, that tries to determine whether the risk is extremely low, too small to detect, or non- existent. The risks of low-level radiation pale in comparison to other societal risks such as obesity, smoking, and air pollution." }, @@ -925,7 +1017,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "By looking at radiation risks in isolation, we prolong the over-regulation of radiation in nuclear plants, driving up costs, whilst not delivering any additional health benefits, in turn incentivising the use of more harmful energy sources. A recalibration is required, and this can only done by ensuring a holistic approach to risk is taken." }, @@ -935,7 +1028,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6 + "page_number": 6, + "links": [] }, "text": "4" }, @@ -945,7 +1039,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Adopting an all-hazards approach" }, @@ -955,7 +1050,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Contemporary debates around nuclear energy often reflect the precautionary principle, a problematic concept applied across a range of regulatory and policy issues. A ‘strong’ interpretation of the precautionary principle, or a ‘as low as possible’ approach to risk, dictates that regulation is required whenever there is a potential adverse health risk, even if the evidence is not certain and regardless of the cost of regulation." }, @@ -965,7 +1061,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "The overall regulatory philosophy, at least theoretically, used in the nuclear industry is the ALARA (As Low As Reasonably Achievable) principle, where any regulatory action on radiation should account for socio- economic benefits and costs, as opposed to making decisions based on radiation risks alone." }, @@ -975,7 +1072,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "However, the regulatory process and the policy debate around nuclear more broadly has long departed from the ALARA principle, no longer weighing cost versus benefits, or considering the overall advantages of nuclear energy, but rather looking at radiation in isolation. This has resulted in a subtle shift towards an ‘as low as possible’ mentality. Attempting to reduce radiation far below de facto safe levels has resulted in an escalation of costs and loss of public confidence, and in some cases has deprived communities of the many benefits nuclear energy provides. In practical terms, this has led to the continued use of more harmful energy sources, such as fossil fuels." }, @@ -985,7 +1083,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "If the potential of nuclear energy is to be fully realized, public health and safety approaches must be recalibrated to consider a wider range of factors when considering radiation, adopting an “all-hazards” approach. Such an approach must ensure that risks are placed within a proper perspective and context, rather than looking at them in isolation. We therefore must not look at the costs – be they economic, environmental, or public health – associated with an individual power plant in isolation, but rather the costs associated with it (and its alternatives) at a societal level (Figure 4). This would entail looking at the potential risks arising from the use of nuclear power and comparing these with the risks associated with not adopting nuclear power." }, @@ -995,7 +1094,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Plant-level production costs at market prices" }, @@ -1005,7 +1105,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Grid-level costs of the electricity system" }, @@ -1015,7 +1116,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Social and environmental costs of emissions, land-use, climate change, security of supply, etc." }, @@ -1025,7 +1127,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Figure 4. The different levels of cost associated with electricity generationx" }, @@ -1035,7 +1138,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "A more holistic regulatory process would be required, in which regulators move away from being siloed, looking at specific risks in isolation, with little regard for the greater picture. The move towards an all-hazard, holistic approach would require greater coordination between regulators, ensuring that the combined risks of a specific nuclear project are weighed against the risks posed by not advancing said project." }, @@ -1045,7 +1149,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "Equally, the adoption of an all-hazards approach means regulators should consider declaring when a risk is too low to be a public health concern, in line with what the U.S. Nuclear Regulatory Commission attempted to do with its Below Regulatory Concern policy statements in the 1980s and early 1990s. In the context of nuclear power, this means departing from the notion that LNT instils of no safe level of radiation, and adopting a regulatory framework which notes the impossibility of eradicating risks. Failing to do so will result in excessive regulation that continues to limit the full potential of nuclear power in tackling climate change and sees a continued reliance on objectively more harmful energy sources." }, @@ -1055,7 +1160,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 7 + "page_number": 7, + "links": [] }, "text": "5" }, @@ -1065,7 +1171,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "Recalibrating the risk conversation" }, @@ -1075,7 +1182,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "By looking at radiation risks in isolation, we have created something akin to a “radiation phobia”, that both directly and indirectly harms people around the world. For instance, it is well established that the vast majority of health impacts from Chernobyl and Fukushima Daiichi were not radiological, but rather psychosocial. There has been an observable and dramatic increase in depression, PTSD, substance abuse, and suicides following these events, which can be significantly attributed to the dissonance between the actual and perceived risks of radiation, and the stigmatization they caused." }, @@ -1085,7 +1193,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "Similarly, many of the tremendous challenges the global community faces are significantly driven by this “radiation phobia”. Indeed, several of these issues have been considerably exacerbated by the fact that certain risks are given a disproportionate amount of focus, whereas others are de facto ignored. The global conversation around climate change is a prime example of this. The historical use of fossil fuels has contributed significantly to climate change through greenhouse gas emissions, causing unprecedented changes in the liveability of the Earth. By 2025, half of the world’s population will be living in water-stressed areas, as extreme heat and droughts are exacerbating water resources. Between 2030 and 2050, climate change is expected to be the cause of an additional 250,000 deaths per year, arising from malnutrition, malaria, diarrhoea and heat stressx. Yet, despite the huge risks associated with climate change, our addiction to coal, oil, and fossil gas remains, with fossil fuels providing 84% of global primary energy in 2019xii. The continued prioritization of fossil fuels at the expense of nuclear energy results in a considerable increase in the risks posed by climate change." }, @@ -1095,7 +1204,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "Equally, it is well established that living without access to electricity results in illness and death around the world, caused by everything from not having access to modern healthcare to household air pollution. As of today, 770 million people around the world do not have access to electricity, with over 75% of that population living in Sub-Saharan Africa. The world's poorest 4 billion people consume a mere 5% of the energy used in developed economies, and we need to find ways of delivering reliable electricity to the entire human population in a fashion that is sustainable. Household and ambient air pollution causes 8.7 million deaths each year, largely because of the continued use of fossil fuels. Widespread electrification is a key tool for delivering a just energy transition. Investment in nuclear, has become an urgent necessity. Discarding it, based on risk perceptions divorced from science, would be to abandon the moral obligation to ensure affordable, reliable, and sustainable energy for every community around the world." }, @@ -1105,7 +1215,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 8 + "page_number": 8, + "links": [] }, "text": "6" }, @@ -1115,7 +1226,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "Clearly, we have reached a point where we must establish a new conversation about the relative risks of using nuclear, especially when risks created by other energy sources are considered. We cannot address many of the global challenges we face without a significant increase in the use of nuclear energy. The detrimental effects of decades of looking at nuclear risks in isolation highlights just how crucial it is that regulators and policymakers change the way they view nuclear energy, and transition towards an all-hazards approach, ensuring that actions taken to mitigate risks do not result in creating more severe risks." }, @@ -1125,7 +1237,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "We must begin to holistically look at the severity of the consequences of maintaining the current energy production system, many of which are irreversible. The ways in which we address climate change and other issues of global importance must be sustainable and not create new hazards down the line. The reality is that nuclear has always been and remains an exceptionally safe source of energy, representing the lowest risk, the most sustainable, and the most affordable ways to generate around-the-clock electricity." }, @@ -1135,7 +1248,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "Therefore, World Nuclear Association calls upon policymakers and regulators to adopt an all-hazards approach, where different risks associated with energy producing technologies are placed in perspective and the appropriate context, and examined in line with the latest scientific evidence. Policymakers and regulators must ensure that their decisions regarding radiation protection do not create greater risks elsewhere. This include the recalibration of existing regulations regarding nuclear power and radiation, weighing the cost of regulatory measures against the societal benefits provided by nuclear energy." }, @@ -1145,7 +1259,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 9 + "page_number": 9, + "links": [] }, "text": "7" }, @@ -1155,7 +1270,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "References" }, @@ -1165,7 +1281,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "i" }, @@ -1175,7 +1292,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "World Health Organization (2020). Road traffic injuries. Available at: https://www.who.int/news-room/fact-sheets/ detail/road-traffic-injuries" }, @@ -1185,7 +1303,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "ii" }, @@ -1195,7 +1314,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "BBC (2020). Plane crash fatalities fell more than 50% in 2019. Available at: https://www.bbc.co.uk/news/ business-50953712" }, @@ -1205,7 +1325,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "iii" }, @@ -1215,7 +1336,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "Slovic, P., 2010. The Psychology of risk. Saúde e Sociedade, 19(4), pp. 731-747." }, @@ -1225,7 +1347,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "iv United Nations Scientific Committee on the Effects of Radiation (2016). Report of the United Nations Scientific" }, @@ -1235,7 +1358,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "Committee on the Effects of Atomic Radiation. Accessed from: https://www.unscear.org/docs/publications/2016/ UNSCEAR_2016_GA-Report-CORR.pdf" }, @@ -1245,7 +1369,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "v" }, @@ -1255,7 +1380,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "International Energy Agency (2020). Global share of total energy supply by source, 2018. Key World Energy Statistics 2020. Available at: https://www.iea.org/data-and-statistics/charts/global-share-of-total-energy-supply-by- source-2018" }, @@ -1265,7 +1391,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "vi" }, @@ -1275,7 +1402,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "Vohra, K., Vodonos, A., Schwartz, J., Marais, E., Sulprizio, M., & Mickley, L. (2021). Global mortality from outdoor fine particle pollution generated by fossil fuel combustion: Results from GEOS-Chem. Environmental Research, 195, p. 1-8" }, @@ -1285,7 +1413,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "vii World Health Organization. (2016). Updated tables 2016 for ‘Preventing disease through health environments: a" }, @@ -1295,7 +1424,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "global assessment of the burden of disease from environmental risks’. Available at: https://www.who.int/data/gho/ data/themes/public-health-and-environment [Accessed on 8 April 2021]" }, @@ -1305,7 +1435,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "viii National Cancer Institute (2020). Cancer statistics. Available at: https://www.cancer.gov/about-cancer/" }, @@ -1315,7 +1446,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "understanding/statistics" }, @@ -1325,7 +1457,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "ix Cancer Research UK (n.d.). Cancer risk statistics. Available at: https://www.cancerresearchuk.org/health-" }, @@ -1335,7 +1468,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "professional/cancer-statistics/risk" }, @@ -1345,7 +1479,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "x OECD-NEA (2019). The Full Costs of Electricity Provision. Available at: https://www.oecd-nea.org/jcms/pl_14998/" }, @@ -1355,7 +1490,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "the-full-costs-of-electricity-provision?details=true" }, @@ -1365,7 +1501,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "xi World Health Organization (2018). Climate change and health. Available at: https://www.who.int/news-room/fact-" }, @@ -1375,7 +1512,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "sheets/detail/climate-change-and-health" }, @@ -1385,7 +1523,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "xii BP, 2020. BP Statistical Review of World Energy, London: BP." }, @@ -1395,7 +1534,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "Photo credits: Front cover & pages 1, 4, 6 left, 7 bottom: Adobe Stock; page 6 right: Getty Images; page 7 top: Uniper." }, @@ -1405,7 +1545,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 10 + "page_number": 10, + "links": [] }, "text": "8" }, @@ -1415,7 +1556,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 12 + "page_number": 12, + "links": [] }, "text": "World Nuclear Association Tower House 10 Southampton Street London WC2E 7HA United Kingdom" }, @@ -1425,7 +1567,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 12 + "page_number": 12, + "links": [] }, "text": "+44 (0)20 7451 1520 www.world-nuclear.org info@world-nuclear.org" }, @@ -1435,7 +1578,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 12 + "page_number": 12, + "links": [] }, "text": "World Nuclear Association is the international organization that represents the global nuclear industry. Its mission is to promote a wider understanding of nuclear energy among key international influencers by producing authoritative information, developing common industry positions, and contributing to the energy debate." }, @@ -1445,7 +1589,8 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 12 + "page_number": 12, + "links": [] }, "text": "Recalibrating risk © 2021 World Nuclear Association. Registered in England and Wales, company number 01215741" } diff --git a/test_unstructured_ingest/expected-structured-output/s3-minio/wiki_movie_plots_small.csv.json b/test_unstructured_ingest/expected-structured-output/s3-minio/wiki_movie_plots_small.csv.json new file mode 100644 index 0000000000..0a44c84aba --- /dev/null +++ b/test_unstructured_ingest/expected-structured-output/s3-minio/wiki_movie_plots_small.csv.json @@ -0,0 +1,19 @@ +[ + { + "type": "Table", + "element_id": "f078b58f281b4e231430e34a3ece07f3", + "metadata": { + "data_source": { + "url": "s3://utic-dev-tech-fixtures/wiki_movie_plots_small.csv", + "version": 103589111396252091980300895568390462924, + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/wiki_movie_plots_small.csv" + } + }, + "filetype": "text/csv", + "text_as_html": "\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
    1901Kansas Saloon SmashersAmericanUnknownunknownhttps://en.wikipedia.org/wiki/Kansas_Saloon_SmashersA bartender is working at a saloon, serving drinks to customers. After he fills a stereotypically Irish man's bucket with beer, Carrie Nation and her followers burst inside. They assault the Irish man, pulling his hat over his eyes and then dumping the beer over his head. The group then begin wrecking the bar, smashing the fixtures, mirrors, and breaking the cash register. The bartender then sprays seltzer water in Nation's face before a group of policemen appear and order everybody to leave.[1]
    1901Love by the Light of the MoonAmericanUnknownunknownhttps://en.wikipedia.org/wiki/Love_by_the_Light_of_the_MoonThe moon, painted with a smiling face hangs over a park at night. A young couple walking past a fence learn on a railing and look up. The moon smiles. They embrace, and the moon's smile gets bigger. They then sit down on a bench by a tree. The moon's view is blocked, causing him to frown. In the last scene, the man fans the woman with his hat because the moon has left the sky and is perched over her shoulder to see everything better.
    1901The Martyred PresidentsAmericanUnknownunknownhttps://en.wikipedia.org/wiki/The_Martyred_PresidentsThe film, just over a minute long, is composed of two shots. In the first, a girl sits at the base of an altar or tomb, her face hidden from the camera. At the center of the altar, a viewing portal displays the portraits of three U.S. Presidents—Abraham Lincoln, James A. Garfield, and William McKinley—each victims of assassination.\\r\\nIn the second shot, which runs just over eight seconds long, an assassin kneels feet of Lady Justice.
    1901Terrible Teddy, the Grizzly KingAmericanUnknownunknownhttps://en.wikipedia.org/wiki/Terrible_Teddy,_the_Grizzly_KingLasting just 61 seconds and consisting of two shots, the first shot is set in a wood during winter. The actor representing then vice-president Theodore Roosevelt enthusiastically hurries down a hillside towards a tree in the foreground. He falls once, but rights himself and cocks his rifle. Two other men, bearing signs reading \"His Photographer\" and \"His Press Agent\" respectively, follow him into the shot; the photographer sets up his camera. \"Teddy\" aims his rifle upward at the tree and fells what appears to be a common house cat, which he then proceeds to stab. \"Teddy\" holds his prize aloft, and the press agent takes notes. The second shot is taken in a slightly different part of the wood, on a path. \"Teddy\" rides the path on his horse towards the camera and out to the left of the shot, followed closely by the press agent and photographer, still dutifully holding their signs.
    1902Jack and the BeanstalkAmericanGeorge S. Fleming, Edwin S. Porterunknownhttps://en.wikipedia.org/wiki/Jack_and_the_Beanstalk_(1902_film)The earliest known adaptation of the classic fairytale, this films shows Jack trading his cow for the beans, his mother forcing him to drop them in the front yard, and beig forced upstairs. As he sleeps, Jack is visited by a fairy who shows him glimpses of what will await him when he ascends the bean stalk. In this version, Jack is the son of a deposed king. When Jack wakes up, he finds the beanstalk has grown and he climbs to the top where he enters the giant's home. The giant finds Jack, who narrowly escapes. The giant chases Jack down the bean stalk, but Jack is able to cut it down before the giant can get to safety. He falls and is killed as Jack celebrates. The fairy then reveals that Jack may return home as a prince.
    1903Alice in WonderlandAmericanCecil HepworthMay Clarkunknownhttps://en.wikipedia.org/wiki/Alice_in_Wonderland_(1903_film)Alice follows a large white rabbit down a \"Rabbit-hole\". She finds a tiny door. When she finds a bottle labeled \"Drink me\", she does, and shrinks, but not enough to pass through the door. She then eats something labeled \"Eat me\" and grows larger. She finds a fan when enables her to shrink enough to get into the \"Garden\" and try to get a \"Dog\" to play with her. She enters the \"White Rabbit's tiny House,\" but suddenly resumes her normal size. In order to get out, she has to use the \"magic fan.\"\\r\\nShe enters a kitchen, in which there is a cook and a woman holding a baby. She persuades the woman to give her the child and takes the infant outside after the cook starts throwing things around. The baby then turns into a pig and squirms out of her grip. \"The Duchess's Cheshire Cat\" appears and disappears a couple of times to Alice and directs her to the Mad Hatter's \"Mad Tea-Party.\" After a while, she leaves.\\r\\nThe Queen invites Alice to join the \"ROYAL PROCESSION\": a parade of marching playing cards and others headed by the White Rabbit. When Alice \"unintentionally offends the Queen\", the latter summons the \"Executioner\". Alice \"boxes the ears\", then flees when all the playing cards come for her. Then she wakes up and realizes it was all a dream.
    1903The Great Train RobberyAmericanEdwin S. Porterwesternhttps://en.wikipedia.org/wiki/The_Great_Train_Robbery_(1903_film)The film opens with two bandits breaking into a railroad telegraph office, where they force the operator at gunpoint to have a train stopped and to transmit orders for the engineer to fill the locomotive's tender at the station's water tank. They then knock the operator out and tie him up. As the train stops it is boarded by the bandits‍—‌now four. Two bandits enter an express car, kill a messenger and open a box of valuables with dynamite; the others kill the fireman and force the engineer to halt the train and disconnect the locomotive. The bandits then force the passengers off the train and rifle them for their belongings. One passenger tries to escape but is instantly shot down. Carrying their loot, the bandits escape in the locomotive, later stopping in a valley where their horses had been left.\\r\\nMeanwhile, back in the telegraph office, the bound operator awakens, but he collapses again. His daughter arrives bringing him his meal and cuts him free, and restores him to consciousness by dousing him with water.\\r\\nThere is some comic relief at a dance hall, where an Eastern stranger is forced to dance while the locals fire at his feet. The door suddenly opens and the telegraph operator rushes in to tell them of the robbery. The men quickly form a posse, which overtakes the bandits, and in a final shootout kills them all and recovers the stolen mail.
    1904The SuburbaniteAmericanWallace McCutcheoncomedyhttps://en.wikipedia.org/wiki/The_SuburbaniteThe film is about a family who move to the suburbs, hoping for a quiet life. Things start to go wrong, and the wife gets violent and starts throwing crockery, leading to her arrest.
    1905The Little Train RobberyAmericanEdwin Stanton Porterunknownhttps://en.wikipedia.org/wiki/The_Little_Train_RobberyThe opening scene shows the interior of the robbers' den. The walls are decorated with the portraits of notorious criminals and pictures illustrating the exploits of famous bandits. Some of the gang are lounging about, while others are reading novels and illustrated papers. Although of youthful appearance, each is dressed like a typical Western desperado. The \"Bandit Queen,\" leading a blindfolded new recruit, now enters the room. He is led to the center of the room, raises his right hand and is solemnly sworn in. When the bandage is removed from his eyes he finds himself looking into the muzzles of a dozen or more 45's. The gang then congratulates the new member and heartily shake his hand. The \"Bandit Queen\" who is evidently the leader of the gang, now calls for volunteers to hold up a train. All respond, but she picks out seven for the job who immediately leave the cabin.\\r\\nThe next scene shows the gang breaking into a barn. They steal ponies and ride away. Upon reaching the place agreed upon they picket their ponies and leaving them in charge of a trusted member proceed to a wild mountain spot in a bend of the railroad, where the road runs over a steep embankment. The spot is an ideal one for holding up a train. Cross ties are now placed on the railroad track and the gang hide in some bushes close by and wait for the train. The train soon approaches and is brought to a stop. The engineer leaves his engine and proceeds to remove the obstruction on the track. While he is bending over one of the gang sneaks up behind them and hits him on the head with an axe, and knocks him senseless down the embankment, while the gang surround the train and hold up the passengers. After securing all the \"valuables,\" consisting principally of candy and dolls, the robbers uncouple the engine and one car and make their escape just in time to avoid a posse of police who appear on the scene. Further up the road they abandon the engine and car, take to the woods and soon reach their ponies.\\r\\nIn the meantime the police have learned the particulars of the hold-up from the frightened passengers and have started up the railroad tracks after the fleeing robbers. The robbers are next seen riding up the bed of a shallow stream and finally reach their den, where the remainder of the gang have been waiting for them. Believing they have successfully eluded their pursuers, they proceed to divide the \"plunder.\" The police, however, have struck the right trail and are in close pursuit. While the \"plunder\" is being divided a sentry gives the alarm and the entire gang, abandoning everything, rush from the cabin barely in time to escape capture. The police make a hurried search and again start in pursuit. The robbers are so hard pressed that they are unable to reach their ponies, and are obliged to take chances on foot. The police now get in sight of the fleeing robbers and a lively chase follows through tall weeds, over a bridge and up a steep hill. Reaching a pond the police are close on their heels. The foremost robbers jump in clothes and all and strike out for the opposite bank. Two hesitate and are captured. Boats are secured and after an exciting tussle the entire gang is rounded up. In the mix up one of the police is dragged overboard. The final scene shows the entire gang of bedraggled and crestfallen robbers tied together with a rope and being led away by the police. Two of the police are loaded down with revolvers, knives and cartridge belts, and resemble walking aresenals. As a fitting climax a confederate steals out of the woods, cuts the rope and gallantly rescues the \"Bandit Queen.\"
    1905The Night Before ChristmasAmericanEdwin Stanton Porterunknownhttps://en.wikipedia.org/wiki/The_Night_Before_Christmas_(1905_film)Scenes are introduced using lines of the poem.[2] Santa Claus, played by Harry Eytinge, is shown feeding real reindeer[4] and finishes his work in the workshop. Meanwhile, the children of a city household hang their stockings and go to bed, but unable to sleep they engage in a pillow fight. Santa Claus leaves his home on a sleigh with his reindeer. He enters the children's house through the chimney, and leaves the presents. The children come down the stairs and enjoy their presents.
    1906Dream of a Rarebit FiendAmericanWallace McCutcheon and Edwin S. Portershorthttps://en.wikipedia.org/wiki/Dream_of_a_Rarebit_Fiend_(1906_film)The Rarebit Fiend gorges on Welsh rarebit at a restaurant. When he leaves, he begins to get dizzy as he starts to hallucinate. He desperately tries to hang onto a lamppost as the world spins all around him. A man helps him get home. He falls into bed and begins having more hallucinatory dreams. During a dream sequence, the furniture begins moving around the room. Imps emerge from a floating Welsh rarebit container and begin poking his head as he sleeps. His bed then begins dancing and spinning wildly around the room before flying out the window with the Fiend in it. The bed floats across the city as the Fiend floats up and off the bed. He hangs off the back and eventually gets caught on a weathervane atop a steeple. His bedclothes tear and he falls from the sky, crashing through his bedroom ceiling. The Fiend awakens from the dream after falling out of his bed.
    1906From Leadville to Aspen: A Hold-Up in the RockiesAmericanFrancis J. Marion and Wallace McCutcheonshort action/crime westernhttps://en.wikipedia.org/wiki/From_Leadville_to_Aspen:_A_Hold-Up_in_the_RockiesThe film features a train traveling through the Rockies and a hold up created by two thugs placing logs on the line. They systematically rob the wealthy occupants at gunpoint and then make their getaway along the tracks and later by a hi-jacked horse and cart.
    1906Kathleen MavourneenAmericanEdwin S. Portershort filmhttps://en.wikipedia.org/wiki/Kathleen_Mavourneen_(1906_film)Irish villager Kathleen is a tenant of Captain Clearfield, who controls local judges and criminals. Her father owes Clearfield a large debt. Terence O'More saves the village from Clearfield, causing a large celebration.\\r\\nFilm historian Charles Musser writes of Porter's adaptation, \"O'More not only rescues Kathleen from the villain but, through marriage, renews the family for another generation.\"[1]
    1907Daniel BooneAmericanWallace McCutcheon and Ediwin S. PorterWilliam Craven, Florence Lawrencebiographicalhttps://en.wikipedia.org/wiki/Daniel_Boone_(1907_film)Boone's daughter befriends an Indian maiden as Boone and his companion start out on a hunting expedition. While he is away, Boone's cabin is attacked by the Indians, who set it on fire and abduct Boone's daughter. Boone returns, swears vengeance, then heads out on the trail to the Indian camp. His daughter escapes but is chased. The Indians encounter Boone, which sets off a huge fight on the edge of a cliff. A burning arrow gets shot into the Indian camp. Boone gets tied to the stake and tortured. The burning arrow sets the Indian camp on fire, causing panic. Boone is rescued by his horse, and Boone has a knife fight in which he kills the Indian chief.[2]
    1907How Brown Saw the Baseball GameAmericanUnknownUnknowncomedyhttps://en.wikipedia.org/wiki/How_Brown_Saw_the_Baseball_GameBefore heading out to a baseball game at a nearby ballpark, sports fan Mr. Brown drinks several highball cocktails. He arrives at the ballpark to watch the game, but has become so inebriated that the game appears to him in reverse, with the players running the bases backwards and the baseball flying back into the pitcher's hand. After the game is over, Mr. Brown is escorted home by one of his friends. When they arrive at Brown's house, they encounter his wife who becomes furious with the friend and proceeds to physically assault him, believing he is responsible for her husband's severe intoxication.[1]
    1907Laughing GasAmericanEdwin Stanton PorterBertha Regustus, Edward Bouldencomedyhttps://en.wikipedia.org/wiki/Laughing_Gas_(film)#1907_FilmThe plot is that of a black woman going to the dentist for a toothache and being given laughing gas. On her way walking home, and in other situations, she can't stop laughing, and everyone she meets \"catches\" the laughter from her, including a vendor and police officers.
    1908The Adventures of DollieAmericanD. W. GriffithArthur V. Johnson, Linda Arvidsondramahttps://en.wikipedia.org/wiki/The_Adventures_of_DollieOn a beautiful summer day a father and mother take their daughter Dollie on an outing to the river. The mother refuses to buy a gypsy's wares. The gypsy tries to rob the mother, but the father drives him off. The gypsy returns to the camp and devises a plan. They return and kidnap Dollie while her parents are distracted. A rescue crew is organized, but the gypsy takes Dollie to his camp. They gag Dollie and hide her in a barrel before the rescue party gets to the camp. Once they leave the gypsies and escapes in their wagon. As the wagon crosses the river, the barrel falls into the water. Still sealed in the barrel, Dollie is swept downstream in dangerous currents. A boy who is fishing in the river finds the barrel, and Dollie is reunited safely with her parents.
    1908The Black ViperAmericanD. W. GriffithD. W. Griffithdramahttps://en.wikipedia.org/wiki/The_Black_ViperA thug accosts a girl as she leaves her workplace but a man rescues her. The thug vows revenge and, with the help of two friends, attacks the girl and her rescuer again as they're going for a walk. This time they succeed in kidnapping the rescuer. He is bound and gagged and taken away in a cart. The girl runs home and gets help from several neighbors. They track the ruffians down to a cabin in the mountains where the gang has trapped their victim and set the cabin on fire. A thug and Rescuer fight on the roof of the house.
    1908A Calamitous ElopementAmericanD.W. GriffithHarry Solter, Linda Arvidsoncomedyhttps://en.wikipedia.org/wiki/A_Calamitous_ElopementA young couple decides to elope after being caught in the midst of a romantic moment by the woman's angry father. They make plans to leave, but a thief discovers their plans and hides in their trunk and waits for the right moment to steal their belongings.
    1908The Call of the WildAmericanD. W. GriffithCharles Insleeadventurehttps://en.wikipedia.org/wiki/The_Call_of_the_Wild_(1908_film)A white girl (Florence Lawrence) rejects a proposal from an Indian brave (Charles Inslee) in this early one-reel Western melodrama. Despite the rejection, the Indian still comes to the girl's defense when she is abducted by his warring tribe. In her first year in films, Florence Lawrence was already the most popular among the Biograph Company's anonymous stock company players. By 1909, she was known the world over as \"The Biograph Girl.\"
    1908A Christmas CarolAmericanUnknownTom Rickettsdramahttps://en.wikipedia.org/wiki/A_Christmas_Carol_(1908_film)No prints of the first American film adaptation of A Christmas Carol are known to exist,[1] but The Moving Picture World magazine provided a scene-by-scene description before the film's release.[2] Scrooge goes into his office and begins working. His nephew, along with three women who wish for Scrooge to donate enter. However, Scrooge dismisses them. On the night of Christmas Eve, his long-dead partner Jacob Marley comes as a ghost, warning him of a horrible fate if he does not change his ways. Scrooge meets three spirits that show Scrooge the real meaning of Christmas, along with his grave, the result of his parsimonious ways. The next morning, he wakes and realizes the error of his ways. Scrooge was then euphoric and generous for the rest of his life.
    1908The Fight for FreedomAmericanD. W. GriffithFlorence Auer, John G. Adolfiwesternhttps://en.wikipedia.org/wiki/The_Fight_for_FreedomThe film opens in a town on the Mexican border. A poker game is going on in the local saloon. One of the players cheats and is shot dead by another of the players, a Mexican named Pedro. In the uproar that follows Pedro is wounded as he escapes from the saloon. The sheriff is called, who tracks Pedro to his home but Pedro kills the sherriff too. While Pedro hides, his wife Juanita, is arrested on suspicion of murdering the sheriff. Pedro rescues her from the town jail and the two head for the Mexican border. Caught by the posse before they reach the border, Juanita is killed and the film ends with Pedro being arrested and taken back to town.
    " + }, + "text": "\n\n\n1901\nKansas Saloon Smashers\nAmerican\nUnknown\n\nunknown\nhttps://en.wikipedia.org/wiki/Kansas_Saloon_Smashers\nA bartender is working at a saloon, serving drinks to customers. After he fills a stereotypically Irish man's bucket with beer, Carrie Nation and her followers burst inside. They assault the Irish man, pulling his hat over his eyes and then dumping the beer over his head. The group then begin wrecking the bar, smashing the fixtures, mirrors, and breaking the cash register. The bartender then sprays seltzer water in Nation's face before a group of policemen appear and order everybody to leave.[1]\n\n\n1901\nLove by the Light of the Moon\nAmerican\nUnknown\n\nunknown\nhttps://en.wikipedia.org/wiki/Love_by_the_Light_of_the_Moon\nThe moon, painted with a smiling face hangs over a park at night. A young couple walking past a fence learn on a railing and look up. The moon smiles. They embrace, and the moon's smile gets bigger. They then sit down on a bench by a tree. The moon's view is blocked, causing him to frown. In the last scene, the man fans the woman with his hat because the moon has left the sky and is perched over her shoulder to see everything better.\n\n\n1901\nThe Martyred Presidents\nAmerican\nUnknown\n\nunknown\nhttps://en.wikipedia.org/wiki/The_Martyred_Presidents\nThe film, just over a minute long, is composed of two shots. In the first, a girl sits at the base of an altar or tomb, her face hidden from the camera. At the center of the altar, a viewing portal displays the portraits of three U.S. Presidents—Abraham Lincoln, James A. Garfield, and William McKinley—each victims of assassination.\\r\\nIn the second shot, which runs just over eight seconds long, an assassin kneels feet of Lady Justice.\n\n\n1901\nTerrible Teddy, the Grizzly King\nAmerican\nUnknown\n\nunknown\nhttps://en.wikipedia.org/wiki/Terrible_Teddy,_the_Grizzly_King\nLasting just 61 seconds and consisting of two shots, the first shot is set in a wood during winter. The actor representing then vice-president Theodore Roosevelt enthusiastically hurries down a hillside towards a tree in the foreground. He falls once, but rights himself and cocks his rifle. Two other men, bearing signs reading \"His Photographer\" and \"His Press Agent\" respectively, follow him into the shot; the photographer sets up his camera. \"Teddy\" aims his rifle upward at the tree and fells what appears to be a common house cat, which he then proceeds to stab. \"Teddy\" holds his prize aloft, and the press agent takes notes. The second shot is taken in a slightly different part of the wood, on a path. \"Teddy\" rides the path on his horse towards the camera and out to the left of the shot, followed closely by the press agent and photographer, still dutifully holding their signs.\n\n\n1902\nJack and the Beanstalk\nAmerican\nGeorge S. Fleming, Edwin S. Porter\n\nunknown\nhttps://en.wikipedia.org/wiki/Jack_and_the_Beanstalk_(1902_film)\nThe earliest known adaptation of the classic fairytale, this films shows Jack trading his cow for the beans, his mother forcing him to drop them in the front yard, and beig forced upstairs. As he sleeps, Jack is visited by a fairy who shows him glimpses of what will await him when he ascends the bean stalk. In this version, Jack is the son of a deposed king. When Jack wakes up, he finds the beanstalk has grown and he climbs to the top where he enters the giant's home. The giant finds Jack, who narrowly escapes. The giant chases Jack down the bean stalk, but Jack is able to cut it down before the giant can get to safety. He falls and is killed as Jack celebrates. The fairy then reveals that Jack may return home as a prince.\n\n\n1903\nAlice in Wonderland\nAmerican\nCecil Hepworth\nMay Clark\nunknown\nhttps://en.wikipedia.org/wiki/Alice_in_Wonderland_(1903_film)\nAlice follows a large white rabbit down a \"Rabbit-hole\". She finds a tiny door. When she finds a bottle labeled \"Drink me\", she does, and shrinks, but not enough to pass through the door. She then eats something labeled \"Eat me\" and grows larger. She finds a fan when enables her to shrink enough to get into the \"Garden\" and try to get a \"Dog\" to play with her. She enters the \"White Rabbit's tiny House,\" but suddenly resumes her normal size. In order to get out, she has to use the \"magic fan.\"\\r\\nShe enters a kitchen, in which there is a cook and a woman holding a baby. She persuades the woman to give her the child and takes the infant outside after the cook starts throwing things around. The baby then turns into a pig and squirms out of her grip. \"The Duchess's Cheshire Cat\" appears and disappears a couple of times to Alice and directs her to the Mad Hatter's \"Mad Tea-Party.\" After a while, she leaves.\\r\\nThe Queen invites Alice to join the \"ROYAL PROCESSION\": a parade of marching playing cards and others headed by the White Rabbit. When Alice \"unintentionally offends the Queen\", the latter summons the \"Executioner\". Alice \"boxes the ears\", then flees when all the playing cards come for her. Then she wakes up and realizes it was all a dream.\n\n\n1903\nThe Great Train Robbery\nAmerican\nEdwin S. Porter\n\nwestern\nhttps://en.wikipedia.org/wiki/The_Great_Train_Robbery_(1903_film)\nThe film opens with two bandits breaking into a railroad telegraph office, where they force the operator at gunpoint to have a train stopped and to transmit orders for the engineer to fill the locomotive's tender at the station's water tank. They then knock the operator out and tie him up. As the train stops it is boarded by the bandits‍—‌now four. Two bandits enter an express car, kill a messenger and open a box of valuables with dynamite; the others kill the fireman and force the engineer to halt the train and disconnect the locomotive. The bandits then force the passengers off the train and rifle them for their belongings. One passenger tries to escape but is instantly shot down. Carrying their loot, the bandits escape in the locomotive, later stopping in a valley where their horses had been left.\\r\\nMeanwhile, back in the telegraph office, the bound operator awakens, but he collapses again. His daughter arrives bringing him his meal and cuts him free, and restores him to consciousness by dousing him with water.\\r\\nThere is some comic relief at a dance hall, where an Eastern stranger is forced to dance while the locals fire at his feet. The door suddenly opens and the telegraph operator rushes in to tell them of the robbery. The men quickly form a posse, which overtakes the bandits, and in a final shootout kills them all and recovers the stolen mail.\n\n\n1904\nThe Suburbanite\nAmerican\nWallace McCutcheon\n\ncomedy\nhttps://en.wikipedia.org/wiki/The_Suburbanite\nThe film is about a family who move to the suburbs, hoping for a quiet life. Things start to go wrong, and the wife gets violent and starts throwing crockery, leading to her arrest.\n\n\n1905\nThe Little Train Robbery\nAmerican\nEdwin Stanton Porter\n\nunknown\nhttps://en.wikipedia.org/wiki/The_Little_Train_Robbery\nThe opening scene shows the interior of the robbers' den. The walls are decorated with the portraits of notorious criminals and pictures illustrating the exploits of famous bandits. Some of the gang are lounging about, while others are reading novels and illustrated papers. Although of youthful appearance, each is dressed like a typical Western desperado. The \"Bandit Queen,\" leading a blindfolded new recruit, now enters the room. He is led to the center of the room, raises his right hand and is solemnly sworn in. When the bandage is removed from his eyes he finds himself looking into the muzzles of a dozen or more 45's. The gang then congratulates the new member and heartily shake his hand. The \"Bandit Queen\" who is evidently the leader of the gang, now calls for volunteers to hold up a train. All respond, but she picks out seven for the job who immediately leave the cabin.\\r\\nThe next scene shows the gang breaking into a barn. They steal ponies and ride away. Upon reaching the place agreed upon they picket their ponies and leaving them in charge of a trusted member proceed to a wild mountain spot in a bend of the railroad, where the road runs over a steep embankment. The spot is an ideal one for holding up a train. Cross ties are now placed on the railroad track and the gang hide in some bushes close by and wait for the train. The train soon approaches and is brought to a stop. The engineer leaves his engine and proceeds to remove the obstruction on the track. While he is bending over one of the gang sneaks up behind them and hits him on the head with an axe, and knocks him senseless down the embankment, while the gang surround the train and hold up the passengers. After securing all the \"valuables,\" consisting principally of candy and dolls, the robbers uncouple the engine and one car and make their escape just in time to avoid a posse of police who appear on the scene. Further up the road they abandon the engine and car, take to the woods and soon reach their ponies.\\r\\nIn the meantime the police have learned the particulars of the hold-up from the frightened passengers and have started up the railroad tracks after the fleeing robbers. The robbers are next seen riding up the bed of a shallow stream and finally reach their den, where the remainder of the gang have been waiting for them. Believing they have successfully eluded their pursuers, they proceed to divide the \"plunder.\" The police, however, have struck the right trail and are in close pursuit. While the \"plunder\" is being divided a sentry gives the alarm and the entire gang, abandoning everything, rush from the cabin barely in time to escape capture. The police make a hurried search and again start in pursuit. The robbers are so hard pressed that they are unable to reach their ponies, and are obliged to take chances on foot. The police now get in sight of the fleeing robbers and a lively chase follows through tall weeds, over a bridge and up a steep hill. Reaching a pond the police are close on their heels. The foremost robbers jump in clothes and all and strike out for the opposite bank. Two hesitate and are captured. Boats are secured and after an exciting tussle the entire gang is rounded up. In the mix up one of the police is dragged overboard. The final scene shows the entire gang of bedraggled and crestfallen robbers tied together with a rope and being led away by the police. Two of the police are loaded down with revolvers, knives and cartridge belts, and resemble walking aresenals. As a fitting climax a confederate steals out of the woods, cuts the rope and gallantly rescues the \"Bandit Queen.\"\n\n\n1905\nThe Night Before Christmas\nAmerican\nEdwin Stanton Porter\n\nunknown\nhttps://en.wikipedia.org/wiki/The_Night_Before_Christmas_(1905_film)\nScenes are introduced using lines of the poem.[2] Santa Claus, played by Harry Eytinge, is shown feeding real reindeer[4] and finishes his work in the workshop. Meanwhile, the children of a city household hang their stockings and go to bed, but unable to sleep they engage in a pillow fight. Santa Claus leaves his home on a sleigh with his reindeer. He enters the children's house through the chimney, and leaves the presents. The children come down the stairs and enjoy their presents.\n\n\n1906\nDream of a Rarebit Fiend\nAmerican\nWallace McCutcheon and Edwin S. Porter\n\nshort\nhttps://en.wikipedia.org/wiki/Dream_of_a_Rarebit_Fiend_(1906_film)\nThe Rarebit Fiend gorges on Welsh rarebit at a restaurant. When he leaves, he begins to get dizzy as he starts to hallucinate. He desperately tries to hang onto a lamppost as the world spins all around him. A man helps him get home. He falls into bed and begins having more hallucinatory dreams. During a dream sequence, the furniture begins moving around the room. Imps emerge from a floating Welsh rarebit container and begin poking his head as he sleeps. His bed then begins dancing and spinning wildly around the room before flying out the window with the Fiend in it. The bed floats across the city as the Fiend floats up and off the bed. He hangs off the back and eventually gets caught on a weathervane atop a steeple. His bedclothes tear and he falls from the sky, crashing through his bedroom ceiling. The Fiend awakens from the dream after falling out of his bed.\n\n\n1906\nFrom Leadville to Aspen: A Hold-Up in the Rockies\nAmerican\nFrancis J. Marion and Wallace McCutcheon\n\nshort action/crime western\nhttps://en.wikipedia.org/wiki/From_Leadville_to_Aspen:_A_Hold-Up_in_the_Rockies\nThe film features a train traveling through the Rockies and a hold up created by two thugs placing logs on the line. They systematically rob the wealthy occupants at gunpoint and then make their getaway along the tracks and later by a hi-jacked horse and cart.\n\n\n1906\nKathleen Mavourneen\nAmerican\nEdwin S. Porter\n\nshort film\nhttps://en.wikipedia.org/wiki/Kathleen_Mavourneen_(1906_film)\nIrish villager Kathleen is a tenant of Captain Clearfield, who controls local judges and criminals. Her father owes Clearfield a large debt. Terence O'More saves the village from Clearfield, causing a large celebration.\\r\\nFilm historian Charles Musser writes of Porter's adaptation, \"O'More not only rescues Kathleen from the villain but, through marriage, renews the family for another generation.\"[1]\n\n\n1907\nDaniel Boone\nAmerican\nWallace McCutcheon and Ediwin S. Porter\nWilliam Craven, Florence Lawrence\nbiographical\nhttps://en.wikipedia.org/wiki/Daniel_Boone_(1907_film)\nBoone's daughter befriends an Indian maiden as Boone and his companion start out on a hunting expedition. While he is away, Boone's cabin is attacked by the Indians, who set it on fire and abduct Boone's daughter. Boone returns, swears vengeance, then heads out on the trail to the Indian camp. His daughter escapes but is chased. The Indians encounter Boone, which sets off a huge fight on the edge of a cliff. A burning arrow gets shot into the Indian camp. Boone gets tied to the stake and tortured. The burning arrow sets the Indian camp on fire, causing panic. Boone is rescued by his horse, and Boone has a knife fight in which he kills the Indian chief.[2]\n\n\n1907\nHow Brown Saw the Baseball Game\nAmerican\nUnknown\nUnknown\ncomedy\nhttps://en.wikipedia.org/wiki/How_Brown_Saw_the_Baseball_Game\nBefore heading out to a baseball game at a nearby ballpark, sports fan Mr. Brown drinks several highball cocktails. He arrives at the ballpark to watch the game, but has become so inebriated that the game appears to him in reverse, with the players running the bases backwards and the baseball flying back into the pitcher's hand. After the game is over, Mr. Brown is escorted home by one of his friends. When they arrive at Brown's house, they encounter his wife who becomes furious with the friend and proceeds to physically assault him, believing he is responsible for her husband's severe intoxication.[1]\n\n\n1907\nLaughing Gas\nAmerican\nEdwin Stanton Porter\nBertha Regustus, Edward Boulden\ncomedy\nhttps://en.wikipedia.org/wiki/Laughing_Gas_(film)#1907_Film\nThe plot is that of a black woman going to the dentist for a toothache and being given laughing gas. On her way walking home, and in other situations, she can't stop laughing, and everyone she meets \"catches\" the laughter from her, including a vendor and police officers.\n\n\n1908\nThe Adventures of Dollie\nAmerican\nD. W. Griffith\nArthur V. Johnson, Linda Arvidson\ndrama\nhttps://en.wikipedia.org/wiki/The_Adventures_of_Dollie\nOn a beautiful summer day a father and mother take their daughter Dollie on an outing to the river. The mother refuses to buy a gypsy's wares. The gypsy tries to rob the mother, but the father drives him off. The gypsy returns to the camp and devises a plan. They return and kidnap Dollie while her parents are distracted. A rescue crew is organized, but the gypsy takes Dollie to his camp. They gag Dollie and hide her in a barrel before the rescue party gets to the camp. Once they leave the gypsies and escapes in their wagon. As the wagon crosses the river, the barrel falls into the water. Still sealed in the barrel, Dollie is swept downstream in dangerous currents. A boy who is fishing in the river finds the barrel, and Dollie is reunited safely with her parents.\n\n\n1908\nThe Black Viper\nAmerican\nD. W. Griffith\nD. W. Griffith\ndrama\nhttps://en.wikipedia.org/wiki/The_Black_Viper\nA thug accosts a girl as she leaves her workplace but a man rescues her. The thug vows revenge and, with the help of two friends, attacks the girl and her rescuer again as they're going for a walk. This time they succeed in kidnapping the rescuer. He is bound and gagged and taken away in a cart. The girl runs home and gets help from several neighbors. They track the ruffians down to a cabin in the mountains where the gang has trapped their victim and set the cabin on fire. A thug and Rescuer fight on the roof of the house.\n\n\n1908\nA Calamitous Elopement\nAmerican\nD.W. Griffith\nHarry Solter, Linda Arvidson\ncomedy\nhttps://en.wikipedia.org/wiki/A_Calamitous_Elopement\nA young couple decides to elope after being caught in the midst of a romantic moment by the woman's angry father. They make plans to leave, but a thief discovers their plans and hides in their trunk and waits for the right moment to steal their belongings.\n\n\n1908\nThe Call of the Wild\nAmerican\nD. W. Griffith\nCharles Inslee\nadventure\nhttps://en.wikipedia.org/wiki/The_Call_of_the_Wild_(1908_film)\nA white girl (Florence Lawrence) rejects a proposal from an Indian brave (Charles Inslee) in this early one-reel Western melodrama. Despite the rejection, the Indian still comes to the girl's defense when she is abducted by his warring tribe. In her first year in films, Florence Lawrence was already the most popular among the Biograph Company's anonymous stock company players. By 1909, she was known the world over as \"The Biograph Girl.\"\n\n\n1908\nA Christmas Carol\nAmerican\nUnknown\nTom Ricketts\ndrama\nhttps://en.wikipedia.org/wiki/A_Christmas_Carol_(1908_film)\nNo prints of the first American film adaptation of A Christmas Carol are known to exist,[1] but The Moving Picture World magazine provided a scene-by-scene description before the film's release.[2] Scrooge goes into his office and begins working. His nephew, along with three women who wish for Scrooge to donate enter. However, Scrooge dismisses them. On the night of Christmas Eve, his long-dead partner Jacob Marley comes as a ghost, warning him of a horrible fate if he does not change his ways. Scrooge meets three spirits that show Scrooge the real meaning of Christmas, along with his grave, the result of his parsimonious ways. The next morning, he wakes and realizes the error of his ways. Scrooge was then euphoric and generous for the rest of his life.\n\n\n1908\nThe Fight for Freedom\nAmerican\nD. W. Griffith\nFlorence Auer, John G. Adolfi\nwestern\nhttps://en.wikipedia.org/wiki/The_Fight_for_Freedom\nThe film opens in a town on the Mexican border. A poker game is going on in the local saloon. One of the players cheats and is shot dead by another of the players, a Mexican named Pedro. In the uproar that follows Pedro is wounded as he escapes from the saloon. The sheriff is called, who tracks Pedro to his home but Pedro kills the sherriff too. While Pedro hides, his wife Juanita, is arrested on suspicion of murdering the sheriff. Pedro rescues her from the town jail and the two head for the Mexican border. Caught by the posse before they reach the border, Juanita is killed and the film ends with Pedro being arrested and taken back to town.\n\n\n" + } +] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json b/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json index 6bd7f4d877..6fd331f40f 100644 --- a/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json @@ -36,7 +36,7 @@ "text": "WORLD ECONOMIC OUTLOOK UPDATE Inflation Peaking amid Low Growth" }, { - "type": "UncategorizedText", + "type": "Title", "element_id": "98e636ffa4ea25e037f659685a56f41d", "metadata": { "data_source": { @@ -91,7 +91,7 @@ }, { "type": "ListItem", - "element_id": "f1d5f4ed63a14db581e985bf15416cdd", + "element_id": "9fe27138e05d3a42d1e5cc57bc1fbc54", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -105,29 +105,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Global growth is projected to fall from an estimated 3.4 percent in 2022 to 2.9 percent in 2023, then rise to 3.1 percent in 2024. The forecast for 2023 is 0.2 percentage point higher than predicted in the October 2022 World Economic Outlook (WEO) but below the historical (2000–19) average of 3.8 percent. The rise in central bank rates to fight inflation and Russia’s war in Ukraine continue to weigh on economic activity. The rapid spread of COVID-19 in China dampened growth in 2022, but the recent reopening has paved the way for a faster-than-expected recovery. Global inflation is expected to fall from 8.8 percent in 2022 to 6.6 percent in 2023 and 4.3 percent in 2024, still above pre-pandemic (2017–19) levels of about 3.5 percent." + "text": " Global growth is projected to fall from an estimated 3.4 percent in 2022 to 2.9 percent in 2023, then rise to 3.1 percent in 2024. The forecast for 2023 is 0.2 percentage point higher than predicted in the October 2022 World Economic Outlook (WEO) but below the historical (2000–19) average of 3.8 percent. The rise in central bank rates to fight inflation and Russia’s war in Ukraine continue to weigh on economic activity. The rapid spread of COVID-19 in China dampened growth in 2022, but the recent reopening has paved the way for a faster-than-expected recovery. Global inflation is expected to fall from 8.8 percent in 2022 to 6.6 percent in 2023 and 4.3 percent in 2024, still above pre-pandemic (2017–19) levels of about 3.5 percent." }, { "type": "ListItem", - "element_id": "c4e0168ffab999611a92e8ebd8fe48a9", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "The balance of risks remains tilted to the downside, but adverse risks have moderated since the October 2022" - }, - { - "type": "NarrativeText", - "element_id": "74180a93b38b6808f8cff7439e5d16d2", + "element_id": "56b3c7e61958b8308bb1ab927b6cdc2c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -141,29 +123,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "WEO. On the upside, a stronger boost from pent-up demand in numerous economies or a faster fall in inflation are plausible. On the downside, severe health outcomes in China could hold back the recovery, Russia’s war in Ukraine could escalate, and tighter global financing conditions could worsen debt distress. Financial markets could also suddenly reprice in response to adverse inflation news, while further geopolitical fragmentation could hamper economic progress." + "text": "© = The balance of risks remains tilted to the downside, but adverse risks have moderated since the October 2022 WEO. On the upside, a stronger boost from pent-up demand in numerous economies or a faster fall in inflation are plausible. On the downside, severe health outcomes in China could hold back the recovery, Russia’s war in Ukraine could escalate, and tighter global financing conditions could worsen debt distress. Financial markets could also suddenly reprice in response to adverse inflation news, while further geopolitical fragmentation could hamper economic progress." }, { "type": "ListItem", - "element_id": "5e9b501fc056965a744f6598d022f31d", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "In most economies, amid the cost-of-living crisis, the priority remains achieving sustained disinflation. With" - }, - { - "type": "NarrativeText", - "element_id": "9f5a3fe548f011e304fda9067caa0824", + "element_id": "cdf520693b6ec6dc4877bc4aedea746c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -252,26 +216,8 @@ "text": "In the fourth quarter of 2022, however, this uptick is estimated to have faded in most—though not all––major economies. US growth remains stronger than expected, with consumers continuing to spend from their stock of savings (the personal saving rate is at its lowest in more than 60 years, except for July 2005), unemployment near historic lows, and plentiful job opportunities. But elsewhere, high-frequency activity indicators (such as business and consumer sentiment, purchasing manager surveys, and mobility indicators) generally point to a slowdown." }, { - "type": "Title", - "element_id": "b3080428cb4e8896623bf36c001e868a", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "International Monetary Fund | January 2023" - }, - { - "type": "UncategorizedText", - "element_id": "6b86b273ff34fce19d6b804eff5a3f57", + "type": "ListItem", + "element_id": "c99869e52743869e29fd645e9e0df6fb", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -285,7 +231,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "1" + "text": "International Monetary Fund | January 2023 1" }, { "type": "Title", @@ -361,7 +307,7 @@ }, { "type": "UncategorizedText", - "element_id": "28a5aa3897d66de6c31caba99a4c337e", + "element_id": "c2c7be4534a60790d1d18451c91dc138", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -375,11 +321,11 @@ "filetype": "application/pdf", "page_number": 3 }, - "text": "–2" + "text": "16 14 12 10 8 6 4 2 0" }, { "type": "UncategorizedText", - "element_id": "c2c7be4534a60790d1d18451c91dc138", + "element_id": "28a5aa3897d66de6c31caba99a4c337e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -393,11 +339,11 @@ "filetype": "application/pdf", "page_number": 3 }, - "text": "16 14 12 10 8 6 4 2 0" + "text": "–2" }, { - "type": "UncategorizedText", - "element_id": "c7c72889cb49cf43d9bd1f892db1be2c", + "type": "Title", + "element_id": "323d79e74460eda1fb0f8d55a2e0ff42", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -411,11 +357,11 @@ "filetype": "application/pdf", "page_number": 3 }, - "text": "Jan. 2019" + "text": "Median country Brazil" }, { - "type": "UncategorizedText", - "element_id": "c7c72889cb49cf43d9bd1f892db1be2c", + "type": "Title", + "element_id": "49dca65f362fee401292ed7ada96f962", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -429,11 +375,11 @@ "filetype": "application/pdf", "page_number": 3 }, - "text": "Jan. 2019" + "text": "United States" }, { - "type": "ListItem", - "element_id": "63e35649dd179389ecc7251e1503489a", + "type": "Title", + "element_id": "007b2203e9e86a49c3108e9ffd16fbbc", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -447,11 +393,11 @@ "filetype": "application/pdf", "page_number": 3 }, - "text": "1. Headline Inflation" + "text": "Euro area" }, { - "type": "ListItem", - "element_id": "b790ab5fcad28bbedb50b568b3adeca2", + "type": "Title", + "element_id": "cc874418b59b7ecb37a2c938783fb5ce", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -465,11 +411,11 @@ "filetype": "application/pdf", "page_number": 3 }, - "text": "2. Core Inflation" + "text": "Nov. 22" }, { "type": "Title", - "element_id": "323d79e74460eda1fb0f8d55a2e0ff42", + "element_id": "cc874418b59b7ecb37a2c938783fb5ce", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -483,11 +429,11 @@ "filetype": "application/pdf", "page_number": 3 }, - "text": "Median country Brazil" + "text": "Nov. 22" }, { - "type": "Title", - "element_id": "646612b0a62b59fd13be769b4590a9ac", + "type": "NarrativeText", + "element_id": "6814df88a59d11e9fcf76a7ed0f5fdfc", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -501,11 +447,11 @@ "filetype": "application/pdf", "page_number": 3 }, - "text": "Jul. 19" + "text": "Winter comes to Europe. European economic growth in 2022 was more resilient than expected in the face of the large negative terms-of-trade shock from the war in Ukraine. This resilience––which is" }, { - "type": "Title", - "element_id": "646612b0a62b59fd13be769b4590a9ac", + "type": "ListItem", + "element_id": "3a162049bc9ee88b56d4d4bf5897368f", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -519,11 +465,11 @@ "filetype": "application/pdf", "page_number": 3 }, - "text": "Jul. 19" + "text": "2 International Monetary Fund | January 2023" }, { "type": "Title", - "element_id": "7a4f82ed474f82c26a8b867becaf89ba", + "element_id": "95af4f3feb2d03b2310ce31abc0c435d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -535,13 +481,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 3 + "page_number": 4 }, - "text": "Jan. 20" + "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023" }, { - "type": "Title", - "element_id": "7a4f82ed474f82c26a8b867becaf89ba", + "type": "NarrativeText", + "element_id": "83ce77349b07c275543d551c2c016370", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -553,13 +499,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 3 + "page_number": 4 }, - "text": "Jan. 20" + "text": "visible in consumption and investment data for the third quarter––partly reflects government support of about 1.2 percent of European Union GDP (net budgetary cost) to households and firms hit by the energy crisis, as well as dynamism from economies reopening. Gas prices have declined by more than expected amid higher non-Russian pipeline and liquefied natural gas flows, compression of demand for gas, and a warmer-than-usual winter. However, the boost from reopening appears to be fading. High-frequency indicators for the fourth quarter suggest that the manufacturing and services sectors are contracting. Consumer confidence and business sentiment have worsened. With inflation at about 10 percent or above in several euro area countries and the United Kingdom, household budgets remain stretched. The accelerated pace of rate increases by the Bank of England and the European Central Bank is tightening financial conditions and cooling demand in the housing sector and beyond." }, { "type": "Title", - "element_id": "6d2f5e3c057e12c92023d5501c3fd075", + "element_id": "26a20452d058d66ad402559f659cec7c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -571,13 +517,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 3 + "page_number": 4 }, - "text": "Jul. 20" + "text": "The Forecast" }, { "type": "Title", - "element_id": "6d2f5e3c057e12c92023d5501c3fd075", + "element_id": "5779b9b7d25794d3b4ed1fe4e61f6617", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -589,13 +535,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 3 + "page_number": 4 }, - "text": "Jul. 20" + "text": "Growth Bottoming Out" }, { - "type": "Title", - "element_id": "49dca65f362fee401292ed7ada96f962", + "type": "NarrativeText", + "element_id": "22011dc596eec73711d7dac8d99b41b6", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -607,13 +553,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 3 + "page_number": 4 }, - "text": "United States" + "text": "Global growth, estimated at 3.4 percent in 2022, is projected to fall to 2.9 percent in 2023 before rising to 3.1 percent in 2024 (Table 1). Compared with the October forecast, the estimate for 2022 and the forecast for 2023 are both higher by about 0.2 percentage point, reflecting positive surprises and greater-than-expected resilience in numerous economies. Negative growth in global GDP or global GDP per capita—which often happens when there is a global recession—is not expected. Nevertheless, global growth projected for 2023 and 2024 is below the historical (2000–19) annual average of 3.8 percent." }, { - "type": "Title", - "element_id": "f4a93992a1b09b3fa6200542fd6fde5a", + "type": "NarrativeText", + "element_id": "97e04ee873fea0151df00f7b1fb4ca42", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -625,13 +571,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 3 + "page_number": 4 }, - "text": "Jan. 21" + "text": "The forecast of low growth in 2023 reflects the rise in central bank rates to fight inflation–– especially in advanced economies––as well as the war in Ukraine. The decline in growth in 2023 from 2022 is driven by advanced economies; in emerging market and developing economies, growth is estimated to have bottomed out in 2022. Growth is expected to pick up in China with the full reopening in 2023. The expected pickup in 2024 in both groups of economies reflects gradual recovery from the effects of the war in Ukraine and subsiding inflation. Following the path of global demand, world trade growth is expected to decline in 2023 to 2.4 percent, despite an easing of supply bottlenecks, before rising to 3.4 percent in 2024." }, { - "type": "Title", - "element_id": "f4a93992a1b09b3fa6200542fd6fde5a", + "type": "NarrativeText", + "element_id": "e08dfaba8a8dc7496a44cb172319d4ba", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -643,13 +589,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 3 + "page_number": 4 }, - "text": "Jan. 21" + "text": "These forecasts are based on a number of assumptions, including on fuel and nonfuel commodity prices, which have generally been revised down since October, and on interest rates, which have been revised up. In 2023, oil prices are projected to fall by about 16 percent, while nonfuel commodity prices are expected to fall by, on average, 6.3 percent. Global interest rate assumptions are revised up, reflecting intensified actual and signaled policy tightening by major central banks since October." }, { - "type": "Title", - "element_id": "81db94f58819ee2fd6c05ddef2082ccc", + "type": "NarrativeText", + "element_id": "73a39336fb540e7d57ec85dfa8e92799", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -661,13 +607,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 3 + "page_number": 4 }, - "text": "Jul. 21" + "text": "For advanced economies, growth is projected to decline sharply from 2.7 percent in 2022 to 1.2 percent in 2023 before rising to 1.4 percent in 2024, with a downward revision of 0.2 percentage point for 2024. About 90 percent of advanced economies are projected to see a decline in growth in 2023." }, { - "type": "Title", - "element_id": "81db94f58819ee2fd6c05ddef2082ccc", + "type": "ListItem", + "element_id": "e3b0c44298fc1c149afbf4c8996fb924", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -679,13 +625,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 3 + "page_number": 4 }, - "text": "Jul. 21" + "text": "" }, { - "type": "Title", - "element_id": "007b2203e9e86a49c3108e9ffd16fbbc", + "type": "ListItem", + "element_id": "e84075ae46df9d9ad37d947011c05a7f", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -697,13 +643,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 3 + "page_number": 4 }, - "text": "Euro area" + "text": "In the United States, growth is projected to fall from 2.0 percent in 2022 to 1.4 percent in 2023 and 1.0 percent in 2024. With growth rebounding in the second half of 2024, growth in 2024 will be faster than in 2023 on a fourth-quarter-over-fourth-quarter basis, as in most advanced" }, { - "type": "Title", - "element_id": "babfe67b3ecc6b32db9adb9da08274bf", + "type": "ListItem", + "element_id": "ab9d11a9dd37cfd5e1876f40777a4480", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -715,13 +661,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 3 + "page_number": 4 }, - "text": "Jan. 22" + "text": "International Monetary Fund | January 2023 3" }, { "type": "Title", - "element_id": "babfe67b3ecc6b32db9adb9da08274bf", + "element_id": "95af4f3feb2d03b2310ce31abc0c435d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -733,13 +679,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 3 + "page_number": 5 }, - "text": "Jan. 22" + "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023" }, { - "type": "Title", - "element_id": "82debf5a182b9b394ad3a9d584a870ef", + "type": "NarrativeText", + "element_id": "67f04acf5353c625d003fd003acb56f3", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -751,13 +697,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 3 + "page_number": 5 }, - "text": "Jul. 22" + "text": "economies. There is a 0.4 percentage point upward revision for annual growth in 2023, reflecting carryover effects from domestic demand resilience in 2022, but a 0.2 percentage point downward revision of growth in 2024 due to the steeper path of Federal Reserve rate hikes, to a peak of about 5.1 percent in 2023." }, { - "type": "Title", - "element_id": "82debf5a182b9b394ad3a9d584a870ef", + "type": "ListItem", + "element_id": "075ec12daaf7e03f8ce608829f7ecdda", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -769,13 +715,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 3 + "page_number": 5 }, - "text": "Jul. 22" + "text": "Growth in the ero area is projected to bottom out at 0.7 percent in 2023 before rising to 1.6 percent in 2024. The 0.2 percentage point upward revision to the forecast for 2023 reflects the effects of faster rate hikes by the European Central Bank and eroding real incomes, offset by the carryover from the 2022 outturn, lower wholesale energy prices, and additional announcements of fiscal purchasing power support in the form of energy price controls and cash transfers." }, { - "type": "Title", - "element_id": "cc874418b59b7ecb37a2c938783fb5ce", + "type": "ListItem", + "element_id": "531e21ce379680ba6ae82ebe340e897d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -787,13 +733,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 3 + "page_number": 5 }, - "text": "Nov. 22" + "text": "Growth in the United Kingdom is projected to be —0.6 percent in 2023, a 0.9 percentage point downward revision from October, reflecting tighter fiscal and monetary policies and financial conditions and still-high energy retail prices weighing on household budgets." }, { - "type": "Title", - "element_id": "cc874418b59b7ecb37a2c938783fb5ce", + "type": "ListItem", + "element_id": "968cc16a6f05e1f4c40da05632df9609", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -805,13 +751,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 3 + "page_number": 5 }, - "text": "Nov. 22" + "text": "Growth in Japan is projected to rise to 1.8 percent in 2023, with continued monetary and fiscal policy support. High corporate profits from a depreciated yen and earlier delays in implementing previous projects will support business investment. In 2024, growth is expected to decline to 0.9 percent as the effects of past stimulus dissipate." }, { "type": "NarrativeText", - "element_id": "6814df88a59d11e9fcf76a7ed0f5fdfc", + "element_id": "497b28af5c258708a114b8a6766662ce", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -823,13 +769,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 3 + "page_number": 5 }, - "text": "Winter comes to Europe. European economic growth in 2022 was more resilient than expected in the face of the large negative terms-of-trade shock from the war in Ukraine. This resilience––which is" + "text": "For emerging market and developing economies, growth is projected to rise modestly, from 3.9 percent in 2022 to 4.0 percent in 2023 and 4.2 percent in 2024, with an upward revision of 0.3 percentage point for 2023 and a downward revision of 0.1 percentage point for 2024. About half of emerging market and developing economies have lower growth in 2023 than in 2022." }, { - "type": "UncategorizedText", - "element_id": "d4735e3a265e16eee03f59718b9b5d03", + "type": "ListItem", + "element_id": "74af5288c060a6b7bc028cc0efcf59ea", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -841,13 +787,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 3 + "page_number": 5 }, - "text": "2" + "text": "percent, respectively, after the deeper-than-expected slowdown in 2022 to 4.3 percent attributable to China’s economy. China’s real GDP slowdown in the fourth quarter of 2022 implies a 0.2 percentage point downgrade for 2022 growth to 3.0 percent—the first time in more than 40 years with China’s growth below the global average. Growth in China is projected to rise to 5.2 percent in 2023, reflecting rapidly improving mobility, and to fall to 4.5 percent in 2024 before settling at below 4 percent over the medium term amid declining business dynamism and slow progress on structural reforms. Growth in India is set to decline from 6.8 percent in 2022 to 6.1 percent in 2023 before picking up to 6.8 percent in 2024, with resilient domestic demand despite external headwinds. Growth in the ASEAN-5 countries (Indonesia, Malaysia, Philippines, Singapore, Thailand) is similarly projected to slow to 4.3 percent in 2023 and then pick up to 4.7 percent in 2024." }, { - "type": "Title", - "element_id": "b3080428cb4e8896623bf36c001e868a", + "type": "ListItem", + "element_id": "e3b0c44298fc1c149afbf4c8996fb924", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -859,13 +805,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 3 + "page_number": 5 }, - "text": "International Monetary Fund | January 2023" + "text": "" }, { - "type": "Title", - "element_id": "95af4f3feb2d03b2310ce31abc0c435d", + "type": "ListItem", + "element_id": "afde979c99a73646915fe253c85c5a9c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -877,13 +823,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 4 + "page_number": 5 }, - "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023" + "text": "Growth in emerging and developing Europe is projected to have bottomed out in 2022 at 0.7 percent and, since the October forecast, has been revised up for 2023 by 0.9 percentage point to 1.5 percent. This reflects a smaller economic contraction in Russia in 2022 (estimated at –2.2 percent compared with a predicted –3.4 percent) followed by modestly positive growth in 2023. At the current oil price cap level of the Group of Seven, Russian crude oil export volumes are not expected to be significantly affected, with Russian trade continuing to be redirected from sanctioning to non-sanctioning countries. In Latin America and the Caribbean, growth is projected to decline from 3.9 percent in 2022 to 1.8 percent in 2023, with an upward revision for 2023 of 0.1 percentage point since October. The forecast revision reflects upgrades of 0.2 percentage point for Brazil and 0.5 percentage point for Mexico due to unexpected domestic demand resilience, higher-than-expected growth in" }, { - "type": "NarrativeText", - "element_id": "83ce77349b07c275543d551c2c016370", + "type": "ListItem", + "element_id": "25072141a0ed1c9474256def9a721513", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -895,13 +841,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 4 + "page_number": 5 }, - "text": "visible in consumption and investment data for the third quarter––partly reflects government support of about 1.2 percent of European Union GDP (net budgetary cost) to households and firms hit by the energy crisis, as well as dynamism from economies reopening. Gas prices have declined by more than expected amid higher non-Russian pipeline and liquefied natural gas flows, compression of demand for gas, and a warmer-than-usual winter. However, the boost from reopening appears to be fading. High-frequency indicators for the fourth quarter suggest that the manufacturing and services sectors are contracting. Consumer confidence and business sentiment have worsened. With inflation at about 10 percent or above in several euro area countries and the United Kingdom, household budgets remain stretched. The accelerated pace of rate increases by the Bank of England and the European Central Bank is tightening financial conditions and cooling demand in the housing sector and beyond." + "text": "4 International Monetary Fund | January 2023" }, { "type": "Title", - "element_id": "26a20452d058d66ad402559f659cec7c", + "element_id": "95af4f3feb2d03b2310ce31abc0c435d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -913,13 +859,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 4 + "page_number": 6 }, - "text": "The Forecast" + "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023" }, { - "type": "Title", - "element_id": "5779b9b7d25794d3b4ed1fe4e61f6617", + "type": "NarrativeText", + "element_id": "c9b8a2f221ce7ec3213fcf4d9ce8879c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -931,13 +877,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 4 + "page_number": 6 }, - "text": "Growth Bottoming Out" + "text": "major trading partner economies, and in Brazil, greater-than-expected fiscal support. Growth in the region is projected to rise to 2.1 percent in 2024, although with a downward revision of 0.3 percentage point, reflecting tighter financial conditions, lower prices of exported commodities, and downward revisions to trading partner growth." }, { - "type": "NarrativeText", - "element_id": "22011dc596eec73711d7dac8d99b41b6", + "type": "ListItem", + "element_id": "25e2f1dc031b5421b8a234945098e58b", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -949,13 +895,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 4 + "page_number": 6 }, - "text": "Global growth, estimated at 3.4 percent in 2022, is projected to fall to 2.9 percent in 2023 before rising to 3.1 percent in 2024 (Table 1). Compared with the October forecast, the estimate for 2022 and the forecast for 2023 are both higher by about 0.2 percentage point, reflecting positive surprises and greater-than-expected resilience in numerous economies. Negative growth in global GDP or global GDP per capita—which often happens when there is a global recession—is not expected. Nevertheless, global growth projected for 2023 and 2024 is below the historical (2000–19) annual average of 3.8 percent." + "text": "Growth in the Middle East and Central Asia is projected to decline from 5.3 percent in 2022 to 3.2 percent in 2023, with a downward revision of 0.4 percentage point since October, mainly attributable to a steeper-than-expected growth slowdown in Saudi Arabia, from 8.7 percent in 2022 (which was stronger than expected by 1.1 percentage points) to 2.6 percent in 2023, with a negative revision of 1.1 percentage points. The downgrade for 2023 reflects mainly lower oil production in line with an agreement through OPEC+ (Organization of the Petroleum Exporting Countries, including Russia and other non-OPEC oil exporters), while non-oil growth is expected to remain robust. In sub-Saharan Africa, growth is projected to remain moderate at 3.8 percent in 2023 amid prolonged fallout from the COVID-19 pandemic, although with a modest upward revision since October, before picking up to 4.1 percent in 2024. The small upward revision for 2023 (0.1 percentage point) reflects Nigeria’s rising growth in 2023 due to measures to address insecurity issues in the oil sector. In South Africa, by contrast, after a COVID-19 reopening rebound in 2022, projected growth more than halves in 2023, to 1.2 percent, reflecting weaker external demand, power shortages, and structural constraints." }, { - "type": "NarrativeText", - "element_id": "97e04ee873fea0151df00f7b1fb4ca42", + "type": "Title", + "element_id": "3dfc45d3333ae253d78008c8cde2d752", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -967,13 +913,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 4 + "page_number": 6 }, - "text": "The forecast of low growth in 2023 reflects the rise in central bank rates to fight inflation–– especially in advanced economies––as well as the war in Ukraine. The decline in growth in 2023 from 2022 is driven by advanced economies; in emerging market and developing economies, growth is estimated to have bottomed out in 2022. Growth is expected to pick up in China with the full reopening in 2023. The expected pickup in 2024 in both groups of economies reflects gradual recovery from the effects of the war in Ukraine and subsiding inflation. Following the path of global demand, world trade growth is expected to decline in 2023 to 2.4 percent, despite an easing of supply bottlenecks, before rising to 3.4 percent in 2024." + "text": "Inflation Peaking" }, { "type": "NarrativeText", - "element_id": "e08dfaba8a8dc7496a44cb172319d4ba", + "element_id": "d24af8f44bd419665bb4ab6efef34fed", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -985,13 +931,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 4 + "page_number": 6 }, - "text": "These forecasts are based on a number of assumptions, including on fuel and nonfuel commodity prices, which have generally been revised down since October, and on interest rates, which have been revised up. In 2023, oil prices are projected to fall by about 16 percent, while nonfuel commodity prices are expected to fall by, on average, 6.3 percent. Global interest rate assumptions are revised up, reflecting intensified actual and signaled policy tightening by major central banks since October." + "text": "About 84 percent of countries are expected to have lower headline (consumer price index) inflation in 2023 than in 2022. Global inflation is set to fall from 8.8 percent in 2022 (annual average) to 6.6 percent in 2023 and 4.3 percent in 2024––above pre-pandemic (2017–19) levels of about 3.5 percent. The projected disinflation partly reflects declining international fuel and nonfuel commodity prices due to weaker global demand. It also reflects the cooling effects of monetary policy tightening on underlying (core) inflation, which globally is expected to decline from 6.9 percent in the fourth quarter of 2022 (year over year) to 4.5 percent by the fourth quarter of 2023. Still, disinflation will take time: by 2024, projected annual average headline and core inflation will, respectively, still be above pre-pandemic levels in 82 percent and 86 percent of economies." }, { "type": "NarrativeText", - "element_id": "73a39336fb540e7d57ec85dfa8e92799", + "element_id": "72d289ea524eebcd8f195a8afda1c223", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1003,13 +949,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 4 + "page_number": 6 }, - "text": "For advanced economies, growth is projected to decline sharply from 2.7 percent in 2022 to 1.2 percent in 2023 before rising to 1.4 percent in 2024, with a downward revision of 0.2 percentage point for 2024. About 90 percent of advanced economies are projected to see a decline in growth in 2023." + "text": "In advanced economies, annual average inflation is projected to decline from 7.3 percent in 2022 to 4.6 percent in 2023 and 2.6 percent in 2024––above target in several cases. In emerging market and developing economies, projected annual inflation declines from 9.9 percent in 2022 to 8.1 percent in 2023 and 5.5 percent in 2024, above the 4.9 percent pre-pandemic (2017–19) average. In low-income developing countries, inflation is projected to moderate from 14.2 percent in 2022 to 8.6 percent in 2024––still high, but close to the pre-pandemic average." }, { - "type": "ListItem", - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", + "type": "Title", + "element_id": "11ebd9f4c9a7cdbac41f8f7399d3950e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1021,13 +967,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 4 + "page_number": 6 }, - "text": "" + "text": "Risks to the Outlook" }, { "type": "NarrativeText", - "element_id": "e84075ae46df9d9ad37d947011c05a7f", + "element_id": "818b1bd0fa9714f9ce4623897ba422a8", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1039,13 +985,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 4 + "page_number": 6 }, - "text": "In the United States, growth is projected to fall from 2.0 percent in 2022 to 1.4 percent in 2023 and 1.0 percent in 2024. With growth rebounding in the second half of 2024, growth in 2024 will be faster than in 2023 on a fourth-quarter-over-fourth-quarter basis, as in most advanced" + "text": "The balance of risks to the global outlook remains tilted to the downside, with scope for lower growth and higher inflation, but adverse risks have moderated since the October 2022 World Economic Outlook." }, { - "type": "Title", - "element_id": "b3080428cb4e8896623bf36c001e868a", + "type": "ListItem", + "element_id": "30c61ae1849c6b38dd09c21d3d4f5951", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1057,13 +1003,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 4 + "page_number": 6 }, - "text": "International Monetary Fund | January 2023" + "text": "International Monetary Fund | January 2023. 5" }, { - "type": "UncategorizedText", - "element_id": "4e07408562bedb8b60ce05c1decfe3ad", + "type": "Title", + "element_id": "95af4f3feb2d03b2310ce31abc0c435d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1075,13 +1021,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 4 + "page_number": 7 }, - "text": "3" + "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023" }, { "type": "Title", - "element_id": "95af4f3feb2d03b2310ce31abc0c435d", + "element_id": "1ad611b76683e54171ae0b1fddd827ca", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1093,13 +1039,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023" + "text": "Table 1. Overview of the World Economic Outlook Projections (Percent change, unless noted otherwise)" }, { - "type": "NarrativeText", - "element_id": "70f05b9620aa1b7236058898e7e59192", + "type": "Title", + "element_id": "d11a1c04bd3a9891350b4bd94104df58", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1111,13 +1057,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "economies. There is a 0.4 percentage point upward revision for annual growth in 2023, reflecting carryover effects from domestic demand resilience in 2022, but a 0.2 percentage point downward revision of growth in 2024 due to the steeper path of Federal Reserve rate hikes, to a peak of about 5.1 percent in 2023." + "text": "Year over Year" }, { - "type": "ListItem", - "element_id": "fd6c549473e196512c076844988f465c", + "type": "Title", + "element_id": "aa22eb2e58c7cf45c528550d68e15c51", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1129,13 +1075,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "Growth in the euro area is projected to bottom out at 0.7 percent in 2023 before rising to 1.6" + "text": "Difference from October 2022" }, { - "type": "NarrativeText", - "element_id": "cdcaed7d1296edd658256d603cb3828c", + "type": "Title", + "element_id": "8c327a62ae0e925498f5c68b819b32b4", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1147,13 +1093,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "percent in 2024. The 0.2 percentage point upward revision to the forecast for 2023 reflects the effects of faster rate hikes by the European Central Bank and eroding real incomes, offset by the carryover from the 2022 outturn, lower wholesale energy prices, and additional announcements of fiscal purchasing power support in the form of energy price controls and cash transfers." + "text": "Q4 over Q4 2/" }, { - "type": "ListItem", - "element_id": "3be6554964c172468cceaee89294f59d", + "type": "Title", + "element_id": "fcadc00fe663ee0e7818b0ffc5c46948", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1165,13 +1111,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "Growth in the United Kingdom is projected to be –0.6 percent in 2023, a 0.9 percentage point" + "text": "World Output" }, { - "type": "NarrativeText", - "element_id": "7e32067b6a4662d72b1244a3aac91be5", + "type": "UncategorizedText", + "element_id": "0c76bc4e35219e2a31b09428cd47d009", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1183,13 +1129,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "downward revision from October, reflecting tighter fiscal and monetary policies and financial conditions and still-high energy retail prices weighing on household budgets." + "text": "World Trade Volume (goods and services) 6/ Advanced Economies Emerging Market and Developing Economies" }, { - "type": "ListItem", - "element_id": "b24771387a5318eeda21adaa49629186", + "type": "NarrativeText", + "element_id": "3c0578f4d944258ffa4ffac7615f1ff9", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1201,13 +1147,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "Growth in Japan is projected to rise to 1.8 percent in 2023, with continued monetary and fiscal" + "text": "Commodity Prices Oil 7/ Nonfuel (average based on world commodity import weights)" }, { - "type": "NarrativeText", - "element_id": "f8b94e8d9a593a1debae96fce2040db7", + "type": "Title", + "element_id": "b2800ff802361713acee893ebae272f6", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1219,13 +1165,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "policy support. High corporate profits from a depreciated yen and earlier delays in implementing previous projects will support business investment. In 2024, growth is expected to decline to 0.9 percent as the effects of past stimulus dissipate." + "text": "Saudi Arabia Sub-Saharan Africa" }, { - "type": "NarrativeText", - "element_id": "497b28af5c258708a114b8a6766662ce", + "type": "Title", + "element_id": "6185fd66a4e106814e65c047c15dfb1f", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1237,13 +1183,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "For emerging market and developing economies, growth is projected to rise modestly, from 3.9 percent in 2022 to 4.0 percent in 2023 and 4.2 percent in 2024, with an upward revision of 0.3 percentage point for 2023 and a downward revision of 0.1 percentage point for 2024. About half of emerging market and developing economies have lower growth in 2023 than in 2022." + "text": "Advanced Economies United States Euro Area" }, { - "type": "ListItem", - "element_id": "2ba41350ae3c684802f0e2b785c2d11b", + "type": "Title", + "element_id": "24af2841400373443d80b6c91180918b", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1255,13 +1201,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "Growth in emerging and developing Asia is expected to rise in 2023 and 2024 to 5.3 percent and 5.2" + "text": "Middle East and Central Asia" }, { - "type": "NarrativeText", - "element_id": "237bc02ecaaf27f074be0c466b31cc09", + "type": "Title", + "element_id": "7559320d044a32fbb21a7a8da25e9045", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1273,13 +1219,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "percent, respectively, after the deeper-than-expected slowdown in 2022 to 4.3 percent attributable to China’s economy. China’s real GDP slowdown in the fourth quarter of 2022 implies a 0.2 percentage point downgrade for 2022 growth to 3.0 percent—the first time in more than 40 years with China’s growth below the global average. Growth in China is projected to rise to 5.2 percent in 2023, reflecting rapidly improving mobility, and to fall to 4.5 percent in 2024 before settling at below 4 percent over the medium term amid declining business dynamism and slow progress on structural reforms. Growth in India is set to decline from 6.8 percent in 2022 to 6.1 percent in 2023 before picking up to 6.8 percent in 2024, with resilient domestic demand despite external headwinds. Growth in the ASEAN-5 countries (Indonesia, Malaysia, Philippines, Singapore, Thailand) is similarly projected to slow to 4.3 percent in 2023 and then pick up to 4.7 percent in 2024." + "text": "Japan United Kingdom Canada Other Advanced Economies 3/" }, { - "type": "ListItem", - "element_id": "afde979c99a73646915fe253c85c5a9c", + "type": "Title", + "element_id": "ad1094978303f5aa32665083ee1ed934", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1291,13 +1237,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "Growth in emerging and developing Europe is projected to have bottomed out in 2022 at 0.7 percent and, since the October forecast, has been revised up for 2023 by 0.9 percentage point to 1.5 percent. This reflects a smaller economic contraction in Russia in 2022 (estimated at –2.2 percent compared with a predicted –3.4 percent) followed by modestly positive growth in 2023. At the current oil price cap level of the Group of Seven, Russian crude oil export volumes are not expected to be significantly affected, with Russian trade continuing to be redirected from sanctioning to non-sanctioning countries. In Latin America and the Caribbean, growth is projected to decline from 3.9 percent in 2022 to 1.8 percent in 2023, with an upward revision for 2023 of 0.1 percentage point since October. The forecast revision reflects upgrades of 0.2 percentage point for Brazil and 0.5 percentage point for Mexico due to unexpected domestic demand resilience, higher-than-expected growth in" + "text": "Latin America and the Caribbean" }, { - "type": "ListItem", - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", + "type": "Title", + "element_id": "8325885b8155742cebc672e0d7072a7d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1309,13 +1255,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "" + "text": "Emerging and Developing Europe" }, { - "type": "UncategorizedText", - "element_id": "4b227777d4dd1fc61c6f884f48641d02", + "type": "Title", + "element_id": "a4ca51cd6c74adf51f6e9ce60165d047", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1327,13 +1273,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "4" + "text": "Emerging Market and Developing Economies Emerging and Developing Asia" }, { - "type": "Title", - "element_id": "b3080428cb4e8896623bf36c001e868a", + "type": "UncategorizedText", + "element_id": "9e5246f529e197f84af65bbcd8e0d2a4", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1345,13 +1291,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "International Monetary Fund | January 2023" + "text": "Memorandum World Growth Based on Market Exchange Rates European Union ASEAN-5 5/ Middle East and North Africa Emerging Market and Middle-Income Economies Low-Income Developing Countries" }, { "type": "Title", - "element_id": "95af4f3feb2d03b2310ce31abc0c435d", + "element_id": "e30a554d7d1cbf308651f8c267ad6872", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1363,13 +1309,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 6 + "page_number": 7 }, - "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023" + "text": "Brazil Mexico" }, { - "type": "NarrativeText", - "element_id": "e7a8e30d6d49ffbca56f87cd6883c9a0", + "type": "Title", + "element_id": "33a3d8ed92b0709ba525369922e51387", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1381,13 +1327,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 6 + "page_number": 7 }, - "text": "major trading partner economies, and in Brazil, greater-than-expected fiscal support. Growth in the region is projected to rise to 2.1 percent in 2024, although with a downward revision of 0.3 percentage point, reflecting tighter financial conditions, lower prices of exported commodities, and downward revisions to trading partner growth." + "text": "Russia" }, { "type": "Title", - "element_id": "3f79bb7b435b05321651daefd374cdc6", + "element_id": "d5d29f012a1237803ee7e623a134117a", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1399,13 +1345,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 6 + "page_number": 7 }, - "text": "e" + "text": "China India 4/" }, { - "type": "ListItem", - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", + "type": "Title", + "element_id": "18231df9f753f2eca887585247231761", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1417,13 +1363,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 6 + "page_number": 7 }, - "text": "" + "text": "Germany France Italy Spain" }, { - "type": "NarrativeText", - "element_id": "25e2f1dc031b5421b8a234945098e58b", + "type": "Title", + "element_id": "05704f84f4326b5f53a04d62f7ad62fc", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1435,13 +1381,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 6 + "page_number": 7 }, - "text": "Growth in the Middle East and Central Asia is projected to decline from 5.3 percent in 2022 to 3.2 percent in 2023, with a downward revision of 0.4 percentage point since October, mainly attributable to a steeper-than-expected growth slowdown in Saudi Arabia, from 8.7 percent in 2022 (which was stronger than expected by 1.1 percentage points) to 2.6 percent in 2023, with a negative revision of 1.1 percentage points. The downgrade for 2023 reflects mainly lower oil production in line with an agreement through OPEC+ (Organization of the Petroleum Exporting Countries, including Russia and other non-OPEC oil exporters), while non-oil growth is expected to remain robust. In sub-Saharan Africa, growth is projected to remain moderate at 3.8 percent in 2023 amid prolonged fallout from the COVID-19 pandemic, although with a modest upward revision since October, before picking up to 4.1 percent in 2024. The small upward revision for 2023 (0.1 percentage point) reflects Nigeria’s rising growth in 2023 due to measures to address insecurity issues in the oil sector. In South Africa, by contrast, after a COVID-19 reopening rebound in 2022, projected growth more than halves in 2023, to 1.2 percent, reflecting weaker external demand, power shortages, and structural constraints." + "text": "Nigeria South Africa" }, { - "type": "Title", - "element_id": "3dfc45d3333ae253d78008c8cde2d752", + "type": "Table", + "element_id": "af79981b9ad6dea2ab3fa92cb5954958", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1453,13 +1399,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 6 + "page_number": 7 }, - "text": "Inflation Peaking" + "text": "over Estimate___ Projections WEO Projections 1/ Estimate Projections 2021 2022 2023 2024 2023 2024 2022 2023 2024 World Output 6.2 34 29 34 0.2 0.1 1.9 3.2 3.0 Advanced Economies 5.4 27 1.2 14 04 0.2 1.3 14 1.6 United States 5.9 2.0 14 1.0 04 -0.2 07 1.0 13 Euro Area 5.3 3.5 07 16 0.2 -0.2 19 0.5 24 Germany 26 19 01 14 04 0.1 14 0.0 23 France 68 26 07 16 0.0 0.0 0.5 09 18 Italy 67 3.9 06 0.9 08 -04 21 0.1 1.0 Spain 5.5 5.2 14 24 -0.1 -0.2 21 13 28 Japan 21 14 18 0.9 0.2 -04 17 1.0 1.0 United Kingdom 76 41 -06 0.9 -0.9 03 04 -05 18 Canada 5.0 3.5 15 15 0.0 0.1 23 12 1.9 Other Advanced Economies 3/ 5.3 28 20 24 -03 02 14 2a 2.2 Emerging Market and Developing Economies 67 3.9 40 42 0.3 -0.1 25 5.0 4A Emerging and Developing Asia 74 43 5.3 5.2 04 0.0 3.4 6.2 49 China 84 3.0 5.2 45 08 0.0 29 5.9 41 India 4/ 87 68 61 68 0.0 0.0 43 70 7A Emerging and Developing Europe 69 07 15 26 0.9 01 -2.0 3.5 28 Russia 47 -2.2 0.3 21 26 06 441 1.0 2.0 Latin America and the Caribbean 7.0 3.9 18 2a 04 0.3 26 1.9 19 Brazil 5.0 34 12 15 0.2 -04 28 0.8 22 Mexico 47 34 47 16 05 -0.2 37 14 1.9 Middle East and Central Asia 45 5.3 3.2 37 -04 0.2 . . . Saudi Arabia 3.2 87 26 34 -11 0.5 46 27 35 Sub-Saharan Africa 47 38 38 41 04 0.0 = ao ao Nigeria 3.6 3.0 3.2 29 0.2 0.0 26 31 29 South Africa 49 26 12 13 01 0.0 3.0 0.5 18 Memorandum World Growth Based on Market Exchange Rates 6.0 3.41 24 25 03 -0.1 17 25 25 European Union 5.5 37 07 18 0.0 -0.3 18 1.2 2.0 ASEAN-5 5/ 3.8 5.2 43 47 0.2 -0.2 37 57 40 Middle East and North Africa 41 54 3.2 35 -04 0.2 a . . Emerging Market and Middle-Income Economies 70 38 40 44 04 0.0 25 5.0 44 Low-Income Developing Countries 441 49 49 56 0.0 01 World Trade Volume (goods and services) 6/ 10.4 5.4 24 3.4 -01 -0.3 Advanced Economies 94 66 23 27 0.0 -04 Emerging Market and Developing Economies 124 34 26 46 03 0.0 Commodity Prices 7/ 65.8 39.8 -16.2 71 33 -0.9 11.2 -98 59 Nonfuel (average based on world commodity import weights) 26.4 70 -6.3 -0.4 -01 03 -2.0 14 -0.2" }, { - "type": "NarrativeText", - "element_id": "72d289ea524eebcd8f195a8afda1c223", + "type": "UncategorizedText", + "element_id": "1bea20e1df19b12013976de2b5e0e3d1", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1471,13 +1417,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 6 + "page_number": 7 }, - "text": "In advanced economies, annual average inflation is projected to decline from 7.3 percent in 2022 to 4.6 percent in 2023 and 2.6 percent in 2024––above target in several cases. In emerging market and developing economies, projected annual inflation declines from 9.9 percent in 2022 to 8.1 percent in 2023 and 5.5 percent in 2024, above the 4.9 percent pre-pandemic (2017–19) average. In low-income developing countries, inflation is projected to moderate from 14.2 percent in 2022 to 8.6 percent in 2024––still high, but close to the pre-pandemic average." + "text": "2021" }, { - "type": "NarrativeText", - "element_id": "d24af8f44bd419665bb4ab6efef34fed", + "type": "UncategorizedText", + "element_id": "b432234c878eb484525dbb0c9be461fe", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1489,13 +1435,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 6 + "page_number": 7 }, - "text": "About 84 percent of countries are expected to have lower headline (consumer price index) inflation in 2023 than in 2022. Global inflation is set to fall from 8.8 percent in 2022 (annual average) to 6.6 percent in 2023 and 4.3 percent in 2024––above pre-pandemic (2017–19) levels of about 3.5 percent. The projected disinflation partly reflects declining international fuel and nonfuel commodity prices due to weaker global demand. It also reflects the cooling effects of monetary policy tightening on underlying (core) inflation, which globally is expected to decline from 6.9 percent in the fourth quarter of 2022 (year over year) to 4.5 percent by the fourth quarter of 2023. Still, disinflation will take time: by 2024, projected annual average headline and core inflation will, respectively, still be above pre-pandemic levels in 82 percent and 86 percent of economies." + "text": "65.8 26.4" }, { - "type": "Title", - "element_id": "11ebd9f4c9a7cdbac41f8f7399d3950e", + "type": "UncategorizedText", + "element_id": "e4fe15854d6650b5b102d8b1c11eb0ba", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1507,13 +1453,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 6 + "page_number": 7 }, - "text": "Risks to the Outlook" + "text": "10.4 9.4 12.1" }, { - "type": "NarrativeText", - "element_id": "818b1bd0fa9714f9ce4623897ba422a8", + "type": "UncategorizedText", + "element_id": "2a9680555d457b6da4b6748492bb6f3d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1525,13 +1471,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 6 + "page_number": 7 }, - "text": "The balance of risks to the global outlook remains tilted to the downside, with scope for lower growth and higher inflation, but adverse risks have moderated since the October 2022 World Economic Outlook." + "text": "5.4 5.9 5.3 2.6 6.8 6.7 5.5 2.1 7.6 5.0 5.3" }, { - "type": "Title", - "element_id": "8ae18586f23aa212e66aeb12a5638609", + "type": "UncategorizedText", + "element_id": "a7143daa9de8af6e0c465ca1354d45b6", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1543,13 +1489,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 6 + "page_number": 7 }, - "text": "International Monetary Fund | January 2023." + "text": "6.7 7.4 8.4 8.7 6.9 4.7 7.0 5.0 4.7 4.5 3.2 4.7 3.6 4.9" }, { "type": "UncategorizedText", - "element_id": "ef2d127de37b942baad06145e54b0c61", + "element_id": "dbc6d298b0672b8176de90a623844b7f", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1561,13 +1507,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 6 + "page_number": 7 }, - "text": "5" + "text": "6.0 5.5 3.8 4.1 7.0 4.1" }, { - "type": "Title", - "element_id": "95af4f3feb2d03b2310ce31abc0c435d", + "type": "UncategorizedText", + "element_id": "69dfc187e2e6d907a0546f7e76f8ee3f", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1581,11 +1527,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023" + "text": "6.2" }, { - "type": "NarrativeText", - "element_id": "1ad611b76683e54171ae0b1fddd827ca", + "type": "Title", + "element_id": "b88d850d87e55cb1fd14ae67e5644d57", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1599,11 +1545,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Table 1. Overview of the World Economic Outlook Projections (Percent change, unless noted otherwise)" + "text": "Estimate 2022" }, { - "type": "Table", - "element_id": "63bdc79def2500227001ac95d78727ab", + "type": "UncategorizedText", + "element_id": "53bcbc5ff007dd49a07f6fb79ef96ef9", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1617,11 +1563,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Difference from October 2022 Q4 over Q4 2/ Estimate___ Projections WEO Projections 1/ Estimate Projections 2021 2022 2023 2024 2023 2024 2022 2023 2024 World Output 6.2 34 29 34 0.2 0.1 1.9 3.2 3.0 Advanced Economies 5.4 27 1.2 14 04 0.2 1.3 14 1.6 United States 5.9 2.0 14 1.0 04 -0.2 07 1.0 13 Euro Area 5.3 3.5 07 16 0.2 -0.2 19 0.5 24 Germany 26 19 01 14 04 0.1 14 0.0 23 France 68 26 07 16 0.0 0.0 0.5 09 18 Italy 67 3.9 06 0.9 08 -04 21 0.1 1.0 Spain 5.5 5.2 14 24 -0.1 -0.2 21 13 28 Japan 21 14 18 0.9 0.2 -04 17 1.0 1.0 United Kingdom 76 41 -06 0.9 -0.9 03 04 -05 18 Canada 5.0 3.5 15 15 0.0 0.1 23 12 1.9 Other Advanced Economies 3/ 5.3 28 20 24 -03 02 14 2a 2.2 Emerging Market and Developing Economies 67 3.9 40 42 0.3 -0.1 25 5.0 4A Emerging and Developing Asia 74 43 5.3 5.2 04 0.0 3.4 6.2 49 China 84 3.0 5.2 45 08 0.0 29 5.9 41 India 4/ 87 68 61 68 0.0 0.0 43 70 7A Emerging and Developing Europe 69 07 15 26 0.9 01 -2.0 3.5 28 Russia 47 -2.2 0.3 21 26 06 441 1.0 2.0 Latin America and the Caribbean 7.0 3.9 18 2a 04 0.3 26 1.9 19 Brazil 5.0 34 12 15 0.2 -04 28 0.8 22 Mexico 47 34 47 16 05 -0.2 37 14 1.9 Middle East and Central Asia 45 5.3 3.2 37 -04 0.2 . . . Saudi Arabia 3.2 87 26 34 -11 0.5 46 27 35 Sub-Saharan Africa 47 38 38 41 04 0.0 = ao ao Nigeria 3.6 3.0 3.2 29 0.2 0.0 26 31 29 South Africa 49 26 12 13 01 0.0 3.0 0.5 18 Memorandum World Growth Based on Market Exchange Rates 6.0 3.41 24 25 03 -0.1 17 25 25 European Union 5.5 37 07 18 0.0 -0.3 18 1.2 2.0 ASEAN-5 5/ 3.8 5.2 43 47 0.2 -0.2 37 57 40 Middle East and North Africa 41 54 3.2 35 -04 0.2 a . . Emerging Market and Middle-Income Economies 70 38 40 44 04 0.0 25 5.0 44 Low-Income Developing Countries 441 49 49 56 0.0 01 World Trade Volume (goods and services) 6/ 10.4 5.4 24 3.4 -01 -0.3 Advanced Economies 94 66 23 27 0.0 -04 Emerging Market and Developing Economies 124 34 26 46 03 0.0 Commodity Prices Oil 7/ 65.8 39.8 -16.2 71 33 -0.9 11.2 -98 59 Nonfuel (average based on world commodity import weights) 26.4 70 -6.3 -0.4 -01 03 -2.0 14 -0.2 World Consumer Prices 8/ 47 88 6.6 43 04 0.2 9.2 5.0 3.5 Advanced Economies 9/ 34 73 46 26 0.2 02 78 31 23 Emerging Market and Developing Economies 8/ 5.9 99 84 5.5 0.0 02 10.4 66 45," + "text": "3.9 4.3 3.0 6.8 0.7 –2.2 3.9 3.1 3.1 5.3 8.7 3.8 3.0 2.6" }, { - "type": "Title", - "element_id": "fcadc00fe663ee0e7818b0ffc5c46948", + "type": "UncategorizedText", + "element_id": "1baf3bebf4d4c9418858185bd491eb8f", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1635,11 +1581,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "World Output" + "text": "39.8 7.0" }, { "type": "UncategorizedText", - "element_id": "6bb1e757e09d7fa3aba323a375abd047", + "element_id": "743f3bc42f087068035515a8dec4f85a", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1653,11 +1599,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "World Consumer Prices 8/ Advanced Economies 9/ Emerging Market and Developing Economies 8/" + "text": "3.1 3.7 5.2 5.4 3.8 4.9" }, { "type": "UncategorizedText", - "element_id": "0c76bc4e35219e2a31b09428cd47d009", + "element_id": "72d73db944cf6d9a5f11d6c073c1dce0", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1671,11 +1617,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "World Trade Volume (goods and services) 6/ Advanced Economies Emerging Market and Developing Economies" + "text": "3.4" }, { - "type": "NarrativeText", - "element_id": "3c0578f4d944258ffa4ffac7615f1ff9", + "type": "UncategorizedText", + "element_id": "e352203d837b1096ee96e1977f1c3d0b", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1689,11 +1635,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Commodity Prices Oil 7/ Nonfuel (average based on world commodity import weights)" + "text": "5.4 6.6 3.4" }, { - "type": "Title", - "element_id": "ad1094978303f5aa32665083ee1ed934", + "type": "UncategorizedText", + "element_id": "6976f35f9f91b539b46743f37d94014a", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1707,11 +1653,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Latin America and the Caribbean" + "text": "2.7 2.0 3.5 1.9 2.6 3.9 5.2 1.4 4.1 3.5 2.8" }, { - "type": "Title", - "element_id": "24af2841400373443d80b6c91180918b", + "type": "UncategorizedText", + "element_id": "7268a41308c4276447de2a707b5df73c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1725,11 +1671,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Middle East and Central Asia" + "text": "–16.2 –6.3" }, { "type": "Title", - "element_id": "b2800ff802361713acee893ebae272f6", + "element_id": "18665f77847d326417463628d8860261", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1743,11 +1689,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Saudi Arabia Sub-Saharan Africa" + "text": "Projections 2023" }, { - "type": "Title", - "element_id": "a4ca51cd6c74adf51f6e9ce60165d047", + "type": "UncategorizedText", + "element_id": "d8236eb6a9bab4f3d37735048ab5aeee", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1761,11 +1707,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Emerging Market and Developing Economies Emerging and Developing Asia" + "text": "1.2 1.4 0.7 0.1 0.7 0.6 1.1 1.8 –0.6 1.5 2.0" }, { - "type": "Title", - "element_id": "8325885b8155742cebc672e0d7072a7d", + "type": "UncategorizedText", + "element_id": "1ea8f3c3db2cb6c75f21ebf26acc28a5", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1779,11 +1725,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Emerging and Developing Europe" + "text": "4.0 5.3 5.2 6.1 1.5 0.3 1.8 1.2 1.7 3.2 2.6 3.8 3.2 1.2" }, { - "type": "Title", - "element_id": "6185fd66a4e106814e65c047c15dfb1f", + "type": "UncategorizedText", + "element_id": "f491e65f8d4b8dbec7621fcedaf1b7a4", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1797,11 +1743,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Advanced Economies United States Euro Area" + "text": "2.9" }, { "type": "UncategorizedText", - "element_id": "9e5246f529e197f84af65bbcd8e0d2a4", + "element_id": "96ccb4fe1ec705d9944d1c1ecf0938ab", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1815,11 +1761,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Memorandum World Growth Based on Market Exchange Rates European Union ASEAN-5 5/ Middle East and North Africa Emerging Market and Middle-Income Economies Low-Income Developing Countries" + "text": "2.4 0.7 4.3 3.2 4.0 4.9" }, { - "type": "Title", - "element_id": "7559320d044a32fbb21a7a8da25e9045", + "type": "UncategorizedText", + "element_id": "098d858ff74b2740723330ff6e43edf8", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1833,11 +1779,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Japan United Kingdom Canada Other Advanced Economies 3/" + "text": "2.4 2.3 2.6" }, { - "type": "Title", - "element_id": "33a3d8ed92b0709ba525369922e51387", + "type": "UncategorizedText", + "element_id": "6557739a67283a8de383fc5c0997fbec", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1851,11 +1797,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Russia" + "text": "2024" }, { - "type": "Title", - "element_id": "05704f84f4326b5f53a04d62f7ad62fc", + "type": "UncategorizedText", + "element_id": "cf39ab5ed0773cea3681c2ac35e6b706", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1869,11 +1815,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Nigeria South Africa" + "text": "–7.1 –0.4" }, { - "type": "Title", - "element_id": "d5d29f012a1237803ee7e623a134117a", + "type": "UncategorizedText", + "element_id": "7fdc64e781146808df57eac112860f9b", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1887,11 +1833,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "China India 4/" + "text": "3.4 2.7 4.6" }, { - "type": "Title", - "element_id": "e30a554d7d1cbf308651f8c267ad6872", + "type": "UncategorizedText", + "element_id": "9d1bc5abd6f3e9c4c6ccb572ae521387", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1905,11 +1851,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Brazil Mexico" + "text": "4.2 5.2 4.5 6.8 2.6 2.1 2.1 1.5 1.6 3.7 3.4 4.1 2.9 1.3" }, { - "type": "Title", - "element_id": "18231df9f753f2eca887585247231761", + "type": "UncategorizedText", + "element_id": "35efc6ded4e13f29a8d86e4f33294be0", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1923,11 +1869,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Germany France Italy Spain" + "text": "3.1" }, { "type": "UncategorizedText", - "element_id": "1bea20e1df19b12013976de2b5e0e3d1", + "element_id": "123157612cd26d61b4760a5ecd1f4bfc", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1941,11 +1887,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "2021" + "text": "2.5 1.8 4.7 3.5 4.1 5.6" }, { "type": "UncategorizedText", - "element_id": "e4fe15854d6650b5b102d8b1c11eb0ba", + "element_id": "777e0063772d428bf1c04383b8ad058e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1959,11 +1905,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "10.4 9.4 12.1" + "text": "1.4 1.0 1.6 1.4 1.6 0.9 2.4 0.9 0.9 1.5 2.4" }, { - "type": "UncategorizedText", - "element_id": "b432234c878eb484525dbb0c9be461fe", + "type": "Title", + "element_id": "1968c7f7ac8a3b0483f733357bb50b16", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1977,11 +1923,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "65.8 26.4" + "text": "WEO Projections 1/" }, { "type": "UncategorizedText", - "element_id": "9db439c530ed3425c0a68724de199942", + "element_id": "d398b29d3dbbb9bf201d4c7e1c19ff9d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1995,11 +1941,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "4.7 3.1 5.9" + "text": "2023" }, { "type": "UncategorizedText", - "element_id": "2a9680555d457b6da4b6748492bb6f3d", + "element_id": "e06f96c6cf56b11e98615192247171fa", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2013,11 +1959,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "5.4 5.9 5.3 2.6 6.8 6.7 5.5 2.1 7.6 5.0 5.3" + "text": "0.3 0.4 0.8 0.0 0.9 2.6 0.1 0.2 0.5 –0.4 –1.1 0.1 0.2 0.1" }, { "type": "UncategorizedText", - "element_id": "a7143daa9de8af6e0c465ca1354d45b6", + "element_id": "84bc47d0d0703878a250620230630525", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2031,11 +1977,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "6.7 7.4 8.4 8.7 6.9 4.7 7.0 5.0 4.7 4.5 3.2 4.7 3.6 4.9" + "text": "–3.3 –0.1" }, { "type": "UncategorizedText", - "element_id": "69dfc187e2e6d907a0546f7e76f8ee3f", + "element_id": "d35a737537febb07f01925c873444cbc", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2049,11 +1995,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "6.2" + "text": "–0.1 0.0 –0.3" }, { "type": "UncategorizedText", - "element_id": "dbc6d298b0672b8176de90a623844b7f", + "element_id": "effb80722a72ecff482b7a0d4a027e78", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2067,11 +2013,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "6.0 5.5 3.8 4.1 7.0 4.1" + "text": "0.3 0.0 –0.2 –0.4 0.4 0.0" }, { - "type": "Title", - "element_id": "b88d850d87e55cb1fd14ae67e5644d57", + "type": "UncategorizedText", + "element_id": "f22875edf393e3502ad60c82e81c5933", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2085,11 +2031,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Estimate 2022" + "text": "0.1 0.4 0.2 0.4 0.0 0.8 –0.1 0.2 –0.9 0.0 –0.3" }, { "type": "UncategorizedText", - "element_id": "1baf3bebf4d4c9418858185bd491eb8f", + "element_id": "44896b09365746b5f7167ee4d64988a3", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2103,11 +2049,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "39.8 7.0" + "text": "0.2" }, { "type": "UncategorizedText", - "element_id": "53bcbc5ff007dd49a07f6fb79ef96ef9", + "element_id": "6557739a67283a8de383fc5c0997fbec", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2121,11 +2067,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "3.9 4.3 3.0 6.8 0.7 –2.2 3.9 3.1 3.1 5.3 8.7 3.8 3.0 2.6" + "text": "2024" }, { "type": "UncategorizedText", - "element_id": "6976f35f9f91b539b46743f37d94014a", + "element_id": "4d702c47ea48fa0dca98ce691995cc1b", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2139,11 +2085,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "2.7 2.0 3.5 1.9 2.6 3.9 5.2 1.4 4.1 3.5 2.8" + "text": "–0.1 0.0 0.0 0.0 0.1 0.6 –0.3 –0.4 –0.2 0.2 0.5 0.0 0.0 0.0" }, { "type": "UncategorizedText", - "element_id": "743f3bc42f087068035515a8dec4f85a", + "element_id": "037023840d334f9f357a6c3da2b058ff", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2157,11 +2103,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "3.1 3.7 5.2 5.4 3.8 4.9" + "text": "–0.1 –0.3 –0.2 0.2 0.0 0.1" }, { "type": "UncategorizedText", - "element_id": "72d73db944cf6d9a5f11d6c073c1dce0", + "element_id": "4e6611d25d5013d40f58a6f82e3aecdf", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2175,11 +2121,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "3.4" + "text": "–0.1" }, { "type": "UncategorizedText", - "element_id": "b7948d6976e997e76e343161b4b5d864", + "element_id": "2f6f72296f8ab115fda4292808436b88", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2193,11 +2139,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "8.8 7.3 9.9" + "text": "–0.2 –0.2 –0.2 –0.1 0.0 –0.4 –0.2 –0.4 0.3 –0.1 –0.2" }, { "type": "UncategorizedText", - "element_id": "e352203d837b1096ee96e1977f1c3d0b", + "element_id": "7ac5e2e700f401ccf7d2c4770d3afd44", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2211,11 +2157,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "5.4 6.6 3.4" + "text": "–0.3 –0.4 0.0" }, { "type": "UncategorizedText", - "element_id": "7268a41308c4276447de2a707b5df73c", + "element_id": "ebb1568088af8b7c7b98878b895decaf", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2229,11 +2175,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "–16.2 –6.3" + "text": "–0.9 0.3" }, { "type": "Title", - "element_id": "18665f77847d326417463628d8860261", + "element_id": "b88d850d87e55cb1fd14ae67e5644d57", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2247,11 +2193,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Projections 2023" + "text": "Estimate 2022" }, { "type": "UncategorizedText", - "element_id": "d8236eb6a9bab4f3d37735048ab5aeee", + "element_id": "3d5c2c97e00e0c5be2a870cf1cbaac06", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2265,11 +2211,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "1.2 1.4 0.7 0.1 0.7 0.6 1.1 1.8 –0.6 1.5 2.0" + "text": "11.2 –2.0" }, { "type": "UncategorizedText", - "element_id": "e7ac421147471fe341ae242e7544a44c", + "element_id": "d7b26ee43ca5481505ca9eb7c3b29b2c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2283,11 +2229,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "6.6 4.6 8.1" + "text": "2.5 3.4 2.9 4.3 –2.0 –4.1 2.6 2.8 3.7 . . . 4.6 . . . 2.6 3.0" }, { "type": "UncategorizedText", - "element_id": "1ea8f3c3db2cb6c75f21ebf26acc28a5", + "element_id": "4d5d14d8c932363fe84036564c6c582b", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2301,11 +2247,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "4.0 5.3 5.2 6.1 1.5 0.3 1.8 1.2 1.7 3.2 2.6 3.8 3.2 1.2" + "text": "1.7 1.8 3.7 . . . 2.5 . . ." }, { "type": "UncategorizedText", - "element_id": "96ccb4fe1ec705d9944d1c1ecf0938ab", + "element_id": "eae9d4d60a1fe2df23f7b65ae3d76ca8", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2319,11 +2265,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "2.4 0.7 4.3 3.2 4.0 4.9" + "text": "1.3 0.7 1.9 1.4 0.5 2.1 2.1 1.7 0.4 2.3 1.4" }, { "type": "UncategorizedText", - "element_id": "f491e65f8d4b8dbec7621fcedaf1b7a4", + "element_id": "708c57a76a5cf81dc197cc1bd612adb2", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2337,11 +2283,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "2.9" + "text": ". . . . . . . . ." }, { "type": "UncategorizedText", - "element_id": "098d858ff74b2740723330ff6e43edf8", + "element_id": "eca06fdd26e513a7b8510c8660228504", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2355,11 +2301,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "2.4 2.3 2.6" + "text": "1.9" }, { "type": "Title", - "element_id": "d11a1c04bd3a9891350b4bd94104df58", + "element_id": "18665f77847d326417463628d8860261", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2373,11 +2319,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Year over Year" + "text": "Projections 2023" }, { "type": "UncategorizedText", - "element_id": "6557739a67283a8de383fc5c0997fbec", + "element_id": "1a009e8c6bb6dada03c326655a15bedf", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2391,11 +2337,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "2024" + "text": "1.1 1.0 0.5 0.0 0.9 0.1 1.3 1.0 –0.5 1.2 2.1" }, { "type": "UncategorizedText", - "element_id": "cf39ab5ed0773cea3681c2ac35e6b706", + "element_id": "4150b86a3fffd48fc159e81c9b7325db", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2409,11 +2355,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "–7.1 –0.4" + "text": "–9.8 1.4" }, { "type": "UncategorizedText", - "element_id": "123157612cd26d61b4760a5ecd1f4bfc", + "element_id": "f4e79a2ba19a5b842cff288f8e4eafd0", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2427,11 +2373,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "2.5 1.8 4.7 3.5 4.1 5.6" + "text": "5.0 6.2 5.9 7.0 3.5 1.0 1.9 0.8 1.1 . . . 2.7 . . . 3.1 0.5" }, { "type": "UncategorizedText", - "element_id": "9d1bc5abd6f3e9c4c6ccb572ae521387", + "element_id": "3135d2d71bff77be4838a7102bbac5b8", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2445,11 +2391,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "4.2 5.2 4.5 6.8 2.6 2.1 2.1 1.5 1.6 3.7 3.4 4.1 2.9 1.3" + "text": "3.2" }, { "type": "UncategorizedText", - "element_id": "7fdc64e781146808df57eac112860f9b", + "element_id": "98e45a005510dc136e14094ee7ed7faf", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2463,11 +2409,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "3.4 2.7 4.6" + "text": "2.5 1.2 5.7 . . . 5.0 . . ." }, { "type": "UncategorizedText", - "element_id": "35efc6ded4e13f29a8d86e4f33294be0", + "element_id": "708c57a76a5cf81dc197cc1bd612adb2", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2481,11 +2427,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "3.1" + "text": ". . . . . . . . ." }, { "type": "UncategorizedText", - "element_id": "4b48b0469ba9682a3e385ee7fbb6bbed", + "element_id": "6557739a67283a8de383fc5c0997fbec", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2499,47 +2445,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "4.3 2.6 5.5" + "text": "2024" }, { "type": "UncategorizedText", - "element_id": "777e0063772d428bf1c04383b8ad058e", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "1.4 1.0 1.6 1.4 1.6 0.9 2.4 0.9 0.9 1.5 2.4" - }, - { - "type": "Title", - "element_id": "aa22eb2e58c7cf45c528550d68e15c51", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "Difference from October 2022" - }, - { - "type": "Title", - "element_id": "1968c7f7ac8a3b0483f733357bb50b16", + "element_id": "301b9fd38725258f32816ff1a855be3e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2553,11 +2463,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "WEO Projections 1/" + "text": "–5.9 –0.2" }, { "type": "UncategorizedText", - "element_id": "d398b29d3dbbb9bf201d4c7e1c19ff9d", + "element_id": "39b99440eae2f9ee75cf98100c285787", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2571,11 +2481,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "2023" + "text": "2.5 2.0 4.0 . . . 4.1 . . ." }, { "type": "UncategorizedText", - "element_id": "effb80722a72ecff482b7a0d4a027e78", + "element_id": "a416ea84421fa7e1351582da48235bac", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2589,11 +2499,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "0.3 0.0 –0.2 –0.4 0.4 0.0" + "text": "3.0" }, { "type": "UncategorizedText", - "element_id": "d35a737537febb07f01925c873444cbc", + "element_id": "07adb8acdd66b5d2490e542ae0604b71", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2607,11 +2517,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "–0.1 0.0 –0.3" + "text": "4.1 4.9 4.1 7.1 2.8 2.0 1.9 2.2 1.9 . . . 3.5 . . . 2.9 1.8" }, { "type": "UncategorizedText", - "element_id": "e06f96c6cf56b11e98615192247171fa", + "element_id": "708c57a76a5cf81dc197cc1bd612adb2", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2625,11 +2535,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "0.3 0.4 0.8 0.0 0.9 2.6 0.1 0.2 0.5 –0.4 –1.1 0.1 0.2 0.1" + "text": ". . . . . . . . ." }, { "type": "UncategorizedText", - "element_id": "84bc47d0d0703878a250620230630525", + "element_id": "1776cf91dccdf2cce268fcee416b28f6", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2643,11 +2553,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "–3.3 –0.1" + "text": "1.6 1.3 2.1 2.3 1.8 1.0 2.8 1.0 1.8 1.9 2.2" }, { "type": "UncategorizedText", - "element_id": "f22875edf393e3502ad60c82e81c5933", + "element_id": "6bb1e757e09d7fa3aba323a375abd047", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2661,11 +2571,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "0.1 0.4 0.2 0.4 0.0 0.8 –0.1 0.2 –0.9 0.0 –0.3" + "text": "World Consumer Prices 8/ Advanced Economies 9/ Emerging Market and Developing Economies 8/" }, { "type": "UncategorizedText", - "element_id": "44896b09365746b5f7167ee4d64988a3", + "element_id": "9db439c530ed3425c0a68724de199942", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2679,11 +2589,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "0.2" + "text": "4.7 3.1 5.9" }, { "type": "UncategorizedText", - "element_id": "5277334fd8abe869f6a8de2e43942c9d", + "element_id": "b7948d6976e997e76e343161b4b5d864", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2697,11 +2607,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "0.1 0.2 0.0" + "text": "8.8 7.3 9.9" }, { "type": "UncategorizedText", - "element_id": "6557739a67283a8de383fc5c0997fbec", + "element_id": "e7ac421147471fe341ae242e7544a44c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2715,1469 +2625,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "2024" + "text": "6.6 4.6 8.1" }, { "type": "UncategorizedText", - "element_id": "4d702c47ea48fa0dca98ce691995cc1b", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "–0.1 0.0 0.0 0.0 0.1 0.6 –0.3 –0.4 –0.2 0.2 0.5 0.0 0.0 0.0" - }, - { - "type": "UncategorizedText", - "element_id": "7ac5e2e700f401ccf7d2c4770d3afd44", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "–0.3 –0.4 0.0" - }, - { - "type": "UncategorizedText", - "element_id": "037023840d334f9f357a6c3da2b058ff", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "–0.1 –0.3 –0.2 0.2 0.0 0.1" - }, - { - "type": "UncategorizedText", - "element_id": "4e6611d25d5013d40f58a6f82e3aecdf", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "–0.1" - }, - { - "type": "UncategorizedText", - "element_id": "ebb1568088af8b7c7b98878b895decaf", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "–0.9 0.3" - }, - { - "type": "UncategorizedText", - "element_id": "2f6f72296f8ab115fda4292808436b88", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "–0.2 –0.2 –0.2 –0.1 0.0 –0.4 –0.2 –0.4 0.3 –0.1 –0.2" - }, - { - "type": "UncategorizedText", - "element_id": "44f0ab7953bb0b3696b9fa3cf0682f35", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "0.2 0.2 0.2" - }, - { - "type": "Title", - "element_id": "b88d850d87e55cb1fd14ae67e5644d57", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "Estimate 2022" - }, - { - "type": "UncategorizedText", - "element_id": "08e781dd2b6499b1ac8105a47f3520cc", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "9.2 7.8 10.4" - }, - { - "type": "UncategorizedText", - "element_id": "d7b26ee43ca5481505ca9eb7c3b29b2c", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "2.5 3.4 2.9 4.3 –2.0 –4.1 2.6 2.8 3.7 . . . 4.6 . . . 2.6 3.0" - }, - { - "type": "UncategorizedText", - "element_id": "3d5c2c97e00e0c5be2a870cf1cbaac06", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "11.2 –2.0" - }, - { - "type": "UncategorizedText", - "element_id": "708c57a76a5cf81dc197cc1bd612adb2", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": ". . . . . . . . ." - }, - { - "type": "UncategorizedText", - "element_id": "eae9d4d60a1fe2df23f7b65ae3d76ca8", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "1.3 0.7 1.9 1.4 0.5 2.1 2.1 1.7 0.4 2.3 1.4" - }, - { - "type": "UncategorizedText", - "element_id": "eca06fdd26e513a7b8510c8660228504", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "1.9" - }, - { - "type": "UncategorizedText", - "element_id": "4d5d14d8c932363fe84036564c6c582b", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "1.7 1.8 3.7 . . . 2.5 . . ." - }, - { - "type": "Title", - "element_id": "8c327a62ae0e925498f5c68b819b32b4", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "Q4 over Q4 2/" - }, - { - "type": "Title", - "element_id": "18665f77847d326417463628d8860261", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "Projections 2023" - }, - { - "type": "UncategorizedText", - "element_id": "4150b86a3fffd48fc159e81c9b7325db", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "–9.8 1.4" - }, - { - "type": "UncategorizedText", - "element_id": "1a009e8c6bb6dada03c326655a15bedf", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "1.1 1.0 0.5 0.0 0.9 0.1 1.3 1.0 –0.5 1.2 2.1" - }, - { - "type": "UncategorizedText", - "element_id": "e586cf66e92b356a4611ee2ffdf85a16", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "5.0 3.1 6.6" - }, - { - "type": "UncategorizedText", - "element_id": "98e45a005510dc136e14094ee7ed7faf", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "2.5 1.2 5.7 . . . 5.0 . . ." - }, - { - "type": "UncategorizedText", - "element_id": "3135d2d71bff77be4838a7102bbac5b8", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "3.2" - }, - { - "type": "UncategorizedText", - "element_id": "708c57a76a5cf81dc197cc1bd612adb2", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": ". . . . . . . . ." - }, - { - "type": "UncategorizedText", - "element_id": "f4e79a2ba19a5b842cff288f8e4eafd0", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "5.0 6.2 5.9 7.0 3.5 1.0 1.9 0.8 1.1 . . . 2.7 . . . 3.1 0.5" - }, - { - "type": "UncategorizedText", - "element_id": "6557739a67283a8de383fc5c0997fbec", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "2024" - }, - { - "type": "UncategorizedText", - "element_id": "301b9fd38725258f32816ff1a855be3e", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "–5.9 –0.2" - }, - { - "type": "UncategorizedText", - "element_id": "07adb8acdd66b5d2490e542ae0604b71", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "4.1 4.9 4.1 7.1 2.8 2.0 1.9 2.2 1.9 . . . 3.5 . . . 2.9 1.8" - }, - { - "type": "UncategorizedText", - "element_id": "39b99440eae2f9ee75cf98100c285787", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "2.5 2.0 4.0 . . . 4.1 . . ." - }, - { - "type": "UncategorizedText", - "element_id": "41d85a7cc007a9c34136a786d6e61c15", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "3.5 2.3 4.5" - }, - { - "type": "UncategorizedText", - "element_id": "a416ea84421fa7e1351582da48235bac", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "3.0" - }, - { - "type": "UncategorizedText", - "element_id": "1776cf91dccdf2cce268fcee416b28f6", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "1.6 1.3 2.1 2.3 1.8 1.0 2.8 1.0 1.8 1.9 2.2" - }, - { - "type": "UncategorizedText", - "element_id": "708c57a76a5cf81dc197cc1bd612adb2", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": ". . . . . . . . ." - }, - { - "type": "NarrativeText", - "element_id": "df59a495ef85c5f70c5ba5356caf764a", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "Upside risks—Plausible upside risks include more favorable surprises to domestic spending—as in the third quarter of 2022—which, however, would increase inflation further. At the same time, there is room for an upside scenario with lower-than-expected inflation and less monetary tightening:" - }, - { - "type": "NarrativeText", - "element_id": "dd295fca8aff81058c48312a022b69b2", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "Note: Real effective exchange rates are assumed to remain constant at the levels prevailing during October 26, 2022--November 23, 2022. Economies are listed on the basis of economic size. The aggregated quarterly data are seasonally adjusted. WEO = World Economic Outlook. 1/ Difference based on rounded figures for the current and October 2022 WEO forecasts. Countries whose forecasts have been updated relative to October 2022 WEO forecasts account for approximately 90 percent of world GDP measured at purchasing-power-parity weights. 2/ For World Output (Emerging Market and Developing Economies), the quarterly estimates and projections account for approximately 90 percent (80 percent) of annual world (emerging market and developing economies') output at purchasing-power-parity weights. 3/ Excludes the Group of Seven (Canada, France, Germany, Italy, Japan, United Kingdom, United States) and euro area countries. 4/ For India, data and projections are presented on a fiscal year basis, with FY 2022/23 (starting in April 2022) shown in the 2022 column. India's growth projections are 5.4 percent in 2023 and 6.8 percent in 2024 based on calendar year. 5/ Indonesia, Malaysia, Philippines, Singapore, Thailand. 6/ Simple average of growth rates for export and import volumes (goods and services). 7/ Simple average of prices of UK Brent, Dubai Fateh, and West Texas Intermediate crude oil. The average assumed price of oil in US dollars a barrel, based on futures markets (as of November 29, 2022), is $81.13 in 2023 and $75.36 in 2024. 8/ Excludes Venezuela. 9/ The inflation rate for the euro area is 5.7% in 2023 and 3.3% in 2024, that for Japan is 2.8% in 2023 and 2.0% in 2024, and that for the United States is 4.0% in 2023 and 2.2% in 2024." - }, - { - "type": "ListItem", - "element_id": "cf20f95904c591b6ac4ccd5d43fa8a98", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "Pent-up demand boost: Fueled by the stock of excess private savings from the pandemic fiscal" - }, - { - "type": "ListItem", - "element_id": "000425958dcafe9c9a9c501237d8c4d3", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "support and, in many cases, still-tight labor markets and solid wage growth, pent-up demand remains an upside risk to the growth outlook. In some advanced economies, recent data show that households are still on net adding to their stock of excess savings (as in some euro area countries and the United Kingdom) or have ample savings left (as in the United States). This leaves scope for a further boost to consumption—particularly of services, including tourism." - }, - { - "type": "UncategorizedText", - "element_id": "e7f6c011776e8db7cd330b54174fd76f", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "6" - }, - { - "type": "Title", - "element_id": "b3080428cb4e8896623bf36c001e868a", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "International Monetary Fund | January 2023" - }, - { - "type": "Title", - "element_id": "95af4f3feb2d03b2310ce31abc0c435d", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023" - }, - { - "type": "NarrativeText", - "element_id": "d379a79a55cecddeed62b21eb6a0ff00", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "However, the boost to demand could stoke core inflation, leading to even tighter monetary policies and a stronger-than-expected slowdown later on. Pent-up demand could also fuel a stronger rebound in China." - }, - { - "type": "ListItem", - "element_id": "2bbe57e6c291db638d3fcddca9e0199a", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "Faster disinflation: An easing in labor market pressures in some advanced economies due to" - }, - { - "type": "NarrativeText", - "element_id": "3f9155fad634c620bd9b820132e20935", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "falling vacancies could cool wage inflation without necessarily increasing unemployment. A sharp fall in the prices of goods, as consumers shift back to services, could further push down inflation. Such developments could imply a “softer” landing with less monetary tightening." - }, - { - "type": "NarrativeText", - "element_id": "a2f806b25a06969405637298b4c85139", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "Downside risks—Numerous downside risks continue to weigh on the global outlook, lowering growth while, in a number of cases, adding further to inflation:" - }, - { - "type": "ListItem", - "element_id": "90a90e12a4c6b8b74d3c8d20a76f22dc", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "China’s recovery stalling: Amid still-low population immunity levels and insufficient hospital" - }, - { - "type": "ListItem", - "element_id": "42ac57e394bf7c98d908745cefce0b80", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "War in Ukraine escalating: An escalation of the war in Ukraine remains a major source of" - }, - { - "type": "NarrativeText", - "element_id": "1bbcee85386321e6e8235a64d4c34d73", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "capacity, especially outside the major urban areas, significant health consequences could hamper the recovery. A deepening crisis in the real estate market remains a major source of vulnerability, with risks of widespread defaults by developers and resulting financial sector instability. Spillovers to the rest of the world would operate primarily through lower demand and potentially renewed supply chain problems." - }, - { - "type": "NarrativeText", - "element_id": "fdb59d523afa92db3942dabc88d94fc4", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "vulnerability, particularly for Europe and lower-income countries. Europe is facing lower-than- anticipated gas prices, having stored enough gas to make shortages unlikely this winter. However, refilling storage with much-diminished Russian flows will be challenging ahead of next winter, particularly if it is a very cold one and China’s energy demand picks up, causing price spikes. A possible increase in food prices from a failed extension of the Black Sea grain initiative would put further pressure on lower-income countries that are experiencing food insecurity and have limited budgetary room to cushion the impact on households and businesses. With elevated food and fuel prices, social unrest may increase." - }, - { - "type": "ListItem", - "element_id": "2d14934d52ff357c52e9ae1c38f7390e", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "Debt distress: Since October, sovereign spreads for emerging market and developing economies have modestly declined on the back of an easing in global financial conditions (Box 1) and dollar depreciation. About 15 percent of low-income countries are estimated to be in debt distress, with an additional 45 percent at high risk of debt distress and about 25 percent of emerging market economies also at high risk. The combination of high debt levels from the pandemic, lower growth, and higher borrowing costs exacerbates the vulnerability of these economies, especially those with significant near-term dollar financing needs. Inflation persisting: Persistent labor market tightness could translate into stronger-than-expected wage growth. Higher-than-expected oil, gas, and food prices from the war in Ukraine or from a faster rebound in China’s growth could again raise headline inflation and pass through into underlying inflation. Such developments could cause inflation expectations to de-anchor and require an even tighter monetary policy." - }, - { - "type": "ListItem", - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "" - }, - { - "type": "ListItem", - "element_id": "33ccff3014b460178e62d9c8021fd728", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "Sudden financial market repricing: A premature easing in financial conditions in response to lower headline inflation data could complicate anti-inflation policies and necessitate additional monetary tightening. For the same reason, unfavorable inflation data releases could trigger sudden repricing of assets and increase volatility in financial markets. Such movements could strain liquidity and the functioning of critical markets, with ripple effects on the real economy." - }, - { - "type": "ListItem", - "element_id": "75bd22ee0ba778cc3a616ed0a9b42292", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "Geopolitical fragmentation: The war in Ukraine and the related international sanctions aimed at  pressuring Russia to end hostilities are splitting the world economy into blocs and reinforcing" - }, - { - "type": "NarrativeText", - "element_id": "810e5a86eae657e179ac8da86f317a62", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "earlier geopolitical tensions, such as those associated with the US-China trade dispute." - }, - { - "type": "Title", - "element_id": "8ae18586f23aa212e66aeb12a5638609", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "International Monetary Fund | January 2023." - }, - { - "type": "UncategorizedText", - "element_id": "7902699be42c8a8e46fbbb4501726517", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "7" - }, - { - "type": "Title", - "element_id": "95af4f3feb2d03b2310ce31abc0c435d", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 9 - }, - "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023" - }, - { - "type": "NarrativeText", - "element_id": "6684fee3e3cd949ec59e7444a0c3fd0c", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 9 - }, - "text": "Fragmentation could intensify—with more restrictions on cross-border movements of capital, workers, and international payments—and could hamper multilateral cooperation on providing global public goods.1 The costs of such fragmentation are especially high in the short term, as replacing disrupted cross-border flows takes time." - }, - { - "type": "Title", - "element_id": "a81cc4e3ca23fd16254e2b858cdcb00a", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 9 - }, - "text": "Policy Priorities" - }, - { - "type": "NarrativeText", - "element_id": "1c464362698203e7245bdaf33c388a80", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 9 - }, - "text": "Securing global disinflation: For most economies, the priority remains achieving a sustained reduction in inflation toward target levels. Raising real policy rates and keeping them above their neutral levels until underlying inflation is clearly declining would ward off risks of inflation expectations de- anchoring. Clear central bank communication and appropriate reactions to shifts in the data will help keep inflation expectations anchored and lessen wage and price pressures. Central banks’ balance sheets will need to be unwound carefully, amid market liquidity risks. Gradual and steady fiscal tightening would contribute to cooling demand and limit the burden on monetary policy in the fight against inflation. In countries where output remains below potential and inflation is in check, maintaining monetary and fiscal accommodation may be appropriate." - }, - { - "type": "NarrativeText", - "element_id": "d6138134f71f953a9da2083154e2629e", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 9 - }, - "text": "Containing the reemergence of COVID-19: Addressing the ongoing pandemic requires coordinated efforts to boost vaccination and medicine access in countries where coverage remains low as well as the deployment of pandemic preparedness measures—including a global push toward sequencing and sharing data. In China, focusing vaccination efforts on vulnerable groups and maintaining sufficiently high coverage of boosters and antiviral medicines would minimize the risks of severe health outcomes and safeguard the recovery, with favorable cross-border spillovers." - }, - { - "type": "NarrativeText", - "element_id": "2457fbbf5aa862b5a8b45d070f9114cb", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 9 - }, - "text": "Ensuring financial stability: Depending on country circumstances, macroprudential tools can be used to tackle pockets of elevated financial sector vulnerabilities. Monitoring housing sector developments and conducting stress tests in economies where house prices have increased significantly over the past few years are warranted. In China, central government action to resolve the property crisis and reduce the risk of spillovers to financial stability and growth is a priority, including by strengthening temporary mechanisms to protect presale homebuyers from the risk of non-delivery and by restructuring troubled developers. Globally, financial sector regulations introduced after the global financial crisis have contributed to the resilience of banking sectors throughout the pandemic, but there is a need to address data and supervisory gaps in the less-regulated nonbank financial sector, where risks may have built up inconspicuously. Recent turmoil in the crypto space also highlights the urgent need to introduce common standards and reinforce oversight of crypto assets." - }, - { - "type": "NarrativeText", - "element_id": "bcef6ce9e3d4c015db21955dc4f6ce42", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 9 - }, - "text": "Restoring debt sustainability: Lower growth and higher borrowing costs have raised public debt ratios in several economies. Where debt is unsustainable, implementing restructuring or reprofiling early on as part of a package of reforms (including fiscal consolidation and growth-enhancing supply-side reforms) can avert the need for more disruptive adjustment later." - }, - { - "type": "NarrativeText", - "element_id": "defb87cb8f10236768732a1e5fe9519f", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 9 - }, - "text": "Supporting the vulnerable: The surge in global energy and food prices triggered a cost-of-living crisis. Governments acted swiftly with support to households and firms, which helped cushion effects on growth and at times limited the pass-through from energy prices to headline inflation through price" - }, - { - "type": "UncategorizedText", - "element_id": "40430ee7d1dc6b176a60b88df18a66c9", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 9 - }, - "text": "1 See “Geo-Economic Fragmentation and the Future of Multilateralism,” IMF Staff Discussion Note 2023/001." - }, - { - "type": "UncategorizedText", - "element_id": "2c624232cdd221771294dfbb310aca00", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 9 - }, - "text": "8" - }, - { - "type": "Title", - "element_id": "b3080428cb4e8896623bf36c001e868a", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 9 - }, - "text": "International Monetary Fund | January 2023" - }, - { - "type": "Title", - "element_id": "95af4f3feb2d03b2310ce31abc0c435d", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023" - }, - { - "type": "NarrativeText", - "element_id": "2e9a0eaddd75095d1bbb4fda6f2c4feb", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "controls. The temporary and broad-based measures are becoming increasingly costly and should be withdrawn and replaced by targeted approaches. Preserving the energy price signal will encourage a reduction in energy consumption and limit the risks of shortages. Targeting can be achieved through social safety nets such as cash transfers to eligible households based on income or demographics or by transfers through electricity companies based on past energy consumption. Subsidies should be temporary and offset by revenue-generating measures, including one-time solidarity taxes on high- income households and companies, where appropriate." - }, - { - "type": "NarrativeText", - "element_id": "da0ef04b13917f67583290e9ba57e375", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "Reinforcing supply: Supply-side policies could address the key structural factors impeding growth— including market power, rent seeking, rigid regulation and planning, and inefficient education—and could help build resilience, reduce bottlenecks, and alleviate price pressures. A concerted push for investment along the supply chain of green energy technologies would bolster energy security and help advance progress on the green transition." - }, - { - "type": "NarrativeText", - "element_id": "c64f29a38dae74989484539db014364f", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "Strengthening multilateral cooperation—Urgent action is needed to limit the risks stemming from geopolitical fragmentation and to ensure cooperation on fundamental areas of common interest:" - }, - { - "type": "NarrativeText", - "element_id": "cb704f1b6d23bfe23f6b4109c471ac8b", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "distribution of vaccines and treatments. Public support for the development of new vaccine technologies and the design of systematic responses to future epidemics also remains essential.  Addressing debt distress: Progress has been made for countries that requested debt treatment under the Group of Twenty’s Common Framework initiative, and more will be needed to strengthen it. It is also necessary to agree on mechanisms to resolve debt in a broader set of economies, including middle-income countries that are not eligible under the Common Framework. Non– Paris Club and private creditors have a crucial role to play in ensuring coordinated, effective, and timely debt resolution processes." - }, - { - "type": "ListItem", - "element_id": "bd7674df887463bc9f05c8030a151dea", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "Restraining the pandemic: Global coordination is needed to resolve bottlenecks in the global" - }, - { - "type": "ListItem", - "element_id": "af6eef18ec41f4980c1a4cbb5b7d4fec", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "Strengthening global trade: Strengthening the global trading system would address risks associated" - }, - { - "type": "NarrativeText", - "element_id": "e6f343736720ae4f9bf5202294c7c9fc", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "trade fragmentation. This can be achieved by rolling back restrictions on food exports and other essential items such as medicine, upgrading World Trade Organization (WTO) rules in critical areas such as agricultural and industrial subsidies, concluding and implementing new WTO-based agreements, and fully restoring the WTO dispute settlement system." - }, - { - "type": "Title", - "element_id": "0695b563acde461fc2f8d9aebccf35c7", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "with" - }, - { - "type": "ListItem", - "element_id": "d6f6afcf055ed3084a0fac1093458c88", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "Using the global financial safety net: With the cascading of shocks to the global economy, using the global financial safety net to its fullest extent is appropriate, including by proactively utilizing the IMF’s precautionary financial arrangements and channeling aid from the international community to low-income countries facing shocks." - }, - { - "type": "ListItem", - "element_id": "089c5759e7030e34a3b537d9e20bcd13", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "Speeding the green transition: To meet governments’ climate change goals, it is necessary to swiftly" - }, - { - "type": "NarrativeText", - "element_id": "77ac1fdd449fba59a90d978745964463", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "implement credible mitigation policies. International coordination on carbon pricing or equivalent policies would facilitate faster decarbonization. Global cooperation is needed to build resilience to climate shocks, including through aid to vulnerable countries." - }, - { - "type": "Title", - "element_id": "b3080428cb4e8896623bf36c001e868a", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "International Monetary Fund | January 2023" - }, - { - "type": "UncategorizedText", - "element_id": "19581e27de7ced00ff1ce50b2047e7a5", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "9" - }, - { - "type": "Image", - "element_id": "0e1f5e74082ed333d383fa20680f0909", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 11 - }, - "text": "BOX 1. GLOBAL FINANCIAL STABILITY UPDATE" - }, - { - "type": "NarrativeText", - "element_id": "8b350f34fe437a1447f2722c30d1e418", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": 265756457651539296174748931590365722430, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "date_modified": "2023-02-14T07:31:28" - }, - "filetype": "application/pdf", - "page_number": 11 - }, - "text": "—— — other" - }, - { - "type": "NarrativeText", - "element_id": "a2fa3a13e51ab7dd0859ee2c869b70e5", + "element_id": "4b48b0469ba9682a3e385ee7fbb6bbed", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4189,13 +2641,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 7 }, - "text": "Overall, financial stability risks remain elevated as investors reassess their inflation and monetary policy outlook. Global financial conditions have eased somewhat since the October 2022 Global Financial Stability Report, driven largely by changing market expectations regarding the interest rate cycle (Figure 1.1). While the expected peak in policy rates—the terminal rate—has risen, markets now also expect the subsequent fall in rates will be significantly faster, and further, than what was forecast in October (Figure 1.2). As a result, global bond yields have recently declined, corporate spreads have tightened, and equity markets have rebounded. That said, central banks are likely to continue to tighten monetary policy to fight inflation, and concerns that this restrictive stance could tip the economy into a recession have increased in major advanced economies." + "text": "4.3 2.6 5.5" }, { - "type": "NarrativeText", - "element_id": "261bebc8fb9b3ed5146d23644639bc26", + "type": "UncategorizedText", + "element_id": "5277334fd8abe869f6a8de2e43942c9d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4207,13 +2659,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 7 }, - "text": "Given the tension between rising recession risks and monetary policy uncertainty, markets have seen significant volatility. While many central banks in advanced economies have stepped down the size of hikes, they have also explicitly stated they will need © —— Sources: Bloomberg Finance L.P.; and IMF staff calculations. Note: GFSR = Global Financial Stability Report. to keep rates higher, for a longer period of time, to tamp down inflation. Risk assets could face significant declines if earnings retrench further or if investors reassess theit outlook for monetary policy given central bank communications. Globally, the partial reversal of the dollar rally has contributed to recent easing due to improved risk appetite, and some emerging market central banks have paused tightening amid tentative signs that inflation may have peaked." + "text": "0.1 0.2 0.0" }, { - "type": "NarrativeText", - "element_id": "e118be83abfed92b8969eca98bb4d53b", + "type": "UncategorizedText", + "element_id": "44f0ab7953bb0b3696b9fa3cf0682f35", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4225,13 +2677,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 7 }, - "text": "Slowing aggregate demand and weaker-than-expected inflation prints in some major advanced economies have prompted investors’ anticipation of a further reduction in the pace of future policy rate hikes. Corporate earnings forecasts have been cut due to headwinds from slowing demand, and margins have contracted across most regions. In addition, survey-based probabilities of recession have been increasing, particularly in the United States and Europe. However, upside risks to the inflation outlook remain. Despite the recent moderation in headline inflation, core inflation remains stubbornly high across most regions, labor markets are still tight, energy prices remain pressured by Russia’s ongoing war in Ukraine, and supply chain disruptions may reappear. To keep these risks in check, financial conditions will likely need to tighten further. If not, central banks may need to increase policy rates even more in order to achieve their inflation objectives." + "text": "0.2 0.2 0.2" }, { "type": "UncategorizedText", - "element_id": "6b86b273ff34fce19d6b804eff5a3f57", + "element_id": "08e781dd2b6499b1ac8105a47f3520cc", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4243,13 +2695,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 7 }, - "text": "1" + "text": "9.2 7.8 10.4" }, { "type": "UncategorizedText", - "element_id": "d4735e3a265e16eee03f59718b9b5d03", + "element_id": "e586cf66e92b356a4611ee2ffdf85a16", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4261,13 +2713,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 7 }, - "text": "2" + "text": "5.0 3.1 6.6" }, { "type": "UncategorizedText", - "element_id": "4e07408562bedb8b60ce05c1decfe3ad", + "element_id": "41d85a7cc007a9c34136a786d6e61c15", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4279,13 +2731,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 7 }, - "text": "3" + "text": "3.5 2.3 4.5" }, { - "type": "UncategorizedText", - "element_id": "4b227777d4dd1fc61c6f884f48641d02", + "type": "NarrativeText", + "element_id": "46c8e0c55b163d73d3d2766be8d1bf8d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4297,13 +2749,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 7 }, - "text": "4" + "text": "Note: Real effective exchange rates are assumed to remain constant at the levels prevailing during October 26, 2022--November 23, 2022. Economies are listed on the basis of economic size. The aggregated quarterly data are seasonally adjusted. WEO = World Economic Outlook. 1/ Difference based on rounded figures for the current and October 2022 WEO forecasts. Countries whose forecasts have been updated relative to October 2022 WEO forecasts account for approximately 90 percent of world GDP measured at purchasing-power-parity weights. 2/ For World Output (Emerging Market and Developing Economies), the quarterly estimates and projections account for approximately 90 percent (80 percent) of annual world (emerging market and developing economies') output at purchasing-power-parity weights. 3/ Excludes the Group of Seven (Canada, France, Germany, Italy, Japan, United Kingdom, United States) and euro area countries. 4/ For India, data and projections are presented on a fiscal year basis, with FY 2022/23 (starting in April 2022) shown in the 2022 column. India's growth projections are 5.4 percent in 2023 and 6.8 percent in 2024 based on calendar year. 5/ Indonesia, Malaysia, Philippines, Singapore, Thailand. 6/ Simple average of growth rates for export and import volumes (goods and services). 7/ Simple average of prices of UK Brent, Dubai Fateh, and West Texas Intermediate crude oil. The average assumed price of oil in US dollars a barrel, based on futures markets (as of November 29, 2022), is $81.13 in 2023 and $75.36 in 2024. 8/ Excludes Venezuela. 9/ The inflation rate for the euro area is 5.7% in 2023 and 3.3% in 2024, that for Japan is 2.8% in 2023 and 2.0% in 2024, and that for the United States is 4.0% in 2023 and 2.2% in 2024." }, { - "type": "UncategorizedText", - "element_id": "ef2d127de37b942baad06145e54b0c61", + "type": "NarrativeText", + "element_id": "df59a495ef85c5f70c5ba5356caf764a", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4315,13 +2767,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 7 }, - "text": "5" + "text": "Upside risks—Plausible upside risks include more favorable surprises to domestic spending—as in the third quarter of 2022—which, however, would increase inflation further. At the same time, there is room for an upside scenario with lower-than-expected inflation and less monetary tightening:" }, { - "type": "UncategorizedText", - "element_id": "e7f6c011776e8db7cd330b54174fd76f", + "type": "ListItem", + "element_id": "000425958dcafe9c9a9c501237d8c4d3", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4333,13 +2785,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 7 }, - "text": "6" + "text": "support and, in many cases, still-tight labor markets and solid wage growth, pent-up demand remains an upside risk to the growth outlook. In some advanced economies, recent data show that households are still on net adding to their stock of excess savings (as in some euro area countries and the United Kingdom) or have ample savings left (as in the United States). This leaves scope for a further boost to consumption—particularly of services, including tourism." }, { - "type": "Title", - "element_id": "6ef230728534d871e5126e2a55e12b26", + "type": "ListItem", + "element_id": "f7d988c7d799cc7eec1527f363785a8c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4351,13 +2803,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 7 }, - "text": "Figure 1.2. Market-Implied Expectations of Policy Rates (Percent)" + "text": "6 International Monetary Fund | January 2023" }, { "type": "Title", - "element_id": "57de33ba9eaa9e5980d4cf6da83abf46", + "element_id": "95af4f3feb2d03b2310ce31abc0c435d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4369,13 +2821,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 8 }, - "text": "Figure 1.1. Global Financial Conditions: Selected Regions (Standard deviations from mean)" + "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023" }, { - "type": "UncategorizedText", - "element_id": "467792e5d9b6bec26f556875e9ccab10", + "type": "NarrativeText", + "element_id": "a6e6e147daf229e8267d85c3e49f7250", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4387,13 +2839,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 8 }, - "text": "–1" + "text": "However, the boost to demand could stoke core inflation, leading to even tighter monetary policies and a stronger-than-expected slowdown later on. Pent-up demand could also fuel a stronger rebound in China." }, { - "type": "UncategorizedText", - "element_id": "28a5aa3897d66de6c31caba99a4c337e", + "type": "ListItem", + "element_id": "2bbe57e6c291db638d3fcddca9e0199a", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4405,13 +2857,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 8 }, - "text": "–2" + "text": "Faster disinflation: An easing in labor market pressures in some advanced economies due to" }, { - "type": "UncategorizedText", - "element_id": "a43f5d32a34c9b54fe96097c3d491389", + "type": "ListItem", + "element_id": "668cd3ea4f48a2f080b7b764c04ab011", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4423,13 +2875,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 8 }, - "text": "–3" + "text": "Faster disinflation: An easing in labor market pressures in some advanced economies due to falling vacancies could cool wage inflation without necessarily increasing unemployment. A sharp fall in the prices of goods, as consumers shift back to services, could further push down inflation. Such developments could imply a “softer” landing with less monetary tightening." }, { "type": "NarrativeText", - "element_id": "1ac9d411aa1266cb68aba2a8a9b70379", + "element_id": "ab2ac0c0c558600b645acb6349ccf2df", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4441,13 +2893,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 8 }, - "text": "Sources: Bloomberg Finance L.P.; Haver Analytics; national data sources; and IMF staff calculations. Note: AEs = advanced economies; EMs = emerging markets. GFSR = Global Financial Stability Report." + "text": "Downside risks—Numerous downside risks continue to weigh on the global outlook, lowering growth while, in a number of cases, adding further to inflation:" }, { - "type": "Title", - "element_id": "49cf8421218222b21a0fc54ffce584c9", + "type": "ListItem", + "element_id": "1bbcee85386321e6e8235a64d4c34d73", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4459,13 +2911,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 8 }, - "text": "Oct. 22" + "text": "capacity, especially outside the major urban areas, significant health consequences could hamper the recovery. A deepening crisis in the real estate market remains a major source of vulnerability, with risks of widespread defaults by developers and resulting financial sector instability. Spillovers to the rest of the world would operate primarily through lower demand and potentially renewed supply chain problems." }, { - "type": "UncategorizedText", - "element_id": "7902699be42c8a8e46fbbb4501726517", + "type": "ListItem", + "element_id": "4e2bc46d4988ddde43a4f295d1d458c2", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4477,13 +2929,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 8 }, - "text": "7" + "text": "vulnerability, particularly for Europe and lower-income countries. Europe is facing lower-than- anticipated gas prices, having stored enough gas to make shortages unlikely this winter. However, refilling storage with much-diminished Russian flows will be challenging ahead of next winter, particularly if it is a very cold one and China’s energy demand picks up, causing price spikes. A possible increase in food prices from a failed extension of the Black Sea grain initiative would put further pressure on lower-income countries that are experiencing food insecurity and have limited budgetary room to cushion the impact on households and businesses. With elevated food and fuel prices, social unrest may increase." }, { - "type": "UncategorizedText", - "element_id": "d4735e3a265e16eee03f59718b9b5d03", + "type": "ListItem", + "element_id": "e3b0c44298fc1c149afbf4c8996fb924", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4495,13 +2947,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 8 }, - "text": "2" + "text": "" }, { - "type": "UncategorizedText", - "element_id": "e7f6c011776e8db7cd330b54174fd76f", + "type": "ListItem", + "element_id": "2d14934d52ff357c52e9ae1c38f7390e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4513,13 +2965,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 8 }, - "text": "6" + "text": "Debt distress: Since October, sovereign spreads for emerging market and developing economies have modestly declined on the back of an easing in global financial conditions (Box 1) and dollar depreciation. About 15 percent of low-income countries are estimated to be in debt distress, with an additional 45 percent at high risk of debt distress and about 25 percent of emerging market economies also at high risk. The combination of high debt levels from the pandemic, lower growth, and higher borrowing costs exacerbates the vulnerability of these economies, especially those with significant near-term dollar financing needs. Inflation persisting: Persistent labor market tightness could translate into stronger-than-expected wage growth. Higher-than-expected oil, gas, and food prices from the war in Ukraine or from a faster rebound in China’s growth could again raise headline inflation and pass through into underlying inflation. Such developments could cause inflation expectations to de-anchor and require an even tighter monetary policy." }, { - "type": "UncategorizedText", - "element_id": "ef2d127de37b942baad06145e54b0c61", + "type": "ListItem", + "element_id": "4ce40bcfac131ab024e535bf860f9495", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4531,13 +2983,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 8 }, - "text": "5" + "text": " Sudden financial market repricing: A premature easing in financial conditions in response to lower headline inflation data could complicate anti-inflation policies and necessitate additional monetary tightening. For the same reason, unfavorable inflation data releases could trigger sudden repricing of assets and increase volatility in financial markets. Such movements could strain liquidity and the functioning of critical markets, with ripple effects on the real economy." }, { - "type": "UncategorizedText", - "element_id": "4b227777d4dd1fc61c6f884f48641d02", + "type": "ListItem", + "element_id": "75bd22ee0ba778cc3a616ed0a9b42292", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4549,13 +3001,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 8 }, - "text": "4" + "text": "Geopolitical fragmentation: The war in Ukraine and the related international sanctions aimed at  pressuring Russia to end hostilities are splitting the world economy into blocs and reinforcing" }, { - "type": "UncategorizedText", - "element_id": "4108466a9a52ce87e39eb1836a42f6f2", + "type": "ListItem", + "element_id": "d1c38e022e1b399f4203ee41c6dc4e43", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4567,13 +3019,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 8 }, - "text": "2006 08 08" + "text": "pressuring Russia to end hostilities are splitting the world economy into blocs and reinforcing earlier geopolitical tensions, such as those associated with the US-China trade dispute." }, { - "type": "UncategorizedText", - "element_id": "5feceb66ffc86f38d952786c6d696c79", + "type": "ListItem", + "element_id": "7250b07d7951c2b7b39c79195f4e69e7", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4585,13 +3037,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 8 }, - "text": "0" + "text": "International Monetary Fund | January 2023. 7" }, { - "type": "UncategorizedText", - "element_id": "4e07408562bedb8b60ce05c1decfe3ad", + "type": "Title", + "element_id": "95af4f3feb2d03b2310ce31abc0c435d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4603,13 +3055,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 9 }, - "text": "3" + "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023" }, { - "type": "UncategorizedText", - "element_id": "6b86b273ff34fce19d6b804eff5a3f57", + "type": "NarrativeText", + "element_id": "6684fee3e3cd949ec59e7444a0c3fd0c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4621,13 +3073,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 9 }, - "text": "1" + "text": "Fragmentation could intensify—with more restrictions on cross-border movements of capital, workers, and international payments—and could hamper multilateral cooperation on providing global public goods.1 The costs of such fragmentation are especially high in the short term, as replacing disrupted cross-border flows takes time." }, { - "type": "UncategorizedText", - "element_id": "aacd834b5cdc64a329e27649143406dd", + "type": "Title", + "element_id": "a81cc4e3ca23fd16254e2b858cdcb00a", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4639,13 +3091,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 9 }, - "text": "06" + "text": "Policy Priorities" }, { - "type": "Title", - "element_id": "24a234895630131d612fc1b4605a256e", + "type": "NarrativeText", + "element_id": "1c464362698203e7245bdaf33c388a80", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4657,13 +3109,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 9 }, - "text": "Apr. 23" + "text": "Securing global disinflation: For most economies, the priority remains achieving a sustained reduction in inflation toward target levels. Raising real policy rates and keeping them above their neutral levels until underlying inflation is clearly declining would ward off risks of inflation expectations de- anchoring. Clear central bank communication and appropriate reactions to shifts in the data will help keep inflation expectations anchored and lessen wage and price pressures. Central banks’ balance sheets will need to be unwound carefully, amid market liquidity risks. Gradual and steady fiscal tightening would contribute to cooling demand and limit the burden on monetary policy in the fight against inflation. In countries where output remains below potential and inflation is in check, maintaining monetary and fiscal accommodation may be appropriate." }, { - "type": "ListItem", - "element_id": "7d4f55875c970d850a152ba1d5ba02a5", + "type": "NarrativeText", + "element_id": "d6138134f71f953a9da2083154e2629e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4675,13 +3127,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 9 }, - "text": "1. United States" + "text": "Containing the reemergence of COVID-19: Addressing the ongoing pandemic requires coordinated efforts to boost vaccination and medicine access in countries where coverage remains low as well as the deployment of pandemic preparedness measures—including a global push toward sequencing and sharing data. In China, focusing vaccination efforts on vulnerable groups and maintaining sufficiently high coverage of boosters and antiviral medicines would minimize the risks of severe health outcomes and safeguard the recovery, with favorable cross-border spillovers." }, { - "type": "Title", - "element_id": "914e31edcbd035dbe9f1cfb7b29089a9", + "type": "NarrativeText", + "element_id": "2457fbbf5aa862b5a8b45d070f9114cb", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4693,13 +3145,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 9 }, - "text": "Oct. 23" + "text": "Ensuring financial stability: Depending on country circumstances, macroprudential tools can be used to tackle pockets of elevated financial sector vulnerabilities. Monitoring housing sector developments and conducting stress tests in economies where house prices have increased significantly over the past few years are warranted. In China, central government action to resolve the property crisis and reduce the risk of spillovers to financial stability and growth is a priority, including by strengthening temporary mechanisms to protect presale homebuyers from the risk of non-delivery and by restructuring troubled developers. Globally, financial sector regulations introduced after the global financial crisis have contributed to the resilience of banking sectors throughout the pandemic, but there is a need to address data and supervisory gaps in the less-regulated nonbank financial sector, where risks may have built up inconspicuously. Recent turmoil in the crypto space also highlights the urgent need to introduce common standards and reinforce oversight of crypto assets." }, { - "type": "Title", - "element_id": "8730d3c2022abf1f9665e4ca1da43e4d", + "type": "NarrativeText", + "element_id": "bcef6ce9e3d4c015db21955dc4f6ce42", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4711,13 +3163,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 9 }, - "text": "Latest" + "text": "Restoring debt sustainability: Lower growth and higher borrowing costs have raised public debt ratios in several economies. Where debt is unsustainable, implementing restructuring or reprofiling early on as part of a package of reforms (including fiscal consolidation and growth-enhancing supply-side reforms) can avert the need for more disruptive adjustment later." }, { - "type": "UncategorizedText", - "element_id": "785329d8f1c63e8d0cdeedba9e6bc2ea", + "type": "NarrativeText", + "element_id": "defb87cb8f10236768732a1e5fe9519f", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4729,13 +3181,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 9 }, - "text": "10 10" + "text": "Supporting the vulnerable: The surge in global energy and food prices triggered a cost-of-living crisis. Governments acted swiftly with support to households and firms, which helped cushion effects on growth and at times limited the pass-through from energy prices to headline inflation through price" }, { - "type": "Title", - "element_id": "d8478f45b9790d52201238244d0e9698", + "type": "NarrativeText", + "element_id": "bda037ffd6adfee8afa08544ca03a391", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4747,13 +3199,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 9 }, - "text": "Dec. 24" + "text": "1 See “Geo-Economic Fragmentation and the Future of Multilateralism,” IMF Staff Discussion Note 2023/001." }, { - "type": "UncategorizedText", - "element_id": "1e46bf7c5134da75e3a2aae852d7bddf", + "type": "Footer", + "element_id": "a9811a5a7bebc1f7a97bf6ca7ca5c890", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4765,13 +3217,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 9 }, - "text": "12 12" + "text": "8 International Monetary Fund | January 2023" }, { "type": "Title", - "element_id": "fe1cc1c654c8a4fde402cfe2426326ef", + "element_id": "95af4f3feb2d03b2310ce31abc0c435d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4783,13 +3235,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 10 }, - "text": "Dec. 26" + "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023" }, { - "type": "Title", - "element_id": "4255f2d53f6408c450b02b249d53c220", + "type": "NarrativeText", + "element_id": "2e9a0eaddd75095d1bbb4fda6f2c4feb", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4801,13 +3253,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 10 }, - "text": "United States Euro area China Other AEs Other EMs" + "text": "controls. The temporary and broad-based measures are becoming increasingly costly and should be withdrawn and replaced by targeted approaches. Preserving the energy price signal will encourage a reduction in energy consumption and limit the risks of shortages. Targeting can be achieved through social safety nets such as cash transfers to eligible households based on income or demographics or by transfers through electricity companies based on past energy consumption. Subsidies should be temporary and offset by revenue-generating measures, including one-time solidarity taxes on high- income households and companies, where appropriate." }, { - "type": "UncategorizedText", - "element_id": "c81a1234a265c680bbc9e96e73073acd", + "type": "NarrativeText", + "element_id": "da0ef04b13917f67583290e9ba57e375", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4819,13 +3271,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 10 }, - "text": "14 16 14" + "text": "Reinforcing supply: Supply-side policies could address the key structural factors impeding growth— including market power, rent seeking, rigid regulation and planning, and inefficient education—and could help build resilience, reduce bottlenecks, and alleviate price pressures. A concerted push for investment along the supply chain of green energy technologies would bolster energy security and help advance progress on the green transition." }, { - "type": "Title", - "element_id": "49cf8421218222b21a0fc54ffce584c9", + "type": "NarrativeText", + "element_id": "9b451c78081780087a0e1e67cc0eaa1d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4837,13 +3289,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 10 }, - "text": "Oct. 22" + "text": "Strengthening multilateral cooperation—Urgent action is needed to limit the risks stemming from geopolitical fragmentation and to ensure cooperation on fundamental areas of common interest:" }, { - "type": "Title", - "element_id": "53d79cec96694df67ce3baff95d8a2e3", + "type": "ListItem", + "element_id": "bd2ec14b604696a7f47651e97a351d31", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4855,13 +3307,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 10 }, - "text": "October 2022 GFSR" + "text": "e = Restraining the pandemic: Global coordination is needed to resolve bottlenecks in the global distribution of vaccines and treatments. Public support for the development of new vaccine technologies and the design of systematic responses to future epidemics also remains essential." }, { - "type": "ListItem", - "element_id": "8e655408cf212df5f74df13e05cdf02c", + "type": "NarrativeText", + "element_id": "add6f9f296b6a99cf0ef86162b3c9cfc", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4873,13 +3325,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 10 }, - "text": "2. Euro area" + "text": "distribution of vaccines and treatments. Public support for the development of new vaccine technologies and the design of systematic responses to future epidemics also remains essential.  Addressing debt distress: Progress has been made for countries that requested debt treatment under the Group of Twenty’s Common Framework initiative, and more will be needed to strengthen it. It is also necessary to agree on mechanisms to resolve debt in a broader set of economies, including middle-income countries that are not eligible under the Common Framework. Non– Paris Club and private creditors have a crucial role to play in ensuring coordinated, effective, and timely debt resolution processes." }, { - "type": "UncategorizedText", - "element_id": "b17ef6d19c7a5b1ee83b907c595526dc", + "type": "ListItem", + "element_id": "e0ee0812ef9249e53d6425e299200f5c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4891,13 +3343,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 10 }, - "text": "16" + "text": "e — Strengthening global trade: Strengthening the global trading system would address risks associated with trade fragmentation. This can be achieved by rolling back restrictions on food exports and other essential items such as medicine, upgrading World Trade Organization (WTO) rules in critical areas such as agricultural and industrial subsidies, concluding and implementing new WTO-based agreements, and fully restoring the WTO dispute settlement system." }, { - "type": "Title", - "element_id": "24a234895630131d612fc1b4605a256e", + "type": "ListItem", + "element_id": "a5751b5964fbbc37b14db4811aeb37f4", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4909,13 +3361,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 10 }, - "text": "Apr. 23" + "text": " Using the global financial safety net: With the cascading of shocks to the global economy, using the global financial safety net to its fullest extent is appropriate, including by proactively utilizing the IMF’s precautionary financial arrangements and channeling aid from the international community to low-income countries facing shocks." }, { - "type": "UncategorizedText", - "element_id": "99cb7a0185216a0acb0ed918e7058868", + "type": "ListItem", + "element_id": "0a4c2d76937c64308220b20382ea68c6", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4927,13 +3379,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 10 }, - "text": "18 18" + "text": "e Speeding the green transition: To meet governments’ climate change goals, it is necessary to swiftly implement credible mitigation policies. International coordination on carbon pricing or equivalent policies would facilitate faster decarbonization. Global cooperation is needed to build resilience to climate shocks, including through aid to vulnerable countries." }, { - "type": "Title", - "element_id": "914e31edcbd035dbe9f1cfb7b29089a9", + "type": "ListItem", + "element_id": "cbb9553ae9412cc864f9f254b47c3efc", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4945,13 +3397,13 @@ "date_modified": "2023-02-14T07:31:28" }, "filetype": "application/pdf", - "page_number": 11 + "page_number": 10 }, - "text": "Oct. 23" + "text": "International Monetary Fund | January 2023 9" }, { - "type": "UncategorizedText", - "element_id": "0c5e98c11d7bb005adbaf731ebfbbb2c", + "type": "Image", + "element_id": "0e1f5e74082ed333d383fa20680f0909", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4965,11 +3417,11 @@ "filetype": "application/pdf", "page_number": 11 }, - "text": "20 22 22" + "text": "BOX 1. GLOBAL FINANCIAL STABILITY UPDATE" }, { - "type": "Title", - "element_id": "d8478f45b9790d52201238244d0e9698", + "type": "NarrativeText", + "element_id": "a2fa3a13e51ab7dd0859ee2c869b70e5", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -4983,11 +3435,11 @@ "filetype": "application/pdf", "page_number": 11 }, - "text": "Dec. 24" + "text": "Overall, financial stability risks remain elevated as investors reassess their inflation and monetary policy outlook. Global financial conditions have eased somewhat since the October 2022 Global Financial Stability Report, driven largely by changing market expectations regarding the interest rate cycle (Figure 1.1). While the expected peak in policy rates—the terminal rate—has risen, markets now also expect the subsequent fall in rates will be significantly faster, and further, than what was forecast in October (Figure 1.2). As a result, global bond yields have recently declined, corporate spreads have tightened, and equity markets have rebounded. That said, central banks are likely to continue to tighten monetary policy to fight inflation, and concerns that this restrictive stance could tip the economy into a recession have increased in major advanced economies." }, { - "type": "UncategorizedText", - "element_id": "f5ca38f748a1d6eaf726b8a42fb575c3", + "type": "NarrativeText", + "element_id": "f79a09409db68af141e82d9ac113ded8", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -5001,11 +3453,11 @@ "filetype": "application/pdf", "page_number": 11 }, - "text": "20" + "text": "Figure 1.1. Global Financial Conditions: Selected Regions (Standard deviations from mean)" }, { - "type": "Title", - "element_id": "53d79cec96694df67ce3baff95d8a2e3", + "type": "Image", + "element_id": "cdd008e3fd865bb8022a5facb083484d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -5019,11 +3471,11 @@ "filetype": "application/pdf", "page_number": 11 }, - "text": "October 2022 GFSR" + "text": " 7 United States Qclober 6 Euro area 2022 : —— China GFSR — other AEs 4 other EMs 3 2 1 0 " }, { - "type": "Title", - "element_id": "fe1cc1c654c8a4fde402cfe2426326ef", + "type": "FigureCaption", + "element_id": "d78f392a386b26aa260548d71936abff", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -5037,11 +3489,11 @@ "filetype": "application/pdf", "page_number": 11 }, - "text": "Dec. 26" + "text": "Sources: Bloomberg Finance L.P.; Haver Analytics; national data sources; and IMF staff calculations. Note: AEs = advanced economies; EMs = emerging markets. GFSR = Global Financial Stability Report." }, { - "type": "UncategorizedText", - "element_id": "d4735e3a265e16eee03f59718b9b5d03", + "type": "NarrativeText", + "element_id": "e118be83abfed92b8969eca98bb4d53b", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -5055,11 +3507,11 @@ "filetype": "application/pdf", "page_number": 11 }, - "text": "2" + "text": "Slowing aggregate demand and weaker-than-expected inflation prints in some major advanced economies have prompted investors’ anticipation of a further reduction in the pace of future policy rate hikes. Corporate earnings forecasts have been cut due to headwinds from slowing demand, and margins have contracted across most regions. In addition, survey-based probabilities of recession have been increasing, particularly in the United States and Europe. However, upside risks to the inflation outlook remain. Despite the recent moderation in headline inflation, core inflation remains stubbornly high across most regions, labor markets are still tight, energy prices remain pressured by Russia’s ongoing war in Ukraine, and supply chain disruptions may reappear. To keep these risks in check, financial conditions will likely need to tighten further. If not, central banks may need to increase policy rates even more in order to achieve their inflation objectives." }, { - "type": "UncategorizedText", - "element_id": "4e07408562bedb8b60ce05c1decfe3ad", + "type": "Title", + "element_id": "6ef230728534d871e5126e2a55e12b26", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -5073,11 +3525,11 @@ "filetype": "application/pdf", "page_number": 11 }, - "text": "3" + "text": "Figure 1.2. Market-Implied Expectations of Policy Rates (Percent)" }, { - "type": "UncategorizedText", - "element_id": "4b227777d4dd1fc61c6f884f48641d02", + "type": "Image", + "element_id": "9a335b9a7fd0ccd069211c60419252fc", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -5091,11 +3543,11 @@ "filetype": "application/pdf", "page_number": 11 }, - "text": "4" + "text": " Latest © —— October 2022 GFSR 6 1. United States 2. Euro area 5 1 1 Oct. Apr. Oct. Dec. Dec. Oct. Apr. Oct. Dec. Dec. 22 23 23 24 26 22 2B 2B 24 2 " }, { - "type": "UncategorizedText", - "element_id": "6b86b273ff34fce19d6b804eff5a3f57", + "type": "NarrativeText", + "element_id": "da431b9817da923cc48a538c4b3b8ade", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -5109,11 +3561,11 @@ "filetype": "application/pdf", "page_number": 11 }, - "text": "1" + "text": "Given the tension between rising recession risks and monetary policy uncertainty, markets have seen significant volatility. While many central banks in advanced economies have stepped down the size of hikes, they have also explicitly stated they will need to keep rates higher, for a longer period of time, to tamp down inflation. Risk assets could face significant declines if earnings retrench further or if investors reassess their outlook for monetary policy given central bank communications. Globally, the partial reversal of the dollar rally has contributed to recent easing due to improved risk appetite, and some emerging market central banks have paused tightening amid tentative signs that inflation may have peaked." }, { - "type": "UncategorizedText", - "element_id": "ef2d127de37b942baad06145e54b0c61", + "type": "NarrativeText", + "element_id": "d073e054fbe8931eb0e200b268710187", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -5127,7 +3579,7 @@ "filetype": "application/pdf", "page_number": 11 }, - "text": "5" + "text": "Sources: Bloomberg Finance L.P.; and IMF staff calculations. Note: GFSR = Global Financial Stability Report." }, { "type": "NarrativeText", @@ -5148,7 +3600,7 @@ "text": "Financial market volatility is expected to remain elevated and could be exacerbated by poor market liquidity. For some asset classes (such as US Treasuries), liquidity has deteriorated to the March 2020 lows of the COVID-19 pandemic. With the process of central bank balance sheet reduction (quantitative tightening) underway, market liquidity is expected to remain challenging." }, { - "type": "Title", + "type": "ListItem", "element_id": "bab943d841e99d44807adb96ef9ef925", "metadata": { "data_source": { @@ -5166,8 +3618,8 @@ "text": "10 — International Monetary Fund | January 2023" }, { - "type": "UncategorizedText", - "element_id": "09b3166aab28edac8872d46b3b34ab02", + "type": "NarrativeText", + "element_id": "06d12185958a014c0c9d6afeab7426c2", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", diff --git a/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/Silent-Giant-(1).pdf.json b/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/Silent-Giant-(1).pdf.json index b5153e745f..095afab414 100644 --- a/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/Silent-Giant-(1).pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/Silent-Giant-(1).pdf.json @@ -1,7 +1,7 @@ [ { "type": "Title", - "element_id": "57eef8242d3675c93268fde018dc9df3", + "element_id": "14547603bad3329c14c74b8c4e2ff8d9", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -15,11 +15,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "WORLD NUCLEAR //s88ciation" + "text": "//s88ciation" }, { "type": "Title", - "element_id": "9f8388cf868cb29d273fdd7328642ff8", + "element_id": "80f1cd7f1c8e281093a32842b1e5bbce", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -33,11 +33,29 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "The Silent Giant" + "text": "WORLD NUCLEAR" }, { "type": "Title", - "element_id": "f439367da08e61523302e29f153007e0", + "element_id": "51174df4a3a78fe261885b1818b66876", + "metadata": { + "data_source": { + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": 177372694731575984083482917563244941766, + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + }, + "date_modified": "2023-02-12T10:10:36" + }, + "filetype": "application/pdf", + "page_number": 1 + }, + "text": "The Silent Giant" + }, + { + "type": "NarrativeText", + "element_id": "e2b1006b190b699d597fdb0f1d73f8f9", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -55,7 +73,7 @@ }, { "type": "Title", - "element_id": "53d548aa01fc3eb72da15a5be7f235e2", + "element_id": "2fa985d0a50e61b09ec22c447cc4b2c9", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -91,7 +109,7 @@ }, { "type": "NarrativeText", - "element_id": "46385c950e7da4d8e588686a541335c2", + "element_id": "8e1e0570b2ba9211cc184c21a3ffbf90", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -105,11 +123,11 @@ "filetype": "application/pdf", "page_number": 3 }, - "text": "The reality today is that both global carbon dioxide emissions and fossil fuel use are still on the rise. This does not only make the battle against climate change much harder, but also results in hundreds of thousands of pollution deaths every year." + "text": "Nuclear energy is a proven solution with a long and well-established track record. Nuclear reactors – a grand total of 445 in 30 countries – are the low-carbon backbone of electricity systems, operating in the background, day in and day out, often out of sight and out of mind. Capable of generating immense amounts of clean power, they are the silent giants upon which we rely daily." }, { "type": "NarrativeText", - "element_id": "8e1e0570b2ba9211cc184c21a3ffbf90", + "element_id": "46385c950e7da4d8e588686a541335c2", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -123,7 +141,7 @@ "filetype": "application/pdf", "page_number": 3 }, - "text": "Nuclear energy is a proven solution with a long and well-established track record. Nuclear reactors – a grand total of 445 in 30 countries – are the low-carbon backbone of electricity systems, operating in the background, day in and day out, often out of sight and out of mind. Capable of generating immense amounts of clean power, they are the silent giants upon which we rely daily." + "text": "The reality today is that both global carbon dioxide emissions and fossil fuel use are still on the rise. This does not only make the battle against climate change much harder, but also results in hundreds of thousands of pollution deaths every year." }, { "type": "NarrativeText", @@ -199,7 +217,7 @@ }, { "type": "ListItem", - "element_id": "3cc3e847449fed4fa13bbd94f86e43a9", + "element_id": "9209d9a3c8ea19bed487dff9476428ee", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -213,11 +231,11 @@ "filetype": "application/pdf", "page_number": 3 }, - "text": "The need to create a level playing field that values reliability and energy security" + "text": "• The need to create a level playing field that values reliability and energy security" }, { "type": "ListItem", - "element_id": "9c4387f669c689e9af0a712fd494b2d7", + "element_id": "ae74ee3ddcecd2ffb75672d469c80a0e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -231,7 +249,7 @@ "filetype": "application/pdf", "page_number": 3 }, - "text": "The need for harmony in the nuclear regulatory environment" + "text": "• The need for harmony in the nuclear regulatory environment" }, { "type": "ListItem", @@ -252,7 +270,7 @@ "text": "The need for a holistic safety paradigm for the whole electricity system." }, { - "type": "UncategorizedText", + "type": "Footer", "element_id": "6b86b273ff34fce19d6b804eff5a3f57", "metadata": { "data_source": { @@ -323,24 +341,6 @@ }, "text": "45,000" }, - { - "type": "Title", - "element_id": "e29786b8cc565a047639f24f7171c30f", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": " Marine" - }, { "type": "Title", "element_id": "563a2980d46c81119e1d7d952b375a41", @@ -360,8 +360,8 @@ "text": "h W T" }, { - "type": "UncategorizedText", - "element_id": "9925953f1faef050547e5f7b811c3f7d", + "type": "Image", + "element_id": "d5aedf7912dfff3c661af8cd17426bac", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -375,11 +375,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "40,000" + "text": "45,000 © Marine 40,000 M™@ csp 35,000 zz Solar PV Geothermal 30,000 ~ Mi Wind 25,000 — Il Bioenergy 20,000 = BB Hydro Nuclear 15,000 — Gas 10,000 — oi 5,000 __ Coal 2000 2010 2020 2030 2040" }, { "type": "UncategorizedText", - "element_id": "4ebe55cc1aee6dd892d7182d797d105a", + "element_id": "81a83544cf93c245178cbc1620030f11", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -393,11 +393,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "35,000" + "text": "2000" }, { "type": "UncategorizedText", - "element_id": "422f240e43a3226f329ba4a0236f587c", + "element_id": "7d12ba56e9f8b3dc64f77c87318c4f37", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -411,11 +411,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "30,000" + "text": "2010" }, { "type": "UncategorizedText", - "element_id": "c7e6673590d2426f635c9be70bd8f057", + "element_id": "73a2af8864fc500fa49048bf3003776c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -429,11 +429,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "25,000" + "text": "2020" }, { "type": "UncategorizedText", - "element_id": "b6b53b7d4224992f9aa86411bbc3f74b", + "element_id": "8e1f192fe25ad49be764c3f55c68beb3", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -447,11 +447,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "20,000" + "text": "2030" }, { "type": "UncategorizedText", - "element_id": "b2ee3509c1fa4f9741f894e592bda9ac", + "element_id": "df34d853f2f2f1f14b92359f695426dc", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -465,11 +465,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "15,000" + "text": "2040" }, { - "type": "UncategorizedText", - "element_id": "28ec039832f5bc96c2be0eaee016dafe", + "type": "FigureCaption", + "element_id": "578e73d091a9463a76ea7502a6a92503", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -483,11 +483,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "10,000" + "text": "Figure 1. IEA projected electricity production and sources to 2040 i" }, { - "type": "UncategorizedText", - "element_id": "b2008c37ee3a7cf7ba87f5ad50dd9e11", + "type": "NarrativeText", + "element_id": "427b54db6e4b434f92954bc67db93473", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -501,11 +501,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "5,000" + "text": "The challenge before us, however, goes far beyond just electricity – we will need to find ways to decarbonize all parts of the economy, and we need solutions that are sustainable in the long-term. That means changing the way we heat our homes and power our industrial processes, as well as ensuring that the way we travel, export our products and ship our food moves away from fossil fuels." }, { - "type": "UncategorizedText", - "element_id": "5feceb66ffc86f38d952786c6d696c79", + "type": "NarrativeText", + "element_id": "92f6fd6a561b87154049d083b93b611d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -519,65 +519,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "0" + "text": "Despite the very considerable efforts to decarbonize the economy and the countless billions spent, our world remains heavily addicted to fossil fuels. The trend is clear – instead of reducing our dependence on fossil fuels, we are increasing it (Figure 2). As a direct result, greenhouse gas emissions continue to rise when they need to drastically fall." }, { "type": "Title", - "element_id": "4a60bf7d4bc1e485744cf7e8d0860524", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "zz" - }, - { - "type": "UncategorizedText", - "element_id": "7ace431cb61584cb9b8dc7ec08cf38ac", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "~" - }, - { - "type": "UncategorizedText", - "element_id": "bda050585a00f0f6cb502350559d7553", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "—" - }, - { - "type": "UncategorizedText", - "element_id": "380918b946a526640a40df5dced65167", + "element_id": "a5d60fc4dbbd484074d8389c35703cf7", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -591,11 +537,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "=" + "text": "h W G" }, { - "type": "UncategorizedText", - "element_id": "bda050585a00f0f6cb502350559d7553", + "type": "Image", + "element_id": "81fe4504e383e98273c4a560382d82ee", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -609,11 +555,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "—" + "text": "30,000,000 |_| High-carbon HE Low-carbon 25,000,000 20,000,000 15,000,000 10,000,000 5,000,000 1990 1995 2000 2005 2010 2015" }, { "type": "UncategorizedText", - "element_id": "bda050585a00f0f6cb502350559d7553", + "element_id": "a7be8e1fe282a37cd666e0632b17d933", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -627,11 +573,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "—" + "text": "1990" }, { "type": "UncategorizedText", - "element_id": "9911f4d2b18457c4726664d309385072", + "element_id": "e78f27ab3ef177a9926e6b90e572b985", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -645,7 +591,7 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "__" + "text": "1995" }, { "type": "UncategorizedText", @@ -667,25 +613,7 @@ }, { "type": "UncategorizedText", - "element_id": "7d12ba56e9f8b3dc64f77c87318c4f37", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "2010" - }, - { - "type": "UncategorizedText", - "element_id": "73a2af8864fc500fa49048bf3003776c", + "element_id": "a20a2b7bb0842d5cf8a0c06c626421fd", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -699,11 +627,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "2020" + "text": "2005" }, { "type": "UncategorizedText", - "element_id": "8e1f192fe25ad49be764c3f55c68beb3", + "element_id": "7d12ba56e9f8b3dc64f77c87318c4f37", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -717,101 +645,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "2030" + "text": "2010" }, { "type": "UncategorizedText", - "element_id": "df34d853f2f2f1f14b92359f695426dc", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "2040" - }, - { - "type": "Title", - "element_id": "d04999bf99ea28fc8a6b20318caac58c", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": " CSP" - }, - { - "type": "Title", - "element_id": "8af26217282646d0f64d3e3211f47512", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": " Solar PV" - }, - { - "type": "Title", - "element_id": "6e28663850f2b50ee6af2d4477b410be", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": " Geothermal" - }, - { - "type": "Title", - "element_id": "7e2f430d44cfb03dca12ffde615c36ec", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": " Wind" - }, - { - "type": "Title", - "element_id": "bde9df80639b681edb85ace46b4d4600", + "element_id": "a85e9db4851f7cd3efb8db7bf69a07cf", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -825,11 +663,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": " Bioenergy" + "text": "2015" }, { - "type": "Title", - "element_id": "b449cd843dc44ab907e1e9ed9c30d92e", + "type": "FigureCaption", + "element_id": "aa04bda99d06997f39a4b613c2c62be5", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -843,11 +681,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": " Hydro" + "text": "Figure 2. Worldwide electricity generation by fuel (1990-2016)ii" }, { - "type": "Title", - "element_id": "f35457739b3bd74c61625c986c844726", + "type": "Header", + "element_id": "53c234e5e8472b6ac51c1ae1cab3fe06", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -861,11 +699,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": " Nuclear" + "text": "2" }, { - "type": "Title", - "element_id": "0f3341ae76e0d4d7816d3620bd915110", + "type": "NarrativeText", + "element_id": "d841776bdfaae69274a3c8b898021653", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -877,13 +715,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 4 + "page_number": 5 }, - "text": " Gas" + "text": "We need to deliver a worldwide transformation that is socially, economically and environmentally sustainable. We need a system that is affordable – no one should have to choose between heating their home, and essentials like eating – as well as helping to alleviate poverty, and ensure the realization of human potential globally. We need a power source that can not only help us mitigate the effects of climate change and environmental degradation, but can also help bring the enormous benefits of reliable electricity supply to the corners of the world that do not have access to it." }, { - "type": "Title", - "element_id": "b001a2374d44e3085e712bb40f66270e", + "type": "NarrativeText", + "element_id": "10a72512425bbe7a4cdd6529b0337d90", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -895,1831 +733,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 4 - }, - "text": " Oil" - }, - { - "type": "Title", - "element_id": "90ad0c8c14253135efd14645e0156145", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": " Coal" - }, - { - "type": "NarrativeText", - "element_id": "578e73d091a9463a76ea7502a6a92503", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "Figure 1. IEA projected electricity production and sources to 2040 i" - }, - { - "type": "NarrativeText", - "element_id": "427b54db6e4b434f92954bc67db93473", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "The challenge before us, however, goes far beyond just electricity – we will need to find ways to decarbonize all parts of the economy, and we need solutions that are sustainable in the long-term. That means changing the way we heat our homes and power our industrial processes, as well as ensuring that the way we travel, export our products and ship our food moves away from fossil fuels." - }, - { - "type": "NarrativeText", - "element_id": "92f6fd6a561b87154049d083b93b611d", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "Despite the very considerable efforts to decarbonize the economy and the countless billions spent, our world remains heavily addicted to fossil fuels. The trend is clear – instead of reducing our dependence on fossil fuels, we are increasing it (Figure 2). As a direct result, greenhouse gas emissions continue to rise when they need to drastically fall." - }, - { - "type": "Title", - "element_id": "a5d60fc4dbbd484074d8389c35703cf7", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "h W G" - }, - { - "type": "UncategorizedText", - "element_id": "ebc18f485dc347b842b3d248d011ce6c", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "30,000,000" - }, - { - "type": "UncategorizedText", - "element_id": "dcdc1a65c75197a553fdd90554060414", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "25,000,000" - }, - { - "type": "UncategorizedText", - "element_id": "1476fd07ef61145d484f5a2e0b4e8e7d", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "20,000,000" - }, - { - "type": "UncategorizedText", - "element_id": "a63634f2c80c7bcc81bc6faad5d53e16", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "15,000,000" - }, - { - "type": "UncategorizedText", - "element_id": "8582d26affb6928525e4f027c2cb8c08", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "10,000,000" - }, - { - "type": "UncategorizedText", - "element_id": "265e4d619f6b21971816b0e4274faf92", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "5,000,000" - }, - { - "type": "UncategorizedText", - "element_id": "5feceb66ffc86f38d952786c6d696c79", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "0" - }, - { - "type": "Title", - "element_id": "e3cf3e34001852adb7a17cf424bda9fc", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": " High-carbon  Low-carbon" - }, - { - "type": "UncategorizedText", - "element_id": "a7be8e1fe282a37cd666e0632b17d933", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "1990" - }, - { - "type": "UncategorizedText", - "element_id": "e78f27ab3ef177a9926e6b90e572b985", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "1995" - }, - { - "type": "UncategorizedText", - "element_id": "81a83544cf93c245178cbc1620030f11", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "2000" - }, - { - "type": "UncategorizedText", - "element_id": "a20a2b7bb0842d5cf8a0c06c626421fd", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "2005" - }, - { - "type": "UncategorizedText", - "element_id": "7d12ba56e9f8b3dc64f77c87318c4f37", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "2010" - }, - { - "type": "UncategorizedText", - "element_id": "a85e9db4851f7cd3efb8db7bf69a07cf", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "2015" - }, - { - "type": "NarrativeText", - "element_id": "aa04bda99d06997f39a4b613c2c62be5", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "Figure 2. Worldwide electricity generation by fuel (1990-2016)ii" - }, - { - "type": "UncategorizedText", - "element_id": "d4735e3a265e16eee03f59718b9b5d03", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "2" - }, - { - "type": "NarrativeText", - "element_id": "d841776bdfaae69274a3c8b898021653", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "We need to deliver a worldwide transformation that is socially, economically and environmentally sustainable. We need a system that is affordable – no one should have to choose between heating their home, and essentials like eating – as well as helping to alleviate poverty, and ensure the realization of human potential globally. We need a power source that can not only help us mitigate the effects of climate change and environmental degradation, but can also help bring the enormous benefits of reliable electricity supply to the corners of the world that do not have access to it." - }, - { - "type": "NarrativeText", - "element_id": "10a72512425bbe7a4cdd6529b0337d90", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "Nuclear energy is already making a major contribution. By using nuclear energy rather than fossil fuels, we currently avoid the emission of more than 2500 million tonnes of carbon dioxide every year. To put that into perspective, it is the equivalent of removing about 400 million cars from the world’s roads." - }, - { - "type": "NarrativeText", - "element_id": "030d3154a592248139651c5f8fbef1d5", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "Modern society is dependent on the steady supply of electricity, every day of the year – regardless of weather, season or time of day – and nuclear energy is particularly well-suited to providing this service. Given that the majority of baseload supply is fossil-based, an increase in the use of nuclear energy would result in a rapid decarbonization of the electricity system. The International Energy Agency’s (IEA) recent report III on nuclear energy highlighted the importance of dependable baseload electricity generators and the need to properly value and compensate them for the electricity security and reliability services they provide." - }, - { - "type": "UncategorizedText", - "element_id": "4e07408562bedb8b60ce05c1decfe3ad", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "3" - }, - { - "type": "NarrativeText", - "element_id": "a53cecd93ffb9ec731b7974f1805e924", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "Despite impressive recent growth, the stark reality is that renewables alone will not be able to resolve our dependence on fossil fuels. Clearly, the sun does not always shine, and the wind does not always blow, and this is compounded by the fact that many times these periods coincide with when electricity demand is at its highest, but renewables can be complementary to nuclear energy. Storage solutions, such as batteries, will not be able to power our societies for days or weeks when the weather is not favourable. Natural gas is currently the most used solution for the intermittency problem, which only serves to reinforce our economy’s dependence of fossil fuels, and severely undermines the apparently ‘green credentials’ of many renewables." - }, - { - "type": "Title", - "element_id": "899a2c517ba69726f3808d66f442e439", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "Moving to a sustainable future" - }, - { - "type": "NarrativeText", - "element_id": "a8c17b6aa3cad915f2f7e0126706c2f5", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "The Intergovernmental Panel on Climate Change (IPCC) special report on Global Warming of 1.5°C iv examined a large number of different scenarios for limiting global warming to 1.5°C. Of those scenarios which would achieve the 1.5°C target, the mean increase in nuclear energy’s contribution to electricity production was 2.5 times higher compared to today. However, the ‘middle-of-the-road’ scenario – in which social, economic, and technological trends follow current patterns and would not require major changes to, for example, diet and travel habits – sees the need for nuclear increase by five times globally by 2050." - }, - { - "type": "NarrativeText", - "element_id": "7562e707e991f1fb634fff41f2cae0e4", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "The IEA has concluded that without an expanded contribution from nuclear energy, the already huge challenge of achieving emissions reductions will become drastically harder and more costly. In their latest report on nuclear energy v, published in 2019, they also conclude that not using nuclear would have negative implications for energy security and result in higher costs for the consumers. The IEA recommends policy reforms to ‘… ensure competition on a level playing field’ and that the ‘… focus should be on designing electricity markets in a way that values the clean energy and energy security attributes of low-carbon technologies, including nuclear power.’ Such reforms should also ensure that reliability of electricity production is properly valued and compensated." - }, - { - "type": "NarrativeText", - "element_id": "1cde21cc10aa769a17ca11aa1e10823e", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "As part of the Harmony Programme, the world’s nuclear industry has identified three key policy areas for action to unlock the true potential of nuclear energy - the need for a level playing field, the harmonization of regulations and the establishment of an effective safety paradigm." - }, - { - "type": "NarrativeText", - "element_id": "af2424b7ec665072a2ee0bdcd901e244", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "In regard to the need for a level playing field, we see that many of the world’s electricity markets operate in an unsustainable fashion, dominated by short-term thinking. Electricity supply which is affordable, reliable and available 24/7 generates broad societal benefits, and as seen in Figure 3, nuclear is one of the most affordable electricity sources." - }, - { - "type": "UncategorizedText", - "element_id": "4b227777d4dd1fc61c6f884f48641d02", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "4" - }, - { - "type": "UncategorizedText", - "element_id": "983bd614bb5afece5ab3b6023f71147c", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "300" - }, - { - "type": "UncategorizedText", - "element_id": "1e472b39b105d349bcd069c4a711b44a", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "250" - }, - { - "type": "UncategorizedText", - "element_id": "27badc983df1780b60c2b3fa9d3a19a0", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "200" - }, - { - "type": "Title", - "element_id": "e8dbac2cdc67e714e99baa9c0f6a54b9", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "h W M / $" - }, - { - "type": "UncategorizedText", - "element_id": "9ae2bdd7beedc2e766c6b76585530e16", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "150" - }, - { - "type": "UncategorizedText", - "element_id": "ad57366865126e55649ecb23ae1d4888", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "100" - }, - { - "type": "UncategorizedText", - "element_id": "1a6562590ef19d1045d06c4055742d38", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "50" - }, - { - "type": "UncategorizedText", - "element_id": "5feceb66ffc86f38d952786c6d696c79", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "0" - }, - { - "type": "NarrativeText", - "element_id": "4b5ebf5890b9c61b43c5daf4c40cbab0", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "C o m" - }, - { - "type": "Title", - "element_id": "8fd5206adbbb7a132889e4161057d4cf", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "m ercial Photovoltaic" - }, - { - "type": "Title", - "element_id": "8e2f99a9826b1b316f7690290f32b31f", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "O nshore Wind" - }, - { - "type": "Title", - "element_id": "53209d7cc67427ba22ec6d878fc8d421", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "Offshore Wind" - }, - { - "type": "Title", - "element_id": "0e6fac6a3ad129a64c2b9d6eaf6680e4", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "SS" - }, - { - "type": "Title", - "element_id": "6dc76d1e1c35d4253537250288157d0c", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "N uclear" - }, - { - "type": "Title", - "element_id": "079c085d3cb9f52f2392addf619382be", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "C C G T" - }, - { - "type": "Title", - "element_id": "6c25ebfc9ffd2510c4c41d4bd5cb7ea9", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "C oal" - }, - { - "type": "NarrativeText", - "element_id": "a5846cd18e790db780cc03f9e5f63278", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "Figure 3. Comparative cost projections for main electricity generators vi" - }, - { - "type": "NarrativeText", - "element_id": "9ad4cf48d0b9d0bbfd257214f3d050dd", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "However, markets fail to give due credit to electricity generators, such as nuclear energy, that are able to meet these societal demands. This has resulted in situations where nuclear energy has struggled to compete with energy sources that have been subsidized, do not pay the hidden costs brought on by their intermittency (e.g. costly backup provisions and investments in the grid), or do not have to take responsibility for using our common atmosphere as a dumping ground." - }, - { - "type": "NarrativeText", - "element_id": "4b3dad9b769c100e89b2c082e7d9e13e", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "In regard to the need to harmonize regulations, multiple regulatory barriers stemming from diverse national licensing processes and safety requirements currently limit global nuclear trade and investment. A lack of international standardization places unnecessary regulatory burdens on nuclear activities and causes delays in the licensing of new designs, hindering innovation." - }, - { - "type": "NarrativeText", - "element_id": "13ff2375260e277c2dfbc8826aa50a65", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "Additionally, electricity markets fail to recognize the relative costs of different forms of electricity generation. Whilst the nuclear industry takes responsibility for its lifecycle costs (including decommissioning and waste management), other electricity generators do not. Fossil fuel generators are rarely required to pay the price in line with the environmental and health damage that their emissions cause, whilst the cost of wind and solar does not include the disposal of the sometimes toxic materials at the end of their life." - }, - { - "type": "NarrativeText", - "element_id": "0ce74aa5e786157de72d5ae801d86cc4", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "The International Atomic Energy Agency (IAEA) has highlighted the importance of addressing this issue, concluding that the lack of regulatory harmony ‘…causes many drawbacks for the entire nuclear industry, including developers, vendors, operators and even regulators themselves…This results in increased costs and reduced predictability in project execution’. vii It is therefore crucial that we harmonize the regulatory process to address these weaknesses, and avoid unnecessary duplication and inconsistencies." - }, - { - "type": "UncategorizedText", - "element_id": "ef2d127de37b942baad06145e54b0c61", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "5" - }, - { - "type": "NarrativeText", - "element_id": "2cf9c478a20b21f5792941a179d926e9", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "In regard to the need for a holistic safety paradigm for the whole electricity system, we need to consider safety from a societal perspective, something the current energy system fails to do. The health, environmental and safety benefits of nuclear energy are not sufficiently understood and valued when compared with other electricity sources. Nuclear energy remains the safest form of electricity generation (Figure 4). Additionally, the use of nuclear consistently prevents many tens of thousands of deaths (mainly resulting from air pollution) every year by avoiding the use of coal - lifesaving measures which must be better recognised and valued." - }, - { - "type": "UncategorizedText", - "element_id": "dbae772db29058a88f9bd830e957c695", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "140" - }, - { - "type": "Title", - "element_id": "1fb2ec4fc8fc547c0de86ba79ba651e5", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "a t a F" - }, - { - "type": "NarrativeText", - "element_id": "e11247712b3df61756970b45f019ad68", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "r a e y" - }, - { - "type": "Title", - "element_id": "f83714d89302473e0e4f5399bd50e7a9", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "W T" - }, - { - "type": "NarrativeText", - "element_id": "f9bb49945b60897227abdd75b5f8d39b", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "r e p s e i t i l" - }, - { - "type": "UncategorizedText", - "element_id": "380918b946a526640a40df5dced65167", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "=" - }, - { - "type": "UncategorizedText", - "element_id": "911bc18af1665a604b4fa4a97d47f477", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "“99 :" - }, - { - "type": "Title", - "element_id": "3f79bb7b435b05321651daefd374cdc6", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "e" - }, - { - "type": "UncategorizedText", - "element_id": "2abaca4911e68fa9bfbf3482ee797fd5", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "120" - }, - { - "type": "UncategorizedText", - "element_id": "ad57366865126e55649ecb23ae1d4888", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "100" - }, - { - "type": "UncategorizedText", - "element_id": "d59eced1ded07f84c145592f65bdf854", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "40" - }, - { - "type": "UncategorizedText", - "element_id": "39fa9ec190eee7b6f4dff1100d6343e1", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "60" - }, - { - "type": "UncategorizedText", - "element_id": "f5ca38f748a1d6eaf726b8a42fb575c3", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "20" - }, - { - "type": "UncategorizedText", - "element_id": "48449a14a4ff7d79bb7a1b6f3d488eba", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "80" - }, - { - "type": "UncategorizedText", - "element_id": "5feceb66ffc86f38d952786c6d696c79", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "0" - }, - { - "type": "UncategorizedText", - "element_id": "e7ac0786668e0ff0f02b62bd04f45ff6", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": ":" - }, - { - "type": "UncategorizedText", - "element_id": "5bddd069fd77ec5699d9ab00c00f47c4", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "1 :" - }, - { - "type": "Title", - "element_id": "6c25ebfc9ffd2510c4c41d4bd5cb7ea9", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "C oal" - }, - { - "type": "UncategorizedText", - "element_id": "2abaca4911e68fa9bfbf3482ee797fd5", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "120" - }, - { - "type": "UncategorizedText", - "element_id": "b725d20650649a5221675144bab5946e", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "99.5" - }, - { - "type": "Title", - "element_id": "2378bdd2cf4f491cf401e6b215cbb4fd", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "Oil" - }, - { - "type": "Title", - "element_id": "4fabb98454d019811a732c4a09f31bf0", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "N atural gas" - }, - { - "type": "UncategorizedText", - "element_id": "ce3201efc2e495241a85e4fc84575f50", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "71.9" - }, - { - "type": "Title", - "element_id": "593cbe414f10662e62c0da03ce3302b8", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "fe)" - }, - { - "type": "Title", - "element_id": "77cf83b127020f3a465005abc747e63f", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "Offshore wind" - }, - { - "type": "UncategorizedText", - "element_id": "e7ac0786668e0ff0f02b62bd04f45ff6", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": ":" - }, - { - "type": "UncategorizedText", - "element_id": "6b86b273ff34fce19d6b804eff5a3f57", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "1" - }, - { - "type": "Title", - "element_id": "1b16b1df538ba12dc3f97edbb85caa70", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "n" - }, - { - "type": "UncategorizedText", - "element_id": "6b86b273ff34fce19d6b804eff5a3f57", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "1" - }, - { - "type": "UncategorizedText", - "element_id": "cdb4ee2aea69cc6a83331bbe96dc2caa", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "." - }, - { - "type": "UncategorizedText", - "element_id": "77e43ef38dbfcec0511535d9c7dbee5c", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "(U K)" - }, - { - "type": "UncategorizedText", - "element_id": "cc6f2aa507f6a1f7de2db7e09ddef042", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "8.5" - }, - { - "type": "NarrativeText", - "element_id": "50a78acc78a3c5b4acc8c439af743a0a", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "O nshore wind (G erm any)" - }, - { - "type": "UncategorizedText", - "element_id": "5d48c7c6dce082d397fecf99b8f1ac7f", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "1.78" - }, - { - "type": "Title", - "element_id": "bbf2011ddebee240452a3ab98416afb4", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "S olar P V" - }, - { - "type": "UncategorizedText", - "element_id": "f1ced6d8a7d437fd3748f56bb2358f9a", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "0.245" - }, - { - "type": "Title", - "element_id": "f280c2a253ebd5a7389dd0790fcbd56c", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "N uclear*" - }, - { - "type": "UncategorizedText", - "element_id": "efc293f64a092b9bfe153be9357f9580", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "<0.01" - }, - { - "type": "NarrativeText", - "element_id": "445676822969fb5177c0081d07449a70", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "Figure 4. Comparison of number of fatalities due to electricity generation viii" - }, - { - "type": "Title", - "element_id": "98d83a387e3ac2261daaf8d936bf3e27", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "Nuclear for a sustainable tomorrow" - }, - { - "type": "NarrativeText", - "element_id": "1119369ba9a68924c64155762de72d8e", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "Nuclear energy is already making a significant contribution to providing the world with clean and abundant electricity, and has a proven track record of being a reliable workhorse around the world. Countries like France, Sweden and Switzerland have proven that it is possible to divorce economic growth from an increase in damaging emissions and over the timescales required to effectively challenge climate change and environmental degradation (Figures 5 and 6). Nuclear can ensure that fast-growing populations achieve rising standards of living – without having to sacrifice the planet or their own well-being." - }, - { - "type": "UncategorizedText", - "element_id": "ad57366865126e55649ecb23ae1d4888", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "100" - }, - { - "type": "UncategorizedText", - "element_id": "69f59c273b6e669ac32a6dd5e1b2cb63", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": "90" - }, - { - "type": "Title", - "element_id": "90ad0c8c14253135efd14645e0156145", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": " Coal" - }, - { - "type": "Title", - "element_id": "3fd264839410c464bf2640d6dbf3ed86", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": " Gas/Oil" - }, - { - "type": "UncategorizedText", - "element_id": "48449a14a4ff7d79bb7a1b6f3d488eba", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 + "page_number": 5 }, - "text": "80" + "text": "Nuclear energy is already making a major contribution. By using nuclear energy rather than fossil fuels, we currently avoid the emission of more than 2500 million tonnes of carbon dioxide every year. To put that into perspective, it is the equivalent of removing about 400 million cars from the world’s roads." }, { - "type": "Title", - "element_id": "9a1f49cd39fe9698fc556924b6b889da", + "type": "NarrativeText", + "element_id": "030d3154a592248139651c5f8fbef1d5", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -2731,13 +751,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 8 + "page_number": 5 }, - "text": " Biofuels/Waste" + "text": "Modern society is dependent on the steady supply of electricity, every day of the year – regardless of weather, season or time of day – and nuclear energy is particularly well-suited to providing this service. Given that the majority of baseload supply is fossil-based, an increase in the use of nuclear energy would result in a rapid decarbonization of the electricity system. The International Energy Agency’s (IEA) recent report III on nuclear energy highlighted the importance of dependable baseload electricity generators and the need to properly value and compensate them for the electricity security and reliability services they provide." }, { - "type": "UncategorizedText", - "element_id": "ff5a1ae012afa5d4c889c50ad427aaf5", + "type": "Footer", + "element_id": "4e07408562bedb8b60ce05c1decfe3ad", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -2749,13 +769,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 8 + "page_number": 5 }, - "text": "70" + "text": "3" }, { - "type": "Title", - "element_id": "c4fad0ce9772d241be8c8624896ada86", + "type": "NarrativeText", + "element_id": "a53cecd93ffb9ec731b7974f1805e924", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -2767,13 +787,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 8 + "page_number": 6 }, - "text": " Wind/Solar" + "text": "Despite impressive recent growth, the stark reality is that renewables alone will not be able to resolve our dependence on fossil fuels. Clearly, the sun does not always shine, and the wind does not always blow, and this is compounded by the fact that many times these periods coincide with when electricity demand is at its highest, but renewables can be complementary to nuclear energy. Storage solutions, such as batteries, will not be able to power our societies for days or weeks when the weather is not favourable. Natural gas is currently the most used solution for the intermittency problem, which only serves to reinforce our economy’s dependence of fossil fuels, and severely undermines the apparently ‘green credentials’ of many renewables." }, { - "type": "UncategorizedText", - "element_id": "39fa9ec190eee7b6f4dff1100d6343e1", + "type": "Title", + "element_id": "899a2c517ba69726f3808d66f442e439", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -2785,13 +805,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 8 + "page_number": 6 }, - "text": "60" + "text": "Moving to a sustainable future" }, { - "type": "Title", - "element_id": "b449cd843dc44ab907e1e9ed9c30d92e", + "type": "NarrativeText", + "element_id": "a8c17b6aa3cad915f2f7e0126706c2f5", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -2803,13 +823,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 8 + "page_number": 6 }, - "text": " Hydro" + "text": "The Intergovernmental Panel on Climate Change (IPCC) special report on Global Warming of 1.5°C iv examined a large number of different scenarios for limiting global warming to 1.5°C. Of those scenarios which would achieve the 1.5°C target, the mean increase in nuclear energy’s contribution to electricity production was 2.5 times higher compared to today. However, the ‘middle-of-the-road’ scenario – in which social, economic, and technological trends follow current patterns and would not require major changes to, for example, diet and travel habits – sees the need for nuclear increase by five times globally by 2050." }, { - "type": "Title", - "element_id": "f35457739b3bd74c61625c986c844726", + "type": "NarrativeText", + "element_id": "7562e707e991f1fb634fff41f2cae0e4", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -2821,13 +841,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 8 + "page_number": 6 }, - "text": " Nuclear" + "text": "The IEA has concluded that without an expanded contribution from nuclear energy, the already huge challenge of achieving emissions reductions will become drastically harder and more costly. In their latest report on nuclear energy v, published in 2019, they also conclude that not using nuclear would have negative implications for energy security and result in higher costs for the consumers. The IEA recommends policy reforms to ‘… ensure competition on a level playing field’ and that the ‘… focus should be on designing electricity markets in a way that values the clean energy and energy security attributes of low-carbon technologies, including nuclear power.’ Such reforms should also ensure that reliability of electricity production is properly valued and compensated." }, { - "type": "UncategorizedText", - "element_id": "bbf3f11cb5b43e700273a78d12de55e4", + "type": "NarrativeText", + "element_id": "1cde21cc10aa769a17ca11aa1e10823e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -2839,13 +859,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 8 + "page_number": 6 }, - "text": "%" + "text": "As part of the Harmony Programme, the world’s nuclear industry has identified three key policy areas for action to unlock the true potential of nuclear energy - the need for a level playing field, the harmonization of regulations and the establishment of an effective safety paradigm." }, { - "type": "UncategorizedText", - "element_id": "1a6562590ef19d1045d06c4055742d38", + "type": "NarrativeText", + "element_id": "af2424b7ec665072a2ee0bdcd901e244", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -2857,13 +877,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 8 + "page_number": 6 }, - "text": "50" + "text": "In regard to the need for a level playing field, we see that many of the world’s electricity markets operate in an unsustainable fashion, dominated by short-term thinking. Electricity supply which is affordable, reliable and available 24/7 generates broad societal benefits, and as seen in Figure 3, nuclear is one of the most affordable electricity sources." }, { - "type": "UncategorizedText", - "element_id": "d59eced1ded07f84c145592f65bdf854", + "type": "Footer", + "element_id": "4b227777d4dd1fc61c6f884f48641d02", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -2875,13 +895,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 8 + "page_number": 6 }, - "text": "40" + "text": "4" }, { - "type": "UncategorizedText", - "element_id": "624b60c58c9d8bfb6ff1886c2fd605d2", + "type": "Title", + "element_id": "e8dbac2cdc67e714e99baa9c0f6a54b9", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -2893,13 +913,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 8 + "page_number": 7 }, - "text": "30" + "text": "h W M / $" }, { - "type": "UncategorizedText", - "element_id": "f5ca38f748a1d6eaf726b8a42fb575c3", + "type": "Image", + "element_id": "5b5f659ab2c445e9ed688dd79280a53e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -2911,13 +931,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 8 + "page_number": 7 }, - "text": "20" + "text": " a ro) 0 » ec $ Se SW SS is é e » Pe US X? oe fe)" }, { - "type": "UncategorizedText", - "element_id": "4a44dc15364204a80fe80e9039455cc1", + "type": "FigureCaption", + "element_id": "a5846cd18e790db780cc03f9e5f63278", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -2929,13 +949,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 8 + "page_number": 7 }, - "text": "10" + "text": "Figure 3. Comparative cost projections for main electricity generators vi" }, { - "type": "UncategorizedText", - "element_id": "5feceb66ffc86f38d952786c6d696c79", + "type": "NarrativeText", + "element_id": "13ff2375260e277c2dfbc8826aa50a65", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -2947,13 +967,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 8 + "page_number": 7 }, - "text": "0" + "text": "Additionally, electricity markets fail to recognize the relative costs of different forms of electricity generation. Whilst the nuclear industry takes responsibility for its lifecycle costs (including decommissioning and waste management), other electricity generators do not. Fossil fuel generators are rarely required to pay the price in line with the environmental and health damage that their emissions cause, whilst the cost of wind and solar does not include the disposal of the sometimes toxic materials at the end of their life." }, { - "type": "Title", - "element_id": "7a1ca4ef7515f7276bae7230545829c2", + "type": "NarrativeText", + "element_id": "9ad4cf48d0b9d0bbfd257214f3d050dd", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -2965,13 +985,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 8 + "page_number": 7 }, - "text": "France" + "text": "However, markets fail to give due credit to electricity generators, such as nuclear energy, that are able to meet these societal demands. This has resulted in situations where nuclear energy has struggled to compete with energy sources that have been subsidized, do not pay the hidden costs brought on by their intermittency (e.g. costly backup provisions and investments in the grid), or do not have to take responsibility for using our common atmosphere as a dumping ground." }, { - "type": "Title", - "element_id": "853637136575897a73cba3c5fb085e8c", + "type": "NarrativeText", + "element_id": "4b3dad9b769c100e89b2c082e7d9e13e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -2983,13 +1003,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 8 + "page_number": 7 }, - "text": "Sweden" + "text": "In regard to the need to harmonize regulations, multiple regulatory barriers stemming from diverse national licensing processes and safety requirements currently limit global nuclear trade and investment. A lack of international standardization places unnecessary regulatory burdens on nuclear activities and causes delays in the licensing of new designs, hindering innovation." }, { - "type": "Title", - "element_id": "2275583196d791405892aaca0d87743c", + "type": "NarrativeText", + "element_id": "0ce74aa5e786157de72d5ae801d86cc4", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3001,13 +1021,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 8 + "page_number": 7 }, - "text": "Switzerland" + "text": "The International Atomic Energy Agency (IAEA) has highlighted the importance of addressing this issue, concluding that the lack of regulatory harmony ‘…causes many drawbacks for the entire nuclear industry, including developers, vendors, operators and even regulators themselves…This results in increased costs and reduced predictability in project execution’. vii It is therefore crucial that we harmonize the regulatory process to address these weaknesses, and avoid unnecessary duplication and inconsistencies." }, { - "type": "NarrativeText", - "element_id": "fd1b6d076800203a708efab109d9393a", + "type": "Footer", + "element_id": "ef2d127de37b942baad06145e54b0c61", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3019,13 +1039,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 8 + "page_number": 7 }, - "text": "Figure 5. The importance of nuclear in ensuring clean energy systems in France, Sweden and Switzerland ix" + "text": "5" }, { - "type": "UncategorizedText", - "element_id": "e7f6c011776e8db7cd330b54174fd76f", + "type": "NarrativeText", + "element_id": "2cf9c478a20b21f5792941a179d926e9", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3039,11 +1059,11 @@ "filetype": "application/pdf", "page_number": 8 }, - "text": "6" + "text": "In regard to the need for a holistic safety paradigm for the whole electricity system, we need to consider safety from a societal perspective, something the current energy system fails to do. The health, environmental and safety benefits of nuclear energy are not sufficiently understood and valued when compared with other electricity sources. Nuclear energy remains the safest form of electricity generation (Figure 4). Additionally, the use of nuclear consistently prevents many tens of thousands of deaths (mainly resulting from air pollution) every year by avoiding the use of coal - lifesaving measures which must be better recognised and valued." }, { "type": "UncategorizedText", - "element_id": "284b7e6d788f363f910f7beb1910473e", + "element_id": "dbae772db29058a88f9bd830e957c695", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3055,13 +1075,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": "600" + "text": "140" }, { - "type": "UncategorizedText", - "element_id": "0604cd3138feed202ef293e062da2f47", + "type": "NarrativeText", + "element_id": "e11247712b3df61756970b45f019ad68", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3073,13 +1093,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": "500" + "text": "r a e y" }, { "type": "Title", - "element_id": "de7d1b721a1e0632b7cf04edf5032c8e", + "element_id": "3f79bb7b435b05321651daefd374cdc6", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3091,13 +1111,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": "i" + "text": "e" }, { "type": "Title", - "element_id": "baa49be4a9f9fab3b991718e0adb565e", + "element_id": "f83714d89302473e0e4f5399bd50e7a9", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3109,13 +1129,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": " Non-hydro" + "text": "W T" }, { - "type": "Title", - "element_id": "293e9366a39d6ed33a894e4dbe0b8700", + "type": "UncategorizedText", + "element_id": "380918b946a526640a40df5dced65167", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3127,13 +1147,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": "ren. & waste" + "text": "=" }, { - "type": "UncategorizedText", - "element_id": "26d228663f13a88592a12d16cf9587ca", + "type": "NarrativeText", + "element_id": "f9bb49945b60897227abdd75b5f8d39b", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3145,13 +1165,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": "400" + "text": "r e p s e i t i l" }, { "type": "Title", - "element_id": "30b160442c1de4494644bbb253d47d62", + "element_id": "1fb2ec4fc8fc547c0de86ba79ba651e5", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3163,13 +1183,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": "z=" + "text": "a t a F" }, { - "type": "Title", - "element_id": "f35457739b3bd74c61625c986c844726", + "type": "Image", + "element_id": "0fece208b80790baa3ae323ace21f818", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3181,13 +1201,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": " Nuclear" + "text": " 140 120 120 1 : 100 99.5 : 80 71.9 1 n 60 . 1 40 : “99 : 85 7g 0245 <0.01 0 : : : > S & 3} cs s\\ é fos < < Qg eS S ew ee © RS Rs ~a S Se fe) we" }, { - "type": "Title", - "element_id": "563a2980d46c81119e1d7d952b375a41", + "type": "FigureCaption", + "element_id": "445676822969fb5177c0081d07449a70", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3199,13 +1219,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": "h W T" + "text": "Figure 4. Comparison of number of fatalities due to electricity generation viii" }, { - "type": "UncategorizedText", - "element_id": "983bd614bb5afece5ab3b6023f71147c", + "type": "Title", + "element_id": "98d83a387e3ac2261daaf8d936bf3e27", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3217,13 +1237,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": "300" + "text": "Nuclear for a sustainable tomorrow" }, { - "type": "UncategorizedText", - "element_id": "27badc983df1780b60c2b3fa9d3a19a0", + "type": "NarrativeText", + "element_id": "1119369ba9a68924c64155762de72d8e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3235,13 +1255,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": "200" + "text": "Nuclear energy is already making a significant contribution to providing the world with clean and abundant electricity, and has a proven track record of being a reliable workhorse around the world. Countries like France, Sweden and Switzerland have proven that it is possible to divorce economic growth from an increase in damaging emissions and over the timescales required to effectively challenge climate change and environmental degradation (Figures 5 and 6). Nuclear can ensure that fast-growing populations achieve rising standards of living – without having to sacrifice the planet or their own well-being." }, { "type": "UncategorizedText", - "element_id": "0b06ee5051e3d7dd686665a41ae1f2d9", + "element_id": "ad57366865126e55649ecb23ae1d4888", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3253,13 +1273,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": "y ——" + "text": "100" }, { - "type": "ListItem", - "element_id": "bda050585a00f0f6cb502350559d7553", + "type": "UncategorizedText", + "element_id": "bbf3f11cb5b43e700273a78d12de55e4", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3271,13 +1291,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": "—" + "text": "%" }, { - "type": "ListItem", - "element_id": "bda050585a00f0f6cb502350559d7553", + "type": "Image", + "element_id": "e56f1d3df6ddf93348f20c095337d639", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3289,13 +1309,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": "—" + "text": " 100 90 IB Coal i Gas/Oil 80 IB Biofuels/Waste 70 i Wind/Solar @ Hydro 60 @ Nuclear 50 40 30 20 10 0) " }, { - "type": "Title", - "element_id": "f6e172956a9472fa43f9a895f99c2836", + "type": "UncategorizedText", + "element_id": "5feceb66ffc86f38d952786c6d696c79", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3307,13 +1327,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": " Natural gas" + "text": "0" }, { "type": "Title", - "element_id": "b449cd843dc44ab907e1e9ed9c30d92e", + "element_id": "7a1ca4ef7515f7276bae7230545829c2", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3325,13 +1345,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": " Hydro" + "text": "France" }, { - "type": "Title", - "element_id": "b001a2374d44e3085e712bb40f66270e", + "type": "FigureCaption", + "element_id": "853637136575897a73cba3c5fb085e8c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3343,13 +1363,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": " Oil" + "text": "Sweden" }, { - "type": "Title", - "element_id": "90ad0c8c14253135efd14645e0156145", + "type": "FigureCaption", + "element_id": "2275583196d791405892aaca0d87743c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3361,13 +1381,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": " Coal" + "text": "Switzerland" }, { - "type": "UncategorizedText", - "element_id": "ad57366865126e55649ecb23ae1d4888", + "type": "FigureCaption", + "element_id": "fd1b6d076800203a708efab109d9393a", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3379,13 +1399,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": "100" + "text": "Figure 5. The importance of nuclear in ensuring clean energy systems in France, Sweden and Switzerland ix" }, { - "type": "UncategorizedText", - "element_id": "5feceb66ffc86f38d952786c6d696c79", + "type": "Footer", + "element_id": "06e9d52c1720fca412803e3b07c4b228", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3397,13 +1417,13 @@ "date_modified": "2023-02-12T10:10:36" }, "filetype": "application/pdf", - "page_number": 9 + "page_number": 8 }, - "text": "0" + "text": "6" }, { - "type": "UncategorizedText", - "element_id": "ec54e99514663edb97adef400fbf34a7", + "type": "Title", + "element_id": "563a2980d46c81119e1d7d952b375a41", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3417,11 +1437,11 @@ "filetype": "application/pdf", "page_number": 9 }, - "text": "1974" + "text": "h W T" }, { - "type": "UncategorizedText", - "element_id": "a2c54f65d066210267b404e8386a7f4c", + "type": "Image", + "element_id": "77d8044f595648ff9853b27fadd6ef94", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3435,11 +1455,11 @@ "filetype": "application/pdf", "page_number": 9 }, - "text": "1980 1985 1990 1995 2000 2005 2010" + "text": " BB Non-hydro 500 i ren. & waste 400 z= Nuclear Natural gas 300 y -— EB Hydro i oil 200 —— -— BB Coal 100" }, { - "type": "UncategorizedText", - "element_id": "46e67c525617663b392a53c0e94ba79e", + "type": "FigureCaption", + "element_id": "ff8db11f410c00860c60393cc143175f", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3453,11 +1473,11 @@ "filetype": "application/pdf", "page_number": 9 }, - "text": "2017" + "text": "1974 1980 1985 1990 1995 2000 2005 2010 2017" }, { - "type": "NarrativeText", - "element_id": "338d3e15917414641f2b559473f168f8", + "type": "FigureCaption", + "element_id": "3b5b3755bac62d7f53eb84cadc34c528", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3474,8 +1494,8 @@ "text": "Figure 6. The lasting decarbonization of French electricity and nuclear’s ability to meet growing demand x" }, { - "type": "FigureCaption", - "element_id": "eeda9f9210dfe4be7e82b4385290d3ca", + "type": "NarrativeText", + "element_id": "4f5cc927b953f3c49c562a22c88f863f", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3489,11 +1509,11 @@ "filetype": "application/pdf", "page_number": 9 }, - "text": "One fuel pellet contains as much energy as a tonne of coal" + "text": "The incredible energy density of uranium means that just a few kilos is all that is required to provide one person with enough power for a lifetime. Uranium is abundant and can be found in many parts of the world, as well as in seawater. Furthermore, spent nuclear fuel is well managed and can in most cases be recycled to produce even more power. By using nuclear energy, countries are able to take charge of their own destinies by decreasing their reliance on imported energy – enhanced independence and security in uncertain times." }, { - "type": "NarrativeText", - "element_id": "4f5cc927b953f3c49c562a22c88f863f", + "type": "Image", + "element_id": "36ca9b7cdbbcba729a46487cf86c07eb", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3507,7 +1527,7 @@ "filetype": "application/pdf", "page_number": 9 }, - "text": "The incredible energy density of uranium means that just a few kilos is all that is required to provide one person with enough power for a lifetime. Uranium is abundant and can be found in many parts of the world, as well as in seawater. Furthermore, spent nuclear fuel is well managed and can in most cases be recycled to produce even more power. By using nuclear energy, countries are able to take charge of their own destinies by decreasing their reliance on imported energy – enhanced independence and security in uncertain times." + "text": "One fuel pellet contains as much energy as a tonne of coal" }, { "type": "NarrativeText", @@ -3546,7 +1566,7 @@ "text": "Nuclear energy offers a multitude of services beyond just electricity. With nuclear, we can decarbonize the way we heat our homes, provide process heat for industry, and ensure access to clean water. As freshwater supplies come under increasing pressure worldwide, nuclear reactors can provide desalination, ensuring a reliable flow of fresh drinking water in areas where it is scarce." }, { - "type": "UncategorizedText", + "type": "Header", "element_id": "7902699be42c8a8e46fbbb4501726517", "metadata": { "data_source": { @@ -3619,7 +1639,7 @@ }, { "type": "Title", - "element_id": "69824d3b0e70ca6aaa0da1613b65fd91", + "element_id": "e56261e0bd30965b8e68ed2abb15b141", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3816,8 +1836,8 @@ "text": "International Energy Agency (2018), World Energy Outlook 2018. Data accessed from https://www.iea.org/weo/ – Based on the New Policies Scenario, which incorporates existing energy policies as well as an assessment of the results likely to stem from the implementation of announced policy intentions – with visual modification by World Nuclear Association. International Energy Agency (n.d.), Statistics. Accessed from: https://www.iea.org/statistics/?country=WORLD&year=2016&category=Electricity&indicator=ElecGenByFuel&mode =chart&dataTable=ELECTRICITYANDHEAT – with visual modifications by World Nuclear Association. International Energy Agency (2019), Nuclear Power in a Clean Energy System. Accessed from: https://www.iea.org/ publications/nuclear/ Intergovernmental Panel on Climate Change (2018), Special Report on Global Warming of 1.5 °C. Accessed from: https://www.ipcc.ch/sr15/ International Energy Agency (2019), Nuclear Power in a Clean Energy System. Accessed from: https://www.iea.org/ publications/nuclear/ International Energy Agency & OECD Nuclear Energy Agency (2015), Projected Costs of generating Electricity – 2015 Edition. Accessed from: https://www.oecd-nea.org/ndd/pubs/2015/7057-proj-costs-electricity-2015.pdf International Atomic Energy Agency (2015), Technical challenges in the application and licensing of digital instrumentation and control systems in nuclear power plants. Accessed from: https://www-pub.iaea.org/MTCD/ Publications/PDF/P1695_web.pdf" }, { - "type": "NarrativeText", - "element_id": "b6396ecd6f60e3dcca17c045c00846c1", + "type": "Title", + "element_id": "ed171375d0bf81eaa5512140c3a29b8f", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3831,11 +1851,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "viii Paul-Scherrer Institute. Data for nuclear accidents modified to reflect UNSCEAR findings/recommendations (2012)" + "text": "ix" }, { - "type": "Title", - "element_id": "ed171375d0bf81eaa5512140c3a29b8f", + "type": "ListItem", + "element_id": "c5693c397679aaeed0a80ac0c6b6dd20", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3849,11 +1869,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "ix" + "text": "x bid." }, { - "type": "Title", - "element_id": "2d711642b726b04401627ca9fbac32f5", + "type": "ListItem", + "element_id": "9ec2f70cbe42f5dc5073a88246db2b7a", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3867,7 +1887,7 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "x" + "text": "and NRC SOARCA study 2015 Paul-Scherrer Institute. Data for nuclear accidents modified to reflect UNSCEAR findings/recommendations (2012)" }, { "type": "UncategorizedText", @@ -3906,8 +1926,8 @@ "text": "Photo credits: Front cover: Mike Baird; page 2: Vattenfall; page 4: Getty Images; page 5: Adobe Stock; page 6: Rosatom; page 8: Dean Calma, IAEA; page 10: Kazatomprom; page 11: EDF." }, { - "type": "UncategorizedText", - "element_id": "2c624232cdd221771294dfbb310aca00", + "type": "Footer", + "element_id": "aa67a169b0bba217aa0aa88a65346920", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3924,8 +1944,8 @@ "text": "8" }, { - "type": "UncategorizedText", - "element_id": "481e5a54650b0a4ac7bc2568ddad436d", + "type": "NarrativeText", + "element_id": "c48603fd38d3449d3afcd2dc18903083", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3943,7 +1963,7 @@ }, { "type": "NarrativeText", - "element_id": "cff66c7267104eeade830b3dc8675acd", + "element_id": "de49f1c955d7c8a4d1d6d261c1cf21ba", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3960,8 +1980,8 @@ "text": "The Silent Giant © 2019 World Nuclear Association. Registered in England and Wales, company number 01215741" }, { - "type": "Title", - "element_id": "2ef1a5c0752085d3a6935132ad9e597c", + "type": "NarrativeText", + "element_id": "821daa4396c0087d9d5ee9240bc5c85c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -3979,7 +1999,7 @@ }, { "type": "NarrativeText", - "element_id": "20ef77d9aa66e60f1443750cdbaa9014", + "element_id": "705da4db5e220010ddfd03d9452855e4", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", diff --git a/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/recalibrating-risk-report.pdf.json b/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/recalibrating-risk-report.pdf.json index a2f18f4d0d..6b44b1a62a 100644 --- a/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/recalibrating-risk-report.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/recalibrating-risk-report.pdf.json @@ -18,8 +18,8 @@ "text": "WORLD ASSOCIATION" }, { - "type": "NarrativeText", - "element_id": "1536456ece03fdb7bdbb6b848116dfde", + "type": "Title", + "element_id": "d72f07e2c764ae90417305db928ebce1", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -37,7 +37,7 @@ }, { "type": "NarrativeText", - "element_id": "38ae4eaf24988f8ff8a9f5b2eaab7449", + "element_id": "c875f7e098e5ea1b337a189c28e80ac3", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -54,8 +54,8 @@ "text": "Putting nuclear risk in context and perspective" }, { - "type": "Title", - "element_id": "e2371e8e756ef68aaf76eb397e9e8f32", + "type": "NarrativeText", + "element_id": "327be60d66a34747047e1365e6bab727", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -91,7 +91,7 @@ }, { "type": "Title", - "element_id": "53d548aa01fc3eb72da15a5be7f235e2", + "element_id": "2fa985d0a50e61b09ec22c447cc4b2c9", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -145,7 +145,7 @@ }, { "type": "NarrativeText", - "element_id": "ee4ac543bf2035b86b6818e06e3a0a90", + "element_id": "f62c49fcf0a7960d0b509e37507d76d3", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -159,7 +159,7 @@ "filetype": "application/pdf", "page_number": 3 }, - "text": "Expanding the use of nuclear energy is essential for solving some of the biggest challenges facing humanity. Nuclear power has already played a major role in avoiding the emission of air pollutants and greenhouse gases, a role that will have to be greatly expanded in the future to ensure global energy supplies are decarbonized by 2050. Nuclear energy will also play a major part in ensuring that the transition to a low-carbon future is done in an equitable fashion, providing people across the world with a high-powered and sustainable future." + "text": "Therefore, World Nuclear Association calls upon policymakers and regulators to adopt an all-hazards approach, where different risks associated with energy producing technologies are placed in perspective and the appropriate context, and examined in line with the latest scientific evidence. Policymakers and regulators must ensure that their decisions regarding radiation protection do not create greater risks elsewhere. This include the recalibration of existing regulations regarding nuclear power and radiation, weighing the cost of regulatory measures against the societal benefits provided by nuclear energy." }, { "type": "NarrativeText", @@ -181,7 +181,7 @@ }, { "type": "NarrativeText", - "element_id": "f62c49fcf0a7960d0b509e37507d76d3", + "element_id": "ee4ac543bf2035b86b6818e06e3a0a90", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -195,10 +195,10 @@ "filetype": "application/pdf", "page_number": 3 }, - "text": "Therefore, World Nuclear Association calls upon policymakers and regulators to adopt an all-hazards approach, where different risks associated with energy producing technologies are placed in perspective and the appropriate context, and examined in line with the latest scientific evidence. Policymakers and regulators must ensure that their decisions regarding radiation protection do not create greater risks elsewhere. This include the recalibration of existing regulations regarding nuclear power and radiation, weighing the cost of regulatory measures against the societal benefits provided by nuclear energy." + "text": "Expanding the use of nuclear energy is essential for solving some of the biggest challenges facing humanity. Nuclear power has already played a major role in avoiding the emission of air pollutants and greenhouse gases, a role that will have to be greatly expanded in the future to ensure global energy supplies are decarbonized by 2050. Nuclear energy will also play a major part in ensuring that the transition to a low-carbon future is done in an equitable fashion, providing people across the world with a high-powered and sustainable future." }, { - "type": "UncategorizedText", + "type": "Footer", "element_id": "6b86b273ff34fce19d6b804eff5a3f57", "metadata": { "data_source": { @@ -216,8 +216,8 @@ "text": "1" }, { - "type": "NarrativeText", - "element_id": "f193ae2dc90e6bc6856125ad88fdab12", + "type": "Title", + "element_id": "6b3149c1769f5cd200ec2a0017b936dc", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -235,7 +235,7 @@ }, { "type": "NarrativeText", - "element_id": "3cf0a9c5ad0cacc724f90abbe99664d9", + "element_id": "ce5bcf6b4fe24d62bd24d156d5bc965e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -249,11 +249,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "In reality, radiation is a natural part of life; indeed, we are all exposed to radiation every day, on average receiving 2-3 millisieverts (mSv) per year. Most of this radiation is naturally occurring, with radon gas from the ground being the main source of exposure. The nuclear industry is responsible for a very small part of radiation exposure to the public, as seen in Figure 2. To put this into perspective, eating 10 bananas or two Brazil nuts results in the same radiation dose as living nearby a nuclear power plant for a year. Humans are also naturally radioactive, and the radiation dose from sleeping next to someone else each night for a year is ten times higher than the exposure from living nearby a nuclear power plant for the same time span." + "text": "It is widely accepted that humans have skewed perceptions of risks, and the way we respond to them is shaped by these perceptions, rather than the actual threats posed. Approximately 1.35 millioni people die every year because of traffic accidents, in comparison with 257 aviation fatalities in 2019ii, yet more people are nervous about flying, fearing a rare deadly crash, than being in a fatal traffic accident. These numbers tell a powerful and well-established story: evaluations of risk are largely the result of emotions, rather than logic or facts. Although it is hard to recognize and accept that our perceptions may mislead us and curtail effective decision making, this is a well-established characteristic of humanity." }, { "type": "NarrativeText", - "element_id": "ce5bcf6b4fe24d62bd24d156d5bc965e", + "element_id": "45e9c81bf6ccdc498a6ac5640d786736", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -267,11 +267,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "It is widely accepted that humans have skewed perceptions of risks, and the way we respond to them is shaped by these perceptions, rather than the actual threats posed. Approximately 1.35 millioni people die every year because of traffic accidents, in comparison with 257 aviation fatalities in 2019ii, yet more people are nervous about flying, fearing a rare deadly crash, than being in a fatal traffic accident. These numbers tell a powerful and well-established story: evaluations of risk are largely the result of emotions, rather than logic or facts. Although it is hard to recognize and accept that our perceptions may mislead us and curtail effective decision making, this is a well-established characteristic of humanity." + "text": "Nuclear energy and the risk of radiation is one of the most extreme cases in which perceived and actual risks have diverged. The fear of radiation, whilst pre- dating the Second World War, was firmly established by the debate on the potential impacts of low-dose radiation from the fallout from nuclear weapons testing in the early years of the Cold War. Radiation in many ways became linked with the mental imagery of nuclear war, playing an important role in increasing public concern about radiation and its health effects. There is a well-established discrepancy between fact-based risk assessments and public perception of different risks. This is very much the case with nuclear power, and this is clearly highlighted in Figure 1, with laypersons ranking nuclear power as the highest risk out of 30 activities and technologies, with experts ranking nuclear as 20th. In many ways, popular culture’s depiction of radiation has played a role in ensuring that this discrepancy has remained, be it Godzilla, The Incredible Hulk, or The Simpsons, which regularly plays on the notion of radiation from nuclear power plants causing three-eyed fish, something that has been firmly rejected as unscientific." }, { "type": "NarrativeText", - "element_id": "45e9c81bf6ccdc498a6ac5640d786736", + "element_id": "3cf0a9c5ad0cacc724f90abbe99664d9", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -285,7 +285,7 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "Nuclear energy and the risk of radiation is one of the most extreme cases in which perceived and actual risks have diverged. The fear of radiation, whilst pre- dating the Second World War, was firmly established by the debate on the potential impacts of low-dose radiation from the fallout from nuclear weapons testing in the early years of the Cold War. Radiation in many ways became linked with the mental imagery of nuclear war, playing an important role in increasing public concern about radiation and its health effects. There is a well-established discrepancy between fact-based risk assessments and public perception of different risks. This is very much the case with nuclear power, and this is clearly highlighted in Figure 1, with laypersons ranking nuclear power as the highest risk out of 30 activities and technologies, with experts ranking nuclear as 20th. In many ways, popular culture’s depiction of radiation has played a role in ensuring that this discrepancy has remained, be it Godzilla, The Incredible Hulk, or The Simpsons, which regularly plays on the notion of radiation from nuclear power plants causing three-eyed fish, something that has been firmly rejected as unscientific." + "text": "In reality, radiation is a natural part of life; indeed, we are all exposed to radiation every day, on average receiving 2-3 millisieverts (mSv) per year. Most of this radiation is naturally occurring, with radon gas from the ground being the main source of exposure. The nuclear industry is responsible for a very small part of radiation exposure to the public, as seen in Figure 2. To put this into perspective, eating 10 bananas or two Brazil nuts results in the same radiation dose as living nearby a nuclear power plant for a year. Humans are also naturally radioactive, and the radiation dose from sleeping next to someone else each night for a year is ten times higher than the exposure from living nearby a nuclear power plant for the same time span." }, { "type": "Title", @@ -305,6 +305,24 @@ }, "text": "Rank Order Laypersons" }, + { + "type": "Table", + "element_id": "07e04cdff751f52e042c08c1b265b6f5", + "metadata": { + "data_source": { + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": 306475068461766865312866697521104206816, + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + }, + "date_modified": "2023-02-12T10:09:32" + }, + "filetype": "application/pdf", + "page_number": 4 + }, + "text": "_Laypersons Experts 1 2 3 Handguns 4 + Nuclear power 20 Motor vehicles 1 4 Smoking 2 17 Electric power (non-nuclear) 9 1 | + + 22 xrays 7 30 Vaccinations 25" + }, { "type": "UncategorizedText", "element_id": "4523540f1504cd17100c4835e85b7eef", @@ -325,7 +343,7 @@ }, { "type": "UncategorizedText", - "element_id": "785f3ec7eb32f30b90cd0fcf3657d388", + "element_id": "624b60c58c9d8bfb6ff1886c2fd605d2", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -339,11 +357,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "22" + "text": "30" }, { "type": "UncategorizedText", - "element_id": "624b60c58c9d8bfb6ff1886c2fd605d2", + "element_id": "785f3ec7eb32f30b90cd0fcf3657d388", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -357,7 +375,7 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "30" + "text": "22" }, { "type": "UncategorizedText", @@ -415,7 +433,7 @@ }, { "type": "UncategorizedText", - "element_id": "d4735e3a265e16eee03f59718b9b5d03", + "element_id": "6b86b273ff34fce19d6b804eff5a3f57", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -429,11 +447,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "2" + "text": "1" }, { "type": "UncategorizedText", - "element_id": "4e07408562bedb8b60ce05c1decfe3ad", + "element_id": "d4735e3a265e16eee03f59718b9b5d03", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -447,11 +465,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "3" + "text": "2" }, { "type": "UncategorizedText", - "element_id": "6b86b273ff34fce19d6b804eff5a3f57", + "element_id": "4e07408562bedb8b60ce05c1decfe3ad", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -465,7 +483,7 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "1" + "text": "3" }, { "type": "UncategorizedText", @@ -505,7 +523,7 @@ }, { "type": "Title", - "element_id": "1656c455012b016fbac5eac0a38397bd", + "element_id": "eda8f72476c539920d2c0e3515ba4b07", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -519,11 +537,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "Electric power (non-nuclear)" + "text": "Smoking" }, { "type": "Title", - "element_id": "ed3861e631428b9b77e2bdc0384d2cbe", + "element_id": "f8e3740e358309bd0570d4f3ca141793", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -537,11 +555,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "Vaccinations" + "text": "Handguns" }, { "type": "Title", - "element_id": "eda8f72476c539920d2c0e3515ba4b07", + "element_id": "ed3861e631428b9b77e2bdc0384d2cbe", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -555,11 +573,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "Smoking" + "text": "Vaccinations" }, { "type": "Title", - "element_id": "f8e3740e358309bd0570d4f3ca141793", + "element_id": "82a60569029ed9032f1b08891e8524c2", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -573,7 +591,7 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "Handguns" + "text": "Nuclear power" }, { "type": "Title", @@ -595,7 +613,7 @@ }, { "type": "Title", - "element_id": "82a60569029ed9032f1b08891e8524c2", + "element_id": "1656c455012b016fbac5eac0a38397bd", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -609,7 +627,7 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "Nuclear power" + "text": "Electric power (non-nuclear)" }, { "type": "Title", @@ -703,7 +721,7 @@ }, { "type": "UncategorizedText", - "element_id": "d1429f8178a04f7fc73a66edf10ab8b5", + "element_id": "19581e27de7ced00ff1ce50b2047e7a5", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -717,11 +735,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "" + "text": "9" }, { "type": "UncategorizedText", - "element_id": "4b227777d4dd1fc61c6f884f48641d02", + "element_id": "d4735e3a265e16eee03f59718b9b5d03", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -735,11 +753,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "4" + "text": "2" }, { "type": "UncategorizedText", - "element_id": "19581e27de7ced00ff1ce50b2047e7a5", + "element_id": "7902699be42c8a8e46fbbb4501726517", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -753,11 +771,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "9" + "text": "7" }, { "type": "UncategorizedText", - "element_id": "6b86b273ff34fce19d6b804eff5a3f57", + "element_id": "4b227777d4dd1fc61c6f884f48641d02", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -771,11 +789,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "1" + "text": "4" }, { "type": "UncategorizedText", - "element_id": "7902699be42c8a8e46fbbb4501726517", + "element_id": "6b86b273ff34fce19d6b804eff5a3f57", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -789,11 +807,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "7" + "text": "1" }, { "type": "UncategorizedText", - "element_id": "d4735e3a265e16eee03f59718b9b5d03", + "element_id": "d1429f8178a04f7fc73a66edf10ab8b5", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -807,7 +825,7 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "2" + "text": "" }, { "type": "NarrativeText", @@ -829,7 +847,7 @@ }, { "type": "NarrativeText", - "element_id": "3d8430367bf97300ddf3963de02bb5f4", + "element_id": "0d28f703c3b3aa9fee1f9f08fa688409", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -846,8 +864,8 @@ "text": "1 The original study was published in 1978, but its findings have been confirmed by numerous studies since." }, { - "type": "UncategorizedText", - "element_id": "d4735e3a265e16eee03f59718b9b5d03", + "type": "Footer", + "element_id": "53c234e5e8472b6ac51c1ae1cab3fe06", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -864,8 +882,8 @@ "text": "2" }, { - "type": "Title", - "element_id": "d6acb6d51cfc574936fc79bc06b8a371", + "type": "Image", + "element_id": "aa493f4c5f573e209dc5e56d5e2a341f", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -879,11 +897,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "Natural" + "text": "Natural Artificial @ 48% Radon @ 11% Medicine @ 14% Buildings & soil @ 0.4% = Fallout @ 12% Food & water @ 0.4% Miscellaneous @ 10% Cosmic @ 0.2% Occupational @ 4% = Thoron @ 0.04% Nuclear discharges " }, { - "type": "Title", - "element_id": "d6acb6d51cfc574936fc79bc06b8a371", + "type": "FigureCaption", + "element_id": "9b657ab0d2ea482c887c7877ba86598d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -897,11 +915,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "Natural" + "text": "Figure 2. Global average exposure from different sources of radiation" }, { - "type": "UncategorizedText", - "element_id": "d4a293a7987bc37f4a826e0da1961aab", + "type": "NarrativeText", + "element_id": "4469b98946c004fbae47ad6285c9bba4", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -915,11 +933,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": " 48% Radon  14% Buildings & soil  12% Food & water  10% Cosmic  4% Thoron" + "text": "Fossil fuels – currently accounting for around 81% of total energy supplyiv – cause significant levels of emissions in terms of both greenhouse gases and air pollutants. Despite the serious and ongoing health and environmental harms caused by air pollution, it is often considered to be an inevitable consequence of economic development. Air pollution’s contribution to the burden of disease is profound, with an estimated 8.7 million people dying worldwide prematurely in 2018 alonev,vi. Despite this, it fails to induce the same fears and anxieties in people as nuclear energy does." }, { - "type": "Title", - "element_id": "8c3274ea479fd4a25c0b5611a8e48662", + "type": "NarrativeText", + "element_id": "cbf390f564b0b1197deb5bf3dd999291", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -933,11 +951,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "Artificial" + "text": "In terms of accidents, hydropower is the deadliest electricity generator, mostly due to collapsing dams and the consequences of flooding. The Banqiao Dam failure in 1975 led to at least 26,000 people drowning, and as many as 150,000 deaths resulting from the secondary effects of the accident. In comparison, radiation exposure following Chernobyl caused 54 deaths2, while no casualties due to radiation are likely to occur from the accident at Fukushima Daiichi." }, { "type": "UncategorizedText", - "element_id": "0f748653e413fbddbb18262352d56b23", + "element_id": "6a3adc54db5128f797d4a12855193373", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -951,11 +969,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": " 11% Medicine  0.4%  0.4% Miscellaneous  0.2% Occupational  0.04% Nuclear discharges" + "text": "24.6" }, { - "type": "Title", - "element_id": "039bede24e51e7c42ce352c25b6427c0", + "type": "NarrativeText", + "element_id": "e11247712b3df61756970b45f019ad68", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -969,11 +987,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "Fallout" + "text": "r a e y" }, { - "type": "NarrativeText", - "element_id": "9b657ab0d2ea482c887c7877ba86598d", + "type": "Title", + "element_id": "3f79bb7b435b05321651daefd374cdc6", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -987,11 +1005,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "Figure 2. Global average exposure from different sources of radiation" + "text": "e" }, { - "type": "NarrativeText", - "element_id": "4469b98946c004fbae47ad6285c9bba4", + "type": "Title", + "element_id": "f83714d89302473e0e4f5399bd50e7a9", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1005,11 +1023,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "Fossil fuels – currently accounting for around 81% of total energy supplyiv – cause significant levels of emissions in terms of both greenhouse gases and air pollutants. Despite the serious and ongoing health and environmental harms caused by air pollution, it is often considered to be an inevitable consequence of economic development. Air pollution’s contribution to the burden of disease is profound, with an estimated 8.7 million people dying worldwide prematurely in 2018 alonev,vi. Despite this, it fails to induce the same fears and anxieties in people as nuclear energy does." + "text": "W T" }, { "type": "NarrativeText", - "element_id": "cbf390f564b0b1197deb5bf3dd999291", + "element_id": "f9bb49945b60897227abdd75b5f8d39b", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1023,11 +1041,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "In terms of accidents, hydropower is the deadliest electricity generator, mostly due to collapsing dams and the consequences of flooding. The Banqiao Dam failure in 1975 led to at least 26,000 people drowning, and as many as 150,000 deaths resulting from the secondary effects of the accident. In comparison, radiation exposure following Chernobyl caused 54 deaths2, while no casualties due to radiation are likely to occur from the accident at Fukushima Daiichi." + "text": "r e p s e i t i l" }, { - "type": "UncategorizedText", - "element_id": "b7a56873cd771f2c446d369b649430b6", + "type": "Title", + "element_id": "1fb2ec4fc8fc547c0de86ba79ba651e5", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1041,11 +1059,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "25" + "text": "a t a F" }, { - "type": "UncategorizedText", - "element_id": "6a3adc54db5128f797d4a12855193373", + "type": "Image", + "element_id": "226de27a8eeb930616d6b9c4aa4dc574", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1059,11 +1077,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "24.6" + "text": " 05 24.6 20 18.4 S15 10 46 28 5 || 0.07 0.04 0.02 0.01 > SS I ~— ~— es ° & Se es oe oe & ro se s& e as" }, { - "type": "UncategorizedText", - "element_id": "f5ca38f748a1d6eaf726b8a42fb575c3", + "type": "FigureCaption", + "element_id": "8e44807922e69a38594c4b389cd0be54", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1077,11 +1095,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "20" + "text": "Figure 3. Comparison of number of fatalities due to electricity generation, including accidents and air pollution3" }, { - "type": "UncategorizedText", - "element_id": "dfb6b8c404e0fa2b32def4ba49e00b3c", + "type": "NarrativeText", + "element_id": "bf88d949b16b32347c420a66fa413d49", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1095,11 +1113,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "18.4" + "text": "Contrary to perceptions, nuclear is an incredibly safe source of energy (see Figure 3 for a comparison). What is also clear is that the continued use of alternative energy sources in preference to nuclear energy – in particular fossil fuels – poses a far greater risk to public health by significantly contributing to climate change and air pollution." }, { - "type": "NarrativeText", - "element_id": "e11247712b3df61756970b45f019ad68", + "type": "ListItem", + "element_id": "9f9b01127f5b3b297b3759a8e205ad59", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1113,11 +1131,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "r a e y" + "text": "$ Sources drawn upon: Markandya, A., & Wilkinson, P. (2007), Sovacool et al. (2016). Data for nuclear accidents modified to reflect the 2012 UNSCEAR report and the 2015 US NRC SOARCA study." }, { - "type": "UncategorizedText", - "element_id": "dca468ba69cda6650ce03d976c274c66", + "type": "NarrativeText", + "element_id": "e450813fe6430d87c4caa64e4792bc74", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1131,11 +1149,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "S15" + "text": "2 Including 28 firefighters that were exposed to lethal amounts of radiation during the accident night, and 15 fatal cases of thyroid cancer. 3 Sources drawn upon: Markandya, A., & Wilkinson, P. (2007), Sovacool et al. (2016). Data for nuclear accidents modified to reflect the" }, { - "type": "Title", - "element_id": "3f79bb7b435b05321651daefd374cdc6", + "type": "Header", + "element_id": "4e07408562bedb8b60ce05c1decfe3ad", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1149,11 +1167,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "e" + "text": "3" }, { - "type": "UncategorizedText", - "element_id": "e629fa6598d732768f7c726b4b621285", + "type": "Title", + "element_id": "b6812463b15ddda3f2402dfda95d2c86", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1165,13 +1183,13 @@ "date_modified": "2023-02-12T10:09:32" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 6 }, - "text": "15" + "text": "The low-dose question" }, { - "type": "Title", - "element_id": "f83714d89302473e0e4f5399bd50e7a9", + "type": "NarrativeText", + "element_id": "ec0fb27e2a16f77899bf83591cd2d0de", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1183,13 +1201,13 @@ "date_modified": "2023-02-12T10:09:32" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 6 }, - "text": "W T" + "text": "Since the 1950s, the Linear No-Threshold (LNT) theory has been used to inform regulatory decisions, positing that any dose of radiation, regardless of the amount or the duration over which it is received, poses a risk. Assuming that LNT is correct, we should expect to see that people living in areas of the world where background doses are higher (e.g. India, Iran and northern Europe) have a higher incidence of cancer. However, despite people living in areas of the world where radiation doses are naturally higher than those that would be received in parts of the evacuation zones around Chernobyl and Fukushima Daiichi, there is no evidence that these populations exhibit any negative health effects. Living nearby a nuclear power plant on average exposes the local population to 0.00009mSv/year, which according to LNT would increase the risk of developing cancer by 0.00000045%. After Chernobyl, the average dose to those evacuated was 30mSv, which would theoretically increase the risk of cancer at some point in their lifetime by 0.15% (on top of the average baseline lifetime risk of cancer, which is 39.5% in the USviii, 50% in the UKix)." }, { "type": "NarrativeText", - "element_id": "f9bb49945b60897227abdd75b5f8d39b", + "element_id": "d6bd9451ceee595c090d110656bb1b2b", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1201,13 +1219,13 @@ "date_modified": "2023-02-12T10:09:32" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 6 }, - "text": "r e p s e i t i l" + "text": "Since the 1980s, there has been considerable scientific debate as to whether the LNT theory is valid, following scientific breakthroughs within, for example, radiobiology and medicine. Indeed, the Chernobyl accident helped illuminate some of the issues associated with LNT. Multiplication of the low doses after the accident (many far too low to be of any health concern) with large populations – using the assumptions made by LNT – led to a large number of predicted cancer deaths, which have not, and likely will not materialize. This practice has been heavily criticized for being inappropriate in making risk assessments by UNSCEAR, the International Commission on Radiation Protection and a large number of independent scientists." }, { - "type": "Title", - "element_id": "1fb2ec4fc8fc547c0de86ba79ba651e5", + "type": "NarrativeText", + "element_id": "d8c68c0317a4a3867de201703e068e2e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1219,13 +1237,13 @@ "date_modified": "2023-02-12T10:09:32" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 6 }, - "text": "a t a F" + "text": "Determining the precise risk (or lack thereof) of the extremely small radiation doses associated with the routine operations of nuclear power plants, the disposal of nuclear waste or even extremely rare nuclear accidents is a purely academic exercise, that tries to determine whether the risk is extremely low, too small to detect, or non- existent. The risks of low-level radiation pale in comparison to other societal risks such as obesity, smoking, and air pollution." }, { - "type": "UncategorizedText", - "element_id": "4a44dc15364204a80fe80e9039455cc1", + "type": "NarrativeText", + "element_id": "e5dec03340d86adfd26612d5d06ab5e6", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1237,13 +1255,13 @@ "date_modified": "2023-02-12T10:09:32" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 6 }, - "text": "10" + "text": "By looking at radiation risks in isolation, we prolong the over-regulation of radiation in nuclear plants, driving up costs, whilst not delivering any additional health benefits, in turn incentivising the use of more harmful energy sources. A recalibration is required, and this can only done by ensuring a holistic approach to risk is taken." }, { - "type": "UncategorizedText", - "element_id": "ef2d127de37b942baad06145e54b0c61", + "type": "Footer", + "element_id": "7de1555df0c2700329e815b93b32c571", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1255,13 +1273,13 @@ "date_modified": "2023-02-12T10:09:32" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 6 }, - "text": "5" + "text": "4" }, { - "type": "UncategorizedText", - "element_id": "5feceb66ffc86f38d952786c6d696c79", + "type": "Title", + "element_id": "3506b7d2b1626663985ae1a521a60fe1", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1273,13 +1291,13 @@ "date_modified": "2023-02-12T10:09:32" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "0" + "text": "Adopting an all-hazards approach" }, { - "type": "UncategorizedText", - "element_id": "8bf40d0515e8461bd30866c2eb8ac250", + "type": "NarrativeText", + "element_id": "07ed21008ec3f8801f7cbb1fc670d4db", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1291,13 +1309,13 @@ "date_modified": "2023-02-12T10:09:32" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "4.6" + "text": "The overall regulatory philosophy, at least theoretically, used in the nuclear industry is the ALARA (As Low As Reasonably Achievable) principle, where any regulatory action on radiation should account for socio- economic benefits and costs, as opposed to making decisions based on radiation risks alone." }, { - "type": "Title", - "element_id": "51229f9593cbcb7c8e25059c004d67b0", + "type": "NarrativeText", + "element_id": "00548dbd288df8370c39789adb302f50", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1309,13 +1327,13 @@ "date_modified": "2023-02-12T10:09:32" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "|| es" + "text": "Contemporary debates around nuclear energy often reflect the precautionary principle, a problematic concept applied across a range of regulatory and policy issues. A ‘strong’ interpretation of the precautionary principle, or a ‘as low as possible’ approach to risk, dictates that regulation is required whenever there is a potential adverse health risk, even if the evidence is not certain and regardless of the cost of regulation." }, { - "type": "Title", - "element_id": "6c25ebfc9ffd2510c4c41d4bd5cb7ea9", + "type": "NarrativeText", + "element_id": "ba80f89ec0449fefee24b33fbb7e29b6", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1327,13 +1345,13 @@ "date_modified": "2023-02-12T10:09:32" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "C oal" + "text": "However, the regulatory process and the policy debate around nuclear more broadly has long departed from the ALARA principle, no longer weighing cost versus benefits, or considering the overall advantages of nuclear energy, but rather looking at radiation in isolation. This has resulted in a subtle shift towards an ‘as low as possible’ mentality. Attempting to reduce radiation far below de facto safe levels has resulted in an escalation of costs and loss of public confidence, and in some cases has deprived communities of the many benefits nuclear energy provides. In practical terms, this has led to the continued use of more harmful energy sources, such as fossil fuels." }, { - "type": "Title", - "element_id": "2378bdd2cf4f491cf401e6b215cbb4fd", + "type": "NarrativeText", + "element_id": "9e9ed8938e271667a9512898d2ca629b", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1345,13 +1363,13 @@ "date_modified": "2023-02-12T10:09:32" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "Oil" + "text": "If the potential of nuclear energy is to be fully realized, public health and safety approaches must be recalibrated to consider a wider range of factors when considering radiation, adopting an “all-hazards” approach. Such an approach must ensure that risks are placed within a proper perspective and context, rather than looking at them in isolation. We therefore must not look at the costs – be they economic, environmental, or public health – associated with an individual power plant in isolation, but rather the costs associated with it (and its alternatives) at a societal level (Figure 4). This would entail looking at the potential risks arising from the use of nuclear power and comparing these with the risks associated with not adopting nuclear power." }, { - "type": "Title", - "element_id": "3a21fb0158c2ea04834163deee74a836", + "type": "Image", + "element_id": "72b1be8b707acf2f917fef7ea176ec32", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1363,13 +1381,13 @@ "date_modified": "2023-02-12T10:09:32" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "Bio m ass" + "text": "ae) Plant-level Social and flea productio Grid-level costs environmental costs of at market pri of the electricity emissions, land-use, system climate change, security of supply, etc. " }, { - "type": "Title", - "element_id": "4fabb98454d019811a732c4a09f31bf0", + "type": "FigureCaption", + "element_id": "567f470fb4fb5c58b115fbe79a425970", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1381,13 +1399,13 @@ "date_modified": "2023-02-12T10:09:32" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "N atural gas" + "text": "Figure 4. The different levels of cost associated with electricity generationx" }, { - "type": "UncategorizedText", - "element_id": "c020bad937ece011339d7447ee0ac9fa", + "type": "NarrativeText", + "element_id": "6595e50969f899bd2fa05c0d7a8a682c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1399,549 +1417,9 @@ "date_modified": "2023-02-12T10:09:32" }, "filetype": "application/pdf", - "page_number": 5 + "page_number": 7 }, - "text": "2.8" - }, - { - "type": "Title", - "element_id": "d151346fe7eea3c6a0865199579ca601", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "Wind" - }, - { - "type": "UncategorizedText", - "element_id": "91539d7445b231b3612c4f68bd077160", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "0.07" - }, - { - "type": "NarrativeText", - "element_id": "5275a384f63ded9bf8541f52dec2c2cb", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "H ydropo w er" - }, - { - "type": "UncategorizedText", - "element_id": "a888fe9e2469182b8e3e3bca241d3189", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "0.04" - }, - { - "type": "Title", - "element_id": "d3d1de6bcd7ebe2351be9f53551f7eb9", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "S olar" - }, - { - "type": "UncategorizedText", - "element_id": "a7e46abf169710b34fe8898b950d57ec", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "0.02" - }, - { - "type": "Title", - "element_id": "6dc76d1e1c35d4253537250288157d0c", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "N uclear" - }, - { - "type": "UncategorizedText", - "element_id": "312b95ee5a344d2f7a16ad817ff70788", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "0.01" - }, - { - "type": "NarrativeText", - "element_id": "8921c0f3c29bc04c22c9c40f4eef6613", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "Figure 3. Comparison of number of fatalities due to electricity generation, including accidents and air pollution3" - }, - { - "type": "NarrativeText", - "element_id": "bf88d949b16b32347c420a66fa413d49", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "Contrary to perceptions, nuclear is an incredibly safe source of energy (see Figure 3 for a comparison). What is also clear is that the continued use of alternative energy sources in preference to nuclear energy – in particular fossil fuels – poses a far greater risk to public health by significantly contributing to climate change and air pollution." - }, - { - "type": "NarrativeText", - "element_id": "e450813fe6430d87c4caa64e4792bc74", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "2 Including 28 firefighters that were exposed to lethal amounts of radiation during the accident night, and 15 fatal cases of thyroid cancer. 3 Sources drawn upon: Markandya, A., & Wilkinson, P. (2007), Sovacool et al. (2016). Data for nuclear accidents modified to reflect the" - }, - { - "type": "Title", - "element_id": "31138d5dc0c297144d27d5dbd15d5ef0", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "2012 UNSCEAR report and the 2015 US NRC SOARCA study." - }, - { - "type": "UncategorizedText", - "element_id": "4e07408562bedb8b60ce05c1decfe3ad", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 5 - }, - "text": "3" - }, - { - "type": "Title", - "element_id": "f5bda7d6ba9ea7120d7f4c11c8b8f1ae", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "The low-dose question" - }, - { - "type": "NarrativeText", - "element_id": "ec0fb27e2a16f77899bf83591cd2d0de", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "Since the 1950s, the Linear No-Threshold (LNT) theory has been used to inform regulatory decisions, positing that any dose of radiation, regardless of the amount or the duration over which it is received, poses a risk. Assuming that LNT is correct, we should expect to see that people living in areas of the world where background doses are higher (e.g. India, Iran and northern Europe) have a higher incidence of cancer. However, despite people living in areas of the world where radiation doses are naturally higher than those that would be received in parts of the evacuation zones around Chernobyl and Fukushima Daiichi, there is no evidence that these populations exhibit any negative health effects. Living nearby a nuclear power plant on average exposes the local population to 0.00009mSv/year, which according to LNT would increase the risk of developing cancer by 0.00000045%. After Chernobyl, the average dose to those evacuated was 30mSv, which would theoretically increase the risk of cancer at some point in their lifetime by 0.15% (on top of the average baseline lifetime risk of cancer, which is 39.5% in the USviii, 50% in the UKix)." - }, - { - "type": "NarrativeText", - "element_id": "d6bd9451ceee595c090d110656bb1b2b", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "Since the 1980s, there has been considerable scientific debate as to whether the LNT theory is valid, following scientific breakthroughs within, for example, radiobiology and medicine. Indeed, the Chernobyl accident helped illuminate some of the issues associated with LNT. Multiplication of the low doses after the accident (many far too low to be of any health concern) with large populations – using the assumptions made by LNT – led to a large number of predicted cancer deaths, which have not, and likely will not materialize. This practice has been heavily criticized for being inappropriate in making risk assessments by UNSCEAR, the International Commission on Radiation Protection and a large number of independent scientists." - }, - { - "type": "NarrativeText", - "element_id": "d8c68c0317a4a3867de201703e068e2e", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "Determining the precise risk (or lack thereof) of the extremely small radiation doses associated with the routine operations of nuclear power plants, the disposal of nuclear waste or even extremely rare nuclear accidents is a purely academic exercise, that tries to determine whether the risk is extremely low, too small to detect, or non- existent. The risks of low-level radiation pale in comparison to other societal risks such as obesity, smoking, and air pollution." - }, - { - "type": "NarrativeText", - "element_id": "e5dec03340d86adfd26612d5d06ab5e6", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "By looking at radiation risks in isolation, we prolong the over-regulation of radiation in nuclear plants, driving up costs, whilst not delivering any additional health benefits, in turn incentivising the use of more harmful energy sources. A recalibration is required, and this can only done by ensuring a holistic approach to risk is taken." - }, - { - "type": "UncategorizedText", - "element_id": "4b227777d4dd1fc61c6f884f48641d02", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 6 - }, - "text": "4" - }, - { - "type": "Title", - "element_id": "3506b7d2b1626663985ae1a521a60fe1", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "Adopting an all-hazards approach" - }, - { - "type": "NarrativeText", - "element_id": "ba80f89ec0449fefee24b33fbb7e29b6", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "However, the regulatory process and the policy debate around nuclear more broadly has long departed from the ALARA principle, no longer weighing cost versus benefits, or considering the overall advantages of nuclear energy, but rather looking at radiation in isolation. This has resulted in a subtle shift towards an ‘as low as possible’ mentality. Attempting to reduce radiation far below de facto safe levels has resulted in an escalation of costs and loss of public confidence, and in some cases has deprived communities of the many benefits nuclear energy provides. In practical terms, this has led to the continued use of more harmful energy sources, such as fossil fuels." - }, - { - "type": "NarrativeText", - "element_id": "07ed21008ec3f8801f7cbb1fc670d4db", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "The overall regulatory philosophy, at least theoretically, used in the nuclear industry is the ALARA (As Low As Reasonably Achievable) principle, where any regulatory action on radiation should account for socio- economic benefits and costs, as opposed to making decisions based on radiation risks alone." - }, - { - "type": "NarrativeText", - "element_id": "00548dbd288df8370c39789adb302f50", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "Contemporary debates around nuclear energy often reflect the precautionary principle, a problematic concept applied across a range of regulatory and policy issues. A ‘strong’ interpretation of the precautionary principle, or a ‘as low as possible’ approach to risk, dictates that regulation is required whenever there is a potential adverse health risk, even if the evidence is not certain and regardless of the cost of regulation." - }, - { - "type": "NarrativeText", - "element_id": "9e9ed8938e271667a9512898d2ca629b", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "If the potential of nuclear energy is to be fully realized, public health and safety approaches must be recalibrated to consider a wider range of factors when considering radiation, adopting an “all-hazards” approach. Such an approach must ensure that risks are placed within a proper perspective and context, rather than looking at them in isolation. We therefore must not look at the costs – be they economic, environmental, or public health – associated with an individual power plant in isolation, but rather the costs associated with it (and its alternatives) at a societal level (Figure 4). This would entail looking at the potential risks arising from the use of nuclear power and comparing these with the risks associated with not adopting nuclear power." - }, - { - "type": "Title", - "element_id": "7ec686735b6e51f8276b057051369b15", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "ae) flea" - }, - { - "type": "Title", - "element_id": "2470c376b60fd11fd9639e0e440ce0f5", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "Plant-level production costs at market prices" - }, - { - "type": "Title", - "element_id": "dde91891334d5ac0e2b4569680eb6f1e", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "Grid-level costs of the electricity system" - }, - { - "type": "UncategorizedText", - "element_id": "fd38688f30f8b6e597d540ab0134278f", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "Social and environmental costs of emissions, land-use, climate change, security of supply, etc." - }, - { - "type": "NarrativeText", - "element_id": "567f470fb4fb5c58b115fbe79a425970", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "Figure 4. The different levels of cost associated with electricity generationx" - }, - { - "type": "NarrativeText", - "element_id": "6595e50969f899bd2fa05c0d7a8a682c", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 7 - }, - "text": "A more holistic regulatory process would be required, in which regulators move away from being siloed, looking at specific risks in isolation, with little regard for the greater picture. The move towards an all-hazard, holistic approach would require greater coordination between regulators, ensuring that the combined risks of a specific nuclear project are weighed against the risks posed by not advancing said project." + "text": "A more holistic regulatory process would be required, in which regulators move away from being siloed, looking at specific risks in isolation, with little regard for the greater picture. The move towards an all-hazard, holistic approach would require greater coordination between regulators, ensuring that the combined risks of a specific nuclear project are weighed against the risks posed by not advancing said project." }, { "type": "NarrativeText", @@ -1962,7 +1440,7 @@ "text": "Equally, the adoption of an all-hazards approach means regulators should consider declaring when a risk is too low to be a public health concern, in line with what the U.S. Nuclear Regulatory Commission attempted to do with its Below Regulatory Concern policy statements in the 1980s and early 1990s. In the context of nuclear power, this means departing from the notion that LNT instils of no safe level of radiation, and adopting a regulatory framework which notes the impossibility of eradicating risks. Failing to do so will result in excessive regulation that continues to limit the full potential of nuclear power in tackling climate change and sees a continued reliance on objectively more harmful energy sources." }, { - "type": "UncategorizedText", + "type": "Header", "element_id": "ef2d127de37b942baad06145e54b0c61", "metadata": { "data_source": { @@ -2034,7 +1512,7 @@ "text": "Similarly, many of the tremendous challenges the global community faces are significantly driven by this “radiation phobia”. Indeed, several of these issues have been considerably exacerbated by the fact that certain risks are given a disproportionate amount of focus, whereas others are de facto ignored. The global conversation around climate change is a prime example of this. The historical use of fossil fuels has contributed significantly to climate change through greenhouse gas emissions, causing unprecedented changes in the liveability of the Earth. By 2025, half of the world’s population will be living in water-stressed areas, as extreme heat and droughts are exacerbating water resources. Between 2030 and 2050, climate change is expected to be the cause of an additional 250,000 deaths per year, arising from malnutrition, malaria, diarrhoea and heat stressx. Yet, despite the huge risks associated with climate change, our addiction to coal, oil, and fossil gas remains, with fossil fuels providing 84% of global primary energy in 2019xii. The continued prioritization of fossil fuels at the expense of nuclear energy results in a considerable increase in the risks posed by climate change." }, { - "type": "NarrativeText", + "type": "FigureCaption", "element_id": "29215d2c137a392941315c6c7a67e8fd", "metadata": { "data_source": { @@ -2052,7 +1530,7 @@ "text": "Equally, it is well established that living without access to electricity results in illness and death around the world, caused by everything from not having access to modern healthcare to household air pollution. As of today, 770 million people around the world do not have access to electricity, with over 75% of that population living in Sub-Saharan Africa. The world's poorest 4 billion people consume a mere 5% of the energy used in developed economies, and we need to find ways of delivering reliable electricity to the entire human population in a fashion that is sustainable. Household and ambient air pollution causes 8.7 million deaths each year, largely because of the continued use of fossil fuels. Widespread electrification is a key tool for delivering a just energy transition. Investment in nuclear, has become an urgent necessity. Discarding it, based on risk perceptions divorced from science, would be to abandon the moral obligation to ensure affordable, reliable, and sustainable energy for every community around the world." }, { - "type": "UncategorizedText", + "type": "Header", "element_id": "e7f6c011776e8db7cd330b54174fd76f", "metadata": { "data_source": { @@ -2071,7 +1549,7 @@ }, { "type": "NarrativeText", - "element_id": "0714f9ff88637006bdb76908c7c936bf", + "element_id": "d754d8d468346f652657279272a11897", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2085,11 +1563,11 @@ "filetype": "application/pdf", "page_number": 9 }, - "text": "We must begin to holistically look at the severity of the consequences of maintaining the current energy production system, many of which are irreversible. The ways in which we address climate change and other issues of global importance must be sustainable and not create new hazards down the line. The reality is that nuclear has always been and remains an exceptionally safe source of energy, representing the lowest risk, the most sustainable, and the most affordable ways to generate around-the-clock electricity." + "text": "Clearly, we have reached a point where we must establish a new conversation about the relative risks of using nuclear, especially when risks created by other energy sources are considered. We cannot address many of the global challenges we face without a significant increase in the use of nuclear energy. The detrimental effects of decades of looking at nuclear risks in isolation highlights just how crucial it is that regulators and policymakers change the way they view nuclear energy, and transition towards an all-hazards approach, ensuring that actions taken to mitigate risks do not result in creating more severe risks." }, { "type": "NarrativeText", - "element_id": "f62c49fcf0a7960d0b509e37507d76d3", + "element_id": "0714f9ff88637006bdb76908c7c936bf", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2103,11 +1581,11 @@ "filetype": "application/pdf", "page_number": 9 }, - "text": "Therefore, World Nuclear Association calls upon policymakers and regulators to adopt an all-hazards approach, where different risks associated with energy producing technologies are placed in perspective and the appropriate context, and examined in line with the latest scientific evidence. Policymakers and regulators must ensure that their decisions regarding radiation protection do not create greater risks elsewhere. This include the recalibration of existing regulations regarding nuclear power and radiation, weighing the cost of regulatory measures against the societal benefits provided by nuclear energy." + "text": "We must begin to holistically look at the severity of the consequences of maintaining the current energy production system, many of which are irreversible. The ways in which we address climate change and other issues of global importance must be sustainable and not create new hazards down the line. The reality is that nuclear has always been and remains an exceptionally safe source of energy, representing the lowest risk, the most sustainable, and the most affordable ways to generate around-the-clock electricity." }, { "type": "NarrativeText", - "element_id": "d754d8d468346f652657279272a11897", + "element_id": "f62c49fcf0a7960d0b509e37507d76d3", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2121,10 +1599,10 @@ "filetype": "application/pdf", "page_number": 9 }, - "text": "Clearly, we have reached a point where we must establish a new conversation about the relative risks of using nuclear, especially when risks created by other energy sources are considered. We cannot address many of the global challenges we face without a significant increase in the use of nuclear energy. The detrimental effects of decades of looking at nuclear risks in isolation highlights just how crucial it is that regulators and policymakers change the way they view nuclear energy, and transition towards an all-hazards approach, ensuring that actions taken to mitigate risks do not result in creating more severe risks." + "text": "Therefore, World Nuclear Association calls upon policymakers and regulators to adopt an all-hazards approach, where different risks associated with energy producing technologies are placed in perspective and the appropriate context, and examined in line with the latest scientific evidence. Policymakers and regulators must ensure that their decisions regarding radiation protection do not create greater risks elsewhere. This include the recalibration of existing regulations regarding nuclear power and radiation, weighing the cost of regulatory measures against the societal benefits provided by nuclear energy." }, { - "type": "UncategorizedText", + "type": "Footer", "element_id": "7902699be42c8a8e46fbbb4501726517", "metadata": { "data_source": { @@ -2143,7 +1621,7 @@ }, { "type": "Title", - "element_id": "69824d3b0e70ca6aaa0da1613b65fd91", + "element_id": "e56261e0bd30965b8e68ed2abb15b141", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2178,80 +1656,8 @@ "text": "i" }, { - "type": "NarrativeText", - "element_id": "d85940c91ae6b53fc4b41bd5137e7371", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "xi World Health Organization (2018). Climate change and health. Available at: https://www.who.int/news-room/fact-" - }, - { - "type": "NarrativeText", - "element_id": "26a84724035df76d7d8a6610a6fa4627", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "x OECD-NEA (2019). The Full Costs of Electricity Provision. Available at: https://www.oecd-nea.org/jcms/pl_14998/" - }, - { - "type": "NarrativeText", - "element_id": "94178a8c2e84bf4b8f2eed9c79d7cfd5", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "ix Cancer Research UK (n.d.). Cancer risk statistics. Available at: https://www.cancerresearchuk.org/health-" - }, - { - "type": "NarrativeText", - "element_id": "794a96b3ab9a3e860f65549c3a106704", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "viii National Cancer Institute (2020). Cancer statistics. Available at: https://www.cancer.gov/about-cancer/" - }, - { - "type": "NarrativeText", - "element_id": "9a236889bced20048d1619798291d194", + "type": "ListItem", + "element_id": "c06ac75f019ceac1ff2baecfc090fd3e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2265,11 +1671,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "vii World Health Organization. (2016). Updated tables 2016 for ‘Preventing disease through health environments: a" + "text": "World Health Organization (2020). Road traffic injuries. Available at: https://www.who.int/news-room/fact-sheets/ detail/road-traffic-injuries" }, { - "type": "NarrativeText", - "element_id": "9d45931b60fa1041a13243a1ee1bb170", + "type": "Title", + "element_id": "5d7f49449ab22deac22d767b89549c55", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2283,7 +1689,7 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "xii BP, 2020. BP Statistical Review of World Energy, London: BP." + "text": "ii" }, { "type": "Title", @@ -2322,62 +1728,8 @@ "text": "vi" }, { - "type": "NarrativeText", - "element_id": "4051afedda98549176dc28aaa9087e81", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "iv United Nations Scientific Committee on the Effects of Radiation (2016). Report of the United Nations Scientific" - }, - { - "type": "Title", - "element_id": "f5557d4fcf727a981a3c315aca733eef", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "iii" - }, - { - "type": "Title", - "element_id": "5d7f49449ab22deac22d767b89549c55", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": 306475068461766865312866697521104206816, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "date_modified": "2023-02-12T10:09:32" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "ii" - }, - { - "type": "NarrativeText", - "element_id": "b6c39a9b3890b5132e4310c83d06b310", + "type": "ListItem", + "element_id": "af64bcc9f6d36d2c339a592dc2ae75ff", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2391,11 +1743,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "Photo credits: Front cover & pages 1, 4, 6 left, 7 bottom: Adobe Stock; page 6 right: Getty Images; page 7 top: Uniper." + "text": "xii BP, 2020. BP Statistical Review of World Energy, London: BP." }, { - "type": "NarrativeText", - "element_id": "c328c06c32c00c43471cd3c9d257c68b", + "type": "ListItem", + "element_id": "18b2cdcbf43cbcab942c6ffa69abdc51", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2409,11 +1761,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "International Energy Agency (2020). Global share of total energy supply by source, 2018. Key World Energy Statistics 2020. Available at: https://www.iea.org/data-and-statistics/charts/global-share-of-total-energy-supply-by- source-2018" + "text": "Slovic, P., 2010. The Psychology of risk. Saúde e Sociedade, 19(4), pp. 731-747." }, { - "type": "NarrativeText", - "element_id": "6bbd046b939157389606adf4059fe1f3", + "type": "ListItem", + "element_id": "46c6ddac9c0dadbc38d874f4b35fa235", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2427,11 +1779,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "Vohra, K., Vodonos, A., Schwartz, J., Marais, E., Sulprizio, M., & Mickley, L. (2021). Global mortality from outdoor fine particle pollution generated by fossil fuel combustion: Results from GEOS-Chem. Environmental Research, 195, p. 1-8" + "text": "National Cancer Institute (2020). Cancer statistics. Available at: https://www.cancer.gov/about-cancer/ understanding/statistics" }, { - "type": "NarrativeText", - "element_id": "2ef1e8614bc32af635d2a0c894b2ed3c", + "type": "ListItem", + "element_id": "acdfef838c7c3dd2d1d6bfe41f4156e6", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2445,11 +1797,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "Slovic, P., 2010. The Psychology of risk. Saúde e Sociedade, 19(4), pp. 731-747." + "text": "Cancer Research UK (n.d.). Cancer risk statistics. Available at: https:/Awww.cancerresearchuk.org/health- professional/cancer-statistics/risk" }, { - "type": "NarrativeText", - "element_id": "d5658e2a49995a2f4ca4b45d95f2058b", + "type": "ListItem", + "element_id": "6febbd0bffa8633c6c188165767c843c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2463,11 +1815,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "global assessment of the burden of disease from environmental risks’. Available at: https://www.who.int/data/gho/ data/themes/public-health-and-environment [Accessed on 8 April 2021]" + "text": "United Nations Scientific Committee on the Effects of Radiation (2016). Report of the United Nations Scientific Committee on the Effects of Atomic Radiation. Accessed from: https:/Avww.unscear.org/docs/publications/2016/ UNSCEAR_2016_GA-Report-CORR.pdf" }, { - "type": "NarrativeText", - "element_id": "e4d7c811a799c3c8e706125556f8a370", + "type": "ListItem", + "element_id": "2f9b2ba9ed7265891caea2b618d2968c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2481,11 +1833,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "BBC (2020). Plane crash fatalities fell more than 50% in 2019. Available at: https://www.bbc.co.uk/news/ business-50953712" + "text": "VIL World Health Organization. (2016). Updated tables 2016 for ‘Preventing disease through health environments: a global assessment of the burden of disease from environmental risks’. Available at: https://www.who.int/data/gho/ data/themes/public-health-and-environment [Accessed on 8 April 2021]" }, { - "type": "Title", - "element_id": "6e98dee26ce2439cd4b8af82426e894e", + "type": "ListItem", + "element_id": "0765b3700a8d5cdd4e4cdb9283835ade", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2499,11 +1851,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "understanding/statistics" + "text": "OECD-NEA (2019). The Full Costs of Electricity Provision. Available at: https:/Avww.oecd-nea.org/jcms/pl_14998/ the-full-costs-of-electricity-provision?details=true" }, { - "type": "Title", - "element_id": "759772833f6756e511150b2a49233864", + "type": "ListItem", + "element_id": "8bfb0188dff570fe23d75b3873051528", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2517,11 +1869,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "professional/cancer-statistics/risk" + "text": "xi World Health Organization (2018). Climate change and health. Available at: https:/Awww.who.int/news-room/fact- sheets/detail/climate-change-and-health" }, { - "type": "Title", - "element_id": "7267222b91f507e040c69dad9af7941f", + "type": "ListItem", + "element_id": "69bd2cd5a46ac8850a9e3ea2df80de60", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2535,11 +1887,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "the-full-costs-of-electricity-provision?details=true" + "text": "Vohra, K., Vodonos, A., Schwartz, J., Marais, E., Sulprizio, M., & Mickley, L. (2021). Global mortality from outdoor fine particle pollution generated by fossil fuel combustion: Results from GEOS-Chem. Environmental Research, 195, p. 1-8" }, { - "type": "NarrativeText", - "element_id": "e72fdf383c0b4d8cba0284d4f7ff06d5", + "type": "ListItem", + "element_id": "81be06e67a1b533cb1278b15860c51db", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2553,11 +1905,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "World Health Organization (2020). Road traffic injuries. Available at: https://www.who.int/news-room/fact-sheets/ detail/road-traffic-injuries" + "text": "International Energy Agency (2020). Global share of total energy supply by source, 2018. Key World Energy Statistics 2020. Available at: https://www.iea.org/data-and-statistics/charts/global-share-of-total-energy-supply-by- source-2018" }, { - "type": "Title", - "element_id": "86c0a0cef7faa217f386f75ead17dbec", + "type": "ListItem", + "element_id": "199440a0821e16b612f4697aa2306cb2", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2571,11 +1923,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "sheets/detail/climate-change-and-health" + "text": "BBC (2020). Plane crash fatalities fell more than 50% in 2019. Available at: https://www.bbc.co.uk/news/ business-50953712" }, { "type": "NarrativeText", - "element_id": "98e5f594de0e79990a0650489fdf295c", + "element_id": "10407d498f2636f50597e71d97cc001a", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2589,10 +1941,10 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "Committee on the Effects of Atomic Radiation. Accessed from: https://www.unscear.org/docs/publications/2016/ UNSCEAR_2016_GA-Report-CORR.pdf" + "text": "Photo credits: Front cover & pages 1, 4, 6 left, 7 bottom: Adobe Stock; page 6 right: Getty Images; page 7 top: Uniper." }, { - "type": "UncategorizedText", + "type": "Header", "element_id": "2c624232cdd221771294dfbb310aca00", "metadata": { "data_source": { @@ -2610,8 +1962,8 @@ "text": "8" }, { - "type": "UncategorizedText", - "element_id": "481e5a54650b0a4ac7bc2568ddad436d", + "type": "NarrativeText", + "element_id": "c48603fd38d3449d3afcd2dc18903083", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2629,7 +1981,7 @@ }, { "type": "NarrativeText", - "element_id": "36d3613fc20527bb317afd4e447d1c74", + "element_id": "fc5faebaec5a1349ce932f1863bdd842", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2646,8 +1998,8 @@ "text": "Recalibrating risk © 2021 World Nuclear Association. Registered in England and Wales, company number 01215741" }, { - "type": "Title", - "element_id": "2ef1a5c0752085d3a6935132ad9e597c", + "type": "NarrativeText", + "element_id": "821daa4396c0087d9d5ee9240bc5c85c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -2665,7 +2017,7 @@ }, { "type": "NarrativeText", - "element_id": "20ef77d9aa66e60f1443750cdbaa9014", + "element_id": "705da4db5e220010ddfd03d9452855e4", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", diff --git a/test_unstructured_ingest/files/azure_cognitive_index_schema.json b/test_unstructured_ingest/files/azure_cognitive_index_schema.json index 2abdc7b1d4..d77fd8da32 100644 --- a/test_unstructured_ingest/files/azure_cognitive_index_schema.json +++ b/test_unstructured_ingest/files/azure_cognitive_index_schema.json @@ -109,6 +109,10 @@ } ] }, + { + "name": "languages", + "type": "Collection(Edm.String)" + }, { "name": "page_number", "type": "Edm.String" diff --git a/test_unstructured_ingest/test-ingest-against-api.sh b/test_unstructured_ingest/test-ingest-against-api.sh index 15200172d6..3f7a43d807 100755 --- a/test_unstructured_ingest/test-ingest-against-api.sh +++ b/test_unstructured_ingest/test-ingest-against-api.sh @@ -10,7 +10,7 @@ SCRIPT_DIR=$(dirname "$(realpath "$0")") cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=api-ingest-output OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh diff --git a/test_unstructured_ingest/test-ingest-airtable-diff.sh b/test_unstructured_ingest/test-ingest-airtable-diff.sh index e7b557a64d..8c69a31146 100755 --- a/test_unstructured_ingest/test-ingest-airtable-diff.sh +++ b/test_unstructured_ingest/test-ingest-airtable-diff.sh @@ -10,11 +10,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=airtable-diff OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi +} +trap cleanup EXIT VARIED_DATA_BASE_ID="app5YQxSfp220fWtm" VARIED_DATA_BASE_ID_2="appJ43QmP8I17zu88" diff --git a/test_unstructured_ingest/test-ingest-airtable-large.sh b/test_unstructured_ingest/test-ingest-airtable-large.sh index 242dc36f47..b87e728187 100755 --- a/test_unstructured_ingest/test-ingest-airtable-large.sh +++ b/test_unstructured_ingest/test-ingest-airtable-large.sh @@ -11,11 +11,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=airtable-large OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi +} +trap cleanup EXIT if [ -z "$AIRTABLE_PERSONAL_ACCESS_TOKEN" ]; then echo "Skipping Airtable ingest test because the AIRTABLE_PERSONAL_ACCESS_TOKEN is not set." diff --git a/test_unstructured_ingest/test-ingest-azure.sh b/test_unstructured_ingest/test-ingest-azure.sh index 6d177bf43a..9fdb9dd5e5 100755 --- a/test_unstructured_ingest/test-ingest-azure.sh +++ b/test_unstructured_ingest/test-ingest-azure.sh @@ -7,11 +7,14 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=azure OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" +} +trap cleanup EXIT PYTHONPATH=. ./unstructured/ingest/main.py \ azure \ diff --git a/test_unstructured_ingest/test-ingest-biomed-api.sh b/test_unstructured_ingest/test-ingest-biomed-api.sh index 6137901914..bf0de6998f 100755 --- a/test_unstructured_ingest/test-ingest-biomed-api.sh +++ b/test_unstructured_ingest/test-ingest-biomed-api.sh @@ -8,11 +8,14 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=biomed-api OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" +} +trap cleanup EXIT "$SCRIPT_DIR"/check-num-files-expected-output.sh 2 $OUTPUT_FOLDER_NAME 10k diff --git a/test_unstructured_ingest/test-ingest-biomed-path.sh b/test_unstructured_ingest/test-ingest-biomed-path.sh index 9915d38d8e..b726364ef3 100755 --- a/test_unstructured_ingest/test-ingest-biomed-path.sh +++ b/test_unstructured_ingest/test-ingest-biomed-path.sh @@ -8,11 +8,14 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=biomed-path OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" +} +trap cleanup EXIT "$SCRIPT_DIR"/check-num-files-expected-output.sh 1 $OUTPUT_FOLDER_NAME 10k diff --git a/test_unstructured_ingest/test-ingest-box.sh b/test_unstructured_ingest/test-ingest-box.sh index 20a167c862..43a8ad38ff 100755 --- a/test_unstructured_ingest/test-ingest-box.sh +++ b/test_unstructured_ingest/test-ingest-box.sh @@ -10,11 +10,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=box OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi +} +trap cleanup EXIT if [ -z "$BOX_APP_CONFIG" ] && [ -z "$BOX_APP_CONFIG_PATH" ]; then echo "Skipping Box ingest test because neither BOX_APP_CONFIG nor BOX_APP_CONFIG_PATH env vars are set." diff --git a/test_unstructured_ingest/test-ingest-confluence-diff.sh b/test_unstructured_ingest/test-ingest-confluence-diff.sh index 52ba2d5954..c9c0c21483 100755 --- a/test_unstructured_ingest/test-ingest-confluence-diff.sh +++ b/test_unstructured_ingest/test-ingest-confluence-diff.sh @@ -9,11 +9,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=confluence-diff OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi +} +trap cleanup EXIT if [ -z "$CONFLUENCE_USER_EMAIL" ] || [ -z "$CONFLUENCE_API_TOKEN" ]; then echo "Skipping Confluence ingest test because the CONFLUENCE_USER_EMAIL or CONFLUENCE_API_TOKEN env var is not set." diff --git a/test_unstructured_ingest/test-ingest-confluence-large.sh b/test_unstructured_ingest/test-ingest-confluence-large.sh index cd686fe0a8..c1196bdd3d 100755 --- a/test_unstructured_ingest/test-ingest-confluence-large.sh +++ b/test_unstructured_ingest/test-ingest-confluence-large.sh @@ -11,11 +11,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=confluence-large OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi +} +trap cleanup EXIT if [ -z "$CONFLUENCE_USER_EMAIL" ] || [ -z "$CONFLUENCE_API_TOKEN" ]; then echo "Skipping Confluence ingest test because the CONFLUENCE_USER_EMAIL or CONFLUENCE_API_TOKEN env var is not set." diff --git a/test_unstructured_ingest/test-ingest-delta-table.sh b/test_unstructured_ingest/test-ingest-delta-table.sh index a952fc18fb..d4c79a8f0d 100755 --- a/test_unstructured_ingest/test-ingest-delta-table.sh +++ b/test_unstructured_ingest/test-ingest-delta-table.sh @@ -8,7 +8,8 @@ OUTPUT_FOLDER_NAME=delta-table OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME DESTINATION_TABLE=$SCRIPT_DIR/delta-table-dest -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} if [ -z "$AWS_ACCESS_KEY_ID" ] && [ -z "$AWS_SECRET_ACCESS_KEY" ]; then echo "Skipping Delta Table ingest test because either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY env var was not set." @@ -21,6 +22,9 @@ source "$SCRIPT_DIR"/cleanup.sh function cleanup() { cleanup_dir "$DESTINATION_TABLE" cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi } trap cleanup EXIT diff --git a/test_unstructured_ingest/test-ingest-discord.sh b/test_unstructured_ingest/test-ingest-discord.sh index d845d76f1e..7aedb2b352 100755 --- a/test_unstructured_ingest/test-ingest-discord.sh +++ b/test_unstructured_ingest/test-ingest-discord.sh @@ -7,11 +7,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=discord OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi +} +trap cleanup EXIT if [ -z "$DISCORD_TOKEN" ]; then echo "Skipping Discord ingest test because the DISCORD_TOKEN env var is not set." diff --git a/test_unstructured_ingest/test-ingest-dropbox.sh b/test_unstructured_ingest/test-ingest-dropbox.sh index 52514cb3d0..b591f0cdd8 100755 --- a/test_unstructured_ingest/test-ingest-dropbox.sh +++ b/test_unstructured_ingest/test-ingest-dropbox.sh @@ -7,11 +7,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=dropbox OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi +} +trap cleanup EXIT if [ -z "$DROPBOX_APP_KEY" ] || [ -z "$DROPBOX_APP_SECRET" ] || [ -z "$DROPBOX_REFRESH_TOKEN" ]; then echo "Skipping Dropbox ingest test because one or more of these env vars is not set:" diff --git a/test_unstructured_ingest/test-ingest-elasticsearch.sh b/test_unstructured_ingest/test-ingest-elasticsearch.sh index 71dcf26a35..7b181f90ba 100755 --- a/test_unstructured_ingest/test-ingest-elasticsearch.sh +++ b/test_unstructured_ingest/test-ingest-elasticsearch.sh @@ -8,19 +8,21 @@ echo "SCRIPT_DIR: $SCRIPT_DIR" OUTPUT_FOLDER_NAME=elasticsearch OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh function cleanup() { # Kill the container so the script can be repeatedly run using the same ports - if docker ps --filter "name=es-test"; then - echo "Stopping Elasticsearch Docker container" - docker stop es-test - fi + echo "Stopping Elasticsearch Docker container" + docker-compose -f scripts/elasticsearch-test-helpers/docker-compose.yaml down --remove-orphans -v cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi } trap cleanup EXIT diff --git a/test_unstructured_ingest/test-ingest-gcs.sh b/test_unstructured_ingest/test-ingest-gcs.sh index df87bb5c76..dd43710941 100755 --- a/test_unstructured_ingest/test-ingest-gcs.sh +++ b/test_unstructured_ingest/test-ingest-gcs.sh @@ -7,11 +7,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=gcs OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi +} +trap cleanup EXIT if [ -z "$GCP_INGEST_SERVICE_KEY" ]; then echo "Skipping Google Drive ingest test because the GCP_INGEST_SERVICE_KEY env var is not set." diff --git a/test_unstructured_ingest/test-ingest-github.sh b/test_unstructured_ingest/test-ingest-github.sh index 08cd9216f3..4061bea956 100755 --- a/test_unstructured_ingest/test-ingest-github.sh +++ b/test_unstructured_ingest/test-ingest-github.sh @@ -7,11 +7,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=github OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi +} +trap cleanup EXIT GH_READ_ONLY_ACCESS_TOKEN=${GH_READ_ONLY_ACCESS_TOKEN:-none} diff --git a/test_unstructured_ingest/test-ingest-gitlab.sh b/test_unstructured_ingest/test-ingest-gitlab.sh index 537ceedc80..1a9031c7a7 100755 --- a/test_unstructured_ingest/test-ingest-gitlab.sh +++ b/test_unstructured_ingest/test-ingest-gitlab.sh @@ -7,11 +7,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=gitlab OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi +} +trap cleanup EXIT PYTHONPATH=. ./unstructured/ingest/main.py \ gitlab \ diff --git a/test_unstructured_ingest/test-ingest-google-drive.sh b/test_unstructured_ingest/test-ingest-google-drive.sh index a259451bed..218a5cfe0a 100755 --- a/test_unstructured_ingest/test-ingest-google-drive.sh +++ b/test_unstructured_ingest/test-ingest-google-drive.sh @@ -7,11 +7,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=google-drive OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi +} +trap cleanup EXIT if [ -z "$GCP_INGEST_SERVICE_KEY" ]; then echo "Skipping Google Drive ingest test because the GCP_INGEST_SERVICE_KEY env var is not set." diff --git a/test_unstructured_ingest/test-ingest-jira.sh b/test_unstructured_ingest/test-ingest-jira.sh index 54f0930d11..173fc4f94b 100755 --- a/test_unstructured_ingest/test-ingest-jira.sh +++ b/test_unstructured_ingest/test-ingest-jira.sh @@ -8,11 +8,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=jira-diff OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi +} +trap cleanup EXIT if [ -z "$JIRA_INGEST_USER_EMAIL" ] || [ -z "$JIRA_INGEST_API_TOKEN" ]; then echo "Skipping Jira ingest test because the JIRA_INGEST_USER_EMAIL or JIRA_INGEST_API_TOKEN env var is not set." diff --git a/test_unstructured_ingest/test-ingest-local-single-file-with-encoding.sh b/test_unstructured_ingest/test-ingest-local-single-file-with-encoding.sh index 1e93cce011..6442eec0b3 100755 --- a/test_unstructured_ingest/test-ingest-local-single-file-with-encoding.sh +++ b/test_unstructured_ingest/test-ingest-local-single-file-with-encoding.sh @@ -6,7 +6,7 @@ SCRIPT_DIR=$(dirname "$(realpath "$0")") cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=local-single-file-with-encoding OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh diff --git a/test_unstructured_ingest/test-ingest-local-single-file-with-pdf-infer-table-structure.sh b/test_unstructured_ingest/test-ingest-local-single-file-with-pdf-infer-table-structure.sh index b5aa2ce7a2..9d15a0e55c 100755 --- a/test_unstructured_ingest/test-ingest-local-single-file-with-pdf-infer-table-structure.sh +++ b/test_unstructured_ingest/test-ingest-local-single-file-with-pdf-infer-table-structure.sh @@ -6,7 +6,7 @@ SCRIPT_DIR=$(dirname "$(realpath "$0")") cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=local-single-file-with-pdf-infer-table-structure OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh diff --git a/test_unstructured_ingest/test-ingest-local-single-file.sh b/test_unstructured_ingest/test-ingest-local-single-file.sh index 090c29f87b..24954c1821 100755 --- a/test_unstructured_ingest/test-ingest-local-single-file.sh +++ b/test_unstructured_ingest/test-ingest-local-single-file.sh @@ -6,7 +6,7 @@ SCRIPT_DIR=$(dirname "$(realpath "$0")") cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=local-single-file OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh diff --git a/test_unstructured_ingest/test-ingest-local.sh b/test_unstructured_ingest/test-ingest-local.sh index 5278d2812d..0e1b3856c1 100755 --- a/test_unstructured_ingest/test-ingest-local.sh +++ b/test_unstructured_ingest/test-ingest-local.sh @@ -6,7 +6,7 @@ SCRIPT_DIR=$(dirname "$(realpath "$0")") cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=local OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh diff --git a/test_unstructured_ingest/test-ingest-notion.sh b/test_unstructured_ingest/test-ingest-notion.sh index 72e435cf02..2a83a47bb3 100755 --- a/test_unstructured_ingest/test-ingest-notion.sh +++ b/test_unstructured_ingest/test-ingest-notion.sh @@ -7,11 +7,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=notion OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi +} +trap cleanup EXIT if [ -z "$NOTION_API_KEY" ]; then echo "Skipping Notion ingest test because the NOTION_API_KEY env var is not set." diff --git a/test_unstructured_ingest/test-ingest-onedrive.sh b/test_unstructured_ingest/test-ingest-onedrive.sh index 41f7210f90..290643815d 100755 --- a/test_unstructured_ingest/test-ingest-onedrive.sh +++ b/test_unstructured_ingest/test-ingest-onedrive.sh @@ -7,11 +7,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=onedrive OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi +} +trap cleanup EXIT if [ -z "$MS_CLIENT_ID" ] || [ -z "$MS_CLIENT_CRED" ] || [ -z "$MS_USER_PNAME" ]; then echo "Skipping OneDrive ingest test because the MS_CLIENT_ID, MS_CLIENT_CRED, MS_USER_PNAME env var is not set." diff --git a/test_unstructured_ingest/test-ingest-outlook.sh b/test_unstructured_ingest/test-ingest-outlook.sh index 2be6b88b75..384287e7ea 100755 --- a/test_unstructured_ingest/test-ingest-outlook.sh +++ b/test_unstructured_ingest/test-ingest-outlook.sh @@ -7,11 +7,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=outlook OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi +} +trap cleanup EXIT if [ -z "$MS_CLIENT_ID" ] || [ -z "$MS_CLIENT_CRED" ] || [ -z "$MS_TENANT_ID" ] || [ -z "$MS_USER_EMAIL" ]; then echo "Skipping Outlook ingest test because the MS_CLIENT_ID or MS_CLIENT_CRED or MS_TENANT_ID or MS_USER_EMAIL env var is not set." diff --git a/test_unstructured_ingest/test-ingest-pdf-fast-reprocess.sh b/test_unstructured_ingest/test-ingest-pdf-fast-reprocess.sh index 46bfd32b19..96acee7bd3 100755 --- a/test_unstructured_ingest/test-ingest-pdf-fast-reprocess.sh +++ b/test_unstructured_ingest/test-ingest-pdf-fast-reprocess.sh @@ -8,11 +8,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=pdf-fast-reprocess OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME INPUT_PATH=$SCRIPT_DIR/download -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$INPUT_PATH" + fi +} +trap cleanup EXIT echo "REPROCESS INPUT PATH" ls "$INPUT_PATH" diff --git a/test_unstructured_ingest/test-ingest-s3-minio.sh b/test_unstructured_ingest/test-ingest-s3-minio.sh new file mode 100755 index 0000000000..000c28e28b --- /dev/null +++ b/test_unstructured_ingest/test-ingest-s3-minio.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +set -e + + +SCRIPT_DIR=$(dirname "$(realpath "$0")") +cd "$SCRIPT_DIR"/.. || exit 1 +OUTPUT_FOLDER_NAME=s3-minio +OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME +DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +secret_key=minioadmin +access_key=minioadmin + +# shellcheck disable=SC1091 +source "$SCRIPT_DIR"/cleanup.sh + +function cleanup() { + # Kill the container so the script can be repeatedly run using the same ports + echo "Stopping Minio Docker container" + docker-compose -f scripts/minio-test-helpers/docker-compose.yaml down --remove-orphans -v + + cleanup_dir "$OUTPUT_DIR" +} + +trap cleanup EXIT + +# shellcheck source=/dev/null +scripts/minio-test-helpers/create-and-check-minio.sh +wait + +AWS_SECRET_ACCESS_KEY=$secret_key AWS_ACCESS_KEY_ID=$access_key PYTHONPATH=. ./unstructured/ingest/main.py \ + s3 \ + --num-processes "$max_processes" \ + --download-dir "$DOWNLOAD_DIR" \ + --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.data_source.date_modified,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ + --strategy hi_res \ + --preserve-downloads \ + --reprocess \ + --output-dir "$OUTPUT_DIR" \ + --verbose \ + --remote-url s3://utic-dev-tech-fixtures/ \ + --endpoint-url http://localhost:9000 + + +"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME diff --git a/test_unstructured_ingest/test-ingest-s3.sh b/test_unstructured_ingest/test-ingest-s3.sh index 4b28a00106..214a70ab71 100755 --- a/test_unstructured_ingest/test-ingest-s3.sh +++ b/test_unstructured_ingest/test-ingest-s3.sh @@ -8,11 +8,14 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=s3 OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" +} +trap cleanup EXIT "$SCRIPT_DIR"/check-num-files-expected-output.sh 3 $OUTPUT_FOLDER_NAME 20k diff --git a/test_unstructured_ingest/test-ingest-salesforce.sh b/test_unstructured_ingest/test-ingest-salesforce.sh index 13b8018b76..04f686e1d9 100755 --- a/test_unstructured_ingest/test-ingest-salesforce.sh +++ b/test_unstructured_ingest/test-ingest-salesforce.sh @@ -10,11 +10,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=salesforce OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi +} +trap cleanup EXIT if [ -z "$SALESFORCE_PRIVATE_KEY" ] && [ -z "$SALESFORCE_PRIVATE_KEY_PATH" ]; then echo "Skipping Salesforce ingest test because neither SALESFORCE_PRIVATE_KEY nor SALESFORCE_PRIVATE_KEY_PATH env vars are set." diff --git a/test_unstructured_ingest/test-ingest-sharepoint-embed-cog-index.sh b/test_unstructured_ingest/test-ingest-sharepoint-embed-cog-index.sh index af9d8f33ae..738848e008 100755 --- a/test_unstructured_ingest/test-ingest-sharepoint-embed-cog-index.sh +++ b/test_unstructured_ingest/test-ingest-sharepoint-embed-cog-index.sh @@ -11,6 +11,7 @@ DESTINATION_INDEX="utic-test-ingest-fixtures-output-$(date +%s)" # The vector configs on the schema currently only exist on versions: # 2023-07-01-Preview, 2021-04-30-Preview, 2020-06-30-Preview API_VERSION=2023-07-01-Preview +CI=${CI:-"false"} if [ -z "$SHAREPOINT_CLIENT_ID" ] || [ -z "$SHAREPOINT_CRED" ] ; then echo "Skipping Sharepoint ingest test because the SHAREPOINT_CLIENT_ID or SHAREPOINT_CRED env var is not set." @@ -27,6 +28,9 @@ if [ -z "$AZURE_SEARCH_ENDPOINT" ] && [ -z "$AZURE_SEARCH_API_KEY" ]; then exit 0 fi +# shellcheck disable=SC1091 +source "$SCRIPT_DIR"/cleanup.sh + function cleanup { response_code=$(curl -s -o /dev/null -w "%{http_code}" \ "https://utic-test-ingest-fixtures.search.windows.net/indexes/$DESTINATION_INDEX?api-version=$API_VERSION" \ @@ -41,6 +45,11 @@ function cleanup { else echo "Index $DESTINATION_INDEX does not exist, nothing to delete" fi + + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi } trap cleanup EXIT @@ -77,6 +86,8 @@ PYTHONPATH=. ./unstructured/ingest/main.py \ --path "Shared Documents" \ --recursive \ --embedding-api-key "$OPENAI_API_KEY" \ + --chunk-elements \ + --chunk-multipage-sections \ azure-cognitive-search \ --key "$AZURE_SEARCH_API_KEY" \ --endpoint "$AZURE_SEARCH_ENDPOINT" \ diff --git a/test_unstructured_ingest/test-ingest-sharepoint.sh b/test_unstructured_ingest/test-ingest-sharepoint.sh index 15913cdbcb..8eefa87a60 100755 --- a/test_unstructured_ingest/test-ingest-sharepoint.sh +++ b/test_unstructured_ingest/test-ingest-sharepoint.sh @@ -7,11 +7,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=Sharepoint OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi +} +trap cleanup EXIT if [ -z "$SHAREPOINT_CLIENT_ID" ] || [ -z "$SHAREPOINT_CRED" ]; then echo "Skipping Sharepoint ingest test because the SHAREPOINT_CLIENT_ID or SHAREPOINT_CRED env var is not set." diff --git a/test_unstructured_ingest/test-ingest-slack.sh b/test_unstructured_ingest/test-ingest-slack.sh index 86a5b6a73d..ff51d63692 100755 --- a/test_unstructured_ingest/test-ingest-slack.sh +++ b/test_unstructured_ingest/test-ingest-slack.sh @@ -7,11 +7,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=slack OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi +} +trap cleanup EXIT if [ -z "$SLACK_TOKEN" ]; then echo "Skipping Slack ingest test because the SLACK_TOKEN env var is not set." diff --git a/test_unstructured_ingest/test-ingest-wikipedia.sh b/test_unstructured_ingest/test-ingest-wikipedia.sh index 5cb127acbb..1dc5e428b4 100755 --- a/test_unstructured_ingest/test-ingest-wikipedia.sh +++ b/test_unstructured_ingest/test-ingest-wikipedia.sh @@ -7,10 +7,18 @@ cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=wikipedia OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python -c "import os; print(os.cpu_count())")} +max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} +CI=${CI:-"false"} + # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh -trap 'cleanup_dir "$OUTPUT_DIR"' EXIT +function cleanup() { + cleanup_dir "$OUTPUT_DIR" + if [ "$CI" == "true" ]; then + cleanup_dir "$DOWNLOAD_DIR" + fi +} +trap cleanup EXIT PYTHONPATH=. ./unstructured/ingest/main.py \ wikipedia \ diff --git a/test_unstructured_ingest/test-ingest.sh b/test_unstructured_ingest/test-ingest.sh index 926821943e..8c2dffc977 100755 --- a/test_unstructured_ingest/test-ingest.sh +++ b/test_unstructured_ingest/test-ingest.sh @@ -10,7 +10,12 @@ export OMP_THREAD_LIMIT=1 scripts=( 'test-ingest-s3.sh' +'test-ingest-s3-minio.sh' 'test-ingest-azure.sh' +'test-ingest-biomed-api.sh' +'test-ingest-biomed-path.sh' +## NOTE(yuming): The following test should be put after any tests with --preserve-downloads option +'test-ingest-pdf-fast-reprocess.sh' 'test-ingest-box.sh' 'test-ingest-discord.sh' 'test-ingest-dropbox.sh' @@ -18,8 +23,6 @@ scripts=( 'test-ingest-gitlab.sh' 'test-ingest-google-drive.sh' 'test-ingest-wikipedia.sh' -'test-ingest-biomed-api.sh' -'test-ingest-biomed-path.sh' 'test-ingest-local.sh' 'test-ingest-slack.sh' 'test-ingest-against-api.sh' @@ -39,8 +42,6 @@ scripts=( 'test-ingest-delta-table.sh' 'test-ingest-salesforce.sh' 'test-ingest-jira.sh' -## NOTE(yuming): The following test should be put after any tests with --preserve-downloads option -'test-ingest-pdf-fast-reprocess.sh' 'test-ingest-sharepoint.sh' ) diff --git a/typings/docx/document.pyi b/typings/docx/document.pyi index 779b6513dd..964b357300 100644 --- a/typings/docx/document.pyi +++ b/typings/docx/document.pyi @@ -11,7 +11,7 @@ from docx.text.paragraph import Paragraph class Document(BlockItemContainer): def add_paragraph( - self, text: str = "", style: Union[_ParagraphStyle, str, None] = None + self, text: str = "", style: Union[_ParagraphStyle, str, None] = None, ) -> Paragraph: ... @property def element(self) -> CT_Document: ... diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 7100ed29da..3d63527b85 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.10.17-dev9" # pragma: no cover +__version__ = "0.10.19-dev10" # pragma: no cover diff --git a/unstructured/chunking/title.py b/unstructured/chunking/title.py index 8fd38d62f3..0c5bde799c 100644 --- a/unstructured/chunking/title.py +++ b/unstructured/chunking/title.py @@ -1,6 +1,7 @@ +import copy import functools import inspect -from typing import Any, Callable, Dict, List, TypeVar +from typing import Any, Callable, Dict, List, Optional, TypeVar, Union from typing_extensions import ParamSpec @@ -9,96 +10,130 @@ Element, ElementMetadata, Table, + TableChunk, Text, Title, ) +def chunk_table_element( + element: Table, + max_characters: Optional[int] = 500, +) -> List[Union[Table, TableChunk]]: + text = element.text + html = getattr(element, "text_as_html", None) + + if len(text) <= max_characters and ( # type: ignore + html is None or len(html) <= max_characters # type: ignore + ): + return [element] + + chunks: List[Union[Table, TableChunk]] = [] + metadata = copy.copy(element.metadata) + is_continuation = False + + while text or html: + text_chunk, text = text[:max_characters], text[max_characters:] + table_chunk = TableChunk(text=text_chunk, metadata=copy.copy(metadata)) + + if html: + html_chunk, html = html[:max_characters], html[max_characters:] + table_chunk.metadata.text_as_html = html_chunk + + if is_continuation: + table_chunk.metadata.is_continuation = True + + chunks.append(table_chunk) + is_continuation = True + + return chunks + + def chunk_by_title( elements: List[Element], multipage_sections: bool = True, - combine_under_n_chars: int = 500, - new_after_n_chars: int = 1500, + combine_text_under_n_chars: int = 500, + new_after_n_chars: int = 500, + max_characters: int = 500, ) -> List[Element]: """Uses title elements to identify sections within the document for chunking. Splits off into a new section when a title is detected or if metadata changes, which happens when page numbers or sections change. Cuts off sections once they have exceeded - a character length of new_after_n_chars. + a character length of max_characters. Parameters ---------- elements - A list of unstructured elements. Usually the ouput of a partition functions. + A list of unstructured elements. Usually the output of a partition functions. multipage_sections If True, sections can span multiple pages. Defaults to True. - combine_under_n_chars + combine_text_under_n_chars Combines elements (for example a series of titles) until a section reaches a length of n characters. new_after_n_chars - Cuts off new sections once they reach a length of n characters + Cuts off new sections once they reach a length of n characters (soft max) + max_characters + Chunks table elements text and text_as_html into chunks of length n characters (hard max) + TODO: (amanda) extend to other elements """ if ( - combine_under_n_chars is not None + combine_text_under_n_chars is not None and new_after_n_chars is not None + and max_characters is not None and ( - combine_under_n_chars > new_after_n_chars - or combine_under_n_chars < 0 + combine_text_under_n_chars > new_after_n_chars + or combine_text_under_n_chars < 0 or new_after_n_chars < 0 + or max_characters <= 0 + or combine_text_under_n_chars > max_characters ) ): raise ValueError( - "Invalid values for combine_under_n_chars and/or new_after_n_chars.", + "Invalid values for combine_text_under_n_chars and/or max_characters.", ) chunked_elements: List[Element] = [] sections = _split_elements_by_title_and_table( elements, multipage_sections=multipage_sections, - combine_under_n_chars=combine_under_n_chars, + combine_text_under_n_chars=combine_text_under_n_chars, new_after_n_chars=new_after_n_chars, ) - for section in sections: if not section: continue - if not isinstance(section[0], Text) or isinstance(section[0], Table): - chunked_elements.extend(section) - elif isinstance(section[0], Text): - text = "" - metadata = section[0].metadata + first_element = section[0] - for i, element in enumerate(section): - if isinstance(element, Text): - text += "\n\n" if text else "" - start_char = len(text) - text += element.text + if not isinstance(first_element, Text): + chunked_elements.extend(section) + continue - for attr, value in vars(element.metadata).items(): - if not isinstance(value, list): - continue + elif isinstance(first_element, Table): + chunked_elements.extend(chunk_table_element(first_element, max_characters)) + continue - _value = getattr(metadata, attr, []) - if _value is None: - _value = [] + text = "" + metadata = first_element.metadata + start_char = 0 + for element in section: + if isinstance(element, Text): + text += "\n\n" if text else "" + start_char = len(text) + text += element.text + for attr, value in vars(element.metadata).items(): + if isinstance(value, list): + _value = getattr(metadata, attr, []) or [] if attr == "regex_metadata": for item in value: item["start"] += start_char item["end"] += start_char - if i > 0: - # NOTE(newelh): Previously, _value was extended with value. - # This caused a memory error if the content was a list of strings - # with a large number of elements -- doubling the list size each time. - # This now instead ensures that the _value list is unique and updated. - for item in value: - if item not in _value: - _value.append(item) - - setattr(metadata, attr, _value) + _value.extend(item for item in value if item not in _value) + setattr(metadata, attr, _value) - chunked_elements.append(CompositeElement(text=text, metadata=metadata)) + chunked_elements.append(CompositeElement(text=text, metadata=metadata)) return chunked_elements @@ -106,8 +141,8 @@ def chunk_by_title( def _split_elements_by_title_and_table( elements: List[Element], multipage_sections: bool = True, - combine_under_n_chars: int = 500, - new_after_n_chars: int = 1500, + combine_text_under_n_chars: int = 500, + new_after_n_chars: int = 500, ) -> List[List[Element]]: sections: List[List[Element]] = [] section: List[Element] = [] @@ -123,11 +158,11 @@ def _split_elements_by_title_and_table( ) section_length = sum([len(str(element)) for element in section]) - new_section = (isinstance(element, Title) and section_length > combine_under_n_chars) or ( - not metadata_matches or section_length > new_after_n_chars - ) + new_section = ( + isinstance(element, Title) and section_length > combine_text_under_n_chars + ) or (not metadata_matches or section_length > new_after_n_chars) - if isinstance(element, Table) or not isinstance(element, Text): + if not isinstance(element, Text) or isinstance(element, Table): sections.append(section) sections.append([element]) section = [] @@ -185,7 +220,7 @@ def add_chunking_strategy() -> Callable[[Callable[_P, List[Element]]], Callable[ """Decorator for chuncking text. Uses title elements to identify sections within the document for chunking. Splits off a new section when a title is detected or if metadata changes, which happens when page numbers or sections change. Cuts off sections once they have exceeded - a character length of new_after_n_chars.""" + a character length of max_characters.""" def decorator(func: Callable[_P, List[Element]]) -> Callable[_P, List[Element]]: if func.__doc__ and ( @@ -199,11 +234,15 @@ def decorator(func: Callable[_P, List[Element]]) -> Callable[_P, List[Element]]: + "\n\tAdditional Parameters:" + "\n\t\tmultipage_sections" + "\n\t\t\tIf True, sections can span multiple pages. Defaults to True." - + "\n\t\tcombine_under_n_chars" + + "\n\t\tcombine_text_under_n_chars" + "\n\t\t\tCombines elements (for example a series of titles) until a section" + "\n\t\t\treaches a length of n characters." + "\n\t\tnew_after_n_chars" - + "\n\t\t\tCuts off new sections once they reach a length of n characters" + + "\n\t\t\t Cuts off new sections once they reach a length of n characters" + + "\n\t\t\t a soft max." + + "\n\t\tmax_characters" + + "\n\t\t\tChunks table elements text and text_as_html into chunks" + + "\n\t\t\tof length n characters, a hard max." ) @functools.wraps(func) @@ -218,8 +257,9 @@ def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> List[Element]: elements = chunk_by_title( elements, multipage_sections=params.get("multipage_sections", True), - combine_under_n_chars=params.get("combine_under_n_chars", 500), - new_after_n_chars=params.get("new_after_n_chars", 1500), + combine_text_under_n_chars=params.get("combine_text_under_n_chars", 500), + new_after_n_chars=params.get("new_after_n_chars", 500), + max_characters=params.get("max_characters", 500), ) return elements diff --git a/unstructured/cleaners/core.py b/unstructured/cleaners/core.py index 088cfa170e..ba7ec592db 100644 --- a/unstructured/cleaners/core.py +++ b/unstructured/cleaners/core.py @@ -2,6 +2,9 @@ import re import sys import unicodedata +from typing import Tuple + +import numpy as np from unstructured.file_utils.encoding import ( format_encoding_str, @@ -412,3 +415,46 @@ def bytes_string_to_string(text: str, encoding: str = "utf-8"): text_bytes = bytes([ord(char) for char in text]) formatted_encoding = format_encoding_str(encoding) return text_bytes.decode(formatted_encoding) + + +def clean_extra_whitespace_with_index_run(text: str) -> Tuple[str, np.ndarray]: + """Cleans extra whitespace characters that appear between words. + Calculate distance between characters of original text and cleaned text. + + Returns cleaned text along with array of indices it has moved from original. + + Example + ------- + ITEM 1. BUSINESS -> ITEM 1. BUSINESS + array([0., 0., 0., 0., 0., 0., 0., 0., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.])) + """ + + cleaned_text = re.sub(r"[\xa0\n]", " ", text) + cleaned_text = re.sub(r"([ ]{2,})", " ", cleaned_text) + + cleaned_text = cleaned_text.strip() + + moved_indices = np.zeros(len(text)) + + distance, original_index, cleaned_index = 0, 0, 0 + while cleaned_index < len(cleaned_text): + if text[original_index] == cleaned_text[cleaned_index] or ( + bool(re.match("[\xa0\n]", text[original_index])) + and bool(re.match(" ", cleaned_text[cleaned_index])) + ): + moved_indices[cleaned_index] = distance + original_index += 1 + cleaned_index += 1 + continue + + distance += 1 + moved_indices[cleaned_index] = distance + original_index += 1 + + moved_indices[cleaned_index:] = distance + + return cleaned_text, moved_indices + + +def index_adjustment_after_clean_extra_whitespace(index, moved_indices) -> int: + return int(index - moved_indices[index]) diff --git a/unstructured/documents/elements.py b/unstructured/documents/elements.py index de52e7447d..7fa34f1d84 100644 --- a/unstructured/documents/elements.py +++ b/unstructured/documents/elements.py @@ -50,7 +50,11 @@ def to_dict(self): @classmethod def from_dict(cls, input_dict): - return cls(**input_dict) + # Only use existing fields when constructing + supported_fields = [f.name for f in dc.fields(cls)] + args = {k: v for k, v in input_dict.items() if k in supported_fields} + + return cls(**args) @dc.dataclass @@ -129,6 +133,7 @@ class Link(TypedDict): text: Optional[str] url: str + start_index: int @dc.dataclass @@ -157,6 +162,7 @@ class ElementMetadata: url: Optional[str] = None link_urls: Optional[List[str]] = None link_texts: Optional[List[str]] = None + links: Optional[List[Link]] = None # E-mail specific metadata fields sent_from: Optional[List[str]] = None @@ -179,6 +185,10 @@ class ElementMetadata: # Metadata extracted via regex regex_metadata: Optional[Dict[str, List[RegexMetadata]]] = None + # Chunking metadata fields + num_characters: Optional[int] = None + is_continuation: Optional[bool] = None + # Detection Model Class Probabilities from Unstructured-Inference Hi-Res detection_class_prob: Optional[float] = None @@ -212,7 +222,12 @@ def from_dict(cls, input_dict: Dict[str, Any]) -> Self: constructor_args["data_source"] = DataSourceMetadata.from_dict( constructor_args["data_source"], ) - return cls(**constructor_args) + + # Only use existing fields when constructing + supported_fields = [f.name for f in dc.fields(cls)] + args = {k: v for k, v in constructor_args.items() if k in supported_fields} + + return cls(**args) def merge(self, other: ElementMetadata): for k in self.__dict__: @@ -555,6 +570,14 @@ class Table(Text): pass +class TableChunk(Table): + """An element for capturing chunks of tables.""" + + category = "Table" + + pass + + class Header(Text): """An element for capturing document headers.""" @@ -594,7 +617,8 @@ class Footer(Text): "Page-footer": Footer, "Page-header": Header, # Title? "Picture": Image, - "Section-header": Header, + # this mapping favors ensures yolox produces backward compatible categories + "Section-header": Title, "Headline": Title, "Subheadline": Title, "Abstract": NarrativeText, diff --git a/unstructured/documents/html.py b/unstructured/documents/html.py index 75299fe898..77afae1e4a 100644 --- a/unstructured/documents/html.py +++ b/unstructured/documents/html.py @@ -273,12 +273,13 @@ def doc_after_cleaners( def _get_links_from_tag(tag_elem: etree.Element) -> List[Link]: links: List[Link] = [] href = tag_elem.get("href") + # TODO(klaijan) - add html href start_index if href: - links.append({"text": tag_elem.text, "url": href}) + links.append({"text": tag_elem.text, "url": href, "start_index": -1}) for tag in tag_elem.iterdescendants(): href = tag.get("href") if href: - links.append({"text": tag.text, "url": href}) + links.append({"text": tag.text, "url": href, "start_index": -1}) return links @@ -388,7 +389,7 @@ def _text_to_element( links=links, emphasized_texts=emphasized_texts, ) - elif is_possible_title(text): + elif is_heading_tag(tag) or is_possible_title(text): return HTMLTitle( text, tag=tag, @@ -416,7 +417,7 @@ def _is_container_with_text(tag_elem: etree.Element) -> bool:
    Please read my message!
    """ - if tag_elem.tag not in SECTION_TAGS or len(tag_elem) == 0: + if tag_elem.tag not in SECTION_TAGS + ["body"] or len(tag_elem) == 0: return False if tag_elem.text is None or tag_elem.text.strip() == "": @@ -430,6 +431,11 @@ def is_narrative_tag(text: str, tag: str) -> bool: return tag not in HEADING_TAGS and is_possible_narrative_text(text) +def is_heading_tag(tag: str) -> bool: + """Uses tag information to infer whether text is a heading.""" + return tag in HEADING_TAGS + + def _construct_text(tag_elem: etree.Element, include_tail_text: bool = True) -> str: """Extracts text from a text tag element.""" text = "" @@ -450,6 +456,12 @@ def _has_break_tags(tag_elem: etree._Element) -> bool: # pyright: ignore[report def _unfurl_break_tags(tag_elem: etree.Element) -> List[etree.Element]: unfurled = [] + + if tag_elem.text: + _tag_elem = etree.Element(tag_elem.tag) + _tag_elem.text = tag_elem.text + unfurled.append(_tag_elem) + children = tag_elem.getchildren() for child in children: if not _has_break_tags(child): @@ -473,13 +485,13 @@ def _is_text_tag(tag_elem: etree.Element, max_predecessor_len: int = 5) -> bool: if len(tag_elem) > max_predecessor_len + empty_elems_len: return False - if tag_elem.tag in TEXT_TAGS + HEADING_TAGS: + if tag_elem.tag in TEXT_TAGS + HEADING_TAGS + TEXTBREAK_TAGS: return True # NOTE(robinson) - This indicates that a div tag has no children. If that's the # case and the tag has text, its potential a text tag children = tag_elem.getchildren() - if tag_elem.tag in SECTION_TAGS and len(children) == 0: + if tag_elem.tag in SECTION_TAGS + ["body"] and len(children) == 0: return True if _has_adjacent_bulleted_spans(tag_elem, children): diff --git a/unstructured/embed/openai.py b/unstructured/embed/openai.py index dd5a360970..b79763f8ec 100644 --- a/unstructured/embed/openai.py +++ b/unstructured/embed/openai.py @@ -1,5 +1,5 @@ import types -from typing import List, Optional +from typing import List import numpy as np @@ -12,7 +12,7 @@ class OpenAIEmbeddingEncoder(BaseEmbeddingEncoder): - def __init__(self, api_key: str, model_name: Optional[str] = "text-embedding-ada-002"): + def __init__(self, api_key: str, model_name: str = "text-embedding-ada-002"): self.api_key = api_key self.model_name = model_name self.initialize() diff --git a/unstructured/ingest/README.md b/unstructured/ingest/README.md index f024769219..4cb3b1832d 100644 --- a/unstructured/ingest/README.md +++ b/unstructured/ingest/README.md @@ -60,17 +60,29 @@ In checklist form, the above steps are summarized as: - [ ] Create a new module under [unstructured/ingest/connector/](unstructured/ingest/connector/) implementing the 3 abstract base classes, similar to [unstructured/ingest/connector/github.py](unstructured/ingest/connector/github.py). - [ ] The subclass of `BaseIngestDoc` overrides `process_file()` if extra processing logic is needed other than what is provided by [auto.partition()](unstructured/partition/auto.py). + - [ ] If the IngestDoc relies on a connection or session that could be reused, the subclass of `BaseConnectorConfig` implements a session handle to manage connections. The ConnectorConfig subclass should also inherit from `ConfigSessionHandleMixin` and the IngestDoc subclass should also inherit from `IngestDocSessionHandleMixin`. Check [here](https://github.com/Unstructured-IO/unstructured/pull/1058/files#diff-dae96d30f58cffe1b348c036d006b48bdc7e2e47fbd7c8ec1c45d63face1542d) for a detailed example. + - [ ] The subclass of `BaseIngestDoc` implements relevant data source properties to include metadata. Check [this PR](https://github.com/Unstructured-IO/unstructured/pull/1283) for detailed examples. + - [ ] The field `record_locator` property should include all of the information required to be able to reach to the document in the source platform. + - [ ] Add the relevant decorators from `unstructured.ingest.error` on top of relevant methods to handle errors such as a source connection error, destination connection error, or a partition error. For examples, check [here](https://github.com/Unstructured-IO/unstructured/commit/92692ad8d7d5001601dd88fef869a29660f492cb). - [ ] Update [unstructured/ingest/cli](unstructured/ingest/cli) with support for the new connector. - [ ] Create a folder under [examples/ingest](examples/ingest) that includes at least one well documented script. - [ ] Add a script test_unstructured_ingest/test-ingest-\.sh. It's json output files should have a total of no more than 100K. - [ ] Git add the expected outputs under test_unstructured_ingest/expected-structured-output/\ so the above test passes in CI. - [ ] Add a line to [test_unstructured_ingest/test-ingest.sh](test_unstructured_ingest/test-ingest.sh) invoking the new test script. +- [ ] Make sure the tests for the connector are running and not skipped by reviewing the logs in CI. - [ ] If additional python dependencies are needed for the new connector: - [ ] Add them as an extra to [setup.py](unstructured/setup.py). - [ ] Update the Makefile, adding a target for `install-ingest-` and adding another `pip-compile` line to the `pip-compile` make target. See [this commit](https://github.com/Unstructured-IO/unstructured/commit/ab542ca3c6274f96b431142262d47d727f309e37) for a reference. - [ ] The added dependencies should be imported at runtime when the new connector is invoked, rather than as top-level imports. - [ ] Add the decorator `unstructured.utils.requires_dependencies` on top of each class instance or function that uses those connector-specific dependencies e.g. for `GitHubConnector` should look like `@requires_dependencies(dependencies=["github"], extras="github")` - [ ] Run `make tidy` and `make check` to ensure linting checks pass. +- [ ] Update ingest documentation [here](https://github.com/Unstructured-IO/unstructured/tree/eb8ce8913729826b62fd4e1224f70d67c5289b9d/docs/source) +- [ ] For team members that are developing in the original repository: + - [ ] If there are secret variables created for the connector tests, make sure to: + - [ ] add the secrets into Github (contact someone with access) + - [ ] include the secret variables in [`ci.yml`](https://github.com/Unstructured-IO/unstructured/blob/eb8ce8913729826b62fd4e1224f70d67c5289b9d/.github/workflows/ci.yml) and [`ingest-test-fixtures-update-pr.yml`](https://github.com/Unstructured-IO/unstructured/blob/eb8ce8913729826b62fd4e1224f70d67c5289b9d/.github/workflows/ingest-test-fixtures-update-pr.yml) + - [ ] add a make install line in the workflow configurations to be able to provide the workflow machine with the required dependencies on the connector while testing + - [ ] Whenever necessary, use the [ingest update test fixtures](https://github.com/Unstructured-IO/unstructured/actions/workflows/ingest-test-fixtures-update-pr.yml) workflow to update the test fixtures. - [ ] Honors the conventions of `BaseConnectorConfig` defined in [unstructured/ingest/interfaces.py](unstructured/ingest/interfaces.py) which is passed through [the CLI](unstructured/ingest/main.py): - [ ] If running with an `.output_dir` where structured outputs already exists for a given file, the file content is not re-downloaded from the data source nor is it reprocessed. This is made possible by implementing the call to `MyIngestDoc.has_output()` which is invoked in [MainProcess._filter_docs_with_outputs](ingest-prep-for-many/unstructured/ingest/main.py). - [ ] Unless `.reprocess` is `True`, then documents are always reprocessed. diff --git a/unstructured/ingest/cli/cmds/azure_cognitive_search.py b/unstructured/ingest/cli/cmds/azure_cognitive_search.py index 22eded4373..241a66b2ba 100644 --- a/unstructured/ingest/cli/cmds/azure_cognitive_search.py +++ b/unstructured/ingest/cli/cmds/azure_cognitive_search.py @@ -9,6 +9,7 @@ log_options, ) from unstructured.ingest.cli.interfaces import ( + CliChunkingConfig, CliEmbeddingsConfig, CliMixin, CliPartitionConfig, @@ -74,6 +75,7 @@ def azure_cognitive_search_dest(ctx: click.Context, **options): read_config = CliReadConfig.from_dict(parent_options) partition_config = CliPartitionConfig.from_dict(parent_options) embedding_config = CliEmbeddingsConfig.from_dict(parent_options) + chunking_config = CliChunkingConfig.from_dict(parent_options) # Run for schema validation AzureCognitiveSearchCliWriteConfig.from_dict(options) runner = runner_map[source_cmd] @@ -93,6 +95,7 @@ def azure_cognitive_search_dest(ctx: click.Context, **options): writer_type="azure_cognitive_search", writer_kwargs=options, embedding_config=embedding_config, + chunking_config=chunking_config, ) runner_instance.run( **parent_options, diff --git a/unstructured/ingest/cli/cmds/s3.py b/unstructured/ingest/cli/cmds/s3.py index 88c46fdb25..34a7845f1b 100644 --- a/unstructured/ingest/cli/cmds/s3.py +++ b/unstructured/ingest/cli/cmds/s3.py @@ -1,4 +1,5 @@ import logging +import typing as t from dataclasses import dataclass import click @@ -22,6 +23,7 @@ @dataclass class S3CliConfig(BaseConfig, CliMixin): anonymous: bool = False + endpoint_url: t.Optional[str] = None @staticmethod def add_cli_options(cmd: click.Command) -> None: @@ -32,6 +34,13 @@ def add_cli_options(cmd: click.Command) -> None: default=False, help="Connect to s3 without local AWS credentials.", ), + click.Option( + ["--endpoint-url"], + type=str, + default=None, + help="Use this endpoint_url, if specified. Needed for " + "connecting to non-AWS S3 buckets.", + ), ] cmd.params.extend(options) diff --git a/unstructured/ingest/cli/cmds/sharepoint.py b/unstructured/ingest/cli/cmds/sharepoint.py index 2457f474c8..5027fe3a80 100644 --- a/unstructured/ingest/cli/cmds/sharepoint.py +++ b/unstructured/ingest/cli/cmds/sharepoint.py @@ -9,6 +9,7 @@ log_options, ) from unstructured.ingest.cli.interfaces import ( + CliChunkingConfig, CliEmbeddingsConfig, CliMixin, CliPartitionConfig, @@ -86,6 +87,7 @@ def sharepoint_source(ctx: click.Context, **options): read_config = CliReadConfig.from_dict(options) partition_config = CliPartitionConfig.from_dict(options) embedding_config = CliEmbeddingsConfig.from_dict(options) + chunking_config = CliChunkingConfig.from_dict(options) # Run for schema validation SharepointCliConfig.from_dict(options) sharepoint_runner = SharePoint( @@ -93,6 +95,7 @@ def sharepoint_source(ctx: click.Context, **options): partition_config=partition_config, verbose=verbose, embedding_config=embedding_config, + chunking_config=chunking_config, ) sharepoint_runner.run(**options) except Exception as e: @@ -109,5 +112,6 @@ def get_source_cmd() -> click.Group: CliReadConfig.add_cli_options(cmd) CliPartitionConfig.add_cli_options(cmd) CliEmbeddingsConfig.add_cli_options(cmd) + CliChunkingConfig.add_cli_options(cmd) cmd.params.append(click.Option(["-v", "--verbose"], is_flag=True, default=False)) return cmd diff --git a/unstructured/ingest/cli/interfaces.py b/unstructured/ingest/cli/interfaces.py index 2190744b5b..7ec4660a6f 100644 --- a/unstructured/ingest/cli/interfaces.py +++ b/unstructured/ingest/cli/interfaces.py @@ -4,7 +4,13 @@ from dataclasses_json.core import Json, _decode_dataclass from unstructured.ingest.cli.cmds.utils import DelimitedString -from unstructured.ingest.interfaces import BaseConfig, EmbeddingConfig, PartitionConfig, ReadConfig +from unstructured.ingest.interfaces import ( + BaseConfig, + ChunkingConfig, + EmbeddingConfig, + PartitionConfig, + ReadConfig, +) class CliMixin: @@ -212,7 +218,7 @@ def from_dict( ): """ Extension of the dataclass from_dict() to avoid a naming conflict with other CLI params. - This allows CLI arguments to be prepended with embedding_ during CLI invocation but + This allows CLI arguments to be prepended with chunk_ during CLI invocation but doesn't require that as part of the field names in this class """ if isinstance(kvs, dict): @@ -225,3 +231,61 @@ def from_dict( return None return _decode_dataclass(cls, new_kvs, infer_missing) return _decode_dataclass(cls, kvs, infer_missing) + + +class CliChunkingConfig(ChunkingConfig, CliMixin): + @staticmethod + def add_cli_options(cmd: click.Command) -> None: + options = [ + click.Option( + ["--chunk-elements"], + is_flag=True, + default=False, + ), + click.Option( + ["--chunk-multipage-sections"], + is_flag=True, + default=False, + ), + click.Option( + ["--chunk-combine-under-n-chars"], + type=int, + default=500, + show_default=True, + ), + click.Option( + ["--chunk-new-after-n-chars"], + type=int, + default=1500, + show_default=True, + ), + ] + cmd.params.extend(options) + + @classmethod + def from_dict( + cls, + kvs: Json, + *, + infer_missing=False, + ): + """ + Extension of the dataclass from_dict() to avoid a naming conflict with other CLI params. + This allows CLI arguments to be prepended with chunking_ during CLI invocation but + doesn't require that as part of the field names in this class + """ + if isinstance(kvs, dict): + new_kvs = {} + if "chunk_elements" in kvs: + new_kvs["chunk_elements"] = kvs.pop("chunk_elements") + new_kvs.update( + { + k[len("chunking_") :]: v # noqa: E203 + for k, v in kvs.items() + if k.startswith("chunking_") + }, + ) + if len(new_kvs.keys()) == 0: + return None + return _decode_dataclass(cls, new_kvs, infer_missing) + return _decode_dataclass(cls, kvs, infer_missing) diff --git a/unstructured/ingest/connector/delta_table.py b/unstructured/ingest/connector/delta_table.py index 92594cad26..976e5fbddf 100644 --- a/unstructured/ingest/connector/delta_table.py +++ b/unstructured/ingest/connector/delta_table.py @@ -3,6 +3,7 @@ import typing as t from dataclasses import dataclass from datetime import datetime as dt +from multiprocessing import Process from pathlib import Path import pandas as pd @@ -15,6 +16,7 @@ BaseSourceConnector, IngestDocCleanupMixin, SourceConnectorCleanupMixin, + SourceMetadata, WriteConfig, ) from unstructured.ingest.logger import logger @@ -50,26 +52,10 @@ def uri_filename(self) -> str: basename = os.path.basename(self.uri) return os.path.splitext(basename)[0] - @property - def source_url(self) -> t.Optional[str]: - """The url of the source document.""" - return self.uri - - @property - def date_created(self) -> t.Optional[str]: - """This is the creation time of the table itself, not the file or specific record""" - # TODO get creation time of file/record - return self.created_at - @property def filename(self): return (Path(self.read_config.download_dir) / f"{self.uri_filename()}.csv").resolve() - @property - def date_modified(self) -> t.Optional[str]: - """The date the document was last modified on the source system.""" - return self.modified_date - @property def _output_filename(self): """Create filename document id combined with a hash of the query to uniquely identify @@ -80,11 +66,8 @@ def _create_full_tmp_dir_path(self): self.filename.parent.mkdir(parents=True, exist_ok=True) self._output_filename.parent.mkdir(parents=True, exist_ok=True) - @SourceConnectionError.wrap - @BaseIngestDoc.skip_if_file_exists @requires_dependencies(["fsspec"], extras="delta-table") - def get_file(self): - import pyarrow.parquet as pq + def _get_fs_from_uri(self): from fsspec.core import url_to_fs try: @@ -94,6 +77,29 @@ def get_file(self): f"uri {self.uri} may be associated with a filesystem that " f"requires additional dependencies: {error}", ) + return fs + + def update_source_metadata(self, **kwargs): + fs = kwargs.get("fs", self._get_fs_from_uri()) + version = ( + fs.checksum(self.uri) if fs.protocol != "gs" else fs.info(self.uri).get("etag", "") + ) + file_exists = fs.exists(self.uri) + self.source_metadata = SourceMetadata( + date_created=self.created_at, + date_modified=self.modified_date, + version=version, + source_url=self.uri, + exists=file_exists, + ) + + @SourceConnectionError.wrap + @BaseIngestDoc.skip_if_file_exists + def get_file(self): + import pyarrow.parquet as pq + + fs = self._get_fs_from_uri() + self.update_source_metadata(fs=fs) logger.info(f"using a {fs} filesystem to collect table data") self._create_full_tmp_dir_path() logger.debug(f"Fetching {self} - PID: {os.getpid()}") @@ -177,8 +183,17 @@ def write(self, docs: t.List[BaseIngestDoc]) -> None: f"writing {len(json_list)} rows to destination " f"table at {self.connector_config.table_uri}", ) - write_deltalake( - table_or_uri=self.connector_config.table_uri, - data=pd.DataFrame(data={self.write_config.write_column: json_list}), - mode=self.write_config.mode, + # NOTE: deltalake writer on Linux sometimes can finish but still trigger a SIGABRT and cause + # ingest to fail, even though all tasks are completed normally. Putting the writer into a + # process mitigates this issue by ensuring python interpreter waits properly for deltalake's + # rust backend to finish + writer = Process( + target=write_deltalake, + kwargs={ + "table_or_uri": self.connector_config.table_uri, + "data": pd.DataFrame(data={self.write_config.write_column: json_list}), + "mode": self.write_config.mode, + }, ) + writer.start() + writer.join() diff --git a/unstructured/ingest/connector/discord.py b/unstructured/ingest/connector/discord.py index 2f8f689195..d9b40d3bb6 100644 --- a/unstructured/ingest/connector/discord.py +++ b/unstructured/ingest/connector/discord.py @@ -77,6 +77,7 @@ def _get_messages(self): from discord.ext import commands messages: t.List[discord.Message] = [] + jumpurl: t.List[str] = [] intents = discord.Intents.default() intents.message_content = True bot = commands.Bot(command_prefix=">", intents=intents) @@ -88,15 +89,17 @@ async def on_ready(): if self.days: after_date = dt.datetime.utcnow() - dt.timedelta(days=self.days) channel = bot.get_channel(int(self.channel)) + jumpurl.append(channel.jump_url) # type: ignore async for msg in channel.history(after=after_date): # type: ignore messages.append(msg) await bot.close() except Exception: logger.error("Error fetching messages") await bot.close() + raise bot.run(self.token) - jump_url = bot.get_channel(int(self.channel)).jump_url # type: ignore + jump_url = None if len(jumpurl) < 1 else jumpurl[0] return messages, jump_url def update_source_metadata(self, **kwargs): diff --git a/unstructured/ingest/connector/sharepoint.py b/unstructured/ingest/connector/sharepoint.py index 0dacea83d4..9fdcf87c9e 100644 --- a/unstructured/ingest/connector/sharepoint.py +++ b/unstructured/ingest/connector/sharepoint.py @@ -5,6 +5,7 @@ from pathlib import Path from urllib.parse import urlparse +from unstructured.documents.elements import Element from unstructured.embed.interfaces import BaseEmbeddingEncoder from unstructured.file_utils.filetype import EXT_TO_FILETYPE from unstructured.ingest.error import SourceConnectionError @@ -12,6 +13,7 @@ BaseConnectorConfig, BaseIngestDoc, BaseSourceConnector, + ChunkingConfig, EmbeddingConfig, IngestDocCleanupMixin, SourceConnectorCleanupMixin, @@ -69,6 +71,19 @@ class SharepointIngestDoc(IngestDocCleanupMixin, BaseIngestDoc): file_path: str registry_name: str = "sharepoint" embedding_config: t.Optional[EmbeddingConfig] = None + chunking_config: t.Optional[ChunkingConfig] = None + + def run_chunking(self, elements: t.List[Element]) -> t.List[Element]: + if self.chunking_config: + logger.info( + "Running chunking to split up elements with config: " + f"{self.chunking_config.to_dict()}", + ) + chunked_elements = self.chunking_config.chunk(elements=elements) + logger.info(f"chunked {len(elements)} elements into {len(chunked_elements)}") + return chunked_elements + else: + return elements @property def embedder(self) -> t.Optional[BaseEmbeddingEncoder]: @@ -244,6 +259,7 @@ def get_file(self): class SharepointSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): connector_config: SimpleSharepointConfig embedding_config: t.Optional[EmbeddingConfig] = None + chunking_config: t.Optional[ChunkingConfig] = None @requires_dependencies(["office365"], extras="sharepoint") def _list_files(self, folder, recursive) -> t.List["File"]: @@ -283,6 +299,7 @@ def _prepare_ingest_doc(self, obj: t.Union["File", "SitePage"], base_url, is_pag is_page=is_page, file_path=file_path, embedding_config=self.embedding_config, + chunking_config=self.chunking_config, ) @requires_dependencies(["office365"], extras="sharepoint") diff --git a/unstructured/ingest/doc_processor/generalized.py b/unstructured/ingest/doc_processor/generalized.py index 849b53853c..f44b2fa8f4 100644 --- a/unstructured/ingest/doc_processor/generalized.py +++ b/unstructured/ingest/doc_processor/generalized.py @@ -62,8 +62,9 @@ def process_document(ingest_doc_json: str, **partition_kwargs) -> Optional[List[ doc.write_result() except Exception: # TODO(crag) save the exception instead of print? - logger.error(f"Failed to process {doc}", exc_info=True) + logger.error(f"Failed to process {doc}") + raise Exception finally: if doc: doc.cleanup_file() - return isd_elems_no_filename + return isd_elems_no_filename diff --git a/unstructured/ingest/interfaces.py b/unstructured/ingest/interfaces.py index c708938bfd..caefa50afd 100644 --- a/unstructured/ingest/interfaces.py +++ b/unstructured/ingest/interfaces.py @@ -13,7 +13,8 @@ import requests from dataclasses_json import DataClassJsonMixin -from unstructured.documents.elements import DataSourceMetadata +from unstructured.chunking.title import chunk_by_title +from unstructured.documents.elements import DataSourceMetadata, Element from unstructured.embed.interfaces import BaseEmbeddingEncoder from unstructured.embed.openai import OpenAIEmbeddingEncoder from unstructured.ingest.error import PartitionError, SourceConnectionError @@ -78,6 +79,25 @@ def get_embedder(self) -> BaseEmbeddingEncoder: return OpenAIEmbeddingEncoder(**kwargs) +@dataclass +class ChunkingConfig(BaseConfig): + chunk_elements: bool = False + multipage_sections: bool = True + combine_text_under_n_chars: int = 500 + max_characters: int = 1500 + + def chunk(self, elements: t.List[Element]) -> t.List[Element]: + if self.chunk_elements: + return chunk_by_title( + elements=elements, + multipage_sections=self.multipage_sections, + combine_text_under_n_chars=self.combine_text_under_n_chars, + max_characters=self.max_characters, + ) + else: + return elements + + @dataclass class WriteConfig(BaseConfig): pass @@ -115,6 +135,9 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._date_processed = None + def run_chunking(self, elements: t.List[Element]) -> t.List[Element]: + return elements + @property def embedder(self) -> t.Optional[BaseEmbeddingEncoder]: return None @@ -263,6 +286,7 @@ def partition_file(self, **partition_kwargs) -> t.List[t.Dict[str, t.Any]]: if response.status_code != 200: raise RuntimeError(f"Caught {response.status_code} from API: {response.text}") elements = elements_from_json(text=json.dumps(response.json())) + elements = self.run_chunking(elements=elements) if self.embedder: logger.info("Running embedder to add vector content to elements") elements = self.embedder.embed_documents(elements) diff --git a/unstructured/ingest/processor.py b/unstructured/ingest/processor.py index a133b72732..a91fc671b5 100644 --- a/unstructured/ingest/processor.py +++ b/unstructured/ingest/processor.py @@ -107,10 +107,11 @@ def process_documents( verbose: bool, dest_doc_connector: t.Optional[BaseDestinationConnector] = None, ) -> None: + languages = partition_config.ocr_languages.split("+") if partition_config.ocr_languages else [] process_document_with_partition_args = partial( process_document, strategy=partition_config.strategy, - ocr_languages=partition_config.ocr_languages, + languages=languages, encoding=partition_config.encoding, pdf_infer_table_structure=partition_config.pdf_infer_table_structure, ) diff --git a/unstructured/ingest/runner/airtable.py b/unstructured/ingest/runner/airtable.py index 92f5bd735d..48fc109b6a 100644 --- a/unstructured/ingest/runner/airtable.py +++ b/unstructured/ingest/runner/airtable.py @@ -10,11 +10,11 @@ def airtable( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, personal_access_token: str, - list_of_paths: t.Optional[str], + verbose: bool = False, + list_of_paths: t.Optional[str] = None, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, diff --git a/unstructured/ingest/runner/azure.py b/unstructured/ingest/runner/azure.py index 90b08e0654..58e2594b4d 100644 --- a/unstructured/ingest/runner/azure.py +++ b/unstructured/ingest/runner/azure.py @@ -9,14 +9,14 @@ def azure( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, account_name: t.Optional[str], account_key: t.Optional[str], connection_string: t.Optional[str], remote_url: str, - recursive: bool, + verbose: bool = False, + recursive: bool = False, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, diff --git a/unstructured/ingest/runner/base_runner.py b/unstructured/ingest/runner/base_runner.py index 772e282f0d..c12bdce1e0 100644 --- a/unstructured/ingest/runner/base_runner.py +++ b/unstructured/ingest/runner/base_runner.py @@ -2,7 +2,12 @@ from abc import ABC, abstractmethod from dataclasses import dataclass -from unstructured.ingest.interfaces import EmbeddingConfig, PartitionConfig, ReadConfig +from unstructured.ingest.interfaces import ( + ChunkingConfig, + EmbeddingConfig, + PartitionConfig, + ReadConfig, +) @dataclass @@ -13,6 +18,7 @@ class Runner(ABC): writer_type: t.Optional[str] = None writer_kwargs: t.Optional[dict] = None embedding_config: t.Optional[EmbeddingConfig] = None + chunking_config: t.Optional[ChunkingConfig] = None @abstractmethod def run(self, *args, **kwargs): diff --git a/unstructured/ingest/runner/biomed.py b/unstructured/ingest/runner/biomed.py index 62e6bb1671..fe23aa34ca 100644 --- a/unstructured/ingest/runner/biomed.py +++ b/unstructured/ingest/runner/biomed.py @@ -13,13 +13,13 @@ def biomed( verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, - path: t.Optional[str], - api_id: t.Optional[str], - api_from: t.Optional[str], - api_until: t.Optional[str], max_retries: int, max_request_time: int, decay: float, + path: t.Optional[str] = None, + api_id: t.Optional[str] = None, + api_from: t.Optional[str] = None, + api_until: t.Optional[str] = None, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, diff --git a/unstructured/ingest/runner/box.py b/unstructured/ingest/runner/box.py index 7ac9d44d7e..20f066dfa3 100644 --- a/unstructured/ingest/runner/box.py +++ b/unstructured/ingest/runner/box.py @@ -9,12 +9,12 @@ def box( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, remote_url: str, - recursive: bool, - box_app_config: t.Optional[str], + verbose: bool = False, + recursive: bool = False, + box_app_config: t.Optional[str] = None, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, @@ -23,7 +23,7 @@ def box( ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) read_config.download_dir = update_download_dir_remote_url( - connector_name="azure", + connector_name="box", read_config=read_config, remote_url=remote_url, logger=logger, diff --git a/unstructured/ingest/runner/confluence.py b/unstructured/ingest/runner/confluence.py index 64db4233c2..5192d07dc2 100644 --- a/unstructured/ingest/runner/confluence.py +++ b/unstructured/ingest/runner/confluence.py @@ -10,7 +10,6 @@ def confluence( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, url: str, @@ -18,6 +17,7 @@ def confluence( api_token: str, max_num_of_spaces: int, max_num_of_docs_from_each_space: int, + verbose: bool = False, spaces: t.Optional[t.List[str]] = None, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, diff --git a/unstructured/ingest/runner/delta_table.py b/unstructured/ingest/runner/delta_table.py index a547831dbd..f19a4d9c4a 100644 --- a/unstructured/ingest/runner/delta_table.py +++ b/unstructured/ingest/runner/delta_table.py @@ -11,12 +11,12 @@ def delta_table( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, table_uri: t.Union[str, Path], version: t.Optional[int] = None, storage_options: t.Optional[str] = None, + verbose: bool = False, without_files: bool = False, columns: t.Optional[t.List[str]] = None, writer_type: t.Optional[str] = None, diff --git a/unstructured/ingest/runner/discord.py b/unstructured/ingest/runner/discord.py index d5a44a5086..de1a7d4cbb 100644 --- a/unstructured/ingest/runner/discord.py +++ b/unstructured/ingest/runner/discord.py @@ -10,12 +10,12 @@ def discord( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, channels: t.List[str], token: str, - period: t.Optional[int], + verbose: bool = False, + period: t.Optional[int] = None, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, diff --git a/unstructured/ingest/runner/dropbox.py b/unstructured/ingest/runner/dropbox.py index bacb3b8127..e30ab36af3 100644 --- a/unstructured/ingest/runner/dropbox.py +++ b/unstructured/ingest/runner/dropbox.py @@ -9,12 +9,12 @@ def dropbox( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, remote_url: str, - recursive: bool, - token: t.Optional[str], + verbose: bool = False, + recursive: bool = False, + token: t.Optional[str] = None, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, diff --git a/unstructured/ingest/runner/elasticsearch.py b/unstructured/ingest/runner/elasticsearch.py index cd02a2f638..8c5a511576 100644 --- a/unstructured/ingest/runner/elasticsearch.py +++ b/unstructured/ingest/runner/elasticsearch.py @@ -10,12 +10,12 @@ def elasticsearch( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, url: str, index_name: str, - jq_query: t.Optional[str], + verbose: bool = False, + jq_query: t.Optional[str] = None, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, diff --git a/unstructured/ingest/runner/fsspec.py b/unstructured/ingest/runner/fsspec.py index f0260af409..7822b30140 100644 --- a/unstructured/ingest/runner/fsspec.py +++ b/unstructured/ingest/runner/fsspec.py @@ -11,11 +11,11 @@ def fsspec( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, remote_url: str, - recursive: bool, + verbose: bool = False, + recursive: bool = False, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, diff --git a/unstructured/ingest/runner/gcs.py b/unstructured/ingest/runner/gcs.py index eab4fb4bc6..a442a28916 100644 --- a/unstructured/ingest/runner/gcs.py +++ b/unstructured/ingest/runner/gcs.py @@ -9,12 +9,12 @@ def gcs( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, remote_url: str, - recursive: bool, - token: t.Optional[str], + verbose: bool = False, + recursive: bool = False, + token: t.Optional[str] = None, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, diff --git a/unstructured/ingest/runner/github.py b/unstructured/ingest/runner/github.py index 4bbf09e5aa..ff726da597 100644 --- a/unstructured/ingest/runner/github.py +++ b/unstructured/ingest/runner/github.py @@ -10,13 +10,13 @@ def github( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, url: str, git_branch: str, - git_access_token: t.Optional[str], - git_file_glob: t.Optional[str], + verbose: bool = False, + git_access_token: t.Optional[str] = None, + git_file_glob: t.Optional[str] = None, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, diff --git a/unstructured/ingest/runner/gitlab.py b/unstructured/ingest/runner/gitlab.py index 4d15385a98..a4e6d9b947 100644 --- a/unstructured/ingest/runner/gitlab.py +++ b/unstructured/ingest/runner/gitlab.py @@ -10,13 +10,13 @@ def gitlab( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, url: str, git_branch: str, - git_access_token: t.Optional[str], - git_file_glob: t.Optional[str], + verbose: bool = False, + git_access_token: t.Optional[str] = None, + git_file_glob: t.Optional[str] = None, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, diff --git a/unstructured/ingest/runner/google_drive.py b/unstructured/ingest/runner/google_drive.py index 2f6f437086..27ad5979bd 100644 --- a/unstructured/ingest/runner/google_drive.py +++ b/unstructured/ingest/runner/google_drive.py @@ -10,13 +10,13 @@ def gdrive( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, service_account_key: str, - recursive: bool, drive_id: str, - extension: t.Optional[str], + verbose: bool = False, + recursive: bool = False, + extension: t.Optional[str] = None, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, diff --git a/unstructured/ingest/runner/jira.py b/unstructured/ingest/runner/jira.py index bcecda323b..e9875e51ee 100644 --- a/unstructured/ingest/runner/jira.py +++ b/unstructured/ingest/runner/jira.py @@ -10,15 +10,15 @@ def jira( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, url: str, user_email: str, api_token: str, - projects: t.Optional[t.List[str]], - boards: t.Optional[t.List[str]], - issues: t.Optional[t.List[str]], + verbose: bool = False, + projects: t.Optional[t.List[str]] = None, + boards: t.Optional[t.List[str]] = None, + issues: t.Optional[t.List[str]] = None, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, diff --git a/unstructured/ingest/runner/local.py b/unstructured/ingest/runner/local.py index 6278079324..a52ee598ec 100644 --- a/unstructured/ingest/runner/local.py +++ b/unstructured/ingest/runner/local.py @@ -8,12 +8,12 @@ def local( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, input_path: str, - recursive: bool, - file_glob: t.Optional[str], + verbose: bool = False, + recursive: bool = False, + file_glob: t.Optional[str] = None, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, diff --git a/unstructured/ingest/runner/notion.py b/unstructured/ingest/runner/notion.py index 9bd10e9b03..7aa22e9c4e 100644 --- a/unstructured/ingest/runner/notion.py +++ b/unstructured/ingest/runner/notion.py @@ -11,11 +11,11 @@ def notion( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, api_key: str, - recursive: bool, + verbose: bool = False, + recursive: bool = False, page_ids: t.Optional[t.List[str]] = None, database_ids: t.Optional[t.List[str]] = None, writer_type: t.Optional[str] = None, diff --git a/unstructured/ingest/runner/onedrive.py b/unstructured/ingest/runner/onedrive.py index 3cee6b9467..abf3d18938 100644 --- a/unstructured/ingest/runner/onedrive.py +++ b/unstructured/ingest/runner/onedrive.py @@ -10,16 +10,16 @@ def onedrive( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, tenant: str, user_pname: str, client_id: str, client_cred: str, - authority_url: t.Optional[str], - path: t.Optional[str], - recursive: bool, + verbose: bool = False, + authority_url: t.Optional[str] = None, + path: t.Optional[str] = None, + recursive: bool = False, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, diff --git a/unstructured/ingest/runner/outlook.py b/unstructured/ingest/runner/outlook.py index 3592634bd0..d0613ce340 100644 --- a/unstructured/ingest/runner/outlook.py +++ b/unstructured/ingest/runner/outlook.py @@ -10,15 +10,15 @@ def outlook( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, user_email: str, - client_id: t.Optional[str], - client_cred: t.Optional[str], - tenant: t.Optional[str], - authority_url: t.Optional[str], - recursive: bool, + verbose: bool = False, + recursive: bool = False, + client_id: t.Optional[str] = None, + client_cred: t.Optional[str] = None, + tenant: t.Optional[str] = None, + authority_url: t.Optional[str] = None, outlook_folders: t.Optional[t.List[str]] = None, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, diff --git a/unstructured/ingest/runner/reddit.py b/unstructured/ingest/runner/reddit.py index 2003723789..fea56f1f12 100644 --- a/unstructured/ingest/runner/reddit.py +++ b/unstructured/ingest/runner/reddit.py @@ -10,15 +10,15 @@ def reddit( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, subreddit_name: str, - client_id: t.Optional[str], - client_secret: t.Optional[str], user_agent: str, - search_query: t.Optional[str], num_posts: int, + verbose: bool = False, + client_id: t.Optional[str] = None, + client_secret: t.Optional[str] = None, + search_query: t.Optional[str] = None, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, diff --git a/unstructured/ingest/runner/s3.py b/unstructured/ingest/runner/s3.py index 45f27ce43d..e3646305fa 100644 --- a/unstructured/ingest/runner/s3.py +++ b/unstructured/ingest/runner/s3.py @@ -9,12 +9,13 @@ def s3( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, remote_url: str, - recursive: bool, - anonymous: bool, + verbose: bool = False, + recursive: bool = False, + anonymous: bool = False, + endpoint_url: t.Optional[str] = None, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, @@ -31,11 +32,14 @@ def s3( from unstructured.ingest.connector.s3 import S3SourceConnector, SimpleS3Config + access_kwargs: t.Dict[str, t.Any] = {"anon": anonymous} + if endpoint_url: + access_kwargs["endpoint_url"] = endpoint_url source_doc_connector = S3SourceConnector( # type: ignore connector_config=SimpleS3Config( path=remote_url, recursive=recursive, - access_kwargs={"anon": anonymous}, + access_kwargs=access_kwargs, ), read_config=read_config, partition_config=partition_config, diff --git a/unstructured/ingest/runner/salesforce.py b/unstructured/ingest/runner/salesforce.py index ad0f050ed5..415d9be79b 100644 --- a/unstructured/ingest/runner/salesforce.py +++ b/unstructured/ingest/runner/salesforce.py @@ -10,14 +10,14 @@ def salesforce( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, - recursive: bool, username: str, consumer_key: str, private_key_path: str, categories: t.List[str], + verbose: bool = False, + recursive: bool = False, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, diff --git a/unstructured/ingest/runner/sharepoint.py b/unstructured/ingest/runner/sharepoint.py index a20e64bdf8..1781e2cbd9 100644 --- a/unstructured/ingest/runner/sharepoint.py +++ b/unstructured/ingest/runner/sharepoint.py @@ -14,9 +14,9 @@ def run( site: str, client_id: str, client_cred: str, - files_only: bool, path: str, - recursive: bool, + files_only: bool = False, + recursive: bool = False, **kwargs, ): writer_kwargs = self.writer_kwargs if self.writer_kwargs else {} @@ -51,6 +51,7 @@ def run( read_config=self.read_config, partition_config=self.partition_config, embedding_config=self.embedding_config, + chunking_config=self.chunking_config, ) dest_doc_connector = None diff --git a/unstructured/ingest/runner/slack.py b/unstructured/ingest/runner/slack.py index d2c61e9faf..0b9919c216 100644 --- a/unstructured/ingest/runner/slack.py +++ b/unstructured/ingest/runner/slack.py @@ -10,13 +10,13 @@ def slack( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, channels: t.List[str], token: str, - start_date: t.Optional[str], - end_date: t.Optional[str], + verbose: bool = False, + start_date: t.Optional[str] = None, + end_date: t.Optional[str] = None, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, diff --git a/unstructured/ingest/runner/wikipedia.py b/unstructured/ingest/runner/wikipedia.py index 44a031cd60..8914042cad 100644 --- a/unstructured/ingest/runner/wikipedia.py +++ b/unstructured/ingest/runner/wikipedia.py @@ -10,11 +10,11 @@ def wikipedia( - verbose: bool, read_config: ReadConfig, partition_config: PartitionConfig, page_title: str, - auto_suggest: bool, + verbose: bool = False, + auto_suggest: bool = False, writer_type: t.Optional[str] = None, writer_kwargs: t.Optional[dict] = None, **kwargs, diff --git a/unstructured/ingest/runner/writers.py b/unstructured/ingest/runner/writers.py index 46a875035e..7be5073c0f 100644 --- a/unstructured/ingest/runner/writers.py +++ b/unstructured/ingest/runner/writers.py @@ -9,6 +9,7 @@ def s3_writer( remote_url: str, anonymous: bool, + endpoint_url: t.Optional[str] = None, verbose: bool = False, **kwargs, ): @@ -17,11 +18,15 @@ def s3_writer( SimpleS3Config, ) + access_kwargs: t.Dict[str, t.Any] = {"anon": anonymous} + if endpoint_url: + access_kwargs["endpoint_url"] = endpoint_url + return S3DestinationConnector( write_config=WriteConfig(), connector_config=SimpleS3Config( path=remote_url, - access_kwargs={"anon": anonymous}, + access_kwargs=access_kwargs, ), ) diff --git a/unstructured/partition/common.py b/unstructured/partition/common.py index 3fcf31d1b5..18a59fc664 100644 --- a/unstructured/partition/common.py +++ b/unstructured/partition/common.py @@ -461,7 +461,8 @@ def convert_to_bytes( def convert_ms_office_table_to_text( - table: Union["docxtable", "pptxtable"], as_html: bool = True + table: Union["docxtable", "pptxtable"], + as_html: bool = True, ) -> str: """ Convert a table object from a Word document to an HTML table string using the tabulate library. diff --git a/unstructured/partition/csv.py b/unstructured/partition/csv.py index 6a7314de03..2528f321cd 100644 --- a/unstructured/partition/csv.py +++ b/unstructured/partition/csv.py @@ -4,6 +4,7 @@ import pandas as pd from lxml.html.soupparser import fromstring as soupparser_fromstring +from unstructured.chunking.title import add_chunking_strategy from unstructured.documents.elements import ( Element, ElementMetadata, @@ -21,6 +22,7 @@ @process_metadata() @add_metadata_with_filetype(FileType.CSV) +@add_chunking_strategy() def partition_csv( filename: Optional[str] = None, file: Optional[Union[IO[bytes], SpooledTemporaryFile]] = None, diff --git a/unstructured/partition/doc.py b/unstructured/partition/doc.py index 18ae998c67..5b3283cd18 100644 --- a/unstructured/partition/doc.py +++ b/unstructured/partition/doc.py @@ -1,6 +1,6 @@ import os import tempfile -from typing import IO, List, Optional +from typing import IO, Any, List, Optional from unstructured.chunking.title import add_chunking_strategy from unstructured.documents.elements import Element, process_metadata @@ -26,7 +26,7 @@ def partition_doc( metadata_last_modified: Optional[str] = None, libre_office_filter: Optional[str] = "MS Word 2007 XML", chunking_strategy: Optional[str] = None, - **kwargs, + **kwargs: Any, ) -> List[Element]: """Partitions Microsoft Word Documents in .doc format into its document elements. diff --git a/unstructured/partition/docx.py b/unstructured/partition/docx.py index 7aefbd9be4..506e1e6459 100644 --- a/unstructured/partition/docx.py +++ b/unstructured/partition/docx.py @@ -411,6 +411,7 @@ def iter_footer(footer: _Footer, header_footer_type: str) -> Iterator[Footer]: metadata=ElementMetadata( filename=self._metadata_filename, header_footer_type=header_footer_type, + category_depth=0, ), ) @@ -438,6 +439,7 @@ def iter_header(header: _Header, header_footer_type: str) -> Iterator[Header]: metadata=ElementMetadata( filename=self._metadata_filename, header_footer_type=header_footer_type, + category_depth=0, # -- headers are always at the root level ), ) @@ -554,12 +556,14 @@ def _paragraph_emphasis(self, paragraph: Paragraph) -> Tuple[List[str], List[str def _paragraph_metadata(self, paragraph: Paragraph) -> ElementMetadata: """ElementMetadata object describing `paragraph`.""" emphasized_text_contents, emphasized_text_tags = self._paragraph_emphasis(paragraph) + category_depth = self._parse_category_depth_by_style(paragraph) return ElementMetadata( filename=self._metadata_filename, page_number=self._page_number, last_modified=self._last_modified, emphasized_text_contents=emphasized_text_contents or None, emphasized_text_tags=emphasized_text_tags or None, + category_depth=category_depth, ) def _parse_paragraph_text_for_element_type(self, paragraph: Paragraph) -> Optional[Type[Text]]: @@ -634,6 +638,52 @@ def _table_emphasis(self, table: DocxTable) -> Tuple[List[str], List[str]]: iter_tbl_emph, iter_tbl_emph_2 = itertools.tee(self._iter_table_emphasis(table)) return ([e["text"] for e in iter_tbl_emph], [e["tag"] for e in iter_tbl_emph_2]) + def _parse_category_depth_by_style(self, paragraph: Paragraph) -> int: + """Determine category depth from paragraph metadata""" + + # Determine category depth from paragraph ilvl xpath + xpath = paragraph._element.xpath("./w:pPr/w:numPr/w:ilvl/@w:val") + if xpath: + return int(xpath[0]) + + # Determine category depth from style name + style_name = (paragraph.style and paragraph.style.name) or "Normal" + depth = self._parse_category_depth_by_style_name(style_name) + + if depth > 0: + return depth + else: + # Check if category depth can be determined from style ilvl + return self._parse_category_depth_by_style_ilvl() + + def _parse_category_depth_by_style_name(self, style_name: str) -> int: + """Parse category-depth from the style-name of `paragraph`. + + Category depth is 0-indexed and relative to the other element types in the document. + """ + + def _extract_number(suffix: str) -> int: + return int(suffix.split()[-1]) - 1 if suffix.split()[-1].isdigit() else 0 + + # Heading styles + if style_name.startswith("Heading"): + return _extract_number(style_name) + + if style_name == "Subtitle": + return 1 + + # List styles + list_prefixes = ["List", "List Bullet", "List Continue", "List Number"] + if any(style_name.startswith(prefix) for prefix in list_prefixes): + return _extract_number(style_name) + + # Other styles + return 0 + + def _parse_category_depth_by_style_ilvl(self) -> int: + # TODO(newelh) Parsing category depth by style ilvl is not yet implemented + return 0 + class _SectBlockItemIterator: """Generates the block-items in a section. diff --git a/unstructured/partition/pdf.py b/unstructured/partition/pdf.py index 16fefa29f6..ddd6e7b845 100644 --- a/unstructured/partition/pdf.py +++ b/unstructured/partition/pdf.py @@ -1,24 +1,40 @@ +import contextlib import io import os import re import warnings from tempfile import SpooledTemporaryFile -from typing import IO, Any, BinaryIO, Iterator, List, Optional, Tuple, Union, cast +from typing import IO, Any, BinaryIO, Iterator, List, Optional, Sequence, Tuple, Union, cast +import numpy as np import pdf2image import PIL -from pdfminer.high_level import extract_pages -from pdfminer.layout import LTContainer, LTImage, LTItem, LTTextBox +from pdfminer.converter import PDFPageAggregator, PDFResourceManager +from pdfminer.layout import ( + LAParams, + LTChar, + LTContainer, + LTImage, + LTItem, + LTTextBox, +) +from pdfminer.pdfinterp import PDFPageInterpreter +from pdfminer.pdfpage import PDFPage +from pdfminer.pdftypes import PDFObjRef from pdfminer.utils import open_filename from unstructured.chunking.title import add_chunking_strategy -from unstructured.cleaners.core import clean_extra_whitespace +from unstructured.cleaners.core import ( + clean_extra_whitespace_with_index_run, + index_adjustment_after_clean_extra_whitespace, +) from unstructured.documents.coordinates import PixelSpace, PointSpace from unstructured.documents.elements import ( CoordinatesMetadata, Element, ElementMetadata, Image, + Link, ListItem, PageBreak, Text, @@ -54,6 +70,13 @@ RE_MULTISPACE_INCLUDING_NEWLINES = re.compile(pattern=r"\s+", flags=re.DOTALL) +def default_hi_res_model() -> str: + # a light config for the hi res model; this is not defined as a constant so that no setting of + # the default hi res model name is done on importing of this submodule; this allows (if user + # prefers) for setting env after importing the sub module and changing the default model name + return os.environ.get("UNSTRUCTURED_HI_RES_MODEL_NAME", "yolox_quantized") + + @process_metadata() @add_metadata_with_filetype(FileType.PDF) @add_chunking_strategy() @@ -71,6 +94,7 @@ def partition_pdf( metadata_filename: Optional[str] = None, metadata_last_modified: Optional[str] = None, chunking_strategy: Optional[str] = None, + links: Sequence[Link] = [], **kwargs, ) -> List[Element]: """Parses a pdf document into a list of interpreted elements. @@ -312,7 +336,7 @@ def _partition_pdf_or_image_local( ocr_languages = prepare_languages_for_tesseract(languages) - model_name = model_name if model_name else os.environ.get("UNSTRUCTURED_HI_RES_MODEL_NAME") + model_name = model_name or default_hi_res_model() pdf_image_dpi = kwargs.pop("pdf_image_dpi", None) extract_images_in_pdf = kwargs.get("extract_images_in_pdf", False) image_output_dir_path = kwargs.get("image_output_dir_path", None) @@ -454,15 +478,42 @@ def _process_pdfminer_pages( elements: List[Element] = [] sort_mode = kwargs.get("sort_mode", SORT_MODE_XY_CUT) - for i, page in enumerate(extract_pages(fp)): # type: ignore - width, height = page.width, page.height + rsrcmgr = PDFResourceManager() + laparams = LAParams() + device = PDFPageAggregator(rsrcmgr, laparams=laparams) + interpreter = PDFPageInterpreter(rsrcmgr, device) + + for i, page in enumerate(PDFPage.get_pages(fp)): # type: ignore + interpreter.process_page(page) + page_layout = device.get_result() + + width, height = page_layout.width, page_layout.height - text_segments = [] page_elements = [] - for obj in page: - x1, y2, x2, y1 = obj.bbox - y1 = height - y1 - y2 = height - y2 + annotation_list = [] + + coordinate_system = PixelSpace( + width=width, + height=height, + ) + if page.annots: + annotation_list = get_uris(page.annots, height, coordinate_system, i + 1) + + for obj in page_layout: + x1, y1, x2, y2 = rect_to_bbox(obj.bbox, height) + bbox = (x1, y1, x2, y2) + + urls_metadata = [] + + if len(annotation_list) > 0 and isinstance(obj, LTTextBox): + annotations_within_element = check_annotations_within_element( + annotation_list, + bbox, + i + 1, + ) + _, words = get_word_bounding_box_from_element(obj, height) + for annot in annotations_within_element: + urls_metadata.append(map_bbox_and_index(words, annot)) if hasattr(obj, "get_text"): _text_snippets = [obj.get_text()] @@ -471,13 +522,8 @@ def _process_pdfminer_pages( _text_snippets = re.split(PARAGRAPH_PATTERN, _text) for _text in _text_snippets: - _text = clean_extra_whitespace(_text) + _text, moved_indices = clean_extra_whitespace_with_index_run(_text) if _text.strip(): - text_segments.append(_text) - coordinate_system = PixelSpace( - width=width, - height=height, - ) points = ((x1, y1), (x1, y2), (x2, y2), (x2, y1)) element = element_from_text( _text, @@ -488,11 +534,27 @@ def _process_pdfminer_pages( points=points, system=coordinate_system, ) + + links: List[Link] = [] + for url in urls_metadata: + with contextlib.suppress(IndexError): + links.append( + { + "text": url["text"], + "url": url["uri"], + "start_index": index_adjustment_after_clean_extra_whitespace( + url["start_index"], + moved_indices, + ), + }, + ) + element.metadata = ElementMetadata( filename=filename, page_number=i + 1, coordinates=coordinates_metadata, last_modified=metadata_last_modified, + links=links, ) page_elements.append(element) list_item = 0 @@ -533,7 +595,7 @@ def _process_pdfminer_pages( system=coordinate_system, ) page_element = list_page_element - updated_page_elements.pop() + updated_page_elements.pop(0) updated_page_elements.append(page_element) @@ -792,12 +854,340 @@ def check_coords_within_boundary( line_height = boundary_y_max - boundary_y_min x_within_boundary = ( - (coordinates.points[0][0] < boundary_x_min + (horizontal_threshold * line_width)) + (coordinates.points[0][0] > boundary_x_min - (horizontal_threshold * line_width)) and (coordinates.points[2][0] < boundary_x_max + (horizontal_threshold * line_width)) and (coordinates.points[0][0] >= boundary_x_min) ) y_within_boundary = ( coordinates.points[0][1] < boundary_y_max + (vertical_threshold * line_height) - ) and (coordinates.points[0][1] > boundary_y_min) + ) and (coordinates.points[0][1] > boundary_y_min - (vertical_threshold * line_height)) return x_within_boundary and y_within_boundary + + +def get_uris( + annots: Union[PDFObjRef, List[PDFObjRef]], + height: float, + coordinate_system: Union[PixelSpace, PointSpace], + page_number: int, +) -> List[dict]: + """ + Extracts URI annotations from a single or a list of PDF object references on a specific page. + The type of annots (list or not) depends on the pdf formatting. The function detectes the type + of annots and then pass on to get_uris_from_annots function as a List. + + Args: + annots (Union[PDFObjRef, List[PDFObjRef]]): A single or a list of PDF object references + representing annotations on the page. + height (float): The height of the page in the specified coordinate system. + coordinate_system (Union[PixelSpace, PointSpace]): The coordinate system used to represent + the annotations' coordinates. + page_number (int): The page number from which to extract annotations. + + Returns: + List[dict]: A list of dictionaries, each containing information about a URI annotation, + including its coordinates, bounding box, type, URI link, and page number. + """ + if isinstance(annots, List): + return get_uris_from_annots(annots, height, coordinate_system, page_number) + return get_uris_from_annots(annots.resolve(), height, coordinate_system, page_number) + + +def get_uris_from_annots( + annots: List[PDFObjRef], + height: Union[int, float], + coordinate_system: Union[PixelSpace, PointSpace], + page_number: int, +) -> List[dict]: + """ + Extracts URI annotations from a list of PDF object references. + + Args: + annots (List[PDFObjRef]): A list of PDF object references representing annotations on + a page. + height (Union[int, float]): The height of the page in the specified coordinate system. + coordinate_system (Union[PixelSpace, PointSpace]): The coordinate system used to represent + the annotations' coordinates. + page_number (int): The page number from which to extract annotations. + + Returns: + List[dict]: A list of dictionaries, each containing information about a URI annotation, + including its coordinates, bounding box, type, URI link, and page number. + """ + annotation_list = [] + for annotation in annots: + annotation_dict = try_resolve(annotation) + if str(annotation_dict["Subtype"]) != "/'Link'" or "A" not in annotation_dict: + continue + x1, y1, x2, y2 = rect_to_bbox(annotation_dict["Rect"], height) + uri_dict = try_resolve(annotation_dict["A"]) + uri_type = str(uri_dict["S"]) + + try: + if uri_type == "/'URI'": + uri = try_resolve(try_resolve(uri_dict["URI"])).decode("utf-8") + if uri_type == "/'GoTo'": + uri = try_resolve(try_resolve(uri_dict["D"])).decode("utf-8") + except (KeyError, AttributeError, TypeError, UnicodeDecodeError): + uri = None + + points = ((x1, y1), (x1, y2), (x2, y2), (x2, y1)) + + coordinates_metadata = CoordinatesMetadata( + points=points, + system=coordinate_system, + ) + + annotation_list.append( + { + "coordinates": coordinates_metadata, + "bbox": (x1, y1, x2, y2), + "type": uri_type, + "uri": uri, + "page_number": page_number, + }, + ) + return annotation_list + + +def try_resolve(annot: PDFObjRef): + """ + Attempt to resolve a PDF object reference. If successful, returns the resolved object; + otherwise, returns the original reference. + """ + try: + return annot.resolve() + except Exception: + return annot + + +def rect_to_bbox( + rect: Tuple[float, float, float, float], + height: float, +) -> Tuple[float, float, float, float]: + """ + Converts a PDF rectangle coordinates (x1, y1, x2, y2) to a bounding box in the specified + coordinate system where the vertical axis is measured from the top of the page. + + Args: + rect (Tuple[float, float, float, float]): A tuple representing a PDF rectangle + coordinates (x1, y1, x2, y2). + height (float): The height of the page in the specified coordinate system. + + Returns: + Tuple[float, float, float, float]: A tuple representing the bounding box coordinates + (x1, y1, x2, y2) with the y-coordinates adjusted to be measured from the top of the page. + """ + x1, y2, x2, y1 = rect + y1 = height - y1 + y2 = height - y2 + return (x1, y1, x2, y2) + + +def calculate_intersection_area( + bbox1: Tuple[float, float, float, float], + bbox2: Tuple[float, float, float, float], +) -> float: + """ + Calculate the area of intersection between two bounding boxes. + + Args: + bbox1 (Tuple[float, float, float, float]): The coordinates of the first bounding box + in the format (x1, y1, x2, y2). + bbox2 (Tuple[float, float, float, float]): The coordinates of the second bounding box + in the format (x1, y1, x2, y2). + + Returns: + float: The area of intersection between the two bounding boxes. If there is no + intersection, the function returns 0.0. + """ + x1_1, y1_1, x2_1, y2_1 = bbox1 + x1_2, y1_2, x2_2, y2_2 = bbox2 + + x_intersection = max(x1_1, x1_2) + y_intersection = max(y1_1, y1_2) + x2_intersection = min(x2_1, x2_2) + y2_intersection = min(y2_1, y2_2) + + if x_intersection < x2_intersection and y_intersection < y2_intersection: + intersection_area = calculate_bbox_area( + (x_intersection, y_intersection, x2_intersection, y2_intersection), + ) + return intersection_area + else: + return 0.0 + + +def calculate_bbox_area(bbox: Tuple[float, float, float, float]) -> float: + """ + Calculate the area of a bounding box. + + Args: + bbox (Tuple[float, float, float, float]): The coordinates of the bounding box + in the format (x1, y1, x2, y2). + + Returns: + float: The area of the bounding box, computed as the product of its width and height. + """ + x1, y1, x2, y2 = bbox + area = (x2 - x1) * (y2 - y1) + return area + + +def check_annotations_within_element( + annotation_list: List[dict], + element_bbox: Tuple[float, float, float, float], + page_number: int, + threshold: float = 0.9, +) -> List[dict]: + """ + Filter annotations that are within or highly overlap with a specified element on a page. + + Args: + annotation_list (List[dict]): A list of dictionaries, each containing information + about an annotation. + element_bbox (Tuple[float, float, float, float]): The bounding box coordinates of the + specified element in the bbox format (x1, y1, x2, y2). + page_number (int): The page number to which the annotations and element belong. + threshold (float, optional): The threshold value (between 0.0 and 1.0) that determines + the minimum overlap required for an annotation to be considered within the element. + Default is 0.9. + + Returns: + List[dict]: A list of dictionaries containing information about annotations that are + within or highly overlap with the specified element on the given page, based on the + specified threshold. + """ + annotations_within_element = [] + for annotation in annotation_list: + if annotation["page_number"] == page_number and ( + calculate_intersection_area(element_bbox, annotation["bbox"]) + / calculate_bbox_area(annotation["bbox"]) + > threshold + ): + annotations_within_element.append(annotation) + return annotations_within_element + + +def get_word_bounding_box_from_element( + obj: LTTextBox, + height: float, +) -> Tuple[List[LTChar], List[dict]]: + """ + Extracts characters and word bounding boxes from a PDF text element. + + Args: + obj (LTTextBox): The PDF text element from which to extract characters and words. + height (float): The height of the page in the specified coordinate system. + + Returns: + Tuple[List[LTChar], List[dict]]: A tuple containing two lists: + - List[LTChar]: A list of LTChar objects representing individual characters. + - List[dict]: A list of dictionaries, each containing information about a word, + including its text, bounding box, and start index in the element's text. + """ + characters = [] + words = [] + text_len = 0 + + for text_line in obj: + word = "" + x1, y1, x2, y2 = None, None, None, None + start_index = 0 + for index, character in enumerate(text_line): + if isinstance(character, LTChar): + characters.append(character) + char = character.get_text() + + if not char.strip(): + words.append( + {"text": word, "bbox": (x1, y1, x2, y2), "start_index": start_index}, + ) + word = "" + continue + + # TODO(klaijan) - isalnum() only works with A-Z, a-z and 0-9 + # will need to switch to some pattern matching once we support more languages + if not word: + isalnum = char.isalnum() + if word and char.isalnum() != isalnum: + isalnum = char.isalnum() + words.append( + {"text": word, "bbox": (x1, y1, x2, y2), "start_index": start_index}, + ) + word = "" + + if len(word) == 0: + start_index = text_len + index + x1 = character.x0 + y2 = height - character.y0 + x2 = character.x1 + y1 = height - character.y1 + else: + x2 = character.x1 + y2 = height - character.y0 + + word += char + text_len += len(text_line) + return characters, words + + +def map_bbox_and_index(words: List[dict], annot: dict): + """ + Maps a bounding box annotation to the corresponding text and start index within a list of words. + + Args: + words (List[dict]): A list of dictionaries, each containing information about a word, + including its text, bounding box, and start index. + annot (dict): The annotation dictionary to be mapped, which will be updated with "text" and + "start_index" fields. + + Returns: + dict: The updated annotation dictionary with "text" representing the mapped text and + "start_index" representing the start index of the mapped text in the list of words. + """ + if len(words) == 0: + annot["text"] = "" + annot["start_index"] = -1 + return annot + + distance_from_bbox_start = np.sqrt( + (annot["bbox"][0] - np.array([word["bbox"][0] for word in words])) ** 2 + + (annot["bbox"][1] - np.array([word["bbox"][1] for word in words])) ** 2, + ) + distance_from_bbox_end = np.sqrt( + (annot["bbox"][2] - np.array([word["bbox"][2] for word in words])) ** 2 + + (annot["bbox"][3] - np.array([word["bbox"][3] for word in words])) ** 2, + ) + closest_start = try_argmin(distance_from_bbox_start) + closest_end = try_argmin(distance_from_bbox_end) + + # NOTE(klaijan) - get the word from closest start only if the end index comes after start index + text = "" + if closest_end >= closest_start: + for _ in range(closest_start, closest_end + 1): + text += " " + text += words[_]["text"] + else: + text = words[closest_start]["text"] + + annot["text"] = text.strip() + annot["start_index"] = words[closest_start]["start_index"] + return annot + + +def try_argmin(array: np.ndarray) -> int: + """ + Attempt to find the index of the minimum value in a NumPy array. + + Args: + array (np.ndarray): The NumPy array in which to find the minimum value's index. + + Returns: + int: The index of the minimum value in the array. If the array is empty or an + IndexError occurs, it returns -1. + """ + try: + return int(np.argmin(array)) + except IndexError: + return -1 diff --git a/unstructured/partition/pptx.py b/unstructured/partition/pptx.py index cdd0a0c1ec..9ba4766dcd 100644 --- a/unstructured/partition/pptx.py +++ b/unstructured/partition/pptx.py @@ -95,7 +95,7 @@ def partition_pptx( include_slide_notes, metadata_filename, metadata_last_modified, - ) + ), ) diff --git a/unstructured/partition/utils/sorting.py b/unstructured/partition/utils/sorting.py index 0c5382f75d..3607a21cde 100644 --- a/unstructured/partition/utils/sorting.py +++ b/unstructured/partition/utils/sorting.py @@ -1,18 +1,62 @@ -from typing import List +import os +from typing import List, Tuple import numpy as np from unstructured.documents.elements import CoordinatesMetadata, Element from unstructured.logger import trace_logger -from unstructured.partition.utils.constants import SORT_MODE_BASIC, SORT_MODE_XY_CUT +from unstructured.partition.utils.constants import ( + SORT_MODE_BASIC, + SORT_MODE_XY_CUT, +) from unstructured.partition.utils.xycut import recursive_xy_cut -def coordinates_to_bbox(coordinates: CoordinatesMetadata) -> List[int]: +def coordinates_to_bbox(coordinates: CoordinatesMetadata) -> Tuple[int, int, int, int]: + """ + Convert coordinates to a bounding box representation. + + Parameters: + coordinates (CoordinatesMetadata): Metadata containing points to represent the bounding box. + + Returns: + Tuple[int, int, int, int]: A tuple representing the bounding box in the format + (left, top, right, bottom). + """ + points = coordinates.points left, top = points[0] right, bottom = points[2] - return [int(left), int(top), int(right), int(bottom)] + return int(left), int(top), int(right), int(bottom) + + +def shrink_bbox(bbox: Tuple[int, int, int, int], shrink_factor) -> Tuple[int, int, int, int]: + """ + Shrink a bounding box by a given shrink factor while maintaining its center. + + Parameters: + bbox (Tuple[int, int, int, int]): The original bounding box represented by + (left, top, right, bottom). + shrink_factor (float): The factor by which to shrink the bounding box (0.0 to 1.0). + + Returns: + Tuple[int, int, int, int]: The shrunken bounding box represented by + (left, top, right, bottom). + """ + + left, top, right, bottom = bbox + width = right - left + height = bottom - top + new_width = width * shrink_factor + new_height = height * shrink_factor + dw = (width - new_width) / 2 + dh = (height - new_height) / 2 + + new_left = left + dw + new_right = right - dw + new_top = top + dh + new_bottom = bottom - dh + return int(new_left), int(new_top), int(new_right), int(new_bottom) def coord_has_valid_points(coordinates: CoordinatesMetadata) -> bool: @@ -37,6 +81,7 @@ def coord_has_valid_points(coordinates: CoordinatesMetadata) -> bool: def sort_page_elements( page_elements: List[Element], sort_mode: str = SORT_MODE_XY_CUT, + shrink_factor: float = 0.9, ) -> List[Element]: """ Sorts a list of page elements based on the specified sorting mode. @@ -57,6 +102,10 @@ def sort_page_elements( - List[Element]: A list of sorted page elements. """ + shrink_factor = float( + os.environ.get("UNSTRUCTURED_XY_CUT_BBOX_SHRINK_FACTOR", shrink_factor), + ) + if not page_elements: return [] @@ -82,9 +131,18 @@ def _coords_ok(strict_points: bool): if sort_mode == SORT_MODE_XY_CUT: if not _coords_ok(strict_points=True): return page_elements - boxes = [coordinates_to_bbox(coords) for coords in coordinates_list] + shrunken_bboxes = [] + for coords in coordinates_list: + bbox = coordinates_to_bbox(coords) + shrunken_bbox = shrink_bbox(bbox, shrink_factor) + shrunken_bboxes.append(shrunken_bbox) + res: List[int] = [] - recursive_xy_cut(np.asarray(boxes).astype(int), np.arange(len(boxes)), res) + recursive_xy_cut( + np.asarray(shrunken_bboxes).astype(int), + np.arange(len(shrunken_bboxes)), + res, + ) sorted_page_elements = [page_elements[i] for i in res] elif sort_mode == SORT_MODE_BASIC: if not _coords_ok(strict_points=False): diff --git a/unstructured/partition/xlsx.py b/unstructured/partition/xlsx.py index 2f4538210f..ebffd6cdf9 100644 --- a/unstructured/partition/xlsx.py +++ b/unstructured/partition/xlsx.py @@ -4,6 +4,7 @@ import pandas as pd from lxml.html.soupparser import fromstring as soupparser_fromstring +from unstructured.chunking.title import add_chunking_strategy from unstructured.documents.elements import ( Element, ElementMetadata, @@ -21,6 +22,7 @@ @process_metadata() @add_metadata_with_filetype(FileType.XLSX) +@add_chunking_strategy() def partition_xlsx( filename: Optional[str] = None, file: Optional[Union[IO[bytes], SpooledTemporaryFile]] = None, diff --git a/unstructured/partition/xml.py b/unstructured/partition/xml.py index ff733300c5..f8bdc48211 100644 --- a/unstructured/partition/xml.py +++ b/unstructured/partition/xml.py @@ -1,6 +1,8 @@ -import xml.etree.ElementTree as ET +from io import BytesIO from tempfile import SpooledTemporaryFile -from typing import IO, BinaryIO, List, Optional, Union, cast +from typing import IO, BinaryIO, Iterator, List, Optional, Union, cast + +from lxml import etree from unstructured.chunking.title import add_chunking_strategy from unstructured.documents.elements import ( @@ -20,41 +22,57 @@ from unstructured.partition.text import element_from_text -def is_leaf(elem): - return not bool(elem) - - -def is_string(elem): - return isinstance(elem, str) or (hasattr(elem, "text") and isinstance(elem.text, str)) - - def get_leaf_elements( filename: Optional[str] = None, file: Optional[Union[IO[bytes], SpooledTemporaryFile]] = None, text: Optional[str] = None, - xml_path: str = ".", - xml_keep_tags: bool = False, -) -> List[Optional[str]]: + xml_path: Optional[str] = None, +) -> Iterator[Optional[str]]: + """Get leaf elements from the XML tree defined in filename, file, or text.""" exactly_one(filename=filename, file=file, text=text) if filename: - _, raw_text = read_txt_file(filename=filename) + return _get_leaf_elements(filename, xml_path=xml_path) elif file: - f = spooled_to_bytes_io_if_needed( - cast(Union[BinaryIO, SpooledTemporaryFile], file), + f = cast( + IO[bytes], + spooled_to_bytes_io_if_needed( + cast(Union[BinaryIO, SpooledTemporaryFile], file), + ), ) - _, raw_text = read_txt_file(file=f) - elif text: - raw_text = text + return _get_leaf_elements(f, xml_path=xml_path) + else: + b = BytesIO(bytes(cast(str, text), encoding="utf-8")) + return _get_leaf_elements(b, xml_path=xml_path) + + +def _get_leaf_elements( + file: Union[str, IO[bytes]], + xml_path: Optional[str] = None, +) -> Iterator[Optional[str]]: + """Parse the XML tree in a memory efficient manner if possible.""" + element_stack = [] + + element_iterator = etree.iterparse(file, events=("start", "end")) + # NOTE(alan) If xml_path is used for filtering, I've yet to find a good way to stream + # elements through in a memory efficient way, so we bite the bullet and load it all into + # memory. + if xml_path is not None: + _, element = next(element_iterator) + compiled_path = etree.XPath(xml_path) + element_iterator = (("end", el) for el in compiled_path(element)) + + for event, element in element_iterator: + if event == "start": + element_stack.append(element) - root = ET.fromstring(raw_text) - leaf_elements = [] + if event == "end": + if element.text is not None and element.text.strip(): + yield element.text - for elem in root.findall(xml_path): - for subelem in elem.iter(): - if is_leaf(subelem) and is_string(subelem.text): - leaf_elements.append(subelem.text) + element.clear() - return leaf_elements + while element_stack and element_stack[-1].getparent() is None: + element_stack.pop() @process_metadata() @@ -65,7 +83,7 @@ def partition_xml( file: Optional[Union[IO[bytes], SpooledTemporaryFile]] = None, text: Optional[str] = None, xml_keep_tags: bool = False, - xml_path: str = ".", + xml_path: Optional[str] = None, metadata_filename: Optional[str] = None, include_metadata: bool = True, encoding: Optional[str] = None, diff --git a/unstructured/staging/weaviate.py b/unstructured/staging/weaviate.py index c6efc80bd4..4a4e15276c 100644 --- a/unstructured/staging/weaviate.py +++ b/unstructured/staging/weaviate.py @@ -15,6 +15,7 @@ class Properties(TypedDict): "regex_metadata", "emphasized_texts", "detection_class_prob", + "is_continuation", )