Skip to content

Commit

Permalink
refactor: add OCRMode enum
Browse files Browse the repository at this point in the history
  • Loading branch information
christinestraub committed Oct 4, 2023
1 parent d3b5a8f commit cd82e31
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 7 deletions.
12 changes: 7 additions & 5 deletions unstructured/partition/ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
# unstructured.documents.elements.Image
from PIL import Image as PILImage
from PIL import ImageSequence

from unstructured.partition.utils.constants import OCRMode
from unstructured_inference.inference.elements import (
Rectangle,
TextRegion,
Expand All @@ -31,7 +33,7 @@ def process_data_with_ocr(
inferred_layout: "DocumentLayout",
is_image: bool = False,
ocr_languages: str = "eng",
ocr_mode: str = "entire_page",
ocr_mode: str = OCRMode.FULL_PAGE.value,
pdf_image_dpi: int = 200,
) -> "DocumentLayout":
"""
Expand Down Expand Up @@ -77,7 +79,7 @@ def process_file_with_ocr(
inferred_layout: "DocumentLayout",
is_image: bool = False,
ocr_languages: str = "eng",
ocr_mode: str = "entire_page",
ocr_mode: str = OCRMode.FULL_PAGE.value,
pdf_image_dpi: int = 200,
) -> "DocumentLayout":
"""
Expand Down Expand Up @@ -149,7 +151,7 @@ def supplement_page_layout_with_ocr(
inferred_page_layout: "PageLayout",
image: PILImage,
ocr_languages: str = "eng",
ocr_mode: str = "entire_page",
ocr_mode: str = OCRMode.FULL_PAGE.value,
) -> "PageLayout":
"""
Supplement an inferred PageLayout with OCR results depending on OCR mode.
Expand All @@ -166,7 +168,7 @@ def supplement_page_layout_with_ocr(
"Environment variable ENTIRE_PAGE_OCR",
" must be set to 'tesseract' or 'paddle'.",
)
if ocr_mode == "entire_page":
if ocr_mode == OCRMode.FULL_PAGE.value:
ocr_layout = get_ocr_layout_from_image(
image,
ocr_languages=ocr_languages,
Expand All @@ -178,7 +180,7 @@ def supplement_page_layout_with_ocr(
)
inferred_page_layout.elements[:] = merged_page_layout_elements
return inferred_page_layout
elif ocr_mode == "individual_blocks":
elif ocr_mode == OCRMode.INDIVIDUAL_BLOCKS.value:
elements = inferred_page_layout.elements
for i, element in enumerate(elements):
if element.text == "":
Expand Down
4 changes: 2 additions & 2 deletions unstructured/partition/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
)
from unstructured.partition.strategies import determine_pdf_or_image_strategy
from unstructured.partition.text import element_from_text, partition_text
from unstructured.partition.utils.constants import SORT_MODE_BASIC, SORT_MODE_XY_CUT
from unstructured.partition.utils.constants import SORT_MODE_BASIC, SORT_MODE_XY_CUT, OCRMode
from unstructured.partition.utils.sorting import (
coord_has_valid_points,
sort_page_elements,
Expand Down Expand Up @@ -322,7 +322,7 @@ def _partition_pdf_or_image_local(
infer_table_structure: bool = False,
include_page_breaks: bool = False,
languages: List[str] = ["eng"],
ocr_mode: str = "entire_page",
ocr_mode: str = OCRMode.FULL_PAGE.value,
model_name: Optional[str] = None,
metadata_last_modified: Optional[str] = None,
**kwargs,
Expand Down
8 changes: 8 additions & 0 deletions unstructured/partition/utils/constants.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,10 @@
from enum import Enum


class OCRMode(Enum):
INDIVIDUAL_BLOCKS = "individual_blocks"
FULL_PAGE = "entire_page"


SORT_MODE_XY_CUT = "xy-cut"
SORT_MODE_BASIC = "basic"

0 comments on commit cd82e31

Please sign in to comment.