refactor: add OCRMode enum

Unstructured-IO · Oct 4, 2023 · cd82e31 · cd82e31
1 parent d3b5a8f
commit cd82e31
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 7 deletions.
diff --git a/unstructured/partition/ocr.py b/unstructured/partition/ocr.py
@@ -10,6 +10,8 @@
 # unstructured.documents.elements.Image
 from PIL import Image as PILImage
 from PIL import ImageSequence
+
+from unstructured.partition.utils.constants import OCRMode
 from unstructured_inference.inference.elements import (
     Rectangle,
     TextRegion,
@@ -31,7 +33,7 @@ def process_data_with_ocr(
     inferred_layout: "DocumentLayout",
     is_image: bool = False,
     ocr_languages: str = "eng",
-    ocr_mode: str = "entire_page",
+    ocr_mode: str = OCRMode.FULL_PAGE.value,
     pdf_image_dpi: int = 200,
 ) -> "DocumentLayout":
     """
@@ -77,7 +79,7 @@ def process_file_with_ocr(
     inferred_layout: "DocumentLayout",
     is_image: bool = False,
     ocr_languages: str = "eng",
-    ocr_mode: str = "entire_page",
+    ocr_mode: str = OCRMode.FULL_PAGE.value,
     pdf_image_dpi: int = 200,
 ) -> "DocumentLayout":
     """
@@ -149,7 +151,7 @@ def supplement_page_layout_with_ocr(
     inferred_page_layout: "PageLayout",
     image: PILImage,
     ocr_languages: str = "eng",
-    ocr_mode: str = "entire_page",
+    ocr_mode: str = OCRMode.FULL_PAGE.value,
 ) -> "PageLayout":
     """
     Supplement an inferred PageLayout with OCR results depending on OCR mode.
@@ -166,7 +168,7 @@ def supplement_page_layout_with_ocr(
             "Environment variable ENTIRE_PAGE_OCR",
             " must be set to 'tesseract' or 'paddle'.",
         )
-    if ocr_mode == "entire_page":
+    if ocr_mode == OCRMode.FULL_PAGE.value:
         ocr_layout = get_ocr_layout_from_image(
             image,
             ocr_languages=ocr_languages,
@@ -178,7 +180,7 @@ def supplement_page_layout_with_ocr(
         )
         inferred_page_layout.elements[:] = merged_page_layout_elements
         return inferred_page_layout
-    elif ocr_mode == "individual_blocks":
+    elif ocr_mode == OCRMode.INDIVIDUAL_BLOCKS.value:
         elements = inferred_page_layout.elements
         for i, element in enumerate(elements):
             if element.text == "":

diff --git a/unstructured/partition/pdf.py b/unstructured/partition/pdf.py
@@ -60,7 +60,7 @@
 )
 from unstructured.partition.strategies import determine_pdf_or_image_strategy
 from unstructured.partition.text import element_from_text, partition_text
-from unstructured.partition.utils.constants import SORT_MODE_BASIC, SORT_MODE_XY_CUT
+from unstructured.partition.utils.constants import SORT_MODE_BASIC, SORT_MODE_XY_CUT, OCRMode
 from unstructured.partition.utils.sorting import (
     coord_has_valid_points,
     sort_page_elements,
@@ -322,7 +322,7 @@ def _partition_pdf_or_image_local(
     infer_table_structure: bool = False,
     include_page_breaks: bool = False,
     languages: List[str] = ["eng"],
-    ocr_mode: str = "entire_page",
+    ocr_mode: str = OCRMode.FULL_PAGE.value,
     model_name: Optional[str] = None,
     metadata_last_modified: Optional[str] = None,
     **kwargs,

diff --git a/unstructured/partition/utils/constants.py b/unstructured/partition/utils/constants.py
@@ -1,2 +1,10 @@
+from enum import Enum
+
+
+class OCRMode(Enum):
+    INDIVIDUAL_BLOCKS = "individual_blocks"
+    FULL_PAGE = "entire_page"
+
+
 SORT_MODE_XY_CUT = "xy-cut"
 SORT_MODE_BASIC = "basic"