Skip to content

Commit

Permalink
refactor: rename get_word_bounding_box_from_element() to `get_words…
Browse files Browse the repository at this point in the history
…_from_obj()`
  • Loading branch information
christinestraub committed Oct 30, 2024
1 parent c2cfd66 commit d2332ca
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
4 changes: 2 additions & 2 deletions unstructured/partition/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
clean_pdfminer_inner_elements,
get_links_in_element,
get_uris,
get_word_bounding_box_from_element,
get_words_from_obj,
map_bbox_and_index,
merge_inferred_with_extracted_layout,
)
Expand Down Expand Up @@ -459,7 +459,7 @@ def _process_pdfminer_pages(
page_number,
annotation_threshold,
)
_, words = get_word_bounding_box_from_element(obj, height)
_, words = get_words_from_obj(obj, height)
for annot in annotations_within_element:
urls_metadata.append(map_bbox_and_index(words, annot))

Expand Down
4 changes: 2 additions & 2 deletions unstructured/partition/pdf_image/pdfminer_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def process_data_with_pdfminer(
page_number,
annotation_threshold,
)
_, words = get_word_bounding_box_from_element(obj, height)
_, words = get_words_from_obj(obj, height)
for annot in annotations_within_element:
urls_metadata.append(map_bbox_and_index(words, annot))

Expand Down Expand Up @@ -516,7 +516,7 @@ def check_annotations_within_element(
return annotations_within_element


def get_word_bounding_box_from_element(
def get_words_from_obj(
obj: LTTextBox,
height: float,
) -> tuple[list[LTChar], list[dict[str, Any]]]:
Expand Down

0 comments on commit d2332ca

Please sign in to comment.