Skip to content

Commit

Permalink
rfctr: normalize method order
Browse files Browse the repository at this point in the history
Keep implementation methods in alphabetical order so they're easier to
locate by name when scanning folds or when search is not readily
available.
  • Loading branch information
scanny committed Oct 3, 2023
1 parent 9beec58 commit 0799e4a
Showing 1 changed file with 46 additions and 46 deletions.
92 changes: 46 additions & 46 deletions unstructured/partition/docx.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,52 @@ def _paragraph_metadata(self, paragraph: Paragraph) -> ElementMetadata:
category_depth=category_depth,
)

def _parse_category_depth_by_style(self, paragraph: Paragraph) -> int:
"""Determine category depth from paragraph metadata"""

# Determine category depth from paragraph ilvl xpath
xpath = paragraph._element.xpath("./w:pPr/w:numPr/w:ilvl/@w:val")
if xpath:
return int(xpath[0])

# Determine category depth from style name
style_name = (paragraph.style and paragraph.style.name) or "Normal"
depth = self._parse_category_depth_by_style_name(style_name)

if depth > 0:
return depth
else:
# Check if category depth can be determined from style ilvl
return self._parse_category_depth_by_style_ilvl()

def _parse_category_depth_by_style_ilvl(self) -> int:
# TODO(newelh) Parsing category depth by style ilvl is not yet implemented
return 0

def _parse_category_depth_by_style_name(self, style_name: str) -> int:
"""Parse category-depth from the style-name of `paragraph`.
Category depth is 0-indexed and relative to the other element types in the document.
"""

def _extract_number(suffix: str) -> int:
return int(suffix.split()[-1]) - 1 if suffix.split()[-1].isdigit() else 0

# Heading styles
if style_name.startswith("Heading"):
return _extract_number(style_name)

if style_name == "Subtitle":
return 1

# List styles
list_prefixes = ["List", "List Bullet", "List Continue", "List Number"]
if any(style_name.startswith(prefix) for prefix in list_prefixes):
return _extract_number(style_name)

# Other styles
return 0

def _parse_paragraph_text_for_element_type(self, paragraph: Paragraph) -> Optional[Type[Text]]:
"""Attempt to differentiate the element-type by inspecting the raw text."""
text = paragraph.text.strip()
Expand Down Expand Up @@ -647,52 +693,6 @@ def _table_emphasis(self, table: DocxTable) -> Tuple[List[str], List[str]]:
iter_tbl_emph, iter_tbl_emph_2 = itertools.tee(self._iter_table_emphasis(table))
return ([e["text"] for e in iter_tbl_emph], [e["tag"] for e in iter_tbl_emph_2])

def _parse_category_depth_by_style(self, paragraph: Paragraph) -> int:
"""Determine category depth from paragraph metadata"""

# Determine category depth from paragraph ilvl xpath
xpath = paragraph._element.xpath("./w:pPr/w:numPr/w:ilvl/@w:val")
if xpath:
return int(xpath[0])

# Determine category depth from style name
style_name = (paragraph.style and paragraph.style.name) or "Normal"
depth = self._parse_category_depth_by_style_name(style_name)

if depth > 0:
return depth
else:
# Check if category depth can be determined from style ilvl
return self._parse_category_depth_by_style_ilvl()

def _parse_category_depth_by_style_name(self, style_name: str) -> int:
"""Parse category-depth from the style-name of `paragraph`.
Category depth is 0-indexed and relative to the other element types in the document.
"""

def _extract_number(suffix: str) -> int:
return int(suffix.split()[-1]) - 1 if suffix.split()[-1].isdigit() else 0

# Heading styles
if style_name.startswith("Heading"):
return _extract_number(style_name)

if style_name == "Subtitle":
return 1

# List styles
list_prefixes = ["List", "List Bullet", "List Continue", "List Number"]
if any(style_name.startswith(prefix) for prefix in list_prefixes):
return _extract_number(style_name)

# Other styles
return 0

def _parse_category_depth_by_style_ilvl(self) -> int:
# TODO(newelh) Parsing category depth by style ilvl is not yet implemented
return 0


class _SectBlockItemIterator:
"""Generates the block-items in a section.
Expand Down

0 comments on commit 0799e4a

Please sign in to comment.