Skip to content

Commit

Permalink
tidy
Browse files Browse the repository at this point in the history
  • Loading branch information
ryannikolaidis committed Oct 4, 2023
1 parent dcb41ad commit b20837d
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
2 changes: 1 addition & 1 deletion test_unstructured/chunking/test_title.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ def test_add_chunking_strategy_raises_error_for_invalid_n_chars(
max_characters=max_characters,
)


def test_chunk_by_title_drops_detection_class_prob():
elements = [
Title(
Expand Down Expand Up @@ -327,7 +328,6 @@ def test_chunk_by_title_drops_detection_class_prob():
assert str(chunks[1]) == str(CompositeElement("An Okay Day\n\nToday is an okay day."))



def test_chunk_by_title_drops_extra_metadata():
elements = [
Title(
Expand Down
9 changes: 8 additions & 1 deletion unstructured/chunking/title.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,14 @@ def _drop_extra_metadata(
metadata_dict: Dict[str, Any],
include_pages: bool = True,
) -> Dict[str, Any]:
keys_to_drop = ["element_id", "type", "coordinates", "parent_id", "category_depth", "detection_class_prob"]
keys_to_drop = [
"element_id",
"type",
"coordinates",
"parent_id",
"category_depth",
"detection_class_prob",
]
if not include_pages and "page_number" in metadata_dict:
keys_to_drop.append("page_number")

Expand Down

0 comments on commit b20837d

Please sign in to comment.