diff --git a/test_unstructured/chunking/test_title.py b/test_unstructured/chunking/test_title.py index 3185eb61c1..1e5bc88ef8 100644 --- a/test_unstructured/chunking/test_title.py +++ b/test_unstructured/chunking/test_title.py @@ -287,6 +287,7 @@ def test_add_chunking_strategy_raises_error_for_invalid_n_chars( max_characters=max_characters, ) + def test_chunk_by_title_drops_detection_class_prob(): elements = [ Title( @@ -327,7 +328,6 @@ def test_chunk_by_title_drops_detection_class_prob(): assert str(chunks[1]) == str(CompositeElement("An Okay Day\n\nToday is an okay day.")) - def test_chunk_by_title_drops_extra_metadata(): elements = [ Title( diff --git a/unstructured/chunking/title.py b/unstructured/chunking/title.py index f24dceab61..a41ae82534 100644 --- a/unstructured/chunking/title.py +++ b/unstructured/chunking/title.py @@ -197,7 +197,14 @@ def _drop_extra_metadata( metadata_dict: Dict[str, Any], include_pages: bool = True, ) -> Dict[str, Any]: - keys_to_drop = ["element_id", "type", "coordinates", "parent_id", "category_depth", "detection_class_prob"] + keys_to_drop = [ + "element_id", + "type", + "coordinates", + "parent_id", + "category_depth", + "detection_class_prob", + ] if not include_pages and "page_number" in metadata_dict: keys_to_drop.append("page_number")