Skip to content

Commit

Permalink
Remove duplicated entried from relationships and nodes (#1333)
Browse files Browse the repository at this point in the history
  • Loading branch information
AlonsoGuevara authored Oct 29, 2024
1 parent 083de12 commit 83026bd
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 2 deletions.
4 changes: 4 additions & 0 deletions .semversioner/next-release/patch-20241029003722814474.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"type": "patch",
"description": "Remove duplicated relationships and nodes"
}
3 changes: 2 additions & 1 deletion graphrag/index/flows/create_final_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,5 @@ async def create_final_nodes(
)
joined.rename(columns={"label": "title", "cluster": "community"}, inplace=True)

return joined
# TODO: Find duplication source
return joined.drop_duplicates(subset=["title", "community"])
3 changes: 2 additions & 1 deletion graphrag/index/flows/create_final_relationships.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,5 @@ async def create_final_relationships(
"text_unit_ids"
].str.split(",")

return edge_combined_degree
# TODO: Find duplication source
return edge_combined_degree.drop_duplicates(subset=["source", "target"])

0 comments on commit 83026bd

Please sign in to comment.