Skip to content

Commit

Permalink
Update file path and delimiter
Browse files Browse the repository at this point in the history
  • Loading branch information
Andy Z authored and Andy Z committed Aug 29, 2023
1 parent c1c7d9e commit d7f7674
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion scripts/data_overlap/compute_data_overlap_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,10 @@ def compute_document_data_overlap(
stats_key_to_intersection_ids[entry_overlap_key.stats_key].add(id)
if output_ngrams:
entry_overlap_key_to_ngram_counts[entry_overlap_key][document_ngram] += 1
with open(f'{output_path}_{document_ngram}', 'a') as f:
ngram_str = ' '.join(document_ngram)
with open(f'{output_path}_{ngram_str}', 'a') as f:
f.write(document)
f.write('------------- DOCUMENT DELIMITER --------------\n\n')

if __name__ == "__main__":
args = get_data_overlap_args()
Expand Down

0 comments on commit d7f7674

Please sign in to comment.