From 762cd2c6fff9b747462ff812cc8f318fddf08b6e Mon Sep 17 00:00:00 2001 From: kylacochrane Date: Tue, 23 Apr 2024 10:32:29 -0400 Subject: [PATCH] Changed abundance calculations to use re-estimated Bracken total reads --- bin/adjust_bracken_for_unclassified_reads.py | 31 ++++++++++---------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/bin/adjust_bracken_for_unclassified_reads.py b/bin/adjust_bracken_for_unclassified_reads.py index b1ac994..ae1810e 100755 --- a/bin/adjust_bracken_for_unclassified_reads.py +++ b/bin/adjust_bracken_for_unclassified_reads.py @@ -85,19 +85,6 @@ def get_num_unclassified_seqs(parsed_kraken_report): return num_unclassified_seqs -def get_num_classified_seqs(parsed_kraken_report): - root_records = list( - filter(lambda x: x["ncbi_taxonomy_id"] == "1", parsed_kraken_report) - ) - num_classified_seqs = 0 - if len(root_records) > 0: - root_record = root_records[0] - if "num_seqs_this_clade" in root_record: - num_classified_seqs = root_record["num_seqs_this_clade"] - - return num_classified_seqs - - def adjust_bracken_report(bracken_report, num_unclassified_seqs): adjusted_bracken_report = [] unclassified_record = { @@ -129,6 +116,19 @@ def adjust_bracken_report(bracken_report, num_unclassified_seqs): return adjusted_bracken_report +def get_num_classified_seqs(adjusted_bracken_report): + root_records = list( + filter(lambda x: x["ncbi_taxonomy_id"] == "1", adjusted_bracken_report) + ) + num_classified_seqs = 0 + if len(root_records) > 0: + root_record = root_records[0] + if "num_seqs_this_clade" in root_record: + num_classified_seqs = root_record["num_seqs_this_clade"] + + return num_classified_seqs + + def adjust_bracken_abundances( bracken_abundances, num_total_seqs, num_unclassified_seqs ): @@ -158,11 +158,12 @@ def adjust_bracken_abundances( def main(args): kraken_report = parse_kraken_report(args.kraken_report) + bracken_report = parse_kraken_report(args.bracken_report) + num_unclassified_seqs = get_num_unclassified_seqs(kraken_report) - num_classified_seqs = get_num_classified_seqs(kraken_report) + num_classified_seqs = get_num_classified_seqs(bracken_report) num_total_seqs = num_unclassified_seqs + num_classified_seqs - bracken_report = parse_kraken_report(args.bracken_report) adjusted_bracken_report = adjust_bracken_report( bracken_report, num_unclassified_seqs )