Skip to content

Commit

Permalink
updated file names and changed merge debugging graph output
Browse files Browse the repository at this point in the history
  • Loading branch information
malonge committed May 17, 2021
1 parent b9eaf73 commit 1d9421e
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 26 deletions.
44 changes: 22 additions & 22 deletions ragtag_correct.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,22 +91,22 @@ def read_genome_alignments(aln_file, query_blacklist, ref_blacklist):
def get_median_read_coverage(output_path, num_threads, overwrite_files):
""" Given the read alignments, use samtools stats to return an approximate median coverage value. """
log("INFO", "Calculating global read coverage")
if os.path.isfile(output_path + "c_reads_against_query.s.bam.stats"):
if os.path.isfile(output_path + "ragtag.correct.reads.s.bam.stats"):
if not overwrite_files:
log("INFO", "Retaining pre-existing file: " + output_path + "c_reads_against_query.s.bam.stats")
log("INFO", "Retaining pre-existing file: " + output_path + "ragtag.correct.reads.s.bam.stats")
else:
log("INFO", "Overwriting pre-existing file: " + output_path + "c_reads_against_query.s.bam.stats")
st = pysam.stats("-@", str(num_threads), output_path + "c_reads_against_query.s.bam")
with open(output_path + "c_reads_against_query.s.bam.stats", "w") as f:
log("INFO", "Overwriting pre-existing file: " + output_path + "ragtag.correct.reads.s.bam.stats")
st = pysam.stats("-@", str(num_threads), output_path + "ragtag.correct.reads.s.bam")
with open(output_path + "ragtag.correct.reads.s.bam.stats", "w") as f:
f.write(st)
else:
st = pysam.stats("-@", str(num_threads), output_path + "c_reads_against_query.s.bam")
with open(output_path + "c_reads_against_query.s.bam.stats", "w") as f:
st = pysam.stats("-@", str(num_threads), output_path + "ragtag.correct.reads.s.bam")
with open(output_path + "ragtag.correct.reads.s.bam.stats", "w") as f:
f.write(st)

# Get the coverage histogram (for 1 to 1k)
covs = []
with open(output_path + "c_reads_against_query.s.bam.stats") as f:
with open(output_path + "ragtag.correct.reads.s.bam.stats") as f:
for line in f:
if line.startswith("COV"):
covs.append(int(line.split("\t")[3]))
Expand All @@ -127,26 +127,26 @@ def get_median_read_coverage(output_path, num_threads, overwrite_files):

def run_samtools(output_path, num_threads, overwrite_files):
""" Compress, sort and index alignments with pysam. """
if os.path.isfile(output_path + "c_reads_against_query.s.bam"):
if os.path.isfile(output_path + "ragtag.correct.reads.s.bam"):
if not overwrite_files:
log("INFO", "Retaining pre-existing file: " + output_path + "c_reads_against_query.s.bam")
log("INFO", "Retaining pre-existing file: " + output_path + "ragtag.correct.reads.s.bam")
else:
log("INFO", "Overwriting pre-existing file: " + output_path + "c_reads_against_query.s.bam")
pysam.view("-@", str(num_threads), "-b", "-o", output_path + "c_reads_against_query.bam", output_path + "c_reads_against_query.sam", catch_stdout=False)
pysam.sort("-@", str(num_threads), "-o", output_path + "c_reads_against_query.s.bam", output_path + "c_reads_against_query.bam", catch_stdout=False)
log("INFO", "Overwriting pre-existing file: " + output_path + "ragtag.correct.reads.s.bam")
pysam.view("-@", str(num_threads), "-b", "-o", output_path + "ragtag.correct.reads.bam", output_path + "ragtag.correct.reads.sam", catch_stdout=False)
pysam.sort("-@", str(num_threads), "-o", output_path + "ragtag.correct.reads.s.bam", output_path + "ragtag.correct.reads.bam", catch_stdout=False)
else:
pysam.view("-@", str(num_threads), "-b", "-o", output_path + "c_reads_against_query.bam", output_path + "c_reads_against_query.sam", catch_stdout=False)
pysam.sort("-@", str(num_threads), "-o", output_path + "c_reads_against_query.s.bam", output_path + "c_reads_against_query.bam", catch_stdout=False)
pysam.view("-@", str(num_threads), "-b", "-o", output_path + "ragtag.correct.reads.bam", output_path + "ragtag.correct.reads.sam", catch_stdout=False)
pysam.sort("-@", str(num_threads), "-o", output_path + "ragtag.correct.reads.s.bam", output_path + "ragtag.correct.reads.bam", catch_stdout=False)

log("INFO", "Indexing read alignments")
if os.path.isfile(output_path + "c_reads_against_query.s.bam.bai"):
if os.path.isfile(output_path + "ragtag.correct.reads.s.bam.bai"):
if not overwrite_files:
log("INFO", "Retaining pre-existing file: " + output_path + "c_reads_against_query.s.bam.bai")
log("INFO", "Retaining pre-existing file: " + output_path + "ragtag.correct.reads.s.bam.bai")
else:
log("INFO", "Overwriting pre-existing file: " + output_path + "c_reads_against_query.s.bam.bai")
pysam.index(output_path + "c_reads_against_query.s.bam", catch_stdout=False)
log("INFO", "Overwriting pre-existing file: " + output_path + "ragtag.correct.reads.s.bam.bai")
pysam.index(output_path + "ragtag.correct.reads.s.bam", catch_stdout=False)
else:
pysam.index(output_path + "c_reads_against_query.s.bam", catch_stdout=False)
pysam.index(output_path + "ragtag.correct.reads.s.bam", catch_stdout=False)


def clean_breaks(val_breaks, d):
Expand Down Expand Up @@ -179,7 +179,7 @@ def validate_breaks(ctg_breaks, output_path, num_threads, overwrite_files, min_b
log("INFO", "The max and min coverage thresholds are %dX and %dX, respectively" % (max_cutoff, min_cutoff))

# Go through each break point and query the coverage within the vicinity of the breakpoint.
bam = pysam.AlignmentFile(output_path + "c_reads_against_query.s.bam")
bam = pysam.AlignmentFile(output_path + "ragtag.correct.reads.s.bam")
validated_ctg_breaks = dict()
for ctg in ctg_breaks:
val_breaks = []
Expand All @@ -194,7 +194,7 @@ def validate_breaks(ctg_breaks, output_path, num_threads, overwrite_files, min_b
continue

region = "%s:%d-%d" % (ctg, min_range, max_range-1)
depth_out = pysam.samtools.depth("-aa", "-r", region, output_path + "c_reads_against_query.s.bam")
depth_out = pysam.samtools.depth("-aa", "-r", region, output_path + "ragtag.correct.reads.s.bam")
covs = np.asarray(
[j.split("\t")[2] for j in [i for i in depth_out.rstrip().split("\n")]],
dtype=np.int32
Expand Down
7 changes: 5 additions & 2 deletions ragtag_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ def main():
if min_comp_len:
agp_multi_sg.filter_by_seq_len(min_comp_len)
if debug_mode:
agp_multi_sg.connect_and_write_gml(output_path + "ragtag.merge.msg.gml")
nx.readwrite.gml.write_gml(agp_multi_sg.graph, output_path + "ragtag.merge.msg.gml")

# Merge the SAG
log("INFO", "Merging the scaffold graph")
Expand Down Expand Up @@ -406,7 +406,10 @@ def main():
log("INFO", "Computing a scaffolding solution")
cover_graph = get_maximal_matching(agp_sg)
if debug_mode:
nx.readwrite.gml.write_gml(cover_graph, output_path + file_prefix + ".covergraph.gml")
tmp_cover_graph = nx.Graph()
for u, v in cover_graph.edges:
tmp_cover_graph.add_edge(u, v)
nx.readwrite.gml.write_gml(tmp_cover_graph, output_path + file_prefix + ".covergraph.gml")

# Write the scaffolding output to an AGP file
log("INFO", "Writing results")
Expand Down
4 changes: 2 additions & 2 deletions tests/integration_tests/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,8 @@ mecho "Validating AGP files and associated fasta files:"
echo ""

bash scripts/validate_agp.sh $E_QUERY \
ragtag_output_ecoli_val/$E_QUERY_PREF.corrected.fasta \
ragtag_output_ecoli_val/ragtag.correction.agp
ragtag_output_ecoli_val/ragtag.correct.fasta \
ragtag_output_ecoli_val/ragtag.correct.agp


# Run ragtag merge
Expand Down

0 comments on commit 1d9421e

Please sign in to comment.