-
Notifications
You must be signed in to change notification settings - Fork 1
/
bam2fq
executable file
·88 lines (80 loc) · 2.29 KB
/
bam2fq
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env perl
use strict;
my ($bam, $outpref, $nosingle) = @ARGV;
if ($outpref eq '--no-single') {
$nosingle = 1;
$outpref = undef;
} elsif ($nosingle eq '--no-single') {
$nosingle = 1;
} elsif ($nosingle) {
die "$0: Unexpected third argument: only '--no-single' allowed, not '$nosingle'!\n";
}
($outpref = $bam) =~ s/\.bam$// unless $outpref;
print "OUTPREF: $outpref\n";
my $tmp = "bam2fq.$$.tmp.fq";
my (%ends, %unps);
## Process paired reads
open my $FQ1, '|-', "gzip > ${outpref}_1.fastq.gz";
open my $FQ2, '|-', "gzip > ${outpref}_2.fastq.gz";
my $cmd = $nosingle ? "samtools bam2fq $bam" : "samtools bam2fq -s $tmp $bam";
open my $IN1, '-|', $cmd;
my ($i, $end, @rec);
while (<$IN1>) {
$i++;
push @rec, $_;
if ($i == 1) {
chomp;
$_ =~ s!/([12])$!!;
$end = $1||1;
$ends{$end}++;
} elsif ($i == 4) {
if ($end == 1) {
print $FQ1 @rec;
} else {
print $FQ2 @rec;
}
@rec = ();
$i = 0;
}
}
close $_ foreach ($IN1, $FQ1, $FQ2);
system "rm -f ${outpref}_1.fastq.gz" unless $ends{1};
system "rm -f ${outpref}_2.fastq.gz" unless $ends{2};
## Process orphanss
if (-e $tmp) {
open my $FQ1, '|-', "gzip > ${outpref}_1_orphans.fastq.gz";
open my $FQ2, '|-', "gzip > ${outpref}_2_orphans.fastq.gz";
open my $IN1, '<', $tmp;
my ($i, $end, @rec);
while (<$IN1>) {
$i++;
push @rec, $_;
if ($i == 1) {
chomp;
$_ =~ s!/([12])$!!;
$end = $1;
$unps{$end}++;
} elsif ($i == 4) {
if ($end == 1) {
print $FQ1 @rec;
} else {
print $FQ2 @rec;
}
@rec = ();
$i = 0;
}
}
close $_ foreach ($IN1, $FQ1, $FQ2);
system "rm -f ${outpref}_1_orphans.fastq.gz" unless $unps{1};
system "rm -f ${outpref}_2_orphans.fastq.gz" unless $unps{2};
system "rm -f $tmp";
}
if ($ends{2}) {
print "Paired-end fastqs are imbalanced: $ends{1} E1 != $ends{2} E2 !\n" if $ends{2} != $ends{1};
print "Paired-end fastqs are balanced: $ends{1} E1 == $ends{2} E2 !\n" if $ends{2} == $ends{1};
print "Orphans: $unps{1} E1 | $unps{2} E2\n";
} else {
print "Reads: $ends{1} E1\n";
}
print "$0 $bam complete!\n";
exit;