-
Notifications
You must be signed in to change notification settings - Fork 2
/
convertMapping.pl
126 lines (105 loc) · 2.68 KB
/
convertMapping.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/perl
# John M. Gaspar ([email protected])
# June 2013
# This program takes a list of mapping files
# intended for use with QIIME and converts
# them for use with FlowClus.
# Primers are named in order (0, 1, 2, ...)
# -- there are no names in the QIIME file!
use strict;
use warnings;
die "Error -- please list file names on command line\n"
if (!@ARGV);
# make output file
my $out = "master.csv";
my @spl = split(/\./, $out);
my $z = 1;
while (-f $out) {
$out = $spl[0] . $z . "." . $spl[1];
$z++;
}
# variables for saving data
my $num = 0;
my @primer = ();
my @reverse = ();
my @mids = ();
my @samples = ();
# analyze files
while (my $file = shift @ARGV) {
if (! open(IN, $file)) {
print "$file: Warning -- cannot open: $!\n";
next;
}
# initialize order
my @order;
for (my $x = 0; $x < 4; $x++) {
$order[$x] = -1;
}
# get header line
my $line = <IN>;
$line =~ s/[\n\r]//g;
my @spl = split("\t", $line);
# load order
for (my $x = 0; $x < scalar @spl; $x++) {
if ($spl[$x] eq "#SampleID") {
$order[0] = $x;
} elsif ($spl[$x] eq "BarcodeSequence") {
$order[1] = $x;
} elsif ($spl[$x] eq "LinkerPrimerSequence") {
$order[2] = $x;
} elsif ($spl[$x] eq "ReversePrimer") {
$order[3] = $x;
}
}
# check for missing information
for (my $x = 0; $x < 4; $x++) {
if ($order[$x] == -1) {
if ($x == 3) {
print "$file: Warning -- no reverse primer specified\n",
"\tDo not specify search for reverse primer\n";
} else {
die "$file: Error -- information missing\n";
}
}
}
# analyze lines
while ($line = <IN>) {
next if (substr($line, 0, 1) eq "#");
$line =~ s/[\n\r]//g;
my @spl = split("\t", $line);
my $prim = $spl[$order[2]];
# check existing primers
my $x;
for ($x = 0; $x < $num; $x++) {
if ($primer[$x] eq $prim) {
push @{$mids[$x]}, $spl[$order[1]];
push @{$samples[$x]}, $spl[$order[0]];
last;
}
}
if ($x == $num) {
$primer[$num] = $prim;
if ($order[3] != -1) {
$reverse[$num] = $spl[$order[3]];
} else {
$reverse[$num] = 0;
}
push @{$mids[$num]}, $spl[$order[1]];
push @{$samples[$num]}, $spl[$order[0]];
$num++;
}
}
}
close IN;
# print output
die "Error -- no files processed\n" if (! scalar @primer);
open(OUT, ">$out");
for (my $x = 0; $x < scalar @primer; $x++) {
print OUT "primer,$x,$primer[$x]\n";
print OUT "reverse,$reverse[$x]\n" if ($reverse[$x]);
for (my $y = 0; $y < scalar @{$mids[$x]}; $y++) {
print OUT "midtag,$samples[$x][$y],$mids[$x][$y]\n";
}
}
close OUT;
print "New mapping file: $out\n";