forked from sc-zhang/bioscripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
split_groups_with_ctg_chr_blast_for_AllHiC.py
executable file
·62 lines (52 loc) · 1.36 KB
/
split_groups_with_ctg_chr_blast_for_AllHiC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/python
import sys, os, shutil
import re
def make_group(db):
new_db = {}
for tig_name in db:
max_name = ''
max = 0
for chrn in db[tig_name]:
if max < db[tig_name][chrn]:
max_name = chrn
if chrn not in new_db:
new_db[chrn] = []
new_db[max_name].append(tig_name)
return new_db
def split_group(blast_out, out_dir):
if os.path.isdir(out_dir):
shutil.rmtree(out_dir)
os.mkdir(out_dir)
print("Reading blast result")
blast_db = {}
with open(blast_out, 'r') as f_in:
for line in f_in:
data = line.strip().split()
if data[1][:3].lower() != 'chr':
continue
if data[1][3].isdigit() == False:
continue
tig_name = data[0].split("_")[0]
chrn = data[1]
if tig_name not in blast_db:
blast_db[tig_name] = {}
if chrn not in blast_db[tig_name]:
blast_db[tig_name][chrn] = 0
blast_db[tig_name][chrn] += 1
print("Making groups")
group_db = make_group(blast_db)
f_db = {}
print("Writing results")
for chrn in group_db:
f_db[chrn] = open(out_dir+"/"+chrn+".ordering", 'w')
f_db[chrn].write("\n".join(group_db[chrn]))
f_db[chrn].close()
print("Success")
if __name__ == "__main__":
if len(sys.argv) < 3:
print("Notice: group contigs with blast result")
print("Usage python "+sys.argv[0]+" <blast_out> <out_dir>")
else:
blast_out = sys.argv[1]
out_dir = sys.argv[2]
split_group(blast_out, out_dir)