-
Notifications
You must be signed in to change notification settings - Fork 4
/
02_uninterleave_fasta_in_file_and_format_fastaName.py
executable file
·87 lines (65 loc) · 3.04 KB
/
02_uninterleave_fasta_in_file_and_format_fastaName.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/python
# formatting a fasta format into phylip format for using with PAML
import string, os, sys
path_IN = sys.argv[1]
path_OUT = sys.argv[2]
###### def1 ######
def extract_interleavedFASTA(path_IN, path_OUT):
file_IN = open(path_IN, "r")
file_OUT = open(path_OUT, "w")
i = 0
n = 1
while 1:
nextline = file_IN.readline()
if not nextline:
#####################################################
### III. ### LAST SEQUENCE [Only record sequence] ###
#####################################################
if i != 0:
file_OUT.write("%s\n" %fasta_name[:-1])
new_fasta_seq = string.replace(fasta_seq, "\n", "")
file_OUT.write("%s\n" %new_fasta_seq)
print n
break
if nextline[0] == ">":
########################################################
### I. ### FIRST SEQUENCE [Only initialize sequence] ###
########################################################
if i == 0:
fasta_name = nextline
fasta_seq = "" # initialise the fasta sequence
i = 1 ## Will indicates that the first sequence is treated
#####################################################################################
### II. ### OTHER SEQUENCES [Record previous sequence + Initialize next sequence] ###
#####################################################################################
else:
## 1 ## Record the previous sequence:
file_OUT.write("%s\n" %fasta_name[:-1])
new_fasta_seq = string.replace(fasta_seq, "\n", "")
file_OUT.write("%s\n" %new_fasta_seq)
print n
## 2 ## Initialize the next sequence:
fasta_name = nextline
fasta_seq = ""
n = n+1
else:
fasta_seq = fasta_seq + nextline
#dico = {}
# file_txt = ""
# subfile = file.read()
# L1 = string.split(subfile, '>')
# for element in L1:
# if element != '':
# j = string.find(element, '\n')
# fasta_name = element[:j]
# seq = element[j+1:-1]
# sequence = string.replace(seq, '\n', '')
# file_txt = file_txt + ">%s\n" %fasta_name + "%s\n" %sequence
file_IN.close()
file_OUT.close()
#-#-#-#-#-#-#-#-#-#-#
###################
### RUN RUN RUN ###
###################
import string
file = extract_interleavedFASTA(path_IN, path_OUT) ### DEF1 ###