-
Notifications
You must be signed in to change notification settings - Fork 3
/
genes_to_vector.py
67 lines (56 loc) · 1.04 KB
/
genes_to_vector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/python
import sys
if len(sys.argv)!=2:
print "%s gene list" % sys.argv[0]
sys.exit(1)
glf=sys.argv[1]
gl=set()
h=open(glf)
for line in h.readlines():
line=line.strip().replace('*','')
if len(line)>1:
gl.add(line)
h.close()
gl=list(gl)
gl.sort()
patients={}
labels={}
lk=set()
bins=[72,30,15,6]
for line in sys.stdin:
line=line.strip().split()
id=line[0]
label=line[1]
lk.add(label)
if id not in patients:
patients[id]={}
if id not in labels:
labels[id]=label
for x in range(4):
l=map(lambda x : x.replace('*',''), line[x+3].split(','))
for g in l:
if g in gl:
if g not in patients[id]:
patients[id][g]=bins[x]
lk=list(lk)
lk.sort()
pk=patients.keys()
pk.sort()
s=[]
s.append('id')
s.append('label')
for g in gl:
s.append(g)
#print ",".join(s)
for p in patients:
v=[str(pk.index(p)),str(lk.index(labels[p]))]
for x in range(len(gl)):
if gl[x] in patients[p]:
v.append(str(patients[p][gl[x]]))
else:
v.append("0")
print ",".join(v)
h=open('lookup','w')
print >> h, lk
print >> h, pk
h.close()