forked from jhammelman/DeepAccess
-
Notifications
You must be signed in to change notification settings - Fork 1
/
ensemble_utils.py
35 lines (32 loc) · 973 Bytes
/
ensemble_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import os
import numpy as np
def ensure_dir(file_path):
#directory = os.path.dirname(file_path)
if not os.path.exists(file_path):
os.makedirs(file_path)
def act_to_class(act):
y = []
header = True
for line in open(act):
if header:
header = False
continue
data = line.strip().split()
y.append([int(d) for d in data[1:]])
return np.array(y)
def fa_to_onehot(fa):
alpha = ['A','C','G','T']
sequences = open(fa).read().split(">")[1:]
seqdict = [seq.strip().split("\n")[1] for seq in sequences]
seq_mat = []
slen = max([len(seq) for seq in seqdict])
for i,seqc in enumerate(seqdict):
seq = np.zeros((slen,4))
for j,c in enumerate(seqc.upper()):
if c not in alpha:
seq[j,:] = 0.25
else:
aind = alpha.index(c)
seq[j,aind] = 1
seq_mat.append(seq)
return np.array(seq_mat)