-
Notifications
You must be signed in to change notification settings - Fork 0
/
npy2PerseusHom.py
256 lines (241 loc) · 8.67 KB
/
npy2PerseusHom.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
"""
Module to convert RBC frames (npy files) to input required by Perseus software
created by kel 6/22/2012
NOTE: -------- FOR COMPUTING REGULAR HOMOLOGY ----------
Perseus computes standard homology by substituting '1' for intensity value
New Cells: 110125, 130125, 140125, 40125, 50125
Old Cells: 100125, 120125, 50125, 90125
NOTE: There's been found to be discrepancies between sparse and dense cubical
formats for blocks 6/22/2012
"""
import numpy
import matplotlib.pylab as plt
import re
import os
import rbc_npy2Perseus as n2p
def write_file ( fname, output ):
"""
----DENSE FORMAT---
- Write contents of single frame to output file
e.g. new cell name on simplex:
'/data/jberwald/wyss/data/Cells_Jesse/New/frames/new_110125/new_110125-concatenated-ASCII_324.npy'
e.g. old cell name on simplex:
'/data/jberwald/wyss/data/Cells_Jesse/Old/frames/old_100125/old_100125-concatenated-ASCII_1525.npy'
Perseus input requires following format:
#Dimension of cubical grid
#Number of cubes in first dimension
#Number of cubes in second dimension
#Number of cubes in ... dimension
#Birth time of cube anchored at (0,0,...,0)
#Birth time of cube anchored at (0,0,...,1)
#Note: Output is file name
"""
#Load npy file
data = numpy.load(fname)
size = data.shape#(203,198)
dim = len ( size )
#Open file
text = open (output + ".txt", "w")
#Write number of dimensions
text . write (str(dim) + "\n")
#Write resolutions
for i in xrange(len(size)):
text . write (str(size[i]) + "\n")
#Write strictly 2D file
for i in xrange(size[0]):#203
for j in xrange(size[1]):#198
if int(data[i][j]) == 0:
text . write (str(-1) + "\n")
else:
text . write (str(1) + "\n")
text . close()
def write_sparse_file ( fname, output):
"""
- Write sparse
See above arguments
"""
#dimension array
numDim = 2
#Open file
text = open (output + ".txt", "w")
#Write number of dimensions
text . write (str(numDim) + "\n")
#Write strictly 2D file
data = numpy.load(fname)
size = data.shape
for i in range(0,size[0]):
for j in range(0,size[1]):
if int(data[i][j]) != 0:
pos = str(i) + ' ' + str(j) + ' '
text . write (pos + str(1) + "\n")
text . close()
def write_All_Cells ( dir, output):
"""
Collects all cells in same directory (say '../New/frames') and writes perseus output (currently writes sparse output)
Note: dir, output need to end with '/'
Ex dir: '/data/rbc-Diff-Frames/New/frames/10-diff/'
ex output: '/home/kellys/Persistence/Perseus/Formatted_Cells/frames/New/diff-10/'
"""
cellFolders = []
cellOutput = []
dlist = os.listdir(dir)
for f in dlist:
if os.path.isdir(dir+f) and (f.startswith('new') or f.startswith('old')):
cellFolders.append(dir+f+'/')
cellOutput.append(output+f+'/')
while cellFolders: #if cellF not empty
write_Cell (cellFolders.pop(0), cellOutput.pop(0) )#call write Cell
def write_Cell ( files, output ):
"""
- Using write_file for cell's stack of frames
- output is location of desired output directory
e.g. files = '/data/jberwald/wyss/data/Cells_Jesse/Old/frames/old_120125/'
output = '/home/kellys/Dropbox/rbc_shared/'
- warning: careful use of output (probably not in Dropbox)
"""
#grab files, skip directories
if os.path.isdir (files):
fdir = files + '/'
dlist = os.listdir(fdir)
frames = []
for f in dlist:
if f.endswith('npy') and not os.path.isdir(fdir+f):
frames.append(f)
else:
print 'error'
#sort frames
frames.sort(key=n2p.natural_key)
#for all frames call write_file
for frame in frames:
write_sparse_file ( fdir+frame, output+frame.rstrip('.npy'))
def write_Comp_Cell ( compArray, output ):
"""
- WRITE COMPLIMENT CELL
- Using write_file for cell's stack of frames
- output is location of desired output directory
e.g. files = '/data/jberwald/wyss/data/Cells_Jesse/Old/frames/old_120125/'
output = '/home/kellys/Dropbox/rbc_shared/'
- warning: careful use of output (probably not in Dropbox)
"""
data = numpy.load(compArray)
#dimension array
numDim = 2
#Write to output
for ind in xrange(data.shape[0]):
#Open file
text = open (output +'_' + str(ind) + ".txt", "w")
#Write number of dimensions
text . write (str(numDim) + "\n")
#Write strictly 2D file
size = data[ind].shape
for i in range(0,size[0]):
for j in range(0,size[1]):
if int(data[ind][i][j]) != 0:
pos = str(i) + ' ' + str(j) + ' '
text . write (pos + str(int(data[ind][i][j])) + "\n")
text . close()
def write_block_dense ( files, output, m, n ):
"""
- Write contents of 3D stack of frames to output
required by perseus
e.g. files = '/data/jberwald/wyss/data/Cells_Jesse/Old/frames/old_120125/'
- DENSE CUBICAL TOPLEX FORMAT
- m is index of frame to start from (index start at 0)
- n is index of frame to end at
"""
#grab files, skip directories
if os.path.isdir (files):
fdir = files + '/'
dlist = os.listdir(fdir)
frames = []
for f in dlist:
if f.endswith('npy') and not os.path.isdir(fdir+f):
frames.append(f)
else:
print 'Error - First argument not a directory!!'
return
#Get necessary dimensions
numDim = 3
dataDims = []
dataDims . append(len(frames))
imgSize = numpy.load(files + frames[0]).shape
for size in imgSize:
dataDims . append (size)
#Error handling for block bounds
if n > m:
if m < 0:
print 'Starting index out of bounds'
return
if n > (dataDims[0]-1):
print 'End index out of bounds'
return
#Sort frames
frames.sort(key=natural_key)
#Open file ( and write dimensions )
text = open (output + ".txt", "w")
text . write (str(numDim) + "\n")
for i in xrange(len(dataDims)):
#text . write (str(dataDims[i]) + "\n") #Iterate forward
text . write (str(dataDims[-1-i]) + "\n")#Iterate backward
#Write data
for ind, frame in zip(xrange(len(frames)),frames):
if ind >= m and ind <= n: #if within block
data = numpy.load(files+frame)
for i in xrange(imgSize[0]):
for j in xrange(imgSize[1]):
if int(data[i][j]) == 0:
text . write (str(-1) + "\n")
else:
text . write (str(1) + "\n")
text . close()
def write_block_sparse ( files, output, m, n ):
"""
- Write contents of 3D stack of frames to output
required by perseus
e.g. files = '/data/jberwald/wyss/data/Cells_Jesse/Old/frames/old_120125/'
- Currently in beta
- m is index of frame to start from (index start at 0)
- n is index of frame to end at
"""
#grab files, skip directories
if os.path.isdir (files):
fdir = files + '/'
dlist = os.listdir(fdir)
frames = []
for f in dlist:
if f.endswith('npy') and not os.path.isdir(fdir+f):
frames.append(f)
else:
print 'Error - First argument not a directory!!'
return
#dimension array
numDim = 3
nFrames = len(frames)
dim = []
#Error handling for block bounds
if n > m:
if m < 0:
print 'Starting index out of bounds'
return
if n > (nFrames-1):
print 'End index out of bounds'
return
#dim.append( nFrames )
size = numpy.load(files + frames[0]).shape
#Sort frames
frames.sort(key=natural_key)
#Write to output
#Open file
text = open (output + ".txt", "w")
#Write number of dimensions
text . write (str(numDim) + "\n")
#Write strictly 2D file
for ind, frame in zip(xrange(len(frames)),frames):
if ind >= m and ind <= n: #if within block
data = numpy.load(files + frame)
for i in xrange(size[0]):
for j in xrange(size[1]):
if int(data[i][j]) != 0:
pos = str(ind) + ' ' + str(i) + ' ' + str(j) + ' '
text . write ( pos + str(1) + "\n")
text . close()