-
Notifications
You must be signed in to change notification settings - Fork 5
/
ELViM.py
278 lines (246 loc) · 11.6 KB
/
ELViM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 14 07:50:54 2023
@author: rafael
"""
import argparse
import numpy as np
from numba import njit, prange
import os
import shutil
import mdtraj as md
import force_scheme as force
@njit(parallel=True, fastmath=False) ####dissimilarity defautl, generalizar para diferentes sigma_0 e epsilon
def dissimilarity(coords, s0):
"""
Calculates the dissimilarity matrix using coordinates of C-alpha carbons.
Using other representations, such as all atoms or different number of
coarse-grain beads, requires adjusting the residue number.
Parameters
----------
coords : np.float32
C-alpha atomic coordinates in Angstrom [frames, atom_index, (x, y, z)]
s0 : float
Gaussian width. Can be changed to gauge the dissimilarity. Default value is one.
"""
epsilon=0.15
sigma_0=s0
size, atoms, _=coords.shape
total = int(size*(size+1)/2)
d=np.zeros(total, dtype=np.float32)
for k in prange (size):
for l in prange (k, size):
N=0
ep=0
for i in prange(atoms):
for j in prange (i+1,atoms):
xijk = (coords[k][i][0] - coords[k][j][0])
yijk = (coords[k][i][1] - coords[k][j][1])
zijk = (coords[k][i][2] - coords[k][j][2])
rijk = np.sqrt(xijk*xijk + yijk*yijk + zijk*zijk)
xijl = (coords[l][i][0] - coords[l][j][0])
yijl = (coords[l][i][1] - coords[l][j][1])
zijl = (coords[l][i][2] - coords[l][j][2])
rijl = np.sqrt(xijl*xijl + yijl*yijl + zijl*zijl)
dr = (rijk-rijl)**2
sigma = sigma_0*abs(i-j)**epsilon
ep = ep + np.exp(-dr/(2*sigma**2))
N = N + 1
d[int(total - ((size - k) * (size - k + 1) / 2) + (l - k))] = 1-ep/N
return d
@njit(parallel=True, fastmath=False) ####dissimilarity defautl, generalizar para diferentes sigma_0 e epsilon
def square_to_condensed(sqdmat):
"""
Convert the symmetrical square matrix to the required condensed form.
"""
size, _ = sqdmat.shape
total = size*(size+1)//2
d=np.zeros(total, dtype=np.float32)
for k in prange (size):
for l in prange (k, size):
d[int(total - ((size - k) * (size - k + 1) / 2) + (l - k))] = sqdmat[k,l]
return d
def calc_dmat(coords, s0, outdm):
# Calculate the dissimilarity matriz
dmat = dissimilarity(coords, s0)
if outdm!=None:
if os.path.exists(outdm):
backup_file(outdm)
np.save(outdm, dmat)
print("Dissimilarity matrix save in condensed form to the file {}".format(outdm))
return dmat
def get_coords(traj_file, top_file):
# Load the trajectory file
if traj_file[-4:] != '.pdb' and top_file==None:
raise RuntimeError("A topology file is needed!")
try:
if traj_file[-4:] == '.pdb':
traj = md.load(traj_file)
else:
traj=md.load(traj_file, top=top_file)
print("Trajectory succesfully loaded!")
except:
raise RuntimeError("Error loading trajectory.")
# Select C-alpha and get coordinates in Angstroms
ca_indices=traj.topology.select("name == CA")
coords=traj.xyz
coords=coords[:,ca_indices,:]*10 # Angstrom
return coords
def backup_file(file_path):
backup_path = file_path + '.bak'
shutil.move(file_path, backup_path)
print(f"Existing file '{file_path}' backed up to '{backup_path}'.")
def read_dmat(dmat_file):
try:
try:
dmat = np.load(dmat_file)
except:
dmat = np.loadtxt(dmat_file)
except:
raise RuntimeError("Failed to load the dissimilarity matrix from both binary .npy and text file formats.")
if len(dmat.shape)==1:
size = int(np.sqrt(2 * len(dmat)))
print("Loaded a condensed matrix for {} conformations".format(size))
elif len(dmat.shape)==2:
size, size2 =dmat.shape
if size!=size2:
raise RuntimeError("The dissimilarity matrix should be square (NxN)")
# Check if the loaded matrix is approximately symmetric
is_sym = np.allclose(dmat, dmat.T, atol=1e-3)
if not is_sym:
raise RuntimeError("The dissimilarity matrix should be symmetric")
dmat = square_to_condensed(dmat)
print("Loaded a square matrix for {} conformations".format(size))
else:
raise RuntimeError('''The dissimilarity matrix should be a symmetrical nxn or its condensed form.
See the tutorial for details.''')
return dmat, size
def main():
""" Read either a trajectory or a dissimilarity matrix and output the projection.
Args:
-f (filename): Pre-processed MD trajectory to be projected in any format recognized by MDtraj
-s (filename): Topology information (eg. frame.pdb) for loading the trajectory
-dm (filename): File containing a dissimilarity matrix (default: None)
-odm (filename): Name to save the dissimilarity matrix (default: dmat.npy)
-it (int): Maximum number of iteration (default: sqrt(n_frames))
-lr0 (int): Initial value for the learning_rate (default: 1/8)
-d (int): Exponential decay of the learning_rate (default: 0)
-s (int): random seed (default: None)
-op (filename): Name to save the ELViM projection coordinates file (default: projection.out)
Returns:
output (file): binary numpy file (.npy) containing the compressed dissimilarity matrix (default: coords.npy)
output (file): ELViM projection coordinates file (default: projection.out)
"""
message='''This program can:
1. Read the trajectory file (pdb or other formats +ref.pdb, calculate the dissimilarity matrix
and perform the ELViM projection. \n
2. Read a distance matrix in the symmetrical square or condensed format and perform the
ELViM projection.
* Either a trajectory or dissimilarity matrix should be provided.
* To save the dissimilarity matrix for other projections use -odm name.npy'''
parser = argparse.ArgumentParser(description=message)
parser.add_argument("-f", dest="trajectory_file",
action="store", type=str, default=None,
help="Name of the trajectory file")
parser.add_argument("-t", dest="topology_file",
action="store", type=str, default=None,
help="Name of the topology file")
parser.add_argument("-dm", dest="dmat",
action="store", type=str, default=None,
help="Name of the dissimilarity matrix file")
parser.add_argument("-c", dest="coords",
action="store", type=str, default=None,
help="Name of the coordinate matrix file (MDtraj format")
parser.add_argument("-s0", dest="sigma0",
action="store", type=float, default=1.0,
help="Value of sig0")
parser.add_argument("-it", dest="max_it",
action="store", type=int, default=None,
help="Maximum number of iteration (default sqrt(n_frames")
parser.add_argument("-lr0", dest="learning_rate0", action="store", type=float, default=0.5,
help="Initial learning rate")
parser.add_argument("-lrmin", dest="learning_rate_min", action="store", type=float, default=0.05,
help="Minimum value for learning rate")
parser.add_argument("-d", dest="decay", action="store", type=float, default=0.95,
help="Learnig rate exponential decay")
parser.add_argument("-tol", dest="tolerance", action="store", type=float, default=0,
help="tolerance to achieve convergence")
parser.add_argument("-o", dest="output_proj",
action="store", type=str, default="projection.out",
help="ELViM projection coordinates")
parser.add_argument("-odm", dest="output_dmat",
action="store", type=str, default=None,
help="Condensed dissimilarity matrix in binary file")
opt = parser.parse_args()
traj_file=opt.trajectory_file
top_file=opt.topology_file
dmat_file=opt.dmat
coords_file=opt.coords
s0 = opt.sigma0
max_it=opt.max_it
lr0=opt.learning_rate0
lrmin=opt.learning_rate_min
tol=opt.tolerance
decay=opt.decay
outdm=opt.output_dmat
outp=opt.output_proj
#### Check if only one of the possible files are parsed to avoid ambiguities between them.
files = [traj_file, dmat_file, coords_file]
nfiles = sum(files is not None for files in files)
if nfiles == 0:
raise RuntimeError('''One of the following files should be provided:
(1) A trajectory file (plus a topology)
(2) A dissimilarity matrix
(3) A MDtraj coordinates file
Try "python ELViM.py -h" to see the options. ''')
elif nfiles > 1:
raise RuntimeError('''Only one of the following files should be provided:
(1) A trajectory file (plus a topology)
(2) A dissimilarity matrix
(3) A MDtraj coordinates file).''')
#Read traj and calculate the dissimilarity matrix
if traj_file!=None:
coords=get_coords(traj_file, top_file)
dmat = calc_dmat(coords, s0, outdm)
size, atom, _=coords.shape
print('Dissimilaty matrix calculated considering {} conformations and {} alpha carbons'.format(size, atom))
elif dmat_file!=None:
dmat, size = read_dmat(dmat_file)
print('Dissimilaty matrix loaded. There are {} conformations'.format(size))
elif coords_file!=None:
try:
coords = np.load(coords_file)
print(''''
Coordinates matix loaded.
Make sure coordinates are for CA only and expressed in Angstroms.
''')
except:
raise RuntimeError('''Failed to load the coords file.
Make sure it is a binary numpy file (.npy),
and the matrix follows the MDtraj format trajectory.xyz*10''')
dmat = calc_dmat(coords, s0, outdm)
size, atom, _=coords.shape
print('Dissimilaty matrix calculated considering {} conformations and {} alpha carbons'.format(size, atom))
### Make the projection
### Initializing the projection with points randomly distributed
projection = np.random.random((size, 2))
if max_it==None:
max_it = int(np.sqrt(size))
### Running force scheme
nr_iterations, error, kstress = force.execute(dmat, projection, max_it, lr0, lrmin, decay, tol)
### write the projection coordinates file
if os.path.exists(outp):
backup_file(outp)
with open(outp, 'w') as f:
f.write("# ELViM Coordinates file. \n")
if dmat_file==None:
f.write("# Dissimilarity matrix calculated with sigma0={} \n".format(s0))
else:
f.write("# Dissimilarity matrix read from the file : {}\n".format(dmat_file))
f.write("# {} iterations with lr0 = {:5.3f}, lrmin = {:5.3f}, and decay = {:5.3f} \n".format(nr_iterations, lr0, lrmin, decay))
f.write("# Mean projection error = {:6.4f}, Stress = {:6.4f} \n".format(error[nr_iterations-1], kstress[nr_iterations-1]))
np.savetxt(f, projection, delimiter=" ", fmt="%.4f")
if __name__=="__main__":
main()
exit(0)