-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcgmap.py
381 lines (315 loc) · 15.7 KB
/
cgmap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
#!/usr/bin/python
import numpy as np
import pandas as pd
from mdtraj import Trajectory
from mdtraj import Topology
from mdtraj import element
from mdtraj.geometry import distance
mapping_options = {}
def map_forces(traj,atom_indices=None,use_pbc=True):
"""Compute the center of mass for each frame.
Parameters
----------
traj : Trajectory
Trajectory to sum forces on
atom_indices : array-like, dtype=int, shape=(n_atoms)
List of indices of atoms to use in computing com
Returns
-------
forces : np.ndarray, shape=(n_frames, 3)
Summed up forces on atom indices
"""
if atom_indices is not None and len(atom_indices)>0:
forces = traj.forces[:,atom_indices,:]
else:
forces = traj.forces
mapped_forces = forces.sum(axis=1)
return mapped_forces
def map_molecules(trj,selection_list,bead_label_list,molecule_types=None,molecule_type_order=False,*args,**kwargs):
""" This performs the mapping where each molecule has been assigned a type.
Specifying molecule_type_order means that the map will be reordered so that all molecules of type 0 come first, then 1, etc
"""
index_list = []
resSeq_list = []
label_list = []
n_molecule_types = len(selection_list)
if molecule_types is None:
molecule_types = [0 for idx in range(trj.top.n_residues)]
if not sorted(set(molecule_types))==range(n_molecule_types):
raise ValueError("Error in map molecules, molecule types list must contain only and all numbers from 0 to n_molecule_types-1")
if len(molecule_types) != trj.top.n_residues:
raise ValueError("Error in map molecules, molecule types list must have the same length as number of residues")
# get the first molecule with molecule type i
first_molecules = [molecule_types.index(i) for i in range(n_molecule_types)]
if len(selection_list) != len(bead_label_list):
raise ValueError("Error in map molecules, must submit selection list and bead label list of same length")
for i in range(n_molecule_types):
if len(selection_list[i]) != len(bead_label_list[i]):
raise ValueError("Error in map molecules, selection list %i and bead label list %i must be of same length"%(i,i))
first_indices = []
internal_indices_list = []
for i in range(n_molecule_types):
internal_indices_list.append([])
first_index = trj.top.select("(resid == %i)"%(first_molecules[i])).min()
for sel in selection_list[i]:
if sel.find("index")>-1 and sel.find("name")>-1:
raise ValueError("Error in map molecules, do not specify selection by index and by type")
elif sel.find("index")>-1:
# use atom selection language to parse selection string containing only indices on whole system, then offset later
internal_indices = trj.top.select("%s"%(sel))
elif sel.find("name")>-1:
# have to un-shift list because this will be added to current id later
internal_indices = trj.top.select("(resid == %i) and (%s)"%(first_molecules[i],sel)) - first_index
if len(internal_indices)==0:
raise ValueError("Error in map_molecules, selection string '%s' produced an empty list of atom indices"%sel)
internal_indices_list[i].append(internal_indices)
start_index = 0
# get list of type [ (0,r0), (1,r1) etc ]
if molecule_type_order is True:
residue_list = sorted( enumerate(trj.top.residues),key=lambda x: molecule_types[x[0]])
else:
residue_list = enumerate(trj.top.residues)
resSeq = 1
for ridx,r in residue_list:
molecule_type = molecule_types[ridx]
for bead_idx, internal_indices in enumerate(internal_indices_list[molecule_type]):
system_indices = internal_indices + start_index
index_list.append(system_indices)
resSeq_list.append(resSeq)
label_list.append(bead_label_list[molecule_type][bead_idx])
resSeq = resSeq+1
start_index = start_index + r.n_atoms
return cg_by_index(trj, index_list, label_list, *args, **kwargs)
def map_identical_molecules(trj,selection_list,bead_label_list,*args,**kwargs):
""" This performs the mapping assuming the entire system is a set of identical molecules"""
index_list = []
resSeq_list = []
label_list = []
internal_indices_list = []
if len(selection_list) != len(bead_label_list):
raise ValueError("Error in map_identical_molecules, must submit selection list and bead label list of same length")
for sel in selection_list:
internal_indices = trj.top.select("(resSeq == 1) and (%s)"%sel)
if len(internal_indices)==0:
raise ValueError("Error in map_identical_molecules, selection string '%s' produced an empty list of atom indices"%sel)
internal_indices_list.append(internal_indices)
start_index = 0
resSeq = 1
for r in trj.top.residues:
for bead_idx, internal_indices in enumerate(internal_indices_list):
system_indices = internal_indices + start_index
index_list.append(system_indices)
resSeq_list.append(resSeq)
label_list.append(bead_label_list[bead_idx])
resSeq = resSeq+1
start_index = start_index + r.n_atoms
return cg_by_index(trj, index_list, label_list, *args, **kwargs)
def compute_center(traj,atom_indices=None,use_pbc=True):
"""Compute the center of mass for each frame.
Parameters
----------
traj : Trajectory
Trajectory to compute center of mass for
atom_indices : array-like, dtype=int, shape=(n_atoms)
List of indices of atoms to use in computing center
Returns
-------
center : np.ndarray, shape=(n_frames, 3)
Coordinates of the mean position of atom_indices for each frame
"""
if atom_indices is not None and len(atom_indices)>0:
xyz = traj.xyz[:,atom_indices,:]
else:
xyz = traj.xyz
center = np.zeros((traj.n_frames, 3))
for i, x in enumerate(xyz):
# use periodic boundaries by centering relative to first xyz coordinate, then shift back
if use_pbc is True:
xyz0 = x[0,:]
shift = traj[i].unitcell_lengths*np.floor( (x - xyz0)/traj[i].unitcell_lengths + 0.5)
x = x - shift
center[i, :] = x.astype('float64').mean(axis=0)
return center
mapping_options['center'] = compute_center
def compute_com(traj,atom_indices=None,use_pbc=True):
"""Compute the center of mass for each frame.
Parameters
----------
traj : Trajectory
Trajectory to compute center of mass for
atom_indices : array-like, dtype=int, shape=(n_atoms)
List of indices of atoms to use in computing com
Returns
-------
com : np.ndarray, shape=(n_frames, 3)
Coordinates of the center of mass for each frame
"""
if atom_indices is not None and len(atom_indices)>0:
xyz = traj.xyz[:,atom_indices,:]
masses = np.array([a.mass for a in traj.top.atoms if a.index in atom_indices])
else:
xyz = traj.xyz
masses = np.array([a.mass for a in traj.top.atoms])
com = np.zeros((traj.n_frames, 3))
masses /= masses.sum()
for i, x in enumerate(xyz):
# use periodic boundaries by centering relative to first xyz coordinate, then shift back
if use_pbc is True:
xyz0 = x[0,:]
shift = traj[i].unitcell_lengths*np.floor( (x - xyz0)/traj[i].unitcell_lengths + 0.5)
x = x - shift
com[i, :] = x.astype('float64').T.dot(masses).flatten()
return com
mapping_options['com_slow'] = compute_com
mapping_options['center_of_mass_slow'] = compute_com
def compute_com_fast(xyz_i,xyz_all,atom_indices,masses,unitcell_lengths=None):
"""Compute the center of mass for each frame.
Parameters
----------
traj : Trajectory
Trajectory to compute center of mass for
atom_indices : array-like, dtype=int, shape=(n_atoms)
List of indices of atoms to use in computing com
Returns
-------
com : np.ndarray, shape=(n_frames, 3)
Coordinates of the center of mass for each frame
"""
#com = xyz_i
masses /= masses.sum()
xyz = xyz_all[:,atom_indices,:]
for i, x in enumerate(xyz):
# use periodic boundaries by centering relative to first xyz coordinate, then shift back
if unitcell_lengths is not None:
xyz0 = x[0,:]
shift = unitcell_lengths[i]*np.floor( (x - xyz0)/unitcell_lengths[i] + 0.5)
x = x - shift
xyz_i[i] = x.astype('float64').T.dot(masses).flatten()
return
# return com
mapping_options['com'] = compute_com_fast
mapping_options['center_of_mass'] = compute_com_fast
def cg_by_selection(trj, selection_string_list, *args, **kwargs):
"""Create a coarse grained (CG) trajectory from list of atom selections by
computing centers of mass of selected sets of atoms.
Parameters
----------
selection_string_list : list of strings in mdtraj selection language, shape=(n_beads,)
bead_label_list : list of maximum 4-letter strings to label CG sites
chain_list : optional list of chain id's to split resulting beads into separate chains
resSeq_list : optional list of residue sequence id's to assign cg residues
segment_id_list : optional list of segment id's to assign cg residues
inplace : bool, default=False
If ``True``, the operation is done inplace, modifying ``trj``.
Otherwise, a copy is returned with the sliced atoms, and
``trj`` is not modified.
bonds : array-like,dtype=int, shape=(n_bonds,2), default=None
If specified, sets these bonds in new topology
Returns
-------
traj : md.Trajectory
The return value is either ``trj``, or the new trajectory,
depending on the value of ``inplace``.
"""
atom_indices_list = []
for sel_string in selection_string_list:
atom_indices_list.append( trj.top.select(sel_string) )
if len(atom_indices_list[-1])==0:
print("Warning - selection string returns 0 atoms: '%s'"%sel_string)
return cg_by_index(trj, atom_indices_list, *args, **kwargs )
def cg_by_index(trj, atom_indices_list, bead_label_list, chain_list=None, segment_id_list=None, resSeq_list=None, inplace=False, bonds=None, mapping_function="com"):
"""Create a coarse grained (CG) trajectory from subsets of atoms by
computing centers of mass of selected sets of atoms.
Parameters
----------
atom_indices_list : list of array-like, dtype=int, shape=(n_beads,n_atoms)
List of indices of atoms to combine into CG sites
bead_label_list : list of maximum 4-letter strings to label CG sites
chain_list : optional list of chain id's to split resulting beads into separate chains
resSeq_list : optional list of residue sequence id's to assign cg residues
segment_id_list : optional list of segment id's to assign cg residues
inplace : bool, default=False
If ``True``, the operation is done inplace, modifying ``trj``.
Otherwise, a copy is returned with the sliced atoms, and
``trj`` is not modified.
bonds : array-like,dtype=int, shape=(n_bonds,2), default=None
If specified, sets these bonds in new topology
mapping_function: string, default='com': how to map xyz coordinates
options: %s
Note - If repeated resSeq values are used, as for a repeated motiff in a CG polymer,
those sections most be broken into separate chains or an incorrect topology will result
Returns
-------
traj : md.Trajectory
The return value is either ``trj``, or the new trajectory,
depending on the value of ``inplace``.
"""%mapping_options.keys()
if not len(atom_indices_list)==len(bead_label_list):
raise ValueError("Must supply a list of bead labels of the same length as a list of selected atom indices")
for bead_label in bead_label_list:
if not (type(bead_label) is str) or len(bead_label)>4 or len(bead_label)<1:
raise ValueError("Specified bead label '%s' is not valid, must be a string between 1 and 4 characters"%bead_label)
bead_label_list = [ bead_label.upper() for bead_label in bead_label_list ]
if mapping_function not in mapping_options:
raise ValueError("Must select a mapping function from: %s"%mapping_options.keys())
map_coords = mapping_options[mapping_function]
if chain_list is None:
chain_list = np.ones(len(atom_indices_list),dtype=int)
elif len(chain_list)!=len(atom_indices_list):
raise ValueError("Supplied chain_list must be of the same length as a list of selected atom indices")
if segment_id_list is not None and len(segment_id_list)!=len(atom_indices_list):
raise ValueError("Supplied segment_id_list must be of the same length as a list of selected atom indices")
if resSeq_list is not None and len(resSeq_list)!=len(atom_indices_list):
raise ValueError("Supplied resSeq_list must be of the same length as a list of selected atom indices")
n_beads = len(atom_indices_list)
xyz = np.zeros((trj.xyz.shape[0],n_beads,trj.xyz.shape[2]),dtype=trj.xyz.dtype,order='C')
forces = np.zeros((trj.xyz.shape[0],n_beads,trj.xyz.shape[2]),dtype=np.double,order='C')
columns = ["serial","name","element","resSeq","resName","chainID"]
masses = np.array([ np.sum([a.mass for a in trj.top.atoms if a.index in atom_indices]) for atom_indices in atom_indices_list],dtype=np.float64)
charges = np.array([ np.sum([a.charge for a in trj.top.atoms if a.index in atom_indices]) for atom_indices in atom_indices_list],dtype=np.float64)
topology_labels = []
element_label_dict = {}
xyz_i = np.zeros((trj.xyz.shape[0],trj.xyz.shape[2]),dtype=trj.xyz.dtype,order='C')
for i in range(n_beads):
atom_indices = atom_indices_list[i]
bead_label = bead_label_list[i]
#xyz_i = map_coords(trj,atom_indices)
masses_i = np.array([a.mass for a in trj.top.atoms if a.index in atom_indices_list[i]],dtype=np.float64)
map_coords(xyz_i,trj.xyz,atom_indices,masses_i,unitcell_lengths=trj.unitcell_lengths)
xyz[:,i,:] = xyz_i
if "forces" in trj.__dict__ and len(trj.forces)>0:
forces_i = map_forces(trj,atom_indices)
forces[:,i,:] = forces_i
if resSeq_list is not None:
resSeq = resSeq_list[i]
else:
resSeq = i + 1
#element_label='%4s'%('B%i'%(resSeq))
if not bead_label in element_label_dict:
element_label='%2s'%('B%i'%(len(element_label_dict)%10))
element_label_dict[bead_label] = element_label
else:
element_label = element_label_dict[bead_label]
if element_label.strip().upper() not in element.Element._elements_by_symbol:
element.Element(1000+resSeq, element_label, element_label, masses[i], 1.0)
topology_labels.append( [i,bead_label,element_label,resSeq,'%3s'%bead_label,chain_list[i]] )
df = pd.DataFrame(topology_labels,columns=columns)
topology = Topology.from_dataframe(df,bonds=bonds)
if segment_id_list is not None:
for beadidx,bead in enumerate(topology.atoms):
bead.residue.segment_id = segment_id_list[beadidx]
if inplace:
if trj._topology is not None:
trj._topology = topology
trj._xyz = xyz
return trj
unitcell_lengths = unitcell_angles = None
if trj._have_unitcell:
unitcell_lengths = trj._unitcell_lengths.copy()
unitcell_angles = trj._unitcell_angles.copy()
time = trj._time.copy()
new_trj = Trajectory(xyz=xyz, topology=topology, time=time,
unitcell_lengths=unitcell_lengths,
unitcell_angles=unitcell_angles)
new_trj.forces = forces
return new_trj