-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathion_dynamics.py
917 lines (717 loc) · 37.1 KB
/
ion_dynamics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
# Class to convert MD simulations to 3D trajectories for use in HDP-AR-HMM
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm
import warnings
import MDAnalysis as mda
from MDAnalysis.analysis.rdf import InterRDF
import MDAnalysis.transformations as trans
import multiprocessing
def plot_colored_line(x, y, c, ax, **lc_kwargs): # from https://matplotlib.org/stable/gallery/lines_bars_and_markers/multicolored_line.html
'''
Plot a line with a color specified along the line by a third value.
It does this by creating a collection of line segments. Each line segment is
made up of two straight lines each connecting the current (x, y) point to the
midpoints of the lines connecting the current point with its two neighbors.
This creates a smooth line with no gaps between the line segments.
Parameters
----------
x, y : array-like
The horizontal and vertical coordinates of the data points.
c : array-like
The color values, which should be the same size as x and y.
ax : Axes
Axis object on which to plot the colored line.
**lc_kwargs
Any additional arguments to pass to matplotlib.collections.LineCollection
constructor. This should not include the array keyword argument because
that is set to the color argument. If provided, it will be overridden.
Returns
-------
matplotlib.collections.LineCollection
The generated line collection representing the colored line.
'''
if "array" in lc_kwargs:
warnings.warn('The provided "array" keyword argument will be overridden')
# Default the capstyle to butt so that the line segments smoothly line up
default_kwargs = {"capstyle": "butt"}
default_kwargs.update(lc_kwargs)
# Compute the midpoints of the line segments. Include the first and last points
# twice so we don't need any special syntax later to handle them.
x = np.asarray(x)
y = np.asarray(y)
x_midpts = np.hstack((x[0], 0.5 * (x[1:] + x[:-1]), x[-1]))
y_midpts = np.hstack((y[0], 0.5 * (y[1:] + y[:-1]), y[-1]))
# Determine the start, middle, and end coordinate pair of each line segment.
# Use the reshape to add an extra dimension so each pair of points is in its
# own list. Then concatenate them to create:
# [
# [(x1_start, y1_start), (x1_mid, y1_mid), (x1_end, y1_end)],
# [(x2_start, y2_start), (x2_mid, y2_mid), (x2_end, y2_end)],
# ...
# ]
coord_start = np.column_stack((x_midpts[:-1], y_midpts[:-1]))[:, np.newaxis, :]
coord_mid = np.column_stack((x, y))[:, np.newaxis, :]
coord_end = np.column_stack((x_midpts[1:], y_midpts[1:]))[:, np.newaxis, :]
segments = np.concatenate((coord_start, coord_mid, coord_end), axis=1)
lc = LineCollection(segments, **default_kwargs)
lc.set_array(c) # set the colors of each segment
return ax.add_collection(lc)
def plot_xyz_trajectories(x, y, z, t, c=None, c_label=None, ax=None, lc_kwargs={}, cbar_kwargs={}):
'''
Plot x, y, and z trajectories as a time series colored by a separate variable c
Parameters
----------
x : array-like
x-coordinate of trajectory
y : array-like
y-coordinate of trajectory
z : array-like
z-coordinate of trajectory
t : array-like
Time values for the trajectory in ns. This should be the same size as x, y, and z.
c : array-like, optional
The color values, which should be the same size as x, y, and z. Default = None means use t.
c_label : str, optional
The label for the colorbar. Default = None
ax : matplotlib axis, optional
Axis to plot on. If None, a new figure will be created. Default = None
lc_kwargs : dict
Any additional arguments to pass to matplotlib.collections.LineCollection
constructor. This should not include the array keyword argument because
that is set to the color argument. If provided, it will be overridden.
cbar_kwargs : dict
Any additional arguments to pass to matplotlib.colorbar
Returns
-------
fig : matplotlib figure
Figure with the trajectories plotted
ax : matplotlib axis
Axis with the trajectories plotted
'''
if ax is None:
fig, ax = plt.subplots(3,1, figsize=(12,9), sharex=True)
if c is None:
c = t
traj = [x,y,z]
for d,dim in enumerate(['x','y','z']):
line = plot_colored_line(t, traj[d], c, ax[d], linewidth=2, **lc_kwargs)
ax[d].set_ylabel(f'{dim} coordinate ($\AA$)')
ax[d].set_ylim(traj[d].min(), traj[d].max())
fig.colorbar(line, ax=ax[:], location='right', label=c_label, fraction=0.046, pad=0.04, **cbar_kwargs)
ax[2].set_xlabel('time (ns)')
ax[2].set_xlim(0, t.max())
return ax
class IonDynamics:
def __init__(self, topology, trajectory, transform=True, njobs=1,
polymer_selection='resname PA*', water_selection='resname SOL',
cation_selection='resname NA', anion_selection='resname CL'):
'''
Class to handle ion trajectories from MD simulations of ions within a polymer membrane
Parameters
----------
topology : str
Topology file for the MDAnalysis Universe
trajectory : str, list of str
Trajectory file(s) for the MDAnalysis Universe
transform : bool, optional
Whether to perform the unwrapping transformations. Only set to False if the transforms
have already been applied to `trajectory`. Default = True
njobs : int, optional
Number of CPUs for the NoJump transformation. Default = 1. -1 means all available.
polymer_selection : str, optional
MDAnalysis selection language for all polymer atoms. Default = 'resname PA*'
water_selection : str, optional
MDAnalysis selection language for all water atoms. Default = 'resname SOL'
cation_selection : str, optional
MDAnalysis selection language for cations. Default = 'resname NA'
anion_selection : str, optional
MDAnalysis selection language for anions. Default = 'resname CL'
'''
# initialize Universe and AtomGroups
self.universe = mda.Universe(topology, trajectory)
self.polymer = self.universe.select_atoms(polymer_selection)
self.waters = self.universe.select_atoms(water_selection)
self.cations = self.universe.select_atoms(cation_selection)
self.anions = self.universe.select_atoms(anion_selection)
self.ions = self.cations + self.anions
for ag in (self.polymer, self.waters, self.cations, self.anions):
print(f'\t{ag[0].resname} group has {len(ag)} atoms')
# initialize some useful variables
self.n_frames = self.universe.trajectory.n_frames
self.dt = self.universe.trajectory[1].time - self.universe.trajectory[0].time # timestep (ps)
self.time = np.arange(self.universe.trajectory[0].time, self.universe.trajectory[-1].time+self.dt, self.dt)
self.state_sequence = None
# set up the trajectory transformations
if njobs == -1:
self.njobs = multiprocessing.cpu_count()
else:
self.njobs = njobs
if transform:
self.universe.trajectory[0]
workflow = [
trans.unwrap(self.universe.atoms),
trans.center_in_box(self.polymer, center='mass'),
trans.wrap(self.universe.atoms),
trans.nojump.NoJump(max_threads=self.njobs)
]
self.universe.trajectory.add_transformations(*workflow)
def __str__(self):
return f'<IonDynamics object with {self.cations.n_atoms} cations and {self.anions.n_atoms} anions over {self.dt*self.n_frames/1000} ns>'
def __repr__(self):
return self.__str__()
def extract_ion_trajectories(self, ions, filename='ion_trajectories.csv'):
'''
Extract the ion trajectories from the MD simulation and save as a csv file
Parameters
----------
ions : MDAnalysis AtomGroup
AtomGroup with the ions to extract
filename : str
Name of the file to save trajectories to. Should be a csv. Default = 'ion_trajectories.csv'
Returns
-------
ion_trajectories : pd.DataFrame
Pandas DataFrame with ion trajectories. Columns are the ion index in the simulation,
the ion type, and the x,y,z coordinates. Also stored as an attribute.
'''
# first, get the trajectories as np.ndarray
traj = self._trajectory_to_numpy(ions)
# pick only ions that are within the middle 50% of the polymer throughout the simulation
self.membrane_ions_traj = {}
idx = []
for a, atom in enumerate(ions):
below_ub = (traj[:,a,2] >= self.membrane_bounds[:,1]).all()
above_lb = (traj[:,a,2] <= self.membrane_bounds[:,3]).all()
if below_ub and above_lb:
self.membrane_ions_traj[atom.index] = traj[:,a,:]
idx.append(atom.index)
self.membrane_ions = self.universe.select_atoms('index ' + ' '.join(map(str, idx)))
# save as a pd.DataFrame and save as csv
data = [(ion_id, *coord) for ion_id, coords in self.membrane_ions_traj.items() for coord in coords]
self.ion_trajectories = pd.DataFrame(data, columns=['ion_index', 'x', 'y', 'z'])
ion_types = []
for idx in self.membrane_ions_traj:
ion = self.universe.select_atoms(f'index {idx}')[0]
[ion_types.append(t) for t in [ion.type]*self.n_frames]
self.ion_trajectories['ion_type'] = ion_types
self.ion_trajectories.to_csv(filename, index=False)
return self.ion_trajectories
def write_xtc(self, filename, atom_group=None, frames=None):
'''
Write an xtc file with MD trajectory
Parameters
----------
filename : str
Name of the output xtc file
atom_group : MDAnalysis AtomGroup, optional
Atom group to write to xtc. Default = None, means use all atoms.
frames : array-like, optional
Frames to write to the xtc file. Default = None, means write all frames.
'''
if atom_group is None:
atom_group = self.universe.atoms
if frames is None:
frames = np.arange(self.n_frames)
with mda.Writer(filename, atom_group.n_atoms) as w:
for ts in tqdm(self.universe.trajectory[frames]):
w.write(atom_group)
def write_gro(self, filename, atom_group=None):
'''
Write a gro file with the coordinates of the atom group
Parameters
----------
filename : str
Name of the output gro file
atom_group : MDAnalysis AtomGroup, optional
Atom group to write to gro. Default = None, means use all atoms.
'''
if atom_group is None:
atom_group = self.universe.atoms
atom_group.write(filename)
def load_state_sequence(self, state_sequence, molecules):
'''
Load the state sequence that was output from the HDP-AR-HMM. Saved as an attribute `self.state_sequence`
in the format {molecule_index: state_sequence}. Creates attributes `self.unique_states` (dict) that contains
the unique states for each molecule, `self.found_states` (np.ndarray) that contains all the states identified,
and `self.n_states` (int) with the number of `found_states`.
Parameters
----------
state_sequence : np.ndarray
State sequence, shape (n_frames,n_molecules). This can be directly loaded from hdphmm.hdphmm.InifiniteHMM.z.T,
i.e. the transpose of the state matrix.
molecules : array-like or MDAnalysis AtomGroup
Molecules to which the state sequence corresponds. This should be the same as the number of columns in
`state_sequence`. This can be a list of molecule indices or an MDAnalysis AtomGroup with the same number
of atoms as the number of columns in `state_sequence`. If a list of indices, the indices should be
the same as the indices in the MDAnalysis Universe.
Returns
-------
state_sequence : dict
State sequences for each molecule. The keys are the indices of the molecules in the MDAnalysis Universe
and the values are the state sequences for each molecule. The shape of each state sequence is (n_frames,).
'''
if len(molecules) != state_sequence.shape[1]:
raise ValueError(f'Number of molecules ({len(molecules)}) does not match the number of columns in state_sequence ({state_sequence.shape[1]})')
self._state_sequence_array = state_sequence
self.state_sequence = {}
self.unique_states = {}
for i,molecule in enumerate(molecules):
if isinstance(molecule, mda.core.groups.Atom):
molecule = molecule.index
elif isinstance(molecule, (int, np.integer)):
pass
else:
raise ValueError(f'molecules must be a list of indices or an MDAnalysis AtomGroup but received {type(molecules)}')
self.state_sequence[molecule] = state_sequence[:,i]
self.unique_states[molecule] = np.unique(state_sequence[:,i])
self.found_states = np.unique(state_sequence)
self.n_states = len(self.found_states)
return self.state_sequence
def project_trajectories(self, n_steps, parameters, n_ions=1):
'''
Project the ion dynamics described by `parameters` to arbitrary time scales (`n_steps`).
New dynamics are generated by the state-space VAR(1) model. First, we generate a realization of
Markov states using the transition matrix `parameters['T']`. Then, we generate the coordinates
with the VAR(1) model based on the sequence of Markov states.
x_t = A_z * x_(t-1) + e_(z,t), where A_z is the VAR(1) parameter matrix (`parameters['A']`) for
state z and e_(z,t) is the Gaussian noise with mean 0 and covariance matrix `parameters['sigma']`.
Parameters
----------
n_steps : int
Number of steps to project the dynamics for
parameters : dict
Parameters from the fit HDP-AR-HMM. Should be input directly as hdphmm.hdphmm.InfiniteHMM.converged_params[...][-1]
i.e. the final parameters from the fit. This should contain the following keys:
- 'A' : VAR(1) parameter matrix
- 'sigma' : covariance matrix for the noise
- 'T' : transition matrix, shape=(found_states,found_states)
- 'pi_init' : transition matrix, shape=(max_states,max_states)
- 'mu' : mean vector for the noise
n_ions : int, optional
Number of ions to project the dynamics for. Default = 1
Returns
-------
projected_trajectories : np.ndarray
Projected trajectories for the ions, shape=(n_steps,n_ions,3)
projected_states : np.ndarray
Projected states for the ions, shape=(n_steps,n_ions)
'''
import deeptime.markov as markov
if self.state_sequence is None:
raise ValueError('No state sequence loaded. Please load a state sequence first.')
# create a MSM using deeptime in order to generate the state sequence
state_sequence_idx = np.zeros((n_steps, n_ions), dtype=int)
state_sequence = np.zeros((n_steps, n_ions), dtype=int)
for ion in range(n_ions):
msm = markov.msm.MarkovStateModel(parameters['T'][0,...])
state_sequence_idx[:,ion] = msm.simulate(n_steps) # this will be in terms of the indices of ihmm.found_states
state_sequence[:,ion] = np.array([self.found_states[i] for i in state_sequence_idx[:,ion]])
projected_states = state_sequence
# project the trajectories using the VAR(1) model
# here, we shift the trajectory at each state change, so the particles actually move and are not
# centered at 0
projected_trajectory = np.zeros((n_steps,n_ions,3))
for ion in range(n_ions):
x_prev = np.zeros(3) # start at [0,0,0]
z_idx_prev = state_sequence_idx[0,ion]
shift = np.zeros(3) # no shift at the start
for i in tqdm(range(1,n_steps)):
z_idx = state_sequence_idx[i, ion]
A_z = parameters['A'][0,0,...,z_idx] # autoregressive matrix for this state
sigma_z = parameters['sigma'][0,...,z_idx] # error covariance matrix for this state
# if the state has changed, we need to shift the trajectory to the last
if z_idx != z_idx_prev:
shift = projected_trajectory[i-1,ion,:]
x_i = A_z @ x_prev + np.random.multivariate_normal(np.zeros(3), sigma_z)
projected_trajectory[i,ion,:] = x_i + shift
x_prev = x_i
z_idx_prev = z_idx
projected_trajectory = projected_trajectory[1:,...] # shift by the AR order (i.e. 1)
return projected_trajectory, projected_states
def partition_coefficient(self, ions, states, verbose=False):
'''
Calculate the partition coefficient for ions entering the membrane. The partition coefficient is
defined as the ratio of the rate of ions entering the membrane to the rate of ions exiting the
membrane.
This method fits a Markov state model for three states -- in solution, in the interface,
and in the bulk membrane. The rates are calculated using the Mean First Passage Time (MFPT) for going
from the solution to the bulk (entry) and from the bulk to the solution (exit).
k = 1 / MFPT
Parameters
----------
ions : MDAnalysis AtomGroup
AtomGroup with the ions to calculate the partition coefficient for
states : array-like
Time series states for the ions (n_frames,n_ions)
verbose : bool, optional
Whether to print the results. Default = False
Returns
-------
partition_coefficient : float
Partition coefficient for the ions.
'''
import deeptime.markov as markov
from deeptime.markov.tools.analysis import mfpt
# Fit a Markov model on the state data
# first, count the transitions between states
counter = markov.TransitionCountEstimator(lagtime=1, count_mode='effective', n_states=3) # note: lagtime=1 means I am using every timestep
counts = counter.fit_fetch(states)
if verbose:
print('Counts matrix:\n', counts.count_matrix)
# fit the Markov model
estimator = markov.msm.MaximumLikelihoodMSM(reversible=True, stationary_distribution_constraint=None)
msm = estimator.fit_fetch(counts)
if verbose:
print('\nEstimated transition matrix:\n', msm.transition_matrix)
print('\nEstimated stationary distribution:\n', msm.stationary_distribution)
# Estimate the rate at which ions go from the solution (state 0) to the bulk (state 2)
# to do this, we will calculate the mean first passage time (MFPT) from state 0 to state 2
# the mean first passage time is the expected number of steps it takes to read a particular state for the first time
# this value is then the inverse of the rate
# Calculate the rate an ion moves from state i to state j using the transition matrix P
P = msm.transition_matrix
solution_state = 0 # state index for solution
bulk_state = 2 # state index for bulk
k_entry = 1 / mfpt(P, target=bulk_state, origin=solution_state) * (self.dt/1000) # rate from solution to bulk in ns^-1
k_exit = 1 / mfpt(P, target=solution_state, origin=bulk_state) * (self.dt/1000) # rate from bulk to solution in ns^-1
if verbose:
print(f'Rate of entry into the membrane: {k_entry:.4f} ns^-1, which corresponds to a MFPT of {1/k_entry:.4f} ns')
print(f'Rate of exit from the membrane: {k_exit:.4f} ns^-1, which corresponds to a MFPT of {1/k_exit:.4f} ns')
print(f'Partition coefficient: {k_entry/k_exit:.4f}')
return k_entry / k_exit
def coordination_environment(self, ions, show_cutoffs=False, cation_radius=3.15):
'''
Get the coordination environment of a given ion. Uses SolvationAnalysis to estimate the coordination shells
and to speciate all the different groups. Saves the solvation_analysis.Solute object to `self.solute`.
For more information, look at their documentation: https://solvation-analysis.readthedocs.io/en/latest/index.html
Parameters
----------
ions : MDAnalaysis AtomGroup
Ions to calculate the coordination environment for
show_cutoffs : bool, optional
Whether to show the cutoffs for the different groups. Default = False
cation_radius : float, optional
Radius to determine cation-cation coordination. This will likely never happen, but it is here for completeness.
Default = 3.15 Angstroms is the Na-water minimum in the RDF.
Returns
-------
speciation : solvation_analysis.speciation.Speciation class
Speciation class with the coordination environment for each ion. Contains speciation data, speciation
fractions, etc.
'''
from solvation_analysis.solute import Solute
xlink_c = self.universe.select_atoms(f'(type c) and (bonded type n)')
cooh_c = self.universe.select_atoms(f'(type c) and (bonded type oh)')
coo_c = self.universe.select_atoms(f'(type c) and (not bonded type oh n)')
cooh_oh = self.universe.select_atoms(f'type oh')
amide_o = self.universe.select_atoms(f'(type o) and (bonded group xlink_c)', xlink_c=xlink_c)
coo_o = self.universe.select_atoms(f'(type o) and (bonded group coo_c)', coo_c=coo_c)
nh2 = self.universe.select_atoms(f'type nh')
cations = self.cations
anions = self.universe.select_atoms(f'resname CL')
waters = self.waters.select_atoms(f'type OW')
self.solute = Solute.from_atoms(ions,
{
'amide_o' : amide_o,
'cooh_oh' : cooh_oh,
'coo_o' : coo_o,
'nh2' : nh2,
'anions' : anions,
'cations' : cations,
'waters' : waters
}, solute_name='Ion',
radii = {'cations' : cation_radius})
self.solute.run()
if show_cutoffs:
# iterate through solvents
for solvent in self.solute.solvents.keys():
# plot the RDF!
fig = self.solute.plot_solvation_radius('Ion', solvent)
fig.show()
return self.solute.speciation
def density_profile(self, frame, atom_groups, bins=None, bin_width=0.5, dim='z', method='atom'):
'''
Calculate the partial density across the box for a given atom group
Parameters
----------
frame : int
Frame of the trajectory
atom_groups : list of MDAnalysis AtomGroups
List of AtomGroups to calculate the density profile for
bins : np.ndarray, optional
Bins to use for the density profile. Default = None, which means bins will be created
based on the bin_width
bin_width : float, optional
Width of the bins for the density profile. Default = 0.5 Angstroms
dim : str, optional
Dimension to calculate the density profile. Default = 'z'
method : str, optional
Method to calculate the density profile. Options are 'atom', 'molecule', 'mass', and 'charge'.
Default = 'atom' gives the number of atoms in each bin. 'molecule' gives the number of molecules in
each bin. 'mass' gives the mass density in each bin. 'charge' gives the charge density in each bin.
Returns
-------
bin_centers : np.ndarray
Array of bin centers, shape (n_bins,)
counts : np.ndarray
Array of counts per bin, shape (n_bins, n_atom_groups)
'''
self.universe.trajectory[frame]
dims = {'x': 0, 'y': 1, 'z': 2}
d = dims[dim]
box = self.universe.dimensions
if bins is None:
n_bins = int(box[d] / bin_width)
bins = np.linspace(0, box[d], num=n_bins)
else:
n_bins = len(bins)
counts = np.zeros((n_bins-1, len(atom_groups)))
for b in range(n_bins-1):
lb = bins[b]
ub = bins[b+1]
for ag,atom_group in enumerate(atom_groups):
bin_atoms = atom_group.select_atoms(f'(prop {dim} >= {lb}) and (prop {dim} <= {ub})')
if method == 'atom':
counts[b,ag] += len(bin_atoms)
elif method == 'molecule':
counts[b,ag] += bin_atoms.n_residues
elif method == 'mass':
dV = box[0] * box[1] * (ub-lb) * (10**-8)**3 # volume in cm^3
mass = bin_atoms.masses.sum() / 6.022 / 10**23
counts[b,ag] += mass / dV
elif method == 'charge':
counts[b,ag] += bin_atoms.charges.sum()
bin_centers = (bins[:-1] + bins[1:]) / 2
return bin_centers, counts
def rdfs_by_state(self, ion, bin_width=0.05, range=(0,20)):
'''
Calculate radial distribution functions for each state for `ion`. This method calculates the RDFs for
cation-water, cation-cation, cation-anion, cation-COOH, cation-COO-, cation-amideO, and cation-NH2
using MDAnalysis InterRDF. It saves the data in a dictionary `rdfs` with keys for each state.
Each state then corresponds to a dictionary of with the RDF types above.
Parameters
----------
ion : int, MDAnalysis Atom
Either index of the ion or an MDAnalysis Atom of the ion
bin_width : float
Width of the bins for the RDFs, default=0.05
range : array-like
Range over which to calculate the RDF, default=(0,20)
Returns
-------
rdfs : dict
Dictionary of dictionaries with the results for each state. The keys are the states and the values
are dictionaries with the RDF types. The keys of the inner dictionaries are the RDF types and the values
are the RDF results.
'''
# check for state sequence and proper types
if self.state_sequence is None:
raise ValueError('No state sequence loaded. Please load a state sequence first.')
if isinstance(ion, mda.core.groups.Atom):
ion = ion.index
elif isinstance(ion, (int, np.integer)):
pass
else:
raise ValueError(f'ion must be an index or an MDAnalysis Atom but received {type(ion)}')
# initialize bins and selections
nbins = int((range[1] - range[0]) / bin_width)
noH = self.universe.select_atoms(f'(not element H)') - self.universe.select_atoms(f'index {ion}')
xlink = self.universe.select_atoms(f'(type c) and (bonded type n)')
cooh_C = self.universe.select_atoms(f'(type c) and (bonded type oh)')
cooh_OH = self.universe.select_atoms(f'(type oh)')
amideO = self.universe.select_atoms(f'(type o) and (bonded group xlink)', xlink=xlink)
coo_O = self.universe.select_atoms(f'(type o) and (not bonded group cooh_C) and (not bonded group xlink)', cooh_C=cooh_C, xlink=xlink)
nh2 = self.universe.select_atoms(f'(type nh)')
waters = self.waters.select_atoms(f'(type OW)')
cations = self.cations - self.universe.select_atoms(f'index {ion}')
polymer = self.polymer.select_atoms('not element H')
# prepare the selections and their labels
selections = [waters, cations, self.anions, cooh_OH,
coo_O, amideO, nh2, polymer, noH]
rdf_types = ['cation-water', 'cation-cation', 'cation-anion', 'cation-COOH',
'cation-COO-', 'cation-amideO', 'cation-NH2', 'cation-polymer',
'cation-noH']
# generate the RDF data
rdfs = {}
for state in self.found_states:
idx = np.where(self.state_sequence[ion] == state)[0]
print(f'Calculating RDFs for state {state} ({len(idx)} frames)')
rdfs[state] = {}
for l,label in enumerate(rdf_types):
rdf = InterRDF(self.universe.select_atoms(f'index {ion}'),
selections[l],
range=range, norm='rdf', verbose=True)
rdf.run(frames=idx)
# calculate the renormalization factor
Na = 1 # since we are only looking at one ion
Nb = len(noH) # normalization should always be done with respect to the noH group
volume = rdf.volume_cum / rdf.n_frames
shell_vol = np.power(rdf.results.edges[1:], 3) - np.power(rdf.results.edges[:-1], 3)
shell_vol *= 4/3.0 * np.pi
norm = 1 / (Na * Nb / volume * shell_vol * rdf.n_frames)
# save the renormalized RDF to the results
rdf.results.renorm = rdf.results.count * norm
rdfs[state][label] = rdf.results
return rdfs
def plot_membrane(self, frame=None, cation_color='blue', anion_color='limegreen',
ydim='x', show_ions=True, show_zones=False, ax=None):
'''
Plot the membrane in the ydim-z plane
Parameters
----------
frame : int, None, optional
Frame of the trajectory to plot. If None, whatever frame the trajectory reader is at will be used.
Default = None
cation_color : str, optional
Color to plot cations. Default = 'blue'
anion_color : str, optional
Color to plot anions. Default = 'limegreen'
ydim : str, optional
Which dimension to plot on the y-axis. Options are 'x' and 'y'. Default = 'x'
show_ions : bool, optional
Whether to plot ions. Default = True
show_zones : bool, optional
Whether to plot geometric polymer zones. Default = False
ax : matplotlib axis, optional
Axis to plot on. If None, a new figure will be created. Default = None
Returns
-------
ax : matplotlib axis
Axis with the membrane plotted
'''
if ax is None:
fig, ax = plt.subplots(1,1, figsize=(8,4))
if frame is not None:
self.universe.trajectory[frame]
if ydim == 'x':
d = 0
elif ydim == 'y':
d = 1
# plot the polymer with a kernel density estimate
polymer = self._coordinates_as_dataframe(self.polymer)
sns.kdeplot(polymer, x='z', y=ydim, cmap='Greys', ax=ax, fill=True,
levels=10, thresh=0.05, cut=0, weights=self.polymer.masses)
# plot scatter points of ions
if show_ions:
ax.scatter(self.cations.positions[:,2], self.cations.positions[:,d], c=cation_color)
ax.scatter(self.anions.positions[:,2], self.anions.positions[:,d], c=anion_color)
# plot lines for geometric polymer regions
if show_zones:
for z in self.polymer_zones:
ax.axvline(z, ls='dashed', c='r')
# plot the periodic boundaries
p = plt.Rectangle((0,0), self.universe.dimensions[2], self.universe.dimensions[0], fill=False, lw=1)
ax.add_patch(p)
ax.set_aspect('equal', adjustable='box')
ax.set_xlim(0,self.universe.dimensions[2])
ax.set_ylim(0,self.universe.dimensions[d])
return ax
def plot_xyz_trajectories(self, x, y, z, c=None, c_label='time (ns)', ax=None, **lc_kwargs):
'''
Plot x, y, and z trajectories as a time series colored by a separate variable c
Parameters
----------
x : array-like
x-coordinate of trajectory
y : array-like
y-coordinate of trajectory
z : array-like
z-coordinate of trajectory
c : array-like, optional
The color values, which should be the same size as x, y, and z. Default = None means use time.
c_label : str, optional
The label for the colorbar. Default = 'time (ns)'
ax : matplotlib axis, optional
Axis to plot on. If None, a new figure will be created. Default = None
**lc_kwargs
Any additional arguments to pass to matplotlib.collections.LineCollection
constructor. This should not include the array keyword argument because
that is set to the color argument. If provided, it will be overridden.
Returns
-------
ax : matplotlib axis
Axis with the membrane plotted
'''
ax = plot_xyz_trajectories(x, y, z, self.time / 1000, c=c, c_label=c_label, ax=ax, **lc_kwargs)
return ax
def _classify_by_location(self, traj_array):
'''
Classify the ions by their location in the polymer zones. The classification is done by
checking the z-coordinate of each ion.
0 : in solution
1 : in interface (outer 25% of polymer)
2 : in bulk membrane (inner 50% of polymer)
Parameters
----------
traj_array : np.ndarray
Array of shape (n_frames, n_ions, 3). This is the trajectory array for the ions. Can be directly
input from `self._trajectory_to_numpy` or `self._traj_array`.
Returns
-------
states_array : np.ndarray
Array of shape (n_frames, n_ions). The classification of each ion at each frame as defined above.
'''
states_array = np.zeros((traj_array.shape[0], traj_array.shape[1]), dtype=int) # 0 = in solution, 1 = in interface (outer 25% polymer), 2 = in bulk (inner 50% polymer)
for t in range(traj_array.shape[0]):
idx = (traj_array[t,:,2] <= self.membrane_bounds[t,0]) | (traj_array[t,:,2] >= self.membrane_bounds[t,4]) # in solution on either side
states_array[t, idx] = 0
idx = (traj_array[t,:,2] > self.membrane_bounds[t,0]) & (traj_array[t,:,2] < self.membrane_bounds[t,1]) # in interface on left side
states_array[t, idx] = 1
idx = (traj_array[t,:,2] > self.membrane_bounds[t,3]) & (traj_array[t,:,2] < self.membrane_bounds[t,4]) # in interface on right side
states_array[t, idx] = 1
idx = (traj_array[t,:,2] >= self.membrane_bounds[t,1]) & (traj_array[t,:,2] <= self.membrane_bounds[t,3]) # in bulk
states_array[t, idx] = 2
return states_array
def _trajectory_to_numpy(self, atom_group):
'''
Convert the trajectories for atoms in a given atom group to a np.ndarray. Also, during each
iteration, saves the polymer zones for each frame to `self.membrane_bounds`, which has shape
(n_frames, 5)
Parameters
----------
atom_group : MDAnalysis AtomGroup
AtomGroup to convert to a DataFrame
Returns
-------
_traj_array : np.ndarray
Array of shape (n_frames, n_atoms, 3). Also, saved as attribute `self._traj_array`
'''
self._traj_array = np.zeros((self.universe.trajectory.n_frames, atom_group.n_atoms, 3))
self.membrane_bounds = np.zeros((self.universe.trajectory.n_frames, 5))
for i,ts in tqdm(enumerate(self.universe.trajectory)):
self.membrane_bounds[i,:] = self.polymer_zones
self._traj_array[i,:,:] = atom_group.positions
return self._traj_array
def _coordinates_as_dataframe(self, atom_group, frame=None):
'''
Convert the coordinates of a given atom group for a single frame to a pandas DataFrame
Parameters
----------
atom_group : MDAnalysis AtomGroup
AtomGroup to convert to a DataFrame
frame : int, None, optional
Frame of the trajectory. If None, whatever frame the trajectory reader is at will be used.
Default = None
Returns
-------
coords : pandas DataFrame
DataFrame with columns 'x', 'y', 'z' for each atom in the Universe
'''
if frame is not None:
self.universe.trajectory[frame]
coords = pd.DataFrame(atom_group.positions, columns=['x', 'y', 'z'])
return coords
@property
def polymer_zones(self):
'''
Get the polymer zones based on simple percentage of z-coordinate
Returns
-------
np.ndarray, shape (5,)
Polymer zones
'''
return np.linspace(self.polymer.positions[:,2].min(), self.polymer.positions[:,2].max(), num=5)