-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsolvation_shells_utils.py
2886 lines (2247 loc) · 109 KB
/
solvation_shells_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Hold functions and classes to analyze solvation shells
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import pymbar
import MDAnalysis as mda
from MDAnalysis.analysis import distances
from MDAnalysis.analysis.base import Results
from scipy.spatial import ConvexHull
from scipy.signal import find_peaks
from sklearn.decomposition import PCA
import subprocess
from textwrap import dedent
from glob import glob
from tqdm import tqdm
import time
from linear_algebra import *
def load_object(filename):
with open(filename, 'rb') as f:
return pickle.load(f)
def run(commands):
'''Run commands with subprocess'''
if not isinstance(commands, list):
commands = [commands]
for cmd in commands:
subprocess.run(cmd, shell=True)
def grompp(gro, mdp, top, tpr=None, gmx='gmx', flags={}, dry_run=False):
'''
Run grompp with mdp file on gro file with topology top
flags should be a dictionary containing any additional flags, e.g. flags = {'maxwarn' : 1}
'''
if tpr is None:
tpr = gro.split('.gro')[0] + '.tpr'
cmd = [f'{gmx} grompp -f {mdp} -p {top} -c {gro} -o {tpr}']
for f in flags:
cmd[0] += f' -{f} {flags[f]}'
if dry_run:
print(cmd)
else:
run(cmd)
return tpr
def mdrun(tpr, output=None, gmx='gmx', flags={}, dry_run=False):
'''
Run GROMACS with tpr file
flags should be a dictionary containing any additional flags, e.g. flags = {'maxwarn' : 1}
'''
if output is None:
output = tpr.split('.tpr')[0]
cmd = [f'{gmx} mdrun -s {tpr} -deffnm {output}']
for f in flags:
cmd[0] += f' -{f} {flags[f]}'
if dry_run:
print(cmd)
else:
run(cmd)
return output + '.gro'
def write_packmol(C, cation, anion, cation_charge=1, anion_charge=-1, n_waters=1000, water='water.pdb', filename='solution.inp', packmol_options=None):
'''
Write packmol input file for standard MD simulation
Parameters
----------
C : float
Concentration of the solution
cation : str
Filename of the cation in solution
anion : str
Filename of the anion in solution
cation_charge : int
Charge on the cation, default=+1
anion_charge : int
Charge on the anion, default=-1
n_waters : int
Number of water molecules to pack in system, default=1000
water : str
Filename of the water molecule in solution, default='water.pdb'
filename : str
Filename for the packmol input file
packmol_options : dict
Additional options to put in the packmol input file, default=None uses some presets.
If specified, should have 'seed', 'tolerance', 'filetype', 'output', 'box'.
Returns
-------
filename : str
Filename of the packmol input file
'''
if packmol_options is None:
packmol_options = {
'seed' : 123456,
'tolerance' : 2.0,
'filetype' : 'pdb',
'output' : filename.split('.inp')[0],
'box' : 32
}
# (n_cations / salt molecule) * (N_A salt molecules / mol salt) * (C mol salt / L water) * (L water / rho g water) * (18.02 g / mol) * (mol / N_A molecules) * (n_water molecules)
n_cations = (-cation_charge/anion_charge) * C / 997 * 18.02 * n_waters
n_anions = (-anion_charge/cation_charge) * C / 997 * 18.02 * n_waters
print(f'For a box of {n_waters} waters, would need {n_cations} cations and {n_anions} anions.')
n_cations = round(n_cations)
n_anions = round(n_anions)
print(f'So, adding {n_cations} cations and {n_anions} anions...')
f = dedent(f'''\
#
# A mixture of water and salt
#
# All the atoms from diferent molecules will be separated at least {packmol_options['tolerance']}
# Anstroms at the solution.
seed {packmol_options['seed']}
tolerance {packmol_options['tolerance']}
filetype {packmol_options['filetype']}
# The name of the output file
output {packmol_options['output']}
# {n_waters} water molecules and {n_cations} cations, {n_anions} anions will be put in a box
# defined by the minimum coordinates x, y and z = 0. 0. 0. and maximum
# coordinates {packmol_options['box']}. {packmol_options['box']}. {packmol_options['box']}. That is, they will be put in a cube of side
# {packmol_options['box']} Angstroms. (the keyword "inside cube 0. 0. 0. {packmol_options['box']}.") could be used as well.
structure {water}
number {n_waters}
inside box 0. 0. 0. {packmol_options['box']}. {packmol_options['box']}. {packmol_options['box']}.
end structure
structure {cation}
number {n_cations}
inside box 0. 0. 0. {packmol_options['box']}. {packmol_options['box']}. {packmol_options['box']}.
end structure
structure {anion}
number {n_anions}
inside box 0. 0. 0. {packmol_options['box']}. {packmol_options['box']}. {packmol_options['box']}.
end structure
''')
out = open(filename, 'w')
out.write(f)
out.close()
return filename
def write_plumed_metad(options, filename='plumed.dat'):
'''Write plumed.dat file for metadynamics simulation'''
f = dedent(f'''\
water_group: GROUP ATOMS=1-3000:3 # oxygen atom of the water molecules
n: COORDINATION GROUPA=3001 GROUPB=water_group SWITCH={{Q REF={options['R_0']} BETA={options['a']} LAMBDA=1 R_0={options['R_0']}}}
t: MATHEVAL ARG=n FUNC=1000-x PERIODIC=NO
PRINT STRIDE=10 ARG=* FILE=COLVAR
''')
out = open(filename, 'w')
out.write(f)
out.close()
return filename
def write_plumed_umbrella(options, filename='plumed.dat'):
'''Write plumed input file for umbrella sampling simulation'''
f = dedent(f'''\
water_group: GROUP ATOMS=1-{options['N_WATERS']*3}:3 # oxygen atom of the water molecules
n: COORDINATION GROUPA={options['N_WATERS']*3+1} GROUPB=water_group SWITCH={{Q REF={options['R_0']} BETA=-21.497624558253246 LAMBDA=1 R_0={options['R_0']}}}
t: MATHEVAL ARG=n FUNC={options['N_WATERS']}-x PERIODIC=NO
r: RESTRAINT ARG=t KAPPA={options['KAPPA']} AT={options['AT']} # apply a harmonic restraint at CN=AT with force constant = KAPPA kJ/mol
PRINT STRIDE={options['STRIDE']} ARG=* FILE={options['FILE']}
''')
out = open(filename, 'w')
out.write(f)
out.close()
return filename
def write_plumed_decoordination(options, filename='plumed.dat'):
'''Write plumed input file for umbrella sampling simulation biased in total coordination'''
f = dedent(f'''\
ion: GROUP NDX_FILE={options['ndx']} NDX_GROUP={options['ion_group']}
not_ion: GROUP NDX_FILE={options['ndx']} NDX_GROUP={options['not_ion_group']}
n: COORDINATION GROUPA=ion GROUPB=not_ion SWITCH={{Q REF={options['R_0']} BETA=-{options['a']} LAMBDA=1 R_0={options['R_0']}}}
t: MATHEVAL ARG=n FUNC={options['n_group']}-x PERIODIC=NO
r: RESTRAINT ARG=t KAPPA={options['KAPPA']} AT={options['AT']} # apply a harmonic restraint at CN=AT with force constant = KAPPA kJ/mol
PRINT STRIDE={options['STRIDE']} ARG=* FILE={options['FILE']}
''')
out = open(filename, 'w')
out.write(f)
out.close()
return filename
def write_sbatch_umbrella(options, filename='submit.job'):
'''Write SLURM submission script to run an individual umbrella simulation'''
f = dedent(f'''\
#!/bin/bash
#SBATCH -A chm230020p
#SBATCH -N 1
#SBATCH --ntasks-per-node={options['ntasks']}
#SBATCH -t {options['time']}
#SBATCH -p RM-shared
#SBATCH -J '{options['job']}_{options['sim_num']}'
#SBATCH -o '%x.out'
#SBATCH --mail-type=END
#SBATCH [email protected]
module load gcc
module load openmpi/4.0.2-gcc8.3.1
source /jet/home/schwinns/.bashrc
source /jet/home/schwinns/pkgs/gromacs-plumed/bin/GMXRC
# run umbrella sampling simulations and analysis
python run_umbrella_sim.py -N {options['sim_num']} -g {options['gro']} -m {options['mdp']} -p {options['top']} -n {options['ntasks']}
''')
out = open(filename, 'w')
out.write(f)
out.close()
return filename
def run_plumed(plumed_input, traj, dt=0.002, stride=250, output='COLVAR'):
'''Run plumed driver on plumed input file for a given trajectory (as an xtc) and read output COLVAR'''
cmd = f'plumed driver --plumed {plumed_input} --ixtc {traj} --timestep {dt} --trajectory-stride {stride}'
run(cmd)
COLVAR = np.loadtxt(output, comments='#')
return COLVAR
def get_dehydration_energy(bins, fes, cn1, cn2):
'''
Calculate the dehydration energy from cn1 to cn2. This function fits a spline to the free energy surface
and estimates the energies as the spline evaluated at cn1 and cn2. For positive free energy, corresponding to
how much free energy is needed to strip a coordinated water, cn1 should be the higher energy coordination state.
Parameters
----------
bins : np.array
Bins for the free energy surface in coordination number
fes : np.array
Free energy surface (kJ/mol)
cn1 : float
Coordination number of state 1 to calculate dG = G_1 - G_2
cn2 : float
Coordination number of state 2 to calculate dG = G_1 - G_2
Returns
-------
dG : float
Free energy difference between cn1 and cn2
'''
from scipy.interpolate import UnivariateSpline
spline = UnivariateSpline(bins, fes, k=4, s=0)
dG = spline(cn1) - spline(cn2)
return dG
class vdW_radii:
def __init__(self):
self.dict = {
"H": 1.20, # Hydrogen
"He": 1.40, # Helium
"Li": 1.82, # Lithium
"Be": 1.53, # Beryllium
"B": 1.92, # Boron
"C": 1.70, # Carbon
"N": 1.55, # Nitrogen
"O": 1.52, # Oxygen
"F": 1.47, # Fluorine
"Ne": 1.54, # Neon
"Na": 2.27, # Sodium
"Mg": 1.73, # Magnesium
"Al": 1.84, # Aluminum
"Si": 2.10, # Silicon
"P": 1.80, # Phosphorus
"S": 1.80, # Sulfur
"Cl": 1.75, # Chlorine
"Ar": 1.88, # Argon
"K": 2.75, # Potassium
"Ca": 2.31, # Calcium
"Sc": 2.11, # Scandium
"Ti": 2.00, # Titanium
"V": 2.00, # Vanadium
"Cr": 2.00, # Chromium
"Mn": 2.00, # Manganese
"Fe": 2.00, # Iron
"Co": 2.00, # Cobalt
"Ni": 1.63, # Nickel
"Cu": 1.40, # Copper
"Zn": 1.39, # Zinc
"Ga": 1.87, # Gallium
"Ge": 2.11, # Germanium
"As": 1.85, # Arsenic
"Se": 1.90, # Selenium
"Br": 1.85, # Bromine
"Kr": 2.02, # Krypton
"Rb": 3.03, # Rubidium
"Sr": 2.49, # Strontium
"Y": 2.30, # Yttrium
"Zr": 2.15, # Zirconium
"Nb": 2.00, # Niobium
"Mo": 2.00, # Molybdenum
"Tc": 2.00, # Technetium
"Ru": 2.00, # Ruthenium
"Rh": 2.00, # Rhodium
"Pd": 1.63, # Palladium
"Ag": 1.72, # Silver
"Cd": 1.58, # Cadmium
"In": 1.93, # Indium
"Sn": 2.17, # Tin
"Sb": 2.06, # Antimony
"Te": 2.06, # Tellurium
"I": 1.98, # Iodine
"Xe": 2.16, # Xenon
"Cs": 3.43, # Cesium
"Ba": 2.68, # Barium
"La": 2.50, # Lanthanum
"Ce": 2.48, # Cerium
"Pr": 2.47, # Praseodymium
"Nd": 2.45, # Neodymium
"Pm": 2.43, # Promethium
"Sm": 2.42, # Samarium
"Eu": 2.40, # Europium
"Gd": 2.38, # Gadolinium
"Tb": 2.37, # Terbium
"Dy": 2.35, # Dysprosium
"Ho": 2.33, # Holmium
"Er": 2.32, # Erbium
"Tm": 2.30, # Thulium
"Yb": 2.28, # Ytterbium
"Lu": 2.27, # Lutetium
"Hf": 2.25, # Hafnium
"Ta": 2.20, # Tantalum
"W": 2.10, # Tungsten
"Re": 2.05, # Rhenium
"Os": 2.00, # Osmium
"Ir": 2.00, # Iridium
"Pt": 1.75, # Platinum
"Au": 1.66, # Gold
"Hg": 1.55, # Mercury
"Tl": 1.96, # Thallium
"Pb": 2.02, # Lead
"Bi": 2.07, # Bismuth
"Po": 1.97, # Polonium
"At": 2.02, # Astatine
"Rn": 2.20, # Radon
"Fr": 3.48, # Francium
"Ra": 2.83, # Radium
"Ac": 2.60, # Actinium
"Th": 2.40, # Thorium
"Pa": 2.00, # Protactinium
"U": 1.86, # Uranium
"Np": 2.00, # Neptunium
"Pu": 2.00, # Plutonium
"Am": 2.00, # Americium
"Cm": 2.00, # Curium
"Bk": 2.00, # Berkelium
"Cf": 2.00, # Californium
"Es": 2.00, # Einsteinium
"Fm": 2.00, # Fermium
"Md": 2.00, # Mendelevium
"No": 2.00, # Nobelium
"Lr": 2.00 # Lawrencium
}
def get_dict(self):
return self.dict
class EquilibriumAnalysis:
def __init__(self, top, traj, water='type OW', cation='resname NA', anion='resname CL'):
'''
Initialize the equilibrium analysis object with a topology and a trajectory from
a production simulation with standard MD
Parameters
----------
top : str
Name of the topology file (e.g. tpr, gro, pdb)
traj : str or list of str
Name(s) of the trajectory file(s) (e.g. xtc)
water : str
MDAnalysis selection language for the water oxygen, default='type OW'
cation : str
MDAnalysis selection language for the cation, default='resname NA'
anion : str
MDAnalysis selection language for the anion, default='resname CL'
'''
self.universe = mda.Universe(top, traj)
self.n_frames = len(self.universe.trajectory)
self.waters = self.universe.select_atoms(water)
self.cations = self.universe.select_atoms(cation)
self.anions = self.universe.select_atoms(anion)
if len(self.waters) == 0:
raise ValueError(f'No waters found with selection {water}')
if len(self.cations) == 0:
raise ValueError(f'No cations found with selection {cation}')
if len(self.anions) == 0:
raise ValueError(f'No anions found with selection {anion}')
self.vdW_radii = vdW_radii().get_dict()
def __repr__(self):
return f'EquilibriumAnalysis object with {len(self.waters)} waters, {len(self.cations)} cations, and {len(self.anions)} anions over {self.n_frames} frames'
def _find_peaks_wrapper(self, bins, data, **kwargs):
'''Wrapper for scipy.signal.find_peaks to use with SolvationAnalysis to find cutoff'''
peaks, _ = find_peaks(-data, **kwargs)
radii = bins[peaks[0]]
return radii
def initialize_Solutes(self, step=1, **kwargs):
'''
Initialize the Solute objects from SolvationAnalysis for the ions. Saves the solutes
in attributes `solute_ci` (cation) and `solute_ai` (anion).
Parameters
----------
step : int
Trajectory step for which to run the Solute
'''
from solvation_analysis.solute import Solute
self.solute_ci = Solute.from_atoms(self.cations, {'water' : self.waters, 'coion' : self.anions},
solute_name='Cation', rdf_kernel=self._find_peaks_wrapper,
kernel_kwargs={'distance':5}, **kwargs)
self.solute_ai = Solute.from_atoms(self.anions, {'water' : self.waters, 'coion' : self.cations},
solute_name='Anion', rdf_kernel=self._find_peaks_wrapper,
kernel_kwargs={'distance':5}, **kwargs)
self.solute_ci.run(step=step)
self.solute_ai.run(step=step)
print(f"\nHydration shell cutoff for cation-water = {self.solute_ci.radii['water']:.6f}")
print(f"Hydration shell cutoff for anion-water = {self.solute_ai.radii['water']:.6f}")
def determine_ion_pairing_cutoffs(self, find_peaks_kwargs={'distance' : 5, 'height' : -1.1}, plot=True):
'''
Calculate the cation-anion radial distributions using SolvationAnalysis and identify the cutoffs for
ion pairing events. Should plot to ensure the cutoff regimes visually look correct, since these are
sensitive to the peak detection algorithm.
Parameters
----------
find_peak_kwargs : dict
Keyword arguments for `scipy.find_peaks` used to find the first 3 minima in the cation-anion RDF,
default={'distance' : 5, 'height' : -1.1} worked well for NaCl at 0.6 M with OPC3 water
plot : bool
Whether to plot the RDF with the regions shaded, default=True
'''
try:
self.solute_ci
except NameError:
print('Solutes not initialized. Try `initialize_Solutes()` first')
r = self.solute_ci.rdf_data['Cation']['coion'][0]
rdf = self.solute_ci.rdf_data['Cation']['coion'][1]
mins, min_props = find_peaks(-rdf, **find_peaks_kwargs)
self.ion_pairs = Results()
self.ion_pairs['CIP'] = (0,r[mins[0]])
self.ion_pairs['SIP'] = (r[mins[0]],r[mins[1]])
self.ion_pairs['DSIP'] = (r[mins[1]],r[mins[2]])
self.ion_pairs['FI'] = (r[mins[2]],np.inf)
if plot:
fig, ax = plt.subplots(1,1)
ax.plot(r, rdf, color='k')
le = 2
for i,m in enumerate(mins[:3]):
ax.fill_betweenx(np.linspace(0,10), le, r[m], alpha=0.25)
ax.text((le+r[m]) / 2, 8, list(self.ion_pairs.keys())[i], ha='center')
le = r[m]
ax.fill_betweenx(np.linspace(0,10), le, 10, alpha=0.25)
ax.text((le+10) / 2, 8, list(self.ion_pairs.keys())[-1])
ax.set_xlabel('r ($\mathrm{\AA}$)')
ax.set_ylabel('g(r)')
ax.set_xlim(2,10)
ax.set_ylim(0,9)
fig.savefig('ion_pairing_cutoffs.png')
plt.show()
return self.ion_pairs
def generate_rdfs(self, bin_width=0.05, range=(0,20), step=1, filename=None, njobs=1):
'''
Calculate radial distributions for the solution. This method calculates the RDFs for cation-water,
anion-water, water-water, and cation-anion using MDAnalysis InterRDF. It saves the data in a
dictionary attribute `rdfs` with keys 'ci-w', 'ai-w', 'w-w', and 'ci-ai'.
Parameters
----------
bin_width : float
Width of the bins for the RDFs, default=0.05 Angstroms
range : array-like
Range over which to calculate the RDF, default=(0,20)
step : int
Trajectory step for which to calculate the RDF, default=1
filename : str
Filename to save RDF data, default=None means do not save to file
njobs : int
Number of CPUs to run on, default=1
Returns
-------
rdfs : dict
Dictionary with all the results from InterRDF
'''
from ParallelMDAnalysis import ParallelInterRDF as InterRDF
nbins = int((range[1] - range[0]) / bin_width)
self.rdfs = {}
print('\nCalculating cation-water RDF...')
ci_w = InterRDF(self.cations, self.waters, nbins=nbins, range=range, norm='rdf', verbose=True)
ci_w.run(step=step, njobs=njobs)
self.rdfs['ci-w'] = ci_w.results
print('\nCalculating anion-water RDF...')
ai_w = InterRDF(self.anions, self.waters, nbins=nbins, range=range, norm='rdf', verbose=True)
ai_w.run(step=step, njobs=njobs)
self.rdfs['ai-w'] = ai_w.results
print('\nCalculating water-water RDF...')
w_w = InterRDF(self.waters, self.waters, nbins=nbins, range=range, norm='rdf', verbose=True)
w_w.run(step=step, njobs=njobs)
self.rdfs['w-w'] = w_w.results
print('\nCalculating cation-anion RDF...')
ci_ai = InterRDF(self.cations, self.anions, nbins=nbins, range=range, norm='rdf', verbose=True)
ci_ai.run(step=step, njobs=njobs)
self.rdfs['ci-ai'] = ci_ai.results
if filename is not None:
data = np.vstack([ci_w.results.bins, ci_w.results.rdf, ai_w.results.rdf, w_w.results.rdf, ci_ai.results.rdf]).T
np.savetxt(filename, data, header='r (Angstroms), cation-water g(r), anion-water g(r), water-water g(r), cation-anion g(r)')
return self.rdfs
def get_coordination_numbers(self, step=1):
'''
Calculate the water coordination number as a function of time for both cations and anions.
Parameters
----------
step : int
Trajectory step for which to calculate coordination numbers
Returns
-------
avg_CN : np.array
Average coordination number over the trajectory for [cations, anions]
'''
try:
self.solute_ci
except NameError:
print('Solutes not initialized. Try `initialize_Solutes()` first')
# initialize coordination number as a function of time
self.coordination_numbers = np.zeros((2,len(self.universe.trajectory[::step])))
for i,ts in enumerate(self.universe.trajectory[::step]):
# first for cations
d = distances.distance_array(self.cations, self.waters, box=ts.dimensions)
n_coordinating = (d <= self.solute_ci.radii['water']).sum()
self.coordination_numbers[0,i] = n_coordinating / len(self.cations)
# then for anions
d = distances.distance_array(self.anions, self.waters, box=ts.dimensions)
n_coordinating = (d <= self.solute_ai.radii['water']).sum()
self.coordination_numbers[1,i] = n_coordinating / len(self.anions)
return self.coordination_numbers.mean(axis=1)
def shell_probabilities(self, plot=False):
'''
Calculate the shell probabilities for each ion. Must first initialize the SolvationAnalysis Solutes.
Parameters
----------
plot : bool
Whether to plot the distributions of shells, default=False
'''
try:
self.solute_ci
except NameError:
print('Solutes not initialized. Try `initialize_Solutes()` first')
df1 = self.solute_ci.speciation.speciation_fraction
shell = []
for i in range(df1.shape[0]):
row = df1.iloc[i]
shell.append(f'{row.coion:.0f}-{row.water:.0f}')
df1['shell'] = shell
self.cation_shells = df1
df2 = self.solute_ai.speciation.speciation_fraction
shell = []
for i in range(df2.shape[0]):
row = df2.iloc[i]
shell.append(f'{row.coion:.0f}-{row.water:.0f}')
df2['shell'] = shell
self.anion_shells = df2
if plot:
df = df1.merge(df2, on='shell', how='outer')
df.plot(x='shell', y=['count_x', 'count_y'], kind='bar', legend=False)
plt.legend(['Cation', 'Anion'])
plt.ylabel('probability')
plt.savefig('shell_probabilities.png')
plt.show()
def water_dipole_distribution(self, ion='cation', radius=None, step=1):
'''
Calculate the distribution of angles between the water dipole and the oxygen-ion vector
Parameters
----------
ion : str
Ion to calculate the distribution for. Options are 'cation' and 'anion'. default='cation'
radius : float
Hydration shell cutoff in Angstroms to select waters within hydration shell only, default=None
means pull from SolvationAnalysis.solute.Solute
step : int
Step to iterate the trajectory when running the analysis, default=10
Returns
-------
angles : np.array
Angles for all waters coordinated with all ions, averaged over the number of frames
'''
# parse arguments
if ion == 'cation':
ions = self.cations
elif ion == 'anion':
ions = self.anions
else:
raise NameError("Options for kwarg ion are 'cation' or 'anion'")
if radius is None:
if ion == 'cation':
radius = self.solute_ci.radii['water']
elif ion == 'anion':
radius = self.solute_ai.radii['water']
# loop through frames and ions to get angle distributions
angles = []
for i, ts in enumerate(self.universe.trajectory[::step]):
for ci in ions:
my_atoms = self.universe.select_atoms(f'sphzone {radius} index {ci.index}') - ci
my_waters = my_atoms & self.waters # intersection operator to get the OW from my_atoms
for ow in my_waters:
dist = ci.position - ow.position
# if the water is on the other side of the box, move it back
for d in range(3):
v = np.array([0,0,0])
v[d] = 1
if dist[d] >= ts.dimensions[d]/2:
ow.residue.atoms.translate(v*ts.dimensions[d])
elif dist[d] <= -ts.dimensions[d]/2:
ow.residue.atoms.translate(-v*ts.dimensions[d])
# calculate and save angles
pos = ow.position
bonded_Hs = ow.bonded_atoms
tmp_pt = bonded_Hs.positions.mean(axis=0)
v1 = ci.position - pos
v2 = pos - tmp_pt
ang = get_angle(v1, v2)*180/np.pi
angles.append(ang)
return np.array(angles)
def angular_water_distribution(self, ion='cation', r_range=(2,5), bin_width=0.05, start=0, step=10):
'''
Calculate the angular distributions of water around the ions as a function of the radius.
The angles are theta (polar angle) and phi (azimuthal angle) from spherical coordinates
centered at the ion location. The polar angle defines how far the water molecule is from
the z-axis, and the azimuthal angle defines where in the orbit the water molecule is around
the ion. Saves images to '{ion}_theta_distribution.png' and '{ion}_phi_distribution.png'.
Parameters
----------
ion : str
Ion to calculate the distributions for. Options are 'cation' and 'anion'. default='cation'
r_range : array-like
Range for the radius to bin the histogram in Angstroms, default=(2,5)
bin_width : float
Width of the bin for the radius histogram in Angstroms, default=0.05
start : int
Starting frame index for the trajectory to run, default=0
step : int
Step to iterate the trajectory when running the analysis, default=10
'''
if ion == 'cation':
ions = self.cations
elif ion == 'anion':
ions = self.anions
# get the bins for the histograms
nbins = int((r_range[1] - r_range[0]) / bin_width)
rbins = np.linspace(r_range[0], r_range[1], nbins)
thbins = np.linspace(0,180, nbins)
phbins = np.linspace(-180,180, nbins)
# initialize the histograms
th_hist,th_x,th_y = np.histogram2d([], [], bins=[rbins,thbins])
ph_hist,ph_x,ph_y = np.histogram2d([], [], bins=[rbins,phbins])
# ang_hist,ang_ph,ang_th = np.histogram2d([], [], bins=[phbins,thbins])
for i, ts in enumerate(self.universe.trajectory[start::step]):
for ci in ions:
self.universe.atoms.translate(-ci.position) # set the ion as the origin
my_waters = self.waters.select_atoms(f'point 0 0 0 {r_range[1]}') # select only waters near the ion
# convert to spherical coordinates, centered at the ion
r = np.sqrt(my_waters.positions[:,0]**2 + my_waters.positions[:,1]**2 + my_waters.positions[:,2]**2)
th = np.degrees(np.arccos(my_waters.positions[:,2] / r))
ph = np.degrees(np.arctan2(my_waters.positions[:,1], my_waters.positions[:,0]))
# histogram to get the probability density
h1,_,_ = np.histogram2d(r, th, bins=[rbins,thbins], density=True)
h2,_,_ = np.histogram2d(r, ph, bins=[rbins,phbins], density=True)
# h3,_,_ = np.histogram2d(ph, th, bins=[phbins,thbins], density=True)
th_hist += h1
ph_hist += h2
# ang_hist += h3
th_hist = th_hist / len(self.universe.trajectory[start::step]) / len(ions)
ph_hist = ph_hist / len(self.universe.trajectory[start::step]) / len(ions)
# ang_hist = ang_hist / len(self.universe.trajectory[start::step])
# for the theta distribution, scale by the differential area of a strip around sphere
s = np.zeros(th_hist.shape)
dth = (th_y[1]-th_y[0])*np.pi/180
th_centers = (th_y[:-1] + th_y[1:])/2
r_centers = (th_x[:-1] + th_x[1:])/2
for i,t in enumerate(th_centers):
for j,r in enumerate(r_centers):
s[j,i] = 2*np.pi * r**2 * np.sin(t*np.pi/180) * dth
th_hist = th_hist/s
# for the phi distribution, scale by the differential area of a strip from pole to pole
s = np.zeros(ph_hist.shape)
dph = (ph_y[1]-ph_y[0])*np.pi/180
ph_centers = (ph_y[:-1] + ph_y[1:])/2
r_centers = (ph_x[:-1] + ph_x[1:])/2
for i,t in enumerate(ph_centers):
for j,r in enumerate(r_centers):
s[j,i] = 2*r**2 * dph
ph_hist = ph_hist/s
## TODO
# for the angle distribution, scale by the differential area of a square on the surface of the sphere
# s = np.zeros(ang_hist.shape)
# dth = (ang_th[1]-ang_th[0])*np.pi/180
# dph = (ang_ph[1]-ang_ph[0])*np.pi/180
# th_centers = (ang_th[:-1] + ang_th[1:])/2
# ph_centers = (ang_ph[:-1] + ang_ph[1:])/2
# for i,t in enumerate(th_centers):
# for j,r in enumerate(ph_centers):
# s[j,i] =
# ang_hist = ang_hist/s
fig1, ax1 = plt.subplots(1,1)
# c1 = ax1.pcolor(th_x, th_y, th_hist.T, vmin=0, vmax=0.006)
c1 = ax1.pcolor(th_x, th_y, th_hist.T)
ax1.set_xlim(r_range[0],r_range[1])
ax1.set_ylim(0,180)
ax1.set_yticks(np.arange(0,180+30,30))
ax1.set_ylabel('$\\theta$ (degrees)')
ax1.set_xlabel('r ($\mathrm{\AA}$)')
fig1.colorbar(c1,label='probability')
fig1.savefig(f'{ion}_theta_distribution.png')
fig2, ax2 = plt.subplots(1,1)
# c2 = ax2.pcolor(ph_x, ph_y, ph_hist.T, vmin=0, vmax=0.003)
c2 = ax2.pcolor(ph_x, ph_y, ph_hist.T)
ax2.set_ylabel('$\phi$ (degrees)')
ax2.set_xlabel('r ($\mathrm{\AA}$)')
ax2.set_xlim(r_range[0],r_range[1])
ax2.set_ylim(-180,180)
plt.yticks(np.arange(-180,200,45))
fig2.colorbar(c2, label='probability')
fig2.savefig(f'{ion}_phi_distribution.png')
plt.show()
return th_hist, (th_x, th_y), ph_hist, (ph_x, ph_y)
def spatial_density(self, group='type OW', ion='cation', r_max=5, grid_pts=20, step=1):
'''
Plot a 3D spatial density for the locations of `group` around the ion. Creates an interactive
plot using `plotly`.
Parameters
----------
group : str
MDAnalysis atom selection language describing the group whose density will be plotted, default=`type OW`
are the water oxygens
ion : str
Ion to calculate the distributions for. Options are 'cation' and 'anion'. default='cation'
r_max : float
Radial distance (Angstroms) to go out around the ion, default=5
grid_pts : int
Number of grid points for the 3D meshgrid, default=20
step : int
Step to iterate the trajectory when running the analysis, default=1
Returns
-------
hist : np.array
Occupation density around the ion. This is the counts over the whole trajectory divided by the maximum
value to scale between 0 and 1.
edges : tuple
The (X,Y,Z) meshgrid necessary for plotting `hist` in 3D space.
'''
import plotly.graph_objects as go
# initialize grid space, bins, and histogram
X, Y, Z = np.meshgrid(np.linspace(-r_max,r_max,grid_pts), np.linspace(-r_max,r_max,grid_pts), np.linspace(-r_max,r_max,grid_pts))
bins = np.linspace(-r_max, r_max, grid_pts+1)
init_sample = np.array([[0,0,0],
[0,0,0]])
hist, edges = np.histogramdd(init_sample, bins=(bins,bins,bins))
hist[hist > 0] = 0 # set the initialized histogram to 0
for i,ts in enumerate(self.universe.trajectory[::step]):
for ci in self.cations:
self.universe.atoms.translate(-ci.position)
my_atoms = self.universe.select_atoms(f'sphzone {r_max} index {ci.index}') - ci
my_selection = my_atoms.select_atoms(group)
h,_ = np.histogramdd(my_selection.positions, bins=(bins,bins,bins))
hist += h
# hist = hist / hist.max()
fig = go.Figure(data=go.Volume(
x=X.flatten(),
y=Y.flatten(),
z=Z.flatten(),
value=hist.flatten(),
isomin=0,
isomax=hist.max(),
opacity=0.05, # needs to be small to see through all surfaces
surface_count=20, # needs to be a large number for good volume rendering
colorscale='jet'
))
fig.show()
return hist, (X,Y,Z)
def polyhedron_size(self, ion='cation', r0=None, njobs=1, step=1):
'''
Construct a polyhedron from the atoms in a hydration shell and calculate the volume of the polyhedron
and the maximum cross-sectional area of the polyhedron. The cross-sections are taken along the first
principal component of the vertices of the polyhedron.
Parameters
----------
ion : str
Whether to calculate the volumes and areas for the cations or anions, options are `cation` and `anion`
r0 : float
Hydration shell cutoff in Angstroms, default=None means will calculate using `self.initialize_Solutes()`
njobs : int
How many processors to run the calculation with, default=1. If greater than 1, use multiprocessing to
distribute the analysis. If -1, use all available processors.
step : int
Trajectory step for analysis
Returns
-------
results : MDAnalysis.analysis.base.Results object
Volume and area time series, saved in `volumes` and `areas` attributes
'''
if ion == 'cation':
ions = self.cations
if r0 is None:
try:
r0 = self.solute_ci.radii['water']
except NameError:
print('Solutes not initialized. Try `initialize_Solutes()` first')
elif ion == 'anion':
ions = self.anions
if r0 is None:
try:
r0 = self.solute_ai.radii['water']
except NameError:
print('Solutes not initialized. Try `initialize_Solutes()` first')
else:
raise NameError("Options for kwarg ion are 'cation' or 'anion'")
# Prepare the Results object
results = Results()
results.areas = np.zeros((len(ions), len(self.universe.trajectory[::step])))
results.volumes = np.zeros((len(ions), len(self.universe.trajectory[::step])))