MSD and diffusion coefficient#

Import packages#

from fishmol import trj, msd
from cage_data import cage1_info
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from fishmol import style

Import information of the simulated system#

# The simulation box
cell = cage1_info.cell
# Waters is a dict contains the indices of atoms of water molcules
waters = cage1_info.waters
water_indices = [[*water.values()] for water in waters]
print(water_indices)
[[14, 15, 16], [17, 18, 19], [143, 144, 145], [146, 147, 148], [272, 273, 274], [275, 276, 277], [401, 402, 403], [404, 405, 406]]

Read trajectory data file#

%%time
# The trajectory is a large file (517 MB)
traj = trj.Trajectory(timestep = 5, data = "/nobackup/rhtp48/data_ana/fishmol_examples/cage_data/cage1-500K.xyz", index = ":", cell = cell)
CPU times: user 45.4 s, sys: 1.47 s, total: 46.9 s
Wall time: 47.1 s

You can retrieve and confirm the simulation cell info

traj.cell
[[21.2944, 0.0, 0.0],
 [-4.6030371123, 20.7909480472, 0.0],
 [-0.9719093466, -1.2106211379, 15.1054299403]]

A test of calc_com() function is working

traj.frames[0][water_indices[0]].calc_com()
array([10.58631557, 19.90213351, 10.38000923])

Calculate the centre of masses (CoMs) for water molecules#

from fishmol.utils import update_progress # import the progress bar function

water_coms = np.zeros((traj.nframes, 3*len(water_indices)))

for i, frame in enumerate(traj.frames):
    # Calculate com
    coms = [frame[water_idx].calc_com() for water_idx in water_indices]
    water_coms[i] = np.asarray(coms).flatten()
    update_progress(i/traj.nframes)
    
# Add data to dateframe
columns=[]
for i in range(1, len(water_indices) + 1):
    columns += [f"water{i}_x", f"water{i}_y", f"water{i}_z"]

water_com_df = pd.DataFrame(columns=columns, data = water_coms)

# Write data to excel file
water_com_df.to_excel("test/cage1-500K-water-com.xlsx")
update_progress(1)
Progress: [■■■■■■■■■■■■■■■■■■■■] 100.0%
water_com_df.head()
water1_x water1_y water1_z water2_x water2_y water2_z water3_x water3_y water3_z water4_x ... water5_z water6_x water6_y water6_z water7_x water7_y water7_z water8_x water8_y water8_z
0 10.586316 19.902134 10.380009 0.624155 10.923925 8.298079 -1.967392 4.261146 12.241481 4.693672 ... 4.725172 15.090375 8.649502 6.883228 17.759366 15.321196 2.865725 11.019290 3.578953 0.703805
1 10.597616 19.945151 10.393126 0.617881 10.942585 8.312331 -1.961926 4.253910 12.205821 4.689873 ... 4.745156 15.076014 8.640429 6.849210 17.749958 15.278667 2.889409 11.049146 3.587807 0.663123
2 10.610208 20.003821 10.408405 0.610859 10.967922 8.330438 -1.953047 4.243309 12.156421 4.687198 ... 4.767198 15.058083 8.628816 6.804086 17.738720 15.225162 2.920655 11.086905 3.600664 0.608757
3 10.622718 20.062112 10.416761 0.604688 10.993416 8.349495 -1.945208 4.229969 12.106099 4.687512 ... 4.786539 15.040753 8.617851 6.760128 17.728982 15.181476 2.948253 11.122864 3.614650 0.558271
4 10.636664 20.122749 10.416813 0.597829 11.020753 8.373153 -1.941190 4.211549 12.054352 4.691621 ... 4.808036 15.020218 8.605213 6.714651 17.717884 15.151479 2.970079 11.155729 3.629731 0.511643

5 rows × 24 columns

del water_coms

Calculate the mean squared displacement of water molecules#

Create the time data#

start_idx = 0
t0 = start_idx * 5
t_end = t0 + 5*(len(water_com_df)-1) # 5 fs is the interval of traj
t = np.linspace(t0, t_end, num = len(water_com_df))
msd_d_df = pd.DataFrame()
msd_d_df["t"] = t[1:]

Calculate the MSD and D#

Define a function to calculate the mean squre displacements (MSDs).

The mean squared displacement (MSD) measures how much particles move over time. There are a number of definitions for the mean squared displacement. The MSDs here are calculated from the most common form, which is defined as: $\( MSD(m) = \frac{1}{N_{particles}} \sum_{i=1}^{N_{particles}} \frac{1}{N-m} \sum_{k=0}^{N-m-1} (\vec{r}_i(k+m) - \vec{r}_i(k))^2 \)\( where \)r_i(k)\( is the position of particle \)i\( in frame \)k\(. The mean squared displacement is averaged displacements over all windows of length \)m$. The algorithm used is described in calandrini2011nmoldyn which can be realised by the code in this StackOverflow thread.

for i in range(len(water_indices)):
    temp = np.array(water_com_df.iloc[:, 3*i:3*i+3])
    msds = msd.msd_fft(temp)
    msd_d_df[f"water{i}_MSD"] = msds[1:]
    msd_d_df[f"water{i}_D"] = msds[1:] * 1E-16 / (6*t[1:]*1E-15)
msd_df = msd_d_df.iloc[:, 1:16:2]
d_df = msd_d_df.iloc[:, 2:17:2]

Calculate average values of MSD and D#

msd_d_df["Mean_MSD"] = msd_df.mean(axis=1)
msd_d_df["MSD_error"] = msd_df.std(axis=1) / 8**0.5
msd_d_df["Mean_D"] = d_df.mean(axis=1)
msd_d_df["D_error"] = d_df.std(axis=1) / 8**0.5
# Save the data
msd_d_df.to_excel("test/cage1-H2O-MSD-D-500K.xlsx")

Plot the data#

# skip the first 5 ps, that the system is still equilibrating
msd_d_df = msd_d_df.iloc[1000:,:]
msd_d_df["t"] = (msd_d_df["t"] -5000)
msd_d_df.head()
t water0_MSD water0_D water1_MSD water1_D water2_MSD water2_D water3_MSD water3_D water4_MSD ... water5_MSD water5_D water6_MSD water6_D water7_MSD water7_D Mean_MSD MSD_error Mean_D D_error
1000 5.0 4.093919 0.000014 19.722342 0.000066 2.003004 0.000007 33.566337 0.000112 1.458750 ... 19.347635 0.000064 2.614106 0.000009 19.172680 0.000064 12.747347 4.198995 0.000042 0.000014
1001 10.0 4.095739 0.000014 19.746275 0.000066 2.003836 0.000007 33.597794 0.000112 1.458538 ... 19.358398 0.000064 2.613060 0.000009 19.178147 0.000064 12.756473 4.202892 0.000042 0.000014
1002 15.0 4.097586 0.000014 19.770177 0.000066 2.004774 0.000007 33.629258 0.000112 1.458261 ... 19.369117 0.000064 2.612067 0.000009 19.183779 0.000064 12.765627 4.206788 0.000042 0.000014
1003 20.0 4.099463 0.000014 19.794038 0.000066 2.005816 0.000007 33.660730 0.000112 1.457916 ... 19.379795 0.000064 2.611117 0.000009 19.189575 0.000064 12.774806 4.210683 0.000042 0.000014
1004 25.0 4.101374 0.000014 19.817852 0.000066 2.006957 0.000007 33.692212 0.000112 1.457499 ... 19.390432 0.000064 2.610204 0.000009 19.195537 0.000064 12.784008 4.214578 0.000042 0.000014

5 rows × 21 columns

fig = plt.figure(figsize=(4.6,3.6))
ax  = fig.add_axes([0.16, 0.16, 0.685, 0.75])

color = "#08519c"
ax.plot(msd_d_df["t"]/1000, msd_d_df["Mean_MSD"], color = "#525252")

ax.plot(msd_d_df["t"]/1000, msd_d_df["Mean_MSD"] - msd_d_df["MSD_error"], linewidth = 1, color = "#bdbdbd")
ax.plot(msd_d_df["t"]/1000, msd_d_df["Mean_MSD"] + msd_d_df["MSD_error"], linewidth = 1, color = "#bdbdbd")

ax.fill_between(msd_d_df["t"]/1000, msd_d_df["Mean_MSD"] + msd_d_df["MSD_error"], msd_d_df["Mean_MSD"] - msd_d_df["MSD_error"], color= "#d9d9d9", alpha = 0.2)

linear_model = np.polyfit(msd_d_df["t"]/1000, msd_d_df["Mean_MSD"],1)  
linear_model_fn = np.poly1d(linear_model) 

x_s = np.arange(msd_d_df["t"].min()/1000, msd_d_df["t"].max()/1000)  
ax.plot(x_s,linear_model_fn(x_s), color="k", ls ="-.")

ax1 = ax.twinx()
ax1.plot(msd_d_df["t"]/1000, msd_d_df["Mean_D"], color = color)
ax1.plot(msd_d_df["t"]/1000, msd_d_df["Mean_D"] - msd_d_df["D_error"], linewidth = 1, color = "#6baed6")
ax1.plot(msd_d_df["t"]/1000, msd_d_df["Mean_D"] + msd_d_df["D_error"], linewidth = 1, color = "#6baed6")

ax1.fill_between(msd_d_df["t"]/1000, msd_d_df["Mean_D"] + msd_d_df["D_error"], msd_d_df["Mean_D"] - msd_d_df["D_error"], color = "#9ecae1", alpha = 0.2)

ax.annotate("", xy=(0.7, 0.68), xycoords = "axes fraction",
            xytext=(0.85, 0.68), textcoords='axes fraction',
            arrowprops=dict(arrowstyle="->", color = "#525252"))

ax.annotate("", xy=(0.23, 0.40), xycoords = "axes fraction",
            xytext=(0.08, 0.40), textcoords='axes fraction',
            arrowprops=dict(arrowstyle="->", color = color))

xmin, xmax = ax.get_xlim()
ymin, ymax = ax.get_ylim()

ax.text(xmin + 0.35*(xmax-xmin), ymin + 0.92*(ymax - ymin), "%.2e cm$^2$ s$^{-1}$"%(linear_model[0]/60000), ha = "center", va = "center")

ax.set_ylabel('MSD (Å$^2$)')
ax.set_xlabel('$t$ (ps)')

ax1.set_ylabel('$D$ (cm$^2$ s$^{-1}$)', color = color)
ax1.tick_params(axis='y', color = color, labelcolor = color)
ax1.spines['right'].set_color(color)
ax1.ticklabel_format(axis='y', style='sci', scilimits=[-4,4], useMathText=True)

plt.savefig("test/cage1-ave_msd-d-500K.jpg", dpi =600)

plt.show()
_images/6745dcb549498ff99adabb8c62e14e0e8b045b92a5a2fcd25c1e0dba0e605946.png