Update 2D_multipole_inv.ipynb

apoorv-kushwaha · apoorv-kushwaha · commit 365d28e42799 · 2023-07-01T12:04:00.000+05:30
diff --git a/2D_multipole_inv.ipynb b/2D_multipole_inv.ipynb
@@ -27,13 +27,23 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# Import important libraries\n",
     "import math\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "from scipy.special import legendre\n",
     "import matplotlib.pyplot as plt"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "f14eaf97",
+   "metadata": {},
+   "source": [
+    "#### read input file (PES)  give separation (remove header such as r, theta, phi, etc)\n",
+    "*The code assumes first column to be R (Radial Coordinate), 2nd to be theta (Angular coordinate) and 3rd column to be E(Potentials)*\n"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 164,
@@ -42,8 +52,7 @@
    "outputs": [],
    "source": [
     "df_inp = pd.read_csv('nccn_he_NN.dat',header=None,sep='\\s+')  # import file\n",
-    "\n",
-    "lm = 10  # lambda max "
+    "lm = 10  # Difine lambda max "
    ]
   },
   {
@@ -53,9 +62,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df_inp.sort_values(by = [ 0,1], inplace=True, ascending = True)    # sort by R, theta\n",
-    "df_inp[2] = (df_inp[2]+188.31099452)*219474.63                     # convert to cm-1\n",
-    "df_inp.reset_index(inplace=True, drop = True)"
+    "df_inp.sort_values(by = [ 0,1], inplace=True, ascending = True)    # sort by (R, theta) in ascending order\n",
+    "E_inf = 188.31099452                                          # define E_infinity (Asymptotic Energy)\n",
+    "df_inp[2] = (df_inp[2] - E_inf)*219474.63                     # convert to cm-1\n",
+    "df_inp.reset_index(inplace=True, drop = True)                 # Resetting index"
    ]
   },
   {
@@ -65,13 +75,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "nc=80\n",
-    "ngm = 91\n",
-    "px = np.zeros((ngm,lm))          # stores legendre polynomial\n",
-    "f = np.zeros(ngm)           # Ab initio energy \n",
-    "R = np.zeros(nc)            # distance R\n",
-    "E = np.zeros(nc)            # multipole expanded potentials\n",
-    "df_out = pd.DataFrame()      # dataframe stores V lambda"
+    "nc=80                       # number of Radial coordinates (Must be same for all angles)\n",
+    "ngm = 91                    # number of angular coordinates (90 for symmetric molecule)\n",
+    "px = np.zeros((ngm,lm))     # Matrix to stores legendre coeffinients\n",
+    "f = np.zeros(ngm)           # Array to store part of ab initio energy \n",
+    "R = np.zeros(nc)            # Array for distance R\n",
+    "E = np.zeros(nc)            # Array to store multipole expanded potentials\n",
+    "df_out = pd.DataFrame()     # dataframe that stores final V lambda (Radial coefficients)"
    ]
   },
   {
@@ -81,8 +91,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "V_nf= np.zeros((nc,lm)) \n",
-    "V_n= np.zeros(lm)"
+    "V_nf= np.zeros((nc,lm))     # Numpy 2D array to store V_lambdas as they are calculated for each radial term\n",
+    "V_n= np.zeros(lm)           # Stores V_lambda for one radial term (Depreciated part of code no longer used!)\n",
+    "symmetric = True            # Verify if rigid rotor is symmetric (else put False)\n",
+    "if symmetric:\n",
+    "    sym = 2\n",
+    "else:\n",
+    "    sym = 1"
    ]
   },
   {
@@ -92,11 +107,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "for j2 in range (ngm):      # angle\n",
-    "    for j3 in range (lm):      # legendre\n",
-    "        pxc = legendre(j3*2)      # *2 for symmetric molecule (data upto 90 degree); *1 otherwise\n",
-    "        ang = math.radians(j2)\n",
-    "        px[j2,j3]= pxc(math.cos(ang))        "
+    "for j2 in range (ngm):      # loop over anglular terms (considering 0-90 with 1 degree interval)\n",
+    "    for j3 in range (lm):      # loop over legendre terms\n",
+    "        pxc = legendre(j3*sym)         # Uses j3*2 for symmetric molecule (only even V_lambdas); and *1 otherwise\n",
+    "        ang = math.radians(j2)         # convert angles to radians\n",
+    "        px[j2,j3]= pxc(math.cos(ang))  # store legendre coefficient for corrosponding angle and lambda (2D)"
    ]
   },
   {
@@ -106,7 +121,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "A_inv = np.linalg.pinv(px)"
+    "A_inv = np.linalg.pinv(px)             # take pseudo-inverse of px matrix (synonymus to least squares fit)"
    ]
   },
   {
@@ -334,13 +349,13 @@
    ],
    "source": [
     "for i in range (nc):           # loop over all R   \n",
-    "    ct = i*ngm\n",
-    "    f = df_inp[2][ct:ct+ngm]   # sorted by R, theta (extracting V for one R at a time)\n",
-    "    V_n1 = A_inv.dot(f)\n",
-    "    V_nf[i,:] = V_n1\n",
-    "a12 = np.arange(lm)\n",
-    "df_Vnf = pd.DataFrame(V_nf, columns = a12*2)\n",
-    "df_Vnf[8:]"
+    "    ct = i*ngm                 # extract start point. Since input dataframe is sorted by R and theta, \n",
+    "    f = df_inp[2][ct:ct+ngm]   # potentials (V) are extracting for each R value at a time \n",
+    "    V_n1 = A_inv.dot(f)        # A-inv * V gives Radial coefficients\n",
+    "    V_nf[i,:] = V_n1           # radial coefficients stored in 2D matrix\n",
+    "a12 = np.arange(lm)            # creates header for lambda terms\n",
+    "df_Vnf = pd.DataFrame(V_nf, columns = a12*sym) # saves final matrix into dataframe with appropriate header\n",
+    "df_Vnf[8:]                     # prints first 8 terms"
    ]
   },
   {
@@ -361,27 +376,9 @@
     }
    ],
    "source": [
-    "min(df_Vnf[0])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 186,
-   "id": "4a3b7cb3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#df_Vnf = df_Vnf[8:][:]"
+    "min(df_Vnf[0])   # prints minima for isotropic term"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "301d19e0",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "code",
    "execution_count": 190,
@@ -400,7 +397,7 @@
     }
    ],
    "source": [
-    "x_dummy = np.arange(2.5,10.5,0.1)\n",
+    "x_dummy = np.arange(2.5,10.5,0.1) # Radial coordinates for plotting data\n",
     "len(x_dummy)"
    ]
   },
@@ -424,7 +421,7 @@
     }
    ],
    "source": [
-    "#Spline100\n",
+    "# Plot raw data\n",
     "for i in range(0,lm):\n",
     "    y_dummy = df_Vnf[i*2]\n",
     "    # Plot the noisy exponential data\n",
@@ -442,8 +439,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# saving 2 datasets 1 from 2.5 and other from 3.3\n",
-    "# add data points to check for wobbes/kinks\n",
+    "# adding more data points to radial terms for extrapolation\n",
     "x_22=np.arange(2,2.5,0.1)\n",
     "x_3=np.append(x_22,x_dummy)\n",
     "x_2=np.array([12.5,12.6,12.7,13,15,20,50])\n",
@@ -457,8 +453,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# function to fit V_lambdas into series of slater functions A*exp(B*(-x))\n",
+    "# 2 terms may underfit, 3 is enough (search good coefficients for B), 4 may overfit the data\n",
     "from scipy.optimize import curve_fit\n",
-    "a,b,c,d,e,f,rmsx = np.zeros(20),np.zeros(20),np.zeros(20),np.zeros(20),np.zeros(20),np.zeros(20),np.zeros(20)\n",
+    "a,b,c,d,e,f,rmsx = np.zeros(lm),np.zeros(lm),np.zeros(lm),np.zeros(lm),np.zeros(lm),np.zeros(lm),np.zeros(lm)\n",
     "def exp_fit(x, a,b,c):\n",
     "    return  a*np.exp(-1*x)+b*np.exp(-2*x)+c*np.exp(-3*x)#+ \\\n",
     "            #d*np.exp(-4*x)#+e*np.exp(-5*x)+f*np.exp(-6*x)"
@@ -500,27 +498,33 @@
     }
    ],
    "source": [
+    "# change x and y limits as needed!\n",
+    "x_i, x_f = -15, 10\n",
+    "y_i, y_f = 2.5, 8\n",
+    "# select inital range for analytical fit (leave high energy points for beter fit)!\n",
+    "ini_val = 21 # starting point for fitting into function\n",
     "\n",
-    "for i in range(0,lm):\n",
-    "    j=int(i)\n",
-    "    y_dummy = df_Vnf[j*2]\n",
-    "    parsx, covx = curve_fit(f=exp_fit, xdata=x_dummy[21:], ydata=y_dummy[21:], p0=[0,0,1000])\n",
+    "# Fitting data into analytic function and plotting for visualization\n",
+    "for j in range(0,lm):\n",
+    "    y_dummy = df_Vnf[j*sym]\n",
+    "    parsx, covx = curve_fit(f=exp_fit, xdata=x_dummy[ini_val:], ydata=y_dummy[ini_val:], p0=[0,0,1000])\n",
     "    a[j],b[j],c[j] = parsx\n",
     "    print(\"[a b c] coefficients: \", parsx)\n",
-    "    # Plot the fit data as an overlay on the scatter data\n",
+    "    # Scatter plot for radial coefficients (raw data points)\n",
     "    plt.scatter(x_dummy, y_dummy,s=20, color='#00b3b3',label = 'no fit')\n",
+    "    # fitted curve (extended R range) as an overlay on the scatter (raw) data\n",
     "    plt.plot(x_3, exp_fit(x_3, *parsx), linestyle='-.', linewidth=2, color='black', label = 'exp fit')\n",
     "    plt.legend(loc=\"upper right\")\n",
     "    plt.ylabel(\"Energy (cm^(-1))\")\n",
     "    plt.xlabel(\"R (Ang)\")\n",
     "    plt.axhline(y=0, color='grey', linestyle=':')\n",
-    "    plt.title(\"V_lambda = %d\" %(i))\n",
-    "    plt.ylim(-15, 10)\n",
-    "    plt.xlim(2.5, 8)\n",
+    "    plt.title(\"V_lambda = %d\" %(j*sym))\n",
+    "    plt.ylim(x_i, x_f)\n",
+    "    plt.xlim(y_i, y_f)\n",
     "    plt.show()\n",
-    "    print('Double exponential RMSE = ',np.sqrt(np.average(np.power((exp_fit(x_dummy[21:], *parsx) \n",
-    "                                                                    - y_dummy[21:]),2))))\n",
-    "    rmsx[j]=np.sqrt(np.average(np.power((exp_fit(x_dummy[21:], *parsx) - y_dummy[21:]),2)))"
+    "    print('Double exponential RMSE = ',np.sqrt(np.average(np.power((exp_fit(x_dummy[ini_val:], *parsx) \n",
+    "                                                                    - y_dummy[ini_val:]),2))))\n",
+    "    rmsx[j]=np.sqrt(np.average(np.power((exp_fit(x_dummy[ini_val:], *parsx) - y_dummy[ini_val:]),2)))"
    ]
   },
   {
@@ -552,13 +556,12 @@
     }
    ],
    "source": [
-    "#spline100 data\n",
     "# save output for each V lambdas as required by molscat!\n",
     "print('LAMBDA =  0,2,4,6,8,10,12,14,16,18,')\n",
     "print('NTERM  = ', '3,'*10)\n",
     "print('NPOWER = ', '0,'*30)\n",
     "print('A      = ')\n",
-    "for j in range (10):\n",
+    "for j in range (lm):\n",
     "    print(a[j],',',b[j],',',c[j],',')\n",
     "print('E      =', '-1,-2,-3,'*10)"
    ]