diff --git a/Activity4_Data1.xlsx b/Activity4_Data1.xlsx new file mode 100644 index 0000000..14c7631 Binary files /dev/null and b/Activity4_Data1.xlsx differ diff --git a/Activity4_Data2.xlsx b/Activity4_Data2.xlsx new file mode 100644 index 0000000..dd4b9e7 Binary files /dev/null and b/Activity4_Data2.xlsx differ diff --git a/Activity_1_Data.xlsx b/Activity_1_Data.xlsx index 7327af2..f6e8b07 100644 Binary files a/Activity_1_Data.xlsx and b/Activity_1_Data.xlsx differ diff --git a/Activity_2_Data.xlsx b/Activity_2_Data.xlsx new file mode 100644 index 0000000..762ba9d Binary files /dev/null and b/Activity_2_Data.xlsx differ diff --git a/Activity_3_Data.xlsx b/Activity_3_Data.xlsx new file mode 100644 index 0000000..ea77478 Binary files /dev/null and b/Activity_3_Data.xlsx differ diff --git a/Fit_1st_order_curve(straight_line).py b/Fit_1st_order_curve(straight_line).py new file mode 100644 index 0000000..cb2e921 --- /dev/null +++ b/Fit_1st_order_curve(straight_line).py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Feb 26 14:33:05 2018 + +@author: sanath +""" + +# -*- coding: utf-8 -*- +""" +Created on Mon Feb 26 13:54:34 2018 + +@author: sanath +""" + +#using the formula method +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + + + +#fit 1st order curve for x and y + +#fit 1 st order straight line +#based on formula method + +def a1(x,y): + x=np.array(x) + y=np.array(y) + val=((len(x)*sum(x*y))-(sum(x)*sum(y)))/((len(x)*sum(x*x))-(sum(x)*sum(x))) + return(val) +def a0(x,y,res_a1): + x=np.array(x) + y=np.array(y) + val=(sum(y)/len(y))-((res_a1*sum(x))/len(x)) + return(val) + + +#n=int(input("Enter number of variables in X ")) +#x = [float(x) for x in input().split()] +#y = [float(x) for x in input().split()] + + + +dfs=pd.read_excel("Activity_2_Data.xlsx",sheetname="Sheet1") +x=np.array(dfs.iloc[0:,0]) +#print(x) + + +#print(len(x)) +y=np.array(dfs.iloc[0:,4]) +#print(y) +#print(len(y)) + + +plt.plot(x,y) +plt.show() + +xlen=len(x) +ylen=len(y) +plt.figure(1) +plt.plot(x,y) +plt.show() +if xlen!=ylen: + print("Enter equal number of samples for both x and y") + quit() +res_a1=a1(y,x) +res_a0=a0(x,y,res_a1) +print("y =",res_a0,'+',res_a1,'x') + + +x_new=np.arange(1,101) +#print(x_new) +y_new=[] + +for ele in x_new: + #print(ele) + #print(res_a0+(res_a1*ele)) + y_new.append(res_a0+(res_a1*ele)) +#print(y_new) +plt.figure(2) +plt.plot(x,y) +plt.plot(x_new,y_new) +plt.show() + +pred_x=float(input("Enter the value to be predicted:- ")) +pred_y=(res_a0+(res_a1*pred_x)) +print("Predicted value for ",pred_x," is ", pred_y) + +""" +x=np.array(x) +y=np.array(y) +x_first=[] +for i in range(xlen): + x_first.append(res_a0+(res_a1*x[i])) +x_first=np.array(x_first) + +plt.figure(2) +plt.plot(y,x_first) +plt.show() + +""" diff --git a/Gradient_descent_with_bias.py b/Gradient_descent_with_bias.py new file mode 100644 index 0000000..7672482 --- /dev/null +++ b/Gradient_descent_with_bias.py @@ -0,0 +1,170 @@ +# -*- coding: utf-8 -*- +""" +Created on Mon Mar 12 17:17:17 2018 + +@author: sanath +""" + +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +dfs=pd.read_excel("Activity_2_Data.xlsx",sheetname="Sheet1") +x=np.array(dfs.iloc[0:,0]) +y=np.array(dfs.iloc[0:,4]) +plt.figure(1) +plt.xlabel("X") +plt.ylabel("Y") +plt.plot(x,y) +plt.show() + +m=len(x) + +X=np.array(x) +Y=np.array(y) + + +theta0=100 +init_theta0=theta0 +theta1=25 +init_theta1=theta1 +m=len(X) +alpha=0.0001 + +print("Learning rate is ",alpha) +print("Number of samples or data is",m) +count=0 +while(True): + cost_J0=0 + cost_J1=0 + for i in range(m): + h_x=theta0+theta1*X[i] + cost_J0=cost_J0+(h_x-Y[i]) + cost_J1=cost_J1+((h_x-Y[i])*X[i]) + + count+=1 + #uncomment to see variation of theta + #print("iteration",count,"theta0",theta0,"theta1",theta1) + + + + temp0=theta0-((alpha*cost_J0)/m) + temp1=theta1-((alpha/m)*cost_J1) + + if((np.abs(temp0-theta0)<0.00001)and(np.abs(temp1-theta1)<0.00001)): + break + theta0=temp0 + theta1=temp1 + +h=[] +for i in range(m): + h.append(theta1*X[i]+theta0) + +h=np.array(h) +plt.figure(2) +plt.xlabel("X") +plt.ylabel("Hypothesis") +plt.plot(X,Y) +plt.plot(X,h) +plt.show() + + + + +print ("Assumed theta0 is ",init_theta0,"predicted theta0 is ",theta0) +print("Assumed theta1 is ",init_theta1,"predicted theta1 is ",theta1) +test_x=float(input("Enter a test sample:")) +predicted_y=(test_x*theta1)+theta0 +#implementation of gradient descent for 1 st order curve(straight line) + +print ("The value of predicted y is",predicted_y) + +print("Importance feature scaling ,here you can see that the number of iteration it took to calculate the theta0 and theta1 are ",count," iterations ,so here if we scale down the x and y values to certain range i.e by normalisation,we can reduce the number of iterations it took to calculate the theta0 and theta1 ,or the other way is to judge the random theta0 and theta1 values by seeing the plot which are very close to each other ,so by this also we can reduce the number of iterations and computing time of the program") + + + + + + + + + + + + + + + + + + + +""" + +#dummy code (test code) +theta0=4 +theta1=3 +m=len(X) +alpha=0.001 + +while(True): + cost_J0=0 + cost_J1=0 + for i in range(m): + h_x=theta0+theta1*X[i] + cost_J0=cost_J0+(h_x-Y[i]) + cost_J1=cost_J1+((h_x-Y[i])*X[i]) + + temp0=theta0-((alpha*cost_J0)/m) + temp1=theta1-((alpha/m)*cost_J1) + + if((np.abs(temp0-theta0)<0.01)and(np.abs(temp1-theta1)<0.01)): + break + theta0=temp0 + theta1=temp1 + +h=[] +for i in range(m): + h.append(theta1*X[i]+theta0) + +h=np.array(h) +plt.xlabel("X") +plt.ylabel("Hypothesis") +plt.plot(X,h) + +print ("theta0={0} theta1={1}").format(theta0,theta1) + + +test_x=input("Enter a test sample:" ) +predicted_y=(test_x*theta1)+theta0 + +print ("The value of predicted y is"),predicted_y +""" + + +""" +# +for j in range(m): + res=res+((theta0+theta1*x[j])-y[j]) +new_theta0=theta0-((alpha/m)*(res)) +print("Assumed Theta0 is ",theta0," New theta1 is",new_theta0 ) + +for j in range(m): + res_theta1=res_theta1+((theta0+theta1*x[j])-y[j])*x[j] +new_theta1=theta1-((alpha/m)*res_theta1) + +print("Assumed Theta1 is ",theta1,"new theta1 is",new_theta1) + + + + +hyp=[] +for ele in x: + hyp.append((new_theta0+(new_theta1*ele))) +hyp=np.array(hyp) +plt.figure(2) +plt.plot(x,y) +plt.plot(x,hyp) +plt.show() +""" + diff --git a/SSD_of_array.py b/SSD_of_array.py new file mode 100644 index 0000000..58cfe98 --- /dev/null +++ b/SSD_of_array.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Feb 27 14:52:23 2018 + +@author: sanath +""" + + + +#ssd implementation for theta0=0 and alpha(learning rate=1) +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +dfs=pd.read_excel("Activity_2_Data.xlsx",sheetname="Sheet1") +x=np.array(dfs.iloc[0:,0]) +#x=[1,2,3,4,5] +y=np.array(dfs.iloc[0:,4]) +#y=[1,2,3,4,5] +plt.figure(1) +plt.plot(x,y) +plt.show() + +range_of_theta=[-10,10] +number_of_samples=50 +theta=np.linspace(range_of_theta[0],range_of_theta[1],number_of_samples) +min_cost=999999 +cost_array=[] +m=len(y) +for i in range(number_of_samples): + cost_j=0 + + for j in range(m): + cost_j=cost_j+(theta[i]*x[j]-y[j])**2 + cost_j==1/(2.0*m)*cost_j + cost_array.append(cost_j) + + + if cost_j0.001): + cost_J=0 + for i in range(m): + cost_J=cost_J+(((theta1*X[i])-Y[i])*X[i]) + count+=1 + temp=theta1-(alpha/m)*cost_J + theta_prev=theta1 + theta1=temp + + +h=[] +for i in range(m): + h.append(theta1*X[i]+theta0) + +h=np.array(h) +plt.figure(2) +plt.xlabel("X") +plt.ylabel("Hypothesis") +plt.plot(X,Y) +plt.plot(X,h) +plt.show() + + +print("number of iterations took to converge is ",count) +test_x=float(input("Enter a test sample:")) +predicted_y=(test_x*theta1)+theta0 +print ("theta0",theta0,"theta1",theta1) +print("Assumed theta1 ",init_theta1,"new theta1 is",theta1) + +print ("The value of predicted y is",predicted_y) +print(" ") + +print("Importance feature scaling ,here you can see that the number of iteration it took to calculate the theta0 and theta1 are ",count," iterations ,so here if we scale down the x and y values to certain range i.e by normalisation,we can reduce the number of iterations it took to calculate the theta0 and theta1 ,or the other way is to judge the random theta0 and theta1 values by seeing the plot which are very close to each other ,so by this also we can reduce the number of iterations and computing time of the program") + + + + + + +""" + +for j in range(m): + res_theta1=res_theta1+((theta0+theta1*x[j])-y[j])*x[j] +new_theta1=theta1-((alpha/m)*res_theta1) + +print("Assumed Theta1 is ",theta1,"new Theta1 is",new_theta1) + +hyp=[] +#hypothesis function +#predicting the values for different values of x +for ele in x: + hyp.append(theta0+(new_theta1*ele)) +hyp=np.array(hyp) +plt.figure(2) +plt.plot(x,y) +plt.plot(x,hyp) +plt.show() + + + +""" + + + + + + + diff --git a/standard_deviation.py b/standard_deviation.py new file mode 100644 index 0000000..1de53fd --- /dev/null +++ b/standard_deviation.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Apr 5 18:45:01 2018 + +@author: sanath +""" + +import math + +def mean(arr,n): + + result=sum(arr)/n + return result +def standard_deviation(arr,n): + u=mean(arr,n) + temp=0 + for i in range(n): + temp=temp+((arr[i]-u)**2) + temp=temp/n + result=math.sqrt(temp) + + return result +n=int(input()) +arr=[int(x) for x in input().split()] +print("{0:.1f}".format(standard_deviation(arr,n)))