Skip to content

Commit 056ec59

Browse files
author
bhanu_mittal
committed
Added code for linear regression
0 parents  commit 056ec59

6 files changed

+970
-0
lines changed

ML_grad_desc.py

+99
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
#!/usr/bin/env python2
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Tue Mar 13 15:29:14 2018
5+
6+
@author: bh387886
7+
"""
8+
9+
import pandas as pd
10+
import numpy as np
11+
import random
12+
import matplotlib.pyplot as plt
13+
14+
15+
16+
def Grad_desc(X, y, m=0
17+
,iters=10000, alpha=0.001):
18+
N = len(y)
19+
print('Iterations: %s, Learning Rate: %s' % (iters,alpha))
20+
c = (float(random.randint(0,30))/100)
21+
for i in range(iters):
22+
m_grad = 0
23+
c_grad = 0
24+
for j in range(N):
25+
m_grad += -(y[j]-(m*X[j]+c))*X[j]/N
26+
c_grad += -(y[j]-(m*X[j]+c))/N
27+
m = m - alpha*m_grad
28+
c = c - alpha*c_grad
29+
cost = 0
30+
for j in range(N):
31+
cost += (y[j]-(m*X[j]+c))**2
32+
return [m, c, cost/(2*N)]
33+
34+
35+
36+
37+
headers = ['cylinders','displacement','horsepower','weight'
38+
,'acceleration','model year','origin']
39+
40+
41+
42+
raw = open('auto-mpg.data.txt')
43+
44+
mpg = [] #Dependent variable : mpg
45+
features = [] #Independent variables: rest of them except car name
46+
47+
48+
#Preparing the data
49+
for line in raw:
50+
i=0
51+
var = []
52+
for y in line.split():
53+
if (i==0):
54+
mpg.append([float(y)])
55+
if (i<8 and i>0):
56+
if(y=='?'): # Handaling missing values for horsepower
57+
y = 0 # (Setting them to 0)
58+
if(i==4):
59+
var.append(float(y.replace('.','')))
60+
else:
61+
var.append(float(y))
62+
i+=1
63+
features.append(var)
64+
65+
66+
df_x = pd.DataFrame.from_records(features, columns = headers)
67+
df_y = pd.DataFrame.from_records(mpg, columns = ['mpg'])
68+
69+
#replacing 0(missing) horsepower with average horsepower
70+
avg_bhp = (np.average(df_x['horsepower'])*len(df_x))/(len(df_x)-6)
71+
df_x['horsepower'] = df_x['horsepower'].replace(0,avg_bhp)
72+
73+
x_org = df_x['horsepower'] #change this if you want another dependent var
74+
y_org = df_y['mpg']
75+
76+
#Feature Scaling (Reducing the )
77+
df_new1 = (x_org-x_org.min())/(x_org.max()-x_org.min())
78+
df_y1 = (y_org - y_org.min())/(y_org.max() - y_org.min())
79+
80+
81+
ans = Grad_desc(df_new1.tolist(),df_y1.tolist())
82+
83+
y_ans = ans[0]*df_new1 + ans[1]
84+
85+
#Restoring the scaled features
86+
y_ans = y_ans*(y_org.max() - y_org.min()) + y_org.min()
87+
ans[1] = ans[1]*(y_org.max() - y_org.min()) + y_org.min()
88+
ans[0] = (y_ans[1]-y_ans[0])/(x_org[1]-x_org[0])
89+
90+
91+
print('ERROR = %s' % ans[2])
92+
print('Equation of best-fit-line: y=%sx + %s' % (ans[0],ans[1]))
93+
94+
95+
96+
97+
98+
plt.scatter(x_org, df_y, color='black')
99+
plt.plot(x_org, y_ans, color='blue', linewidth=3)

ML_linear_reg.py

+115
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
#!/usr/bin/env python2
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Tue Mar 13 10:27:11 2018
5+
6+
@author: bh387886
7+
"""
8+
9+
10+
import pandas as pd
11+
import numpy as np
12+
import matplotlib.pyplot as plt
13+
import sklearn
14+
from sklearn.linear_model import LinearRegression
15+
16+
headers = ['cylinders','displacement','horsepower','weight'
17+
,'acceleration','model year','origin']
18+
19+
20+
21+
raw = open('auto-mpg.data.txt')
22+
23+
mpg = [] #Dependent variable : mpg
24+
features = [] #Independent variables: rest of them except car name
25+
26+
27+
#Preparing the data
28+
for line in raw:
29+
i=0
30+
var = []
31+
for y in line.split():
32+
if (i==0):
33+
mpg.append([float(y)])
34+
if (i<8 and i>0):
35+
if(y=='?'): # Handaling missing values for horsepower
36+
y = 0 # (Setting them to 0)
37+
if(i==4):
38+
var.append(float(y.replace('.','')))
39+
else:
40+
var.append(float(y))
41+
i+=1
42+
features.append(var)
43+
44+
45+
df_x = pd.DataFrame.from_records(features, columns = headers)
46+
df_y = pd.DataFrame.from_records(mpg, columns = ['mpg'])
47+
48+
#replacing 0(missing) horsepower with average horsepower
49+
avg_bhp = (np.average(df_x['horsepower'])*len(df_x))/(len(df_x)-6)
50+
df_x['horsepower'] = df_x['horsepower'].replace(0,avg_bhp)
51+
52+
lm = LinearRegression()
53+
lm1 = LinearRegression()
54+
55+
X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(
56+
df_x,df_y,test_size=0.2,random_state = 5)
57+
58+
59+
60+
######################## USING ALL INDEPENDENT VARS ###########################
61+
62+
lm.fit(X_train, Y_train)
63+
64+
pred_test = lm.predict(X_test)
65+
66+
print('coefficients :(all vars)')
67+
print(lm.coef_)
68+
69+
print('Intercept :(all vars)')
70+
print(lm.intercept_)
71+
72+
print('Accuracy Score(all vars): %s' % lm.score(X_test,Y_test))
73+
74+
residues = (Y_test - pred_test)
75+
76+
#print('Residues: ')
77+
#print((residues))
78+
79+
###############################################################################
80+
81+
print('####################################################')
82+
83+
##################### USING ALL ONE OF THE VAR(horse power) ###################
84+
85+
df_new1 = df_x['horsepower']
86+
87+
df_new = df_new1 [:, np.newaxis]
88+
89+
X_train1, X_test1, Y_train1, Y_test1 = sklearn.model_selection.train_test_split(
90+
df_new,df_y,test_size=0.2,random_state = 5)
91+
92+
93+
lm1.fit(X_train1, Y_train1)
94+
95+
pred_test1 = lm1.predict(X_test1)
96+
97+
print('coefficients :(single var)')
98+
print(lm1.coef_)
99+
100+
print('Intercept :(single var)')
101+
print(lm1.intercept_)
102+
103+
print('Accuracy Score(single vars): %s' % lm1.score(X_test1,Y_test1))
104+
105+
residues = (Y_test1 - pred_test1)
106+
107+
#print('Residues: ')
108+
#print((residues))
109+
110+
111+
plt.scatter(X_test1, Y_test1, color='black')
112+
plt.plot(X_test1, pred_test1, color='blue', linewidth=3)
113+
114+
###############################################################################
115+

ReadMe.docx

92.1 KB
Binary file not shown.

0 commit comments

Comments
 (0)