Skip to content

Commit

Permalink
More ex6 fixes (plotting fixes, spam model training, important words)
Browse files Browse the repository at this point in the history
  • Loading branch information
jtlowery committed Apr 17, 2017
1 parent df95336 commit 1aa29cb
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 18 deletions.
17 changes: 13 additions & 4 deletions ex6/ex6.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,13 @@
import numpy as np
import scipy.io
from sklearn import svm

from show import show
from dataset3Params import dataset3Params
from plotData import plotData
from visualizeBoundary import visualizeBoundary
from visualizeBoundaryLinear import visualizeBoundaryLinear
from gaussianKernel import gaussianKernel

# =============== Part 1: Loading and Visualizing Data ================
# We start the exercise by first loading and visualizing the dataset.
Expand All @@ -40,6 +43,7 @@

# Plot training data
plotData(X, y)
show()

input('Program paused. Press Enter to continue...')

Expand All @@ -62,6 +66,7 @@
clf = svm.SVC(C=C, kernel='linear', tol=1e-3, max_iter=20)
model = clf.fit(X, y)
visualizeBoundaryLinear(X, y, model)
show()

input('Program paused. Press Enter to continue...')

Expand All @@ -74,10 +79,10 @@
x1 = np.array([1, 2, 1])
x2 = np.array([0, 4, -1])
sigma = 2
# sim = gaussianKernel(x1, x2, sigma)
#
# print 'Gaussian Kernel between x1 = [1 2 1], x2 = [0 4 -1], sigma = %0.5f : ' \
# '\t%f\n(this value should be about 0.324652)\n' % (sigma, sim)
sim = gaussianKernel(x1, x2, sigma)

print('Gaussian Kernel between x1 = [1 2 1], x2 = [0 4 -1], sigma = %0.5f : '
'\t%f\n(this value should be about 0.324652)\n' % (sigma, sim))

input('Program paused. Press Enter to continue...')

Expand All @@ -95,6 +100,7 @@

# Plot training data
plotData(X, y)
show()

input('Program paused. Press Enter to continue...')

Expand Down Expand Up @@ -122,6 +128,7 @@
clf = svm.SVC(C=C, kernel='rbf', tol=1e-3, max_iter=200, gamma=gamma)
model = clf.fit(X, y)
visualizeBoundary(X, y, model)
show()

input('Program paused. Press Enter to continue...')

Expand All @@ -139,6 +146,7 @@

# Plot training data
plotData(X, y)
show()

input('Program paused. Press Enter to continue...')

Expand All @@ -160,5 +168,6 @@
clf = svm.SVC(C=C, kernel='rbf', tol=1e-3, max_iter=200, gamma=gamma)
model = clf.fit(X, y)
visualizeBoundary(X, y, model)
show()

input('Program paused. Press Enter to continue...')
15 changes: 7 additions & 8 deletions ex6/ex6_spam.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,12 @@
print('(this may take 1 to 2 minutes) ...')

C = 0.1
clf = svm.SVC(C=C, kernel='linear', tol=1e-3, max_iter=200)
clf = svm.SVC(C=C, kernel='linear', tol=1e-4, max_iter=2000)
model = clf.fit(X, y)

p = model.predict(X)

print('Training Accuracy: %f', np.mean(np.double(p == y)) * 100)
print('Training Accuracy: %f\n' % (np.mean(np.double(p == y)) * 100))

# =================== Part 4: Test Spam Classification ================
# After training the classifier, we can evaluate it on a test set. We have
Expand All @@ -92,13 +92,13 @@
# You will have Xtest, ytest in your environment
data = scipy.io.loadmat('spamTest.mat')
Xtest = data['Xtest']
ytest = data['ytest']
ytest = data['ytest'].flatten()

print('Evaluating the trained Linear SVM on a test set ...')

p = model.predict(Xtest)

print('Test Accuracy: %f', np. mean(np.double(p == ytest)) * 100)
print('Test Accuracy: %f\n' % (np.mean(np.double(p == ytest)) * 100))

# ================= Part 5: Top Predictors of Spam ====================
# Since the model we are training is a linear SVM, we can inspect the
Expand All @@ -108,16 +108,15 @@
# 'thinks' that these words are the most likely indicators of spam.

# Sort the weights and obtain the vocabulary list

t = sorted(list(enumerate(model.coef_[0])), key=lambda e: e[1], reverse=True)
d = OrderedDict(t)
idx = d.keys()
weight = d.values()
idx = list(d.keys())
weight = list(d.values())
vocabList = getVocabList()

print('Top predictors of spam: ')
for i in range(15):
print(' %-15s (%f)' %(vocabList[idx[i]], weight[i]))
print(' %-15s (%f)' % (vocabList[idx[i]], weight[i]))

print('Program paused. Press enter to continue.')

Expand Down
1 change: 0 additions & 1 deletion ex6/plotData.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,4 @@ def plotData(X, y):
# Plot Examples
plt.plot(X[pos, 0], X[pos, 1], 'k+', linewidth=1, markersize=7)
plt.plot(X[neg, 0], X[neg, 1], 'ko', color='y', markersize=7)
show()

2 changes: 1 addition & 1 deletion ex6/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def output(part_id):
x1 = np.sin(np.arange(1, 11))
x2 = np.cos(np.arange(1, 11))
ec = 'the quick brown fox jumped over the lazy dog'
wi = np.abs(np.round(x1 * 1863))
wi = np.array(np.abs(np.round(x1 * 1863)), dtype=int)
wi = np.hstack((wi, wi))

fname = srcs[part_id - 1].rsplit('.', 1)[0]
Expand Down
7 changes: 4 additions & 3 deletions ex6/visualizeBoundary.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import numpy as np
from plotData import plotData
from matplotlib import pyplot as plt

from plotData import plotData


def visualizeBoundary(X, y, model):
"""plots a non-linear decision boundary learned by the
Expand All @@ -21,5 +22,5 @@ def visualizeBoundary(X, y, model):
vals[:, i] = model.predict(this_X)

# Plot the SVM boundary
#contour(X1, X2, vals, [0 0], 'Color', 'b')
plt.contour(X1, X2, vals, levels=[0.0, 0.0])
# contour(X1, X2, vals, [0 0], 'Color', 'b')
plt.contour(X1, X2, vals, color='b', lw=0.5, levels=[0])
2 changes: 1 addition & 1 deletion ex6/visualizeBoundaryLinear.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import matplotlib.pyplot as plt
import numpy as np

from plotData import plotData


Expand All @@ -14,4 +15,3 @@ def visualizeBoundaryLinear(X, y, model):
yp = -(w[0] * xp + b) / w[1]
plotData(X, y)
plt.plot(xp, yp, '-b')

0 comments on commit 1aa29cb

Please sign in to comment.