More ex6 fixes (plotting fixes, spam model training, important words)

johnmcdowell · Apr 17, 2017 · 1aa29cb · 1aa29cb
1 parent df95336
commit 1aa29cb
Show file tree

Hide file tree

Showing 6 changed files with 26 additions and 18 deletions.
diff --git a/ex6/ex6.py b/ex6/ex6.py
@@ -20,10 +20,13 @@
 import numpy as np
 import scipy.io
 from sklearn import svm
+
+from show import show
 from dataset3Params import dataset3Params
 from plotData import plotData
 from visualizeBoundary import visualizeBoundary
 from visualizeBoundaryLinear import visualizeBoundaryLinear
+from gaussianKernel import gaussianKernel
 
 #  =============== Part 1: Loading and Visualizing Data ================
 #  We start the exercise by first loading and visualizing the dataset. 
@@ -40,6 +43,7 @@
 
 # Plot training data
 plotData(X, y)
+show()
 
 input('Program paused. Press Enter to continue...')
 
@@ -62,6 +66,7 @@
 clf = svm.SVC(C=C, kernel='linear', tol=1e-3, max_iter=20)
 model = clf.fit(X, y)
 visualizeBoundaryLinear(X, y, model)
+show()
 
 input('Program paused. Press Enter to continue...')
 
@@ -74,10 +79,10 @@
 x1 = np.array([1, 2, 1])
 x2 = np.array([0, 4, -1])
 sigma = 2
-# sim = gaussianKernel(x1, x2, sigma)
-#
-# print 'Gaussian Kernel between x1 = [1 2 1], x2 = [0 4 -1], sigma = %0.5f : ' \
-#        '\t%f\n(this value should be about 0.324652)\n' % (sigma, sim)
+sim = gaussianKernel(x1, x2, sigma)
+
+print('Gaussian Kernel between x1 = [1 2 1], x2 = [0 4 -1], sigma = %0.5f : '
+      '\t%f\n(this value should be about 0.324652)\n' % (sigma, sim))
 
 input('Program paused. Press Enter to continue...')
 
@@ -95,6 +100,7 @@
 
 # Plot training data
 plotData(X, y)
+show()
 
 input('Program paused. Press Enter to continue...')
 
@@ -122,6 +128,7 @@
 clf = svm.SVC(C=C, kernel='rbf', tol=1e-3, max_iter=200, gamma=gamma)
 model = clf.fit(X, y)
 visualizeBoundary(X, y, model)
+show()
 
 input('Program paused. Press Enter to continue...')
 
@@ -139,6 +146,7 @@
 
 # Plot training data
 plotData(X, y)
+show()
 
 input('Program paused. Press Enter to continue...')
 
@@ -160,5 +168,6 @@
 clf = svm.SVC(C=C, kernel='rbf', tol=1e-3, max_iter=200, gamma=gamma)
 model = clf.fit(X, y)
 visualizeBoundary(X, y, model)
+show()
 
 input('Program paused. Press Enter to continue...')
diff --git a/ex6/ex6_spam.py b/ex6/ex6_spam.py
@@ -77,12 +77,12 @@
 print('(this may take 1 to 2 minutes) ...')
 
 C = 0.1
-clf = svm.SVC(C=C, kernel='linear', tol=1e-3, max_iter=200)
+clf = svm.SVC(C=C, kernel='linear', tol=1e-4, max_iter=2000)
 model = clf.fit(X, y)
 
 p = model.predict(X)
 
-print('Training Accuracy: %f', np.mean(np.double(p == y)) * 100)
+print('Training Accuracy: %f\n' % (np.mean(np.double(p == y)) * 100))
 
 #  =================== Part 4: Test Spam Classification ================
 #  After training the classifier, we can evaluate it on a test set. We have
@@ -92,13 +92,13 @@
 # You will have Xtest, ytest in your environment
 data = scipy.io.loadmat('spamTest.mat')
 Xtest = data['Xtest']
-ytest = data['ytest']
+ytest = data['ytest'].flatten()
 
 print('Evaluating the trained Linear SVM on a test set ...')
 
 p = model.predict(Xtest)
 
-print('Test Accuracy: %f', np. mean(np.double(p == ytest)) * 100)
+print('Test Accuracy: %f\n' % (np.mean(np.double(p == ytest)) * 100))
 
 #  ================= Part 5: Top Predictors of Spam ====================
 #  Since the model we are training is a linear SVM, we can inspect the
@@ -108,16 +108,15 @@
 #  'thinks' that these words are the most likely indicators of spam.
 
 # Sort the weights and obtain the vocabulary list
-
 t = sorted(list(enumerate(model.coef_[0])), key=lambda e: e[1], reverse=True)
 d = OrderedDict(t)
-idx = d.keys()
-weight = d.values()
+idx = list(d.keys())
+weight = list(d.values())
 vocabList = getVocabList()
 
 print('Top predictors of spam: ')
 for i in range(15):
-    print(' %-15s (%f)' %(vocabList[idx[i]], weight[i]))
+    print(' %-15s (%f)' % (vocabList[idx[i]], weight[i]))
 
 print('Program paused. Press enter to continue.')
 

diff --git a/ex6/plotData.py b/ex6/plotData.py
@@ -18,5 +18,4 @@ def plotData(X, y):
     # Plot Examples
     plt.plot(X[pos, 0], X[pos, 1], 'k+', linewidth=1, markersize=7)
     plt.plot(X[neg, 0], X[neg, 1], 'ko', color='y', markersize=7)
-    show()
 
diff --git a/ex6/submit.py b/ex6/submit.py
@@ -26,7 +26,7 @@ def output(part_id):
     x1 = np.sin(np.arange(1, 11))
     x2 = np.cos(np.arange(1, 11))
     ec = 'the quick brown fox jumped over the lazy dog'
-    wi = np.abs(np.round(x1 * 1863))
+    wi = np.array(np.abs(np.round(x1 * 1863)), dtype=int)
     wi = np.hstack((wi, wi))
 
     fname = srcs[part_id - 1].rsplit('.', 1)[0]

diff --git a/ex6/visualizeBoundary.py b/ex6/visualizeBoundary.py
@@ -1,7 +1,8 @@
 import numpy as np
-from plotData import plotData
 from matplotlib import pyplot as plt
 
+from plotData import plotData
+
 
 def visualizeBoundary(X, y, model):
     """plots a non-linear decision boundary learned by the
@@ -21,5 +22,5 @@ def visualizeBoundary(X, y, model):
         vals[:, i] = model.predict(this_X)
 
     # Plot the SVM boundary
-    #contour(X1, X2, vals, [0 0], 'Color', 'b')
-    plt.contour(X1, X2, vals, levels=[0.0, 0.0])
+    # contour(X1, X2, vals, [0 0], 'Color', 'b')
+    plt.contour(X1, X2, vals, color='b', lw=0.5, levels=[0])
diff --git a/ex6/visualizeBoundaryLinear.py b/ex6/visualizeBoundaryLinear.py
@@ -1,5 +1,6 @@
 import matplotlib.pyplot as plt
 import numpy as np
+
 from plotData import plotData
 
 
@@ -14,4 +15,3 @@ def visualizeBoundaryLinear(X, y, model):
     yp = -(w[0] * xp + b) / w[1]
     plotData(X, y)
     plt.plot(xp, yp, '-b')
-