From 1aa29cb1780aa68aee8c4d11758bd4a5e3945dc2 Mon Sep 17 00:00:00 2001 From: Joel Lowery Date: Mon, 17 Apr 2017 01:49:43 -0500 Subject: [PATCH] More ex6 fixes (plotting fixes, spam model training, important words) --- ex6/ex6.py | 17 +++++++++++++---- ex6/ex6_spam.py | 15 +++++++-------- ex6/plotData.py | 1 - ex6/submit.py | 2 +- ex6/visualizeBoundary.py | 7 ++++--- ex6/visualizeBoundaryLinear.py | 2 +- 6 files changed, 26 insertions(+), 18 deletions(-) diff --git a/ex6/ex6.py b/ex6/ex6.py index 716794c..3e93b45 100644 --- a/ex6/ex6.py +++ b/ex6/ex6.py @@ -20,10 +20,13 @@ import numpy as np import scipy.io from sklearn import svm + +from show import show from dataset3Params import dataset3Params from plotData import plotData from visualizeBoundary import visualizeBoundary from visualizeBoundaryLinear import visualizeBoundaryLinear +from gaussianKernel import gaussianKernel # =============== Part 1: Loading and Visualizing Data ================ # We start the exercise by first loading and visualizing the dataset. @@ -40,6 +43,7 @@ # Plot training data plotData(X, y) +show() input('Program paused. Press Enter to continue...') @@ -62,6 +66,7 @@ clf = svm.SVC(C=C, kernel='linear', tol=1e-3, max_iter=20) model = clf.fit(X, y) visualizeBoundaryLinear(X, y, model) +show() input('Program paused. Press Enter to continue...') @@ -74,10 +79,10 @@ x1 = np.array([1, 2, 1]) x2 = np.array([0, 4, -1]) sigma = 2 -# sim = gaussianKernel(x1, x2, sigma) -# -# print 'Gaussian Kernel between x1 = [1 2 1], x2 = [0 4 -1], sigma = %0.5f : ' \ -# '\t%f\n(this value should be about 0.324652)\n' % (sigma, sim) +sim = gaussianKernel(x1, x2, sigma) + +print('Gaussian Kernel between x1 = [1 2 1], x2 = [0 4 -1], sigma = %0.5f : ' + '\t%f\n(this value should be about 0.324652)\n' % (sigma, sim)) input('Program paused. Press Enter to continue...') @@ -95,6 +100,7 @@ # Plot training data plotData(X, y) +show() input('Program paused. Press Enter to continue...') @@ -122,6 +128,7 @@ clf = svm.SVC(C=C, kernel='rbf', tol=1e-3, max_iter=200, gamma=gamma) model = clf.fit(X, y) visualizeBoundary(X, y, model) +show() input('Program paused. Press Enter to continue...') @@ -139,6 +146,7 @@ # Plot training data plotData(X, y) +show() input('Program paused. Press Enter to continue...') @@ -160,5 +168,6 @@ clf = svm.SVC(C=C, kernel='rbf', tol=1e-3, max_iter=200, gamma=gamma) model = clf.fit(X, y) visualizeBoundary(X, y, model) +show() input('Program paused. Press Enter to continue...') diff --git a/ex6/ex6_spam.py b/ex6/ex6_spam.py index ebd523b..66ecd04 100644 --- a/ex6/ex6_spam.py +++ b/ex6/ex6_spam.py @@ -77,12 +77,12 @@ print('(this may take 1 to 2 minutes) ...') C = 0.1 -clf = svm.SVC(C=C, kernel='linear', tol=1e-3, max_iter=200) +clf = svm.SVC(C=C, kernel='linear', tol=1e-4, max_iter=2000) model = clf.fit(X, y) p = model.predict(X) -print('Training Accuracy: %f', np.mean(np.double(p == y)) * 100) +print('Training Accuracy: %f\n' % (np.mean(np.double(p == y)) * 100)) # =================== Part 4: Test Spam Classification ================ # After training the classifier, we can evaluate it on a test set. We have @@ -92,13 +92,13 @@ # You will have Xtest, ytest in your environment data = scipy.io.loadmat('spamTest.mat') Xtest = data['Xtest'] -ytest = data['ytest'] +ytest = data['ytest'].flatten() print('Evaluating the trained Linear SVM on a test set ...') p = model.predict(Xtest) -print('Test Accuracy: %f', np. mean(np.double(p == ytest)) * 100) +print('Test Accuracy: %f\n' % (np.mean(np.double(p == ytest)) * 100)) # ================= Part 5: Top Predictors of Spam ==================== # Since the model we are training is a linear SVM, we can inspect the @@ -108,16 +108,15 @@ # 'thinks' that these words are the most likely indicators of spam. # Sort the weights and obtain the vocabulary list - t = sorted(list(enumerate(model.coef_[0])), key=lambda e: e[1], reverse=True) d = OrderedDict(t) -idx = d.keys() -weight = d.values() +idx = list(d.keys()) +weight = list(d.values()) vocabList = getVocabList() print('Top predictors of spam: ') for i in range(15): - print(' %-15s (%f)' %(vocabList[idx[i]], weight[i])) + print(' %-15s (%f)' % (vocabList[idx[i]], weight[i])) print('Program paused. Press enter to continue.') diff --git a/ex6/plotData.py b/ex6/plotData.py index 0a28a03..6554975 100644 --- a/ex6/plotData.py +++ b/ex6/plotData.py @@ -18,5 +18,4 @@ def plotData(X, y): # Plot Examples plt.plot(X[pos, 0], X[pos, 1], 'k+', linewidth=1, markersize=7) plt.plot(X[neg, 0], X[neg, 1], 'ko', color='y', markersize=7) - show() diff --git a/ex6/submit.py b/ex6/submit.py index 59b1710..ff65c0a 100644 --- a/ex6/submit.py +++ b/ex6/submit.py @@ -26,7 +26,7 @@ def output(part_id): x1 = np.sin(np.arange(1, 11)) x2 = np.cos(np.arange(1, 11)) ec = 'the quick brown fox jumped over the lazy dog' - wi = np.abs(np.round(x1 * 1863)) + wi = np.array(np.abs(np.round(x1 * 1863)), dtype=int) wi = np.hstack((wi, wi)) fname = srcs[part_id - 1].rsplit('.', 1)[0] diff --git a/ex6/visualizeBoundary.py b/ex6/visualizeBoundary.py index c883a07..018f380 100644 --- a/ex6/visualizeBoundary.py +++ b/ex6/visualizeBoundary.py @@ -1,7 +1,8 @@ import numpy as np -from plotData import plotData from matplotlib import pyplot as plt +from plotData import plotData + def visualizeBoundary(X, y, model): """plots a non-linear decision boundary learned by the @@ -21,5 +22,5 @@ def visualizeBoundary(X, y, model): vals[:, i] = model.predict(this_X) # Plot the SVM boundary - #contour(X1, X2, vals, [0 0], 'Color', 'b') - plt.contour(X1, X2, vals, levels=[0.0, 0.0]) + # contour(X1, X2, vals, [0 0], 'Color', 'b') + plt.contour(X1, X2, vals, color='b', lw=0.5, levels=[0]) diff --git a/ex6/visualizeBoundaryLinear.py b/ex6/visualizeBoundaryLinear.py index 1477517..77a443d 100644 --- a/ex6/visualizeBoundaryLinear.py +++ b/ex6/visualizeBoundaryLinear.py @@ -1,5 +1,6 @@ import matplotlib.pyplot as plt import numpy as np + from plotData import plotData @@ -14,4 +15,3 @@ def visualizeBoundaryLinear(X, y, model): yp = -(w[0] * xp + b) / w[1] plotData(X, y) plt.plot(xp, yp, '-b') -