From 14669517d8abf8b615f9486d09d2ec7f63387416 Mon Sep 17 00:00:00 2001 From: Joel Lowery Date: Sat, 22 Apr 2017 23:25:10 -0500 Subject: [PATCH] Numerous ex7 fixes - More consistent display of plots - Improved coloring of clusters in ex7_pca - Dropping unused return 'val' from findClosestCentroids - Specifying S must be a diagonal matrix --- ex7/ex7.py | 8 ++++---- ex7/ex7_pca.py | 31 ++++++++++++++++++++----------- ex7/findClosestCentroids.py | 5 +++-- ex7/pca.py | 1 + ex7/plotDataPoints.py | 10 ++++------ ex7/plotProgresskMeans.py | 11 +++++------ ex7/runkMeans.py | 19 ++++++++++++------- 7 files changed, 49 insertions(+), 36 deletions(-) diff --git a/ex7/ex7.py b/ex7/ex7.py index ea2f423..4f96e1c 100644 --- a/ex7/ex7.py +++ b/ex7/ex7.py @@ -48,7 +48,7 @@ # Find the closest centroids for the examples using the # initial_centroids -val, idx = findClosestCentroids(X, initial_centroids) +idx = findClosestCentroids(X, initial_centroids) print('Closest centroids for the first 3 examples:') print(idx[0:3].tolist()) @@ -155,7 +155,7 @@ print('Applying K-Means to compress an image.') # Find closest cluster members -_, idx = findClosestCentroids(X, centroids) +idx = findClosestCentroids(X, centroids) # Essentially, now we have represented the image X as in terms of the # indices in idx. @@ -168,10 +168,10 @@ X_recovered = X_recovered.reshape(img_size[0], img_size[1], 3) # Display the original image +plt.figure() plt.subplot(1, 2, 1) plt.imshow(A) plt.title('Original') -show() # Display compressed image side by side plt.subplot(1, 2, 2) @@ -179,4 +179,4 @@ plt.title('Compressed, with %d colors.' % K) show() -input('Program paused. Press Enter to continue...') \ No newline at end of file +input('Program paused. Press Enter to continue...') diff --git a/ex7/ex7_pca.py b/ex7/ex7_pca.py index 5eb85b1..8e6a52c 100644 --- a/ex7/ex7_pca.py +++ b/ex7/ex7_pca.py @@ -46,6 +46,7 @@ X = data['X'] # Visualize the example dataset +plt.figure() plt.scatter(X[:, 0], X[:, 1], marker='o', color='b', facecolors='none', lw=1.0) plt.axis([0.5, 6.5, 2, 8]) plt.axis('equal') @@ -70,12 +71,16 @@ # Draw the eigenvectors centered at mean of data. These lines show the # directions of maximum variations in the dataset. mu2 = mu + 1.5 * S.dot(U.T) +plt.figure() +plt.scatter(X[:, 0], X[:, 1], marker='o', color='b', facecolors='none', lw=1.0) +plt.axis([0.5, 6.5, 2, 8]) +plt.axis('equal') plt.plot([mu[0], mu2[0, 0]], [mu[1], mu2[0, 1]], '-k', lw=2) plt.plot([mu[0], mu2[1, 0]], [mu[1], mu2[1, 1]], '-k', lw=2) show() print('Top eigenvector: ') -print(' U(:,1) = %f %f ', U[0,0], U[1,0]) +print(' U(:,1) = %f %f ' % (U[0, 0], U[1, 0])) print('(you should expect to see -0.707107 -0.707107)') input('Program paused. Press Enter to continue...') @@ -96,16 +101,15 @@ color='b', facecolors='none', lw=1.0) plt.axis([-4, 3, -4, 3]) # axis square plt.axis('equal') -show() # Project the data onto K = 1 dimension K = 1 Z = projectData(X_norm, U, K) -print('Projection of the first example: %f', Z[0]) +print('Projection of the first example: %f' % Z[0]) print('(this value should be about 1.481274)') X_rec = recoverData(Z, U, K) -print('Approximation of the first example: %f %f'% (X_rec[0, 0], X_rec[0, 1])) +print('Approximation of the first example: %f %f' % (X_rec[0, 0], X_rec[0, 1])) print('(this value should be about -1.047419 -1.047419)') # Draw lines connecting the projected points to the original points @@ -113,8 +117,8 @@ color='r', facecolor='none', lw=1.0) for i in range(len(X_norm)): plt.plot([X_norm[i, 0], X_rec[i, 0]], [X_norm[i, 1], X_rec[i, 1]], '--k') - show() + input('Program paused. Press Enter to continue...') # =============== Part 4: Loading and Visualizing Face Data ============= @@ -128,6 +132,7 @@ X = data['X'] # Display the first 100 faces in the dataset +plt.figure() displayData(X[0:100, :]) input('Program paused. Press Enter to continue...') @@ -146,6 +151,7 @@ U, S, V = pca(X_norm) # Visualize the top 36 eigenvectors found +plt.figure() displayData(U[:, 1:36].T) input('Program paused. Press Enter to continue...') @@ -174,6 +180,7 @@ X_rec = recoverData(Z, U, K) # Display normalized data +plt.figure() plt.subplot(1, 2, 1) displayData(X_norm[:100,:]) plt.title('Original faces') @@ -211,7 +218,8 @@ # Sample 1000 random indexes (since working with all the data is # too expensive. If you have a fast computer, you may increase this. -sel = np.floor(np.random.random(1000) * len(X)) + 1 +sel = np.floor(np.random.random(1000) * len(X)) +sel = sel.astype(int) # Setup Color Palette @@ -223,9 +231,11 @@ ys = Xs[:, 1] zs = Xs[:, 2] cmap = plt.get_cmap("jet") -idxn = sel.astype('float') / max(sel.astype('float')) +idxn = idx[sel] +idxn = idxn.astype('float') / max(idxn.astype('float')) colors = cmap(idxn) -# ax = Axes3D(fig) + +ax = Axes3D(fig) ax.scatter3D(xs, ys, zs=zs, edgecolors=colors, marker='o', facecolors='none', lw=0.4, s=10) @@ -246,10 +256,9 @@ # Plot in 2D plt.figure() zs = np.array([Z[s] for s in sel]) -idxs = np.array([idx[s] for s in sel]) -# plt.scatter(zs[:,0], zs[:,1]) -plotDataPoints(zs, idxs) +plotDataPoints(zs, idxn) plt.title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction') show() + input('Program paused. Press Enter to continue...') diff --git a/ex7/findClosestCentroids.py b/ex7/findClosestCentroids.py index f4841e3..a3bce51 100644 --- a/ex7/findClosestCentroids.py +++ b/ex7/findClosestCentroids.py @@ -11,7 +11,7 @@ def findClosestCentroids(X, centroids): K = len(centroids) # You need to return the following variables correctly. - idx = np.zeros(X.shape[0]) + idx = np.zeros(X.shape[0], dtype=int) # ====================== YOUR CODE HERE ====================== # Instructions: Go over every example, find its closest centroid, and store @@ -21,7 +21,8 @@ def findClosestCentroids(X, centroids): # range 1..K # # Note: You can use a for-loop over the examples to compute this. + # # ============================================================= - return val, idx + return idx diff --git a/ex7/pca.py b/ex7/pca.py index 1239e9d..64c490b 100644 --- a/ex7/pca.py +++ b/ex7/pca.py @@ -10,6 +10,7 @@ def pca(X): m, n = X.shape # You need to return the following variables correctly. + # S must be a diagonal matrix. # ====================== YOUR CODE HERE ====================== # Instructions: You should first compute the covariance matrix. Then, you diff --git a/ex7/plotDataPoints.py b/ex7/plotDataPoints.py index 2866feb..520ffa7 100644 --- a/ex7/plotDataPoints.py +++ b/ex7/plotDataPoints.py @@ -12,10 +12,8 @@ def plotDataPoints(X, idx): # # # Plot the data - # c = dict(enumerate(np.eye(3))) - # colors=idx - map = plt.get_cmap("jet") + cmap = plt.get_cmap("jet") idxn = idx.astype('float') / max(idx.astype('float')) - colors = map(idxn) - plt.scatter(X[:, 0], X[:, 1], 15, edgecolors=colors, marker='o', facecolors='none', lw=0.5) - show() + colors = cmap(idxn) + plt.scatter(X[:, 0], X[:, 1], 15, edgecolors=colors, + marker='o', facecolors='none', lw=0.5) diff --git a/ex7/plotProgresskMeans.py b/ex7/plotProgresskMeans.py index 0fddb3c..a0927f2 100644 --- a/ex7/plotProgresskMeans.py +++ b/ex7/plotProgresskMeans.py @@ -4,7 +4,7 @@ from show import show -def plotProgresskMeans(X, centroids, previous, idx, K, i, color): +def plotProgresskMeans(X, centroids, previous, idx, K, i, color, ax): """plots the data points with colors assigned to each centroid. With the previous centroids, it also plots a line between the previous locations and @@ -15,16 +15,15 @@ def plotProgresskMeans(X, centroids, previous, idx, K, i, color): plotDataPoints(X, idx) # Plot the centroids as black x's - plt.scatter(centroids[:, 0], centroids[:, 1], - marker='x', s=60, lw=3, edgecolor='k') + ax.scatter(centroids[:, 0], centroids[:, 1], + marker='x', s=60, lw=3, color='black') # Plot the history of the centroids with lines for j in range(len(centroids)): - plt.plot([centroids[j, 0], previous[j, 0]], - [centroids[j, 1], previous[j, 1]], c=color) + ax.plot([centroids[j, 0], previous[j, 0]], + [centroids[j, 1], previous[j, 1]], color='black') # Title plt.title('Iteration number %d' % i) - show() input('Program paused. Press Enter to continue...') diff --git a/ex7/runkMeans.py b/ex7/runkMeans.py index ad2e997..89dd420 100644 --- a/ex7/runkMeans.py +++ b/ex7/runkMeans.py @@ -1,10 +1,12 @@ -from computeCentroids import computeCentroids -from plotProgresskMeans import plotProgresskMeans -from findClosestCentroids import findClosestCentroids import matplotlib.pyplot as plt import numpy as np import itertools +from computeCentroids import computeCentroids +from plotProgresskMeans import plotProgresskMeans +from findClosestCentroids import findClosestCentroids +from show import show + def runkMeans(X, initial_centroids, max_iters, plot_progress=False): """runs the K-Means algorithm on data matrix X, where each @@ -19,7 +21,8 @@ def runkMeans(X, initial_centroids, max_iters, plot_progress=False): # Plot the data if we are plotting progress if plot_progress: - plt.figure() + fig = plt.figure() + ax = plt.gca() # Initialize values m, n = X.shape @@ -37,15 +40,17 @@ def runkMeans(X, initial_centroids, max_iters, plot_progress=False): print('K-Means iteration %d/%d...' % (i, max_iters)) # For each example in X, assign it to the closest centroid - _, idx = findClosestCentroids(X, centroids) + idx = findClosestCentroids(X, centroids) # Optionally, plot progress here if plot_progress: color = rgb[int(next(c))] plotProgresskMeans(X, np.array(centroids), - np.array(previous_centroids), idx, K, i, color) + np.array(previous_centroids), + idx, K, i, color, ax) previous_centroids = centroids - # raw_input("Press Enter to continue...") + show() + fig.canvas.draw() # Given the memberships, compute new centroids centroids = computeCentroids(X, idx, K)