Skip to content

Commit

Permalink
Numerous ex7 fixes
Browse files Browse the repository at this point in the history
- More consistent display of plots
- Improved coloring of clusters in ex7_pca
- Dropping unused return 'val' from findClosestCentroids
- Specifying S must be a diagonal matrix
  • Loading branch information
jtlowery committed Apr 23, 2017
1 parent cafcfbb commit 1466951
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 36 deletions.
8 changes: 4 additions & 4 deletions ex7/ex7.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@

# Find the closest centroids for the examples using the
# initial_centroids
val, idx = findClosestCentroids(X, initial_centroids)
idx = findClosestCentroids(X, initial_centroids)

print('Closest centroids for the first 3 examples:')
print(idx[0:3].tolist())
Expand Down Expand Up @@ -155,7 +155,7 @@
print('Applying K-Means to compress an image.')

# Find closest cluster members
_, idx = findClosestCentroids(X, centroids)
idx = findClosestCentroids(X, centroids)

# Essentially, now we have represented the image X as in terms of the
# indices in idx.
Expand All @@ -168,15 +168,15 @@
X_recovered = X_recovered.reshape(img_size[0], img_size[1], 3)

# Display the original image
plt.figure()
plt.subplot(1, 2, 1)
plt.imshow(A)
plt.title('Original')
show()

# Display compressed image side by side
plt.subplot(1, 2, 2)
plt.imshow(X_recovered)
plt.title('Compressed, with %d colors.' % K)
show()

input('Program paused. Press Enter to continue...')
input('Program paused. Press Enter to continue...')
31 changes: 20 additions & 11 deletions ex7/ex7_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
X = data['X']

# Visualize the example dataset
plt.figure()
plt.scatter(X[:, 0], X[:, 1], marker='o', color='b', facecolors='none', lw=1.0)
plt.axis([0.5, 6.5, 2, 8])
plt.axis('equal')
Expand All @@ -70,12 +71,16 @@
# Draw the eigenvectors centered at mean of data. These lines show the
# directions of maximum variations in the dataset.
mu2 = mu + 1.5 * S.dot(U.T)
plt.figure()
plt.scatter(X[:, 0], X[:, 1], marker='o', color='b', facecolors='none', lw=1.0)
plt.axis([0.5, 6.5, 2, 8])
plt.axis('equal')
plt.plot([mu[0], mu2[0, 0]], [mu[1], mu2[0, 1]], '-k', lw=2)
plt.plot([mu[0], mu2[1, 0]], [mu[1], mu2[1, 1]], '-k', lw=2)
show()

print('Top eigenvector: ')
print(' U(:,1) = %f %f ', U[0,0], U[1,0])
print(' U(:,1) = %f %f ' % (U[0, 0], U[1, 0]))
print('(you should expect to see -0.707107 -0.707107)')

input('Program paused. Press Enter to continue...')
Expand All @@ -96,25 +101,24 @@
color='b', facecolors='none', lw=1.0)
plt.axis([-4, 3, -4, 3]) # axis square
plt.axis('equal')
show()

# Project the data onto K = 1 dimension
K = 1
Z = projectData(X_norm, U, K)
print('Projection of the first example: %f', Z[0])
print('Projection of the first example: %f' % Z[0])
print('(this value should be about 1.481274)')

X_rec = recoverData(Z, U, K)
print('Approximation of the first example: %f %f'% (X_rec[0, 0], X_rec[0, 1]))
print('Approximation of the first example: %f %f' % (X_rec[0, 0], X_rec[0, 1]))
print('(this value should be about -1.047419 -1.047419)')

# Draw lines connecting the projected points to the original points
plt.scatter(X_rec[:, 0], X_rec[:, 1], marker='o',
color='r', facecolor='none', lw=1.0)
for i in range(len(X_norm)):
plt.plot([X_norm[i, 0], X_rec[i, 0]], [X_norm[i, 1], X_rec[i, 1]], '--k')

show()

input('Program paused. Press Enter to continue...')

# =============== Part 4: Loading and Visualizing Face Data =============
Expand All @@ -128,6 +132,7 @@
X = data['X']

# Display the first 100 faces in the dataset
plt.figure()
displayData(X[0:100, :])

input('Program paused. Press Enter to continue...')
Expand All @@ -146,6 +151,7 @@
U, S, V = pca(X_norm)

# Visualize the top 36 eigenvectors found
plt.figure()
displayData(U[:, 1:36].T)

input('Program paused. Press Enter to continue...')
Expand Down Expand Up @@ -174,6 +180,7 @@
X_rec = recoverData(Z, U, K)

# Display normalized data
plt.figure()
plt.subplot(1, 2, 1)
displayData(X_norm[:100,:])
plt.title('Original faces')
Expand Down Expand Up @@ -211,7 +218,8 @@

# Sample 1000 random indexes (since working with all the data is
# too expensive. If you have a fast computer, you may increase this.
sel = np.floor(np.random.random(1000) * len(X)) + 1
sel = np.floor(np.random.random(1000) * len(X))
sel = sel.astype(int)

# Setup Color Palette

Expand All @@ -223,9 +231,11 @@
ys = Xs[:, 1]
zs = Xs[:, 2]
cmap = plt.get_cmap("jet")
idxn = sel.astype('float') / max(sel.astype('float'))
idxn = idx[sel]
idxn = idxn.astype('float') / max(idxn.astype('float'))
colors = cmap(idxn)
# ax = Axes3D(fig)

ax = Axes3D(fig)
ax.scatter3D(xs, ys, zs=zs, edgecolors=colors,
marker='o', facecolors='none', lw=0.4, s=10)

Expand All @@ -246,10 +256,9 @@
# Plot in 2D
plt.figure()
zs = np.array([Z[s] for s in sel])
idxs = np.array([idx[s] for s in sel])

# plt.scatter(zs[:,0], zs[:,1])
plotDataPoints(zs, idxs)
plotDataPoints(zs, idxn)
plt.title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction')
show()

input('Program paused. Press Enter to continue...')
5 changes: 3 additions & 2 deletions ex7/findClosestCentroids.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def findClosestCentroids(X, centroids):
K = len(centroids)

# You need to return the following variables correctly.
idx = np.zeros(X.shape[0])
idx = np.zeros(X.shape[0], dtype=int)

# ====================== YOUR CODE HERE ======================
# Instructions: Go over every example, find its closest centroid, and store
Expand All @@ -21,7 +21,8 @@ def findClosestCentroids(X, centroids):
# range 1..K
#
# Note: You can use a for-loop over the examples to compute this.
#
# =============================================================

return val, idx
return idx

1 change: 1 addition & 0 deletions ex7/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ def pca(X):
m, n = X.shape

# You need to return the following variables correctly.
# S must be a diagonal matrix.

# ====================== YOUR CODE HERE ======================
# Instructions: You should first compute the covariance matrix. Then, you
Expand Down
10 changes: 4 additions & 6 deletions ex7/plotDataPoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@ def plotDataPoints(X, idx):
#
# # Plot the data

# c = dict(enumerate(np.eye(3)))
# colors=idx
map = plt.get_cmap("jet")
cmap = plt.get_cmap("jet")
idxn = idx.astype('float') / max(idx.astype('float'))
colors = map(idxn)
plt.scatter(X[:, 0], X[:, 1], 15, edgecolors=colors, marker='o', facecolors='none', lw=0.5)
show()
colors = cmap(idxn)
plt.scatter(X[:, 0], X[:, 1], 15, edgecolors=colors,
marker='o', facecolors='none', lw=0.5)
11 changes: 5 additions & 6 deletions ex7/plotProgresskMeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from show import show


def plotProgresskMeans(X, centroids, previous, idx, K, i, color):
def plotProgresskMeans(X, centroids, previous, idx, K, i, color, ax):
"""plots the data
points with colors assigned to each centroid. With the previous
centroids, it also plots a line between the previous locations and
Expand All @@ -15,16 +15,15 @@ def plotProgresskMeans(X, centroids, previous, idx, K, i, color):
plotDataPoints(X, idx)

# Plot the centroids as black x's
plt.scatter(centroids[:, 0], centroids[:, 1],
marker='x', s=60, lw=3, edgecolor='k')
ax.scatter(centroids[:, 0], centroids[:, 1],
marker='x', s=60, lw=3, color='black')

# Plot the history of the centroids with lines
for j in range(len(centroids)):
plt.plot([centroids[j, 0], previous[j, 0]],
[centroids[j, 1], previous[j, 1]], c=color)
ax.plot([centroids[j, 0], previous[j, 0]],
[centroids[j, 1], previous[j, 1]], color='black')

# Title
plt.title('Iteration number %d' % i)
show()
input('Program paused. Press Enter to continue...')

19 changes: 12 additions & 7 deletions ex7/runkMeans.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from computeCentroids import computeCentroids
from plotProgresskMeans import plotProgresskMeans
from findClosestCentroids import findClosestCentroids
import matplotlib.pyplot as plt
import numpy as np
import itertools

from computeCentroids import computeCentroids
from plotProgresskMeans import plotProgresskMeans
from findClosestCentroids import findClosestCentroids
from show import show


def runkMeans(X, initial_centroids, max_iters, plot_progress=False):
"""runs the K-Means algorithm on data matrix X, where each
Expand All @@ -19,7 +21,8 @@ def runkMeans(X, initial_centroids, max_iters, plot_progress=False):

# Plot the data if we are plotting progress
if plot_progress:
plt.figure()
fig = plt.figure()
ax = plt.gca()

# Initialize values
m, n = X.shape
Expand All @@ -37,15 +40,17 @@ def runkMeans(X, initial_centroids, max_iters, plot_progress=False):
print('K-Means iteration %d/%d...' % (i, max_iters))

# For each example in X, assign it to the closest centroid
_, idx = findClosestCentroids(X, centroids)
idx = findClosestCentroids(X, centroids)

# Optionally, plot progress here
if plot_progress:
color = rgb[int(next(c))]
plotProgresskMeans(X, np.array(centroids),
np.array(previous_centroids), idx, K, i, color)
np.array(previous_centroids),
idx, K, i, color, ax)
previous_centroids = centroids
# raw_input("Press Enter to continue...")
show()
fig.canvas.draw()

# Given the memberships, compute new centroids
centroids = computeCentroids(X, idx, K)
Expand Down

0 comments on commit 1466951

Please sign in to comment.