Update ex7

johnmcdowell · Apr 22, 2017 · cafcfbb · cafcfbb
1 parent 1aa29cb
commit cafcfbb
Show file tree

Hide file tree

Showing 9 changed files with 88 additions and 89 deletions.
diff --git a/ex7/computeCentroids.py b/ex7/computeCentroids.py
@@ -2,7 +2,8 @@
 
 
 def computeCentroids(X, idx, K):
-    """returns the new centroids by
+    """
+    returns the new centroids by
     computing the means of the data points assigned to each centroid. It is
     given a dataset X where each row is a single data point, a vector
     idx of centroid assignments (i.e. each entry in range [1..K]) for each
@@ -11,23 +12,20 @@ def computeCentroids(X, idx, K):
     assigned to it.
     """
 
-# Useful variables
+    # Useful variables
     m, n = X.shape
 
-# You need to return the following variables correctly.
+    # You need to return the following variables correctly.
     centroids = []
 
-
-# ====================== YOUR CODE HERE ======================
-# Instructions: Go over every centroid and compute mean of all points that
-#               belong to it. Concretely, the row vector centroids(i, :)
-#               should contain the mean of the data points assigned to
-#               centroid i.
-#
-# Note: You can use a for-loop over the centroids to compute this.
-# 
-
-
-# =============================================================
+    # ====================== YOUR CODE HERE ======================
+    # Instructions: Go over every centroid and compute mean of all points that
+    #               belong to it. Concretely, the row vector centroids(i, :)
+    #               should contain the mean of the data points assigned to
+    #               centroid i.
+    #
+    # Note: You can use a for-loop over the centroids to compute this.
+    #
+    # =============================================================
 
     return centroids
diff --git a/ex7/ex7.py b/ex7/ex7.py
@@ -1,4 +1,4 @@
-## Machine Learning Online Class
+#  Machine Learning Online Class
 #  Exercise 7 | Principle Component Analysis and K-Means Clustering
 #
 #  Instructions
@@ -16,13 +16,12 @@
 #
 #  For this exercise, you will not need to change any code in this file,
 #  or any other files other than those mentioned above.
-#
 
-## ================= Part 1: Find Closest Centroids ====================
+#  ================= Part 1: Find Closest Centroids ====================
 #  To help you implement K-Means, we have divided the learning algorithm 
 #  into two functions -- findClosestCentroids and computeCentroids. In this
 #  part, you shoudl complete the code in the findClosestCentroids function. 
-#
+
 from matplotlib import use, cm
 use('TkAgg')
 import numpy as np
@@ -36,6 +35,7 @@
 from kMeansInitCentroids import kMeansInitCentroids
 from show import show
 
+
 print('Finding closest centroids.')
 
 # Load an example dataset that we will be using
@@ -56,13 +56,13 @@
 
 input('Program paused. Press Enter to continue...')
 
-## ===================== Part 2: Compute Means =========================
+#  ===================== Part 2: Compute Means =========================
 #  After implementing the closest centroids function, you should now
 #  complete the computeCentroids function.
-#
+
 print('Computing centroids means.')
 
-#  Compute means based on the closest centroids found in the previous part.
+# Compute means based on the closest centroids found in the previous part.
 centroids = computeCentroids(X, idx, K)
 
 print('Centroids computed after initial finding of closest centroids:')
@@ -77,12 +77,12 @@
 input('Program paused. Press Enter to continue...')
 
 
-## =================== Part 3: K-Means Clustering ======================
+#  =================== Part 3: K-Means Clustering ======================
 #  After you have completed the two functions computeCentroids and
 #  findClosestCentroids, you have all the necessary pieces to run the
 #  kMeans algorithm. In this part, you will run the K-Means algorithm on
 #  the example dataset we have provided. 
-#
+
 print('Running K-Means clustering on example dataset.')
 
 # Load an example dataset
@@ -106,17 +106,16 @@
 
 input('Program paused. Press Enter to continue...')
 
-## ============= Part 4: K-Means Clustering on Pixels ===============
+#  ============= Part 4: K-Means Clustering on Pixels ===============
 #  In this exercise, you will use K-Means to compress an image. To do this,
 #  you will first run K-Means on the colors of the pixels in the image and
 #  then you will map each pixel on to it's closest centroid.
 #  
 #  You should now complete the code in kMeansInitCentroids.m
-#
 
 print('Running K-Means clustering on pixels from an image.')
 
-#  Load an image of a bird
+# Load an image of a bird
 A = scipy.misc.imread('bird_small.png')
 
 # If imread does not work for you, you can try instead
@@ -148,7 +147,7 @@
 input('Program paused. Press Enter to continue...')
 
 
-## ================= Part 5: Image Compression ======================
+#  ================= Part 5: Image Compression ======================
 #  In this part of the exercise, you will use the clusters of K-Means to
 #  compress an image. To do this, we first find the closest clusters for
 #  each example. After that, we 

diff --git a/ex7/ex7_pca.py b/ex7/ex7_pca.py
@@ -1,4 +1,4 @@
-## Machine Learning Online Class
+#  Machine Learning Online Class
 #  Exercise 7 | Principle Component Analysis and K-Means Clustering
 #
 #  Instructions
@@ -35,40 +35,40 @@
 from ex3.displayData import displayData
 from show import show
 
-## ================== Part 1: Load Example Dataset  ===================
+#  ================== Part 1: Load Example Dataset  ===================
 #  We start this exercise by using a small dataset that is easily to
 #  visualize
 
 print('Visualizing example dataset for PCA.')
-#  The following command loads the dataset. You should now have the 
-#  variable X in your environment
+# The following command loads the dataset. You should now have the
+# variable X in your environment
 data = scipy.io.loadmat('ex7data1.mat')
 X = data['X']
 
-#  Visualize the example dataset
+# Visualize the example dataset
 plt.scatter(X[:, 0], X[:, 1], marker='o', color='b', facecolors='none', lw=1.0)
 plt.axis([0.5, 6.5, 2, 8])
 plt.axis('equal')
 show()
 
 input('Program paused. Press Enter to continue...')
 
-## =============== Part 2: Principal Component Analysis ===============
+#  =============== Part 2: Principal Component Analysis ===============
 #  You should now implement PCA, a dimension reduction technique. You
 #  should complete the code in pca.m
-#
+
 print('Running PCA on example dataset.')
 
 #  Before running PCA, it is important to first normalize X
 X_norm, mu, sigma = featureNormalize(X)
 
-#  Run PCA
+# Run PCA
 U, S, V = pca(X_norm)
 
-#  Compute mu, the mean of the each feature
+# Compute mu, the mean of the each feature
 
-#  Draw the eigenvectors centered at mean of data. These lines show the
-#  directions of maximum variations in the dataset.
+# Draw the eigenvectors centered at mean of data. These lines show the
+# directions of maximum variations in the dataset.
 mu2 = mu + 1.5 * S.dot(U.T)
 plt.plot([mu[0], mu2[0, 0]], [mu[1], mu2[0, 1]], '-k', lw=2)
 plt.plot([mu[0], mu2[1, 0]], [mu[1], mu2[1, 1]], '-k', lw=2)
@@ -80,65 +80,66 @@
 
 input('Program paused. Press Enter to continue...')
 
-
-## =================== Part 3: Dimension Reduction ===================
+#  =================== Part 3: Dimension Reduction ===================
 #  You should now implement the projection step to map the data onto the 
 #  first k eigenvectors. The code will then plot the data in this reduced 
 #  dimensional space.  This will show you what the data looks like when 
 #  using only the corresponding eigenvectors to reconstruct it.
 #
 #  You should complete the code in projectData.m
-#
+
 print('Dimension reduction on example dataset.')
 
-#  Plot the normalized dataset (returned from pca)
+# Plot the normalized dataset (returned from pca)
 plt.figure()
-plt.scatter(X_norm[:, 0], X_norm[:, 1], marker='o', color='b', facecolors='none', lw=1.0)
-plt.axis([-4, 3, -4, 3]) #axis square
+plt.scatter(X_norm[:, 0], X_norm[:, 1], marker='o',
+            color='b', facecolors='none', lw=1.0)
+plt.axis([-4, 3, -4, 3])  # axis square
 plt.axis('equal')
 show()
 
-#  Project the data onto K = 1 dimension
+# Project the data onto K = 1 dimension
 K = 1
 Z = projectData(X_norm, U, K)
 print('Projection of the first example: %f', Z[0])
 print('(this value should be about 1.481274)')
 
-X_rec  = recoverData(Z, U, K)
+X_rec = recoverData(Z, U, K)
 print('Approximation of the first example: %f %f'% (X_rec[0, 0], X_rec[0, 1]))
 print('(this value should be about  -1.047419 -1.047419)')
 
 #  Draw lines connecting the projected points to the original points
-plt.scatter(X_rec[:, 0], X_rec[:, 1], marker='o', color='r', facecolor='none', lw=1.0)
+plt.scatter(X_rec[:, 0], X_rec[:, 1], marker='o',
+            color='r', facecolor='none', lw=1.0)
 for i in range(len(X_norm)):
     plt.plot([X_norm[i, 0], X_rec[i, 0]], [X_norm[i, 1], X_rec[i, 1]], '--k')
 
 show()
 input('Program paused. Press Enter to continue...')
 
-## =============== Part 4: Loading and Visualizing Face Data =============
+#  =============== Part 4: Loading and Visualizing Face Data =============
 #  We start the exercise by first loading and visualizing the dataset.
 #  The following code will load the dataset into your environment
-#
+
 print('Loading face dataset.')
 
-#  Load Face dataset
+# Load Face dataset
 data = scipy.io.loadmat('ex7faces.mat')
 X = data['X']
 
-#  Display the first 100 faces in the dataset
+# Display the first 100 faces in the dataset
 displayData(X[0:100, :])
 
 input('Program paused. Press Enter to continue...')
 
-## =========== Part 5: PCA on Face Data: Eigenfaces  ===================
+#  =========== Part 5: PCA on Face Data: Eigenfaces  ===================
 #  Run PCA and visualize the eigenvectors which are in this case eigenfaces
 #  We display the first 36 eigenfaces.
-#
+
 print('Running PCA on face dataset.\n(this might take a minute or two ...)\n\n')
 
-#  Before running PCA, it is important to first normalize X by subtracting 
-#  the mean value from each feature
+# Before running PCA, it is important to first normalize X by subtracting
+# the mean value from each feature
 X_norm, mu, sigma = featureNormalize(X)
 
 #  Run PCA
@@ -149,7 +150,7 @@
 
 input('Program paused. Press Enter to continue...')
 
-## ============= Part 6: Dimension Reduction for Faces =================
+#  ============= Part 6: Dimension Reduction for Faces =================
 #  Project images to the eigen space using the top k eigenvectors 
 #  If you are applying a machine learning algorithm 
 print('Dimension reduction for face dataset.')
@@ -158,19 +159,19 @@
 Z = projectData(X_norm, U, K)
 
 print('The projected data Z has a size of: ')
-print('%d %d'% Z.shape)
+print('%d %d' % Z.shape)
 
 input('Program paused. Press Enter to continue...')
 
-## ==== Part 7: Visualization of Faces after PCA Dimension Reduction ====
+#  ==== Part 7: Visualization of Faces after PCA Dimension Reduction ====
 #  Project images to the eigen space using the top K eigen vectors and 
 #  visualize only using those K dimensions
 #  Compare to the original input, which is also displayed
 
 print('Visualizing the projected (reduced dimension) faces.')
 
 K = 100
-X_rec  = recoverData(Z, U, K)
+X_rec = recoverData(Z, U, K)
 
 # Display normalized data
 plt.subplot(1, 2, 1)
@@ -187,7 +188,7 @@
 input('Program paused. Press Enter to continue...')
 
 
-## === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization ===
+#  === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization ===
 #  One useful application of PCA is to use it to visualize high-dimensional
 #  data. In the last K-Means exercise you ran K-Means on 3-dimensional 
 #  pixel colors of an image. We first visualize this output in 3D, and then
@@ -208,13 +209,13 @@
 initial_centroids = kMeansInitCentroids(X, K)
 centroids, idx = runkMeans(X, initial_centroids, max_iters)
 
-#  Sample 1000 random indexes (since working with all the data is
-#  too expensive. If you have a fast computer, you may increase this.
+# Sample 1000 random indexes (since working with all the data is
+# too expensive. If you have a fast computer, you may increase this.
 sel = np.floor(np.random.random(1000) * len(X)) + 1
 
-#  Setup Color Palette
+# Setup Color Palette
 
-#  Visualize the data and centroid memberships in 3D
+# Visualize the data and centroid memberships in 3D
 fig = plt.figure()
 ax = fig.add_subplot(111, projection='3d')
 Xs = np.array([X[s] for s in sel])
@@ -225,14 +226,15 @@
 idxn = sel.astype('float') / max(sel.astype('float'))
 colors = cmap(idxn)
 # ax = Axes3D(fig)
-ax.scatter3D(xs, ys, zs=zs, edgecolors=colors, marker='o', facecolors='none', lw=0.4, s=10)
+ax.scatter3D(xs, ys, zs=zs, edgecolors=colors,
+             marker='o', facecolors='none', lw=0.4, s=10)
 
 plt.title('Pixel dataset plotted in 3D. Color shows centroid memberships')
 show()
 input('Program paused. Press Enter to continue...')
 
-## === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization ===
-# Use PCA to project this cloud to 2D for visualization
+#  === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization ===
+#  Use PCA to project this cloud to 2D for visualization
 
 # Subtract the mean to use PCA
 X_norm, mu, sigma = featureNormalize(X)

diff --git a/ex7/featureNormalize.py b/ex7/featureNormalize.py
@@ -2,8 +2,7 @@
 
 
 def featureNormalize(X):
-    """
-    returns a normalized version of X where
+    """returns a normalized version of X where
     the mean value of each feature is 0 and the standard deviation
     is 1. This is often a good preprocessing step to do when
     working with learning algorithms.
@@ -15,4 +14,4 @@ def featureNormalize(X):
     sigma = np.std(X_norm, axis=0, ddof=1)
     X_norm = X_norm / sigma
 
-    return X_norm, mu, sigma
+    return X_norm, mu, sigma
diff --git a/ex7/findClosestCentroids.py b/ex7/findClosestCentroids.py
@@ -13,15 +13,15 @@ def findClosestCentroids(X, centroids):
     # You need to return the following variables correctly.
     idx = np.zeros(X.shape[0])
 
-# ====================== YOUR CODE HERE ======================
-# Instructions: Go over every example, find its closest centroid, and store
-#               the index inside idx at the appropriate location.
-#               Concretely, idx(i) should contain the index of the centroid
-#               closest to example i. Hence, it should be a value in the 
-#               range 1..K
-#
-# Note: You can use a for-loop over the examples to compute this.
-# =============================================================
+    # ====================== YOUR CODE HERE ======================
+    # Instructions: Go over every example, find its closest centroid, and store
+    #               the index inside idx at the appropriate location.
+    #               Concretely, idx(i) should contain the index of the centroid
+    #               closest to example i. Hence, it should be a value in the
+    #               range 1..K
+    #
+    # Note: You can use a for-loop over the examples to compute this.
+    # =============================================================
 
     return val, idx