From cafcfbbdd279e789585c235d1ff9fb85fdf8c1e4 Mon Sep 17 00:00:00 2001 From: Joel Lowery Date: Fri, 21 Apr 2017 19:32:54 -0500 Subject: [PATCH] Update ex7 --- ex7/computeCentroids.py | 28 ++++++------- ex7/ex7.py | 25 ++++++----- ex7/ex7_pca.py | 82 +++++++++++++++++++------------------ ex7/featureNormalize.py | 5 +-- ex7/findClosestCentroids.py | 18 ++++---- ex7/kMeansInitCentroids.py | 11 ++--- ex7/pca.py | 3 +- ex7/plotDataPoints.py | 1 - ex7/submit.py | 4 +- 9 files changed, 88 insertions(+), 89 deletions(-) diff --git a/ex7/computeCentroids.py b/ex7/computeCentroids.py index 4d7b4ff..a749bf9 100644 --- a/ex7/computeCentroids.py +++ b/ex7/computeCentroids.py @@ -2,7 +2,8 @@ def computeCentroids(X, idx, K): - """returns the new centroids by + """ + returns the new centroids by computing the means of the data points assigned to each centroid. It is given a dataset X where each row is a single data point, a vector idx of centroid assignments (i.e. each entry in range [1..K]) for each @@ -11,23 +12,20 @@ def computeCentroids(X, idx, K): assigned to it. """ -# Useful variables + # Useful variables m, n = X.shape -# You need to return the following variables correctly. + # You need to return the following variables correctly. centroids = [] - -# ====================== YOUR CODE HERE ====================== -# Instructions: Go over every centroid and compute mean of all points that -# belong to it. Concretely, the row vector centroids(i, :) -# should contain the mean of the data points assigned to -# centroid i. -# -# Note: You can use a for-loop over the centroids to compute this. -# - - -# ============================================================= + # ====================== YOUR CODE HERE ====================== + # Instructions: Go over every centroid and compute mean of all points that + # belong to it. Concretely, the row vector centroids(i, :) + # should contain the mean of the data points assigned to + # centroid i. + # + # Note: You can use a for-loop over the centroids to compute this. + # + # ============================================================= return centroids diff --git a/ex7/ex7.py b/ex7/ex7.py index c0c07bf..ea2f423 100644 --- a/ex7/ex7.py +++ b/ex7/ex7.py @@ -1,4 +1,4 @@ -## Machine Learning Online Class +# Machine Learning Online Class # Exercise 7 | Principle Component Analysis and K-Means Clustering # # Instructions @@ -16,13 +16,12 @@ # # For this exercise, you will not need to change any code in this file, # or any other files other than those mentioned above. -# -## ================= Part 1: Find Closest Centroids ==================== +# ================= Part 1: Find Closest Centroids ==================== # To help you implement K-Means, we have divided the learning algorithm # into two functions -- findClosestCentroids and computeCentroids. In this # part, you shoudl complete the code in the findClosestCentroids function. -# + from matplotlib import use, cm use('TkAgg') import numpy as np @@ -36,6 +35,7 @@ from kMeansInitCentroids import kMeansInitCentroids from show import show + print('Finding closest centroids.') # Load an example dataset that we will be using @@ -56,13 +56,13 @@ input('Program paused. Press Enter to continue...') -## ===================== Part 2: Compute Means ========================= +# ===================== Part 2: Compute Means ========================= # After implementing the closest centroids function, you should now # complete the computeCentroids function. -# + print('Computing centroids means.') -# Compute means based on the closest centroids found in the previous part. +# Compute means based on the closest centroids found in the previous part. centroids = computeCentroids(X, idx, K) print('Centroids computed after initial finding of closest centroids:') @@ -77,12 +77,12 @@ input('Program paused. Press Enter to continue...') -## =================== Part 3: K-Means Clustering ====================== +# =================== Part 3: K-Means Clustering ====================== # After you have completed the two functions computeCentroids and # findClosestCentroids, you have all the necessary pieces to run the # kMeans algorithm. In this part, you will run the K-Means algorithm on # the example dataset we have provided. -# + print('Running K-Means clustering on example dataset.') # Load an example dataset @@ -106,17 +106,16 @@ input('Program paused. Press Enter to continue...') -## ============= Part 4: K-Means Clustering on Pixels =============== +# ============= Part 4: K-Means Clustering on Pixels =============== # In this exercise, you will use K-Means to compress an image. To do this, # you will first run K-Means on the colors of the pixels in the image and # then you will map each pixel on to it's closest centroid. # # You should now complete the code in kMeansInitCentroids.m -# print('Running K-Means clustering on pixels from an image.') -# Load an image of a bird +# Load an image of a bird A = scipy.misc.imread('bird_small.png') # If imread does not work for you, you can try instead @@ -148,7 +147,7 @@ input('Program paused. Press Enter to continue...') -## ================= Part 5: Image Compression ====================== +# ================= Part 5: Image Compression ====================== # In this part of the exercise, you will use the clusters of K-Means to # compress an image. To do this, we first find the closest clusters for # each example. After that, we diff --git a/ex7/ex7_pca.py b/ex7/ex7_pca.py index d336286..5eb85b1 100644 --- a/ex7/ex7_pca.py +++ b/ex7/ex7_pca.py @@ -1,4 +1,4 @@ -## Machine Learning Online Class +# Machine Learning Online Class # Exercise 7 | Principle Component Analysis and K-Means Clustering # # Instructions @@ -35,17 +35,17 @@ from ex3.displayData import displayData from show import show -## ================== Part 1: Load Example Dataset =================== +# ================== Part 1: Load Example Dataset =================== # We start this exercise by using a small dataset that is easily to # visualize print('Visualizing example dataset for PCA.') -# The following command loads the dataset. You should now have the -# variable X in your environment +# The following command loads the dataset. You should now have the +# variable X in your environment data = scipy.io.loadmat('ex7data1.mat') X = data['X'] -# Visualize the example dataset +# Visualize the example dataset plt.scatter(X[:, 0], X[:, 1], marker='o', color='b', facecolors='none', lw=1.0) plt.axis([0.5, 6.5, 2, 8]) plt.axis('equal') @@ -53,22 +53,22 @@ input('Program paused. Press Enter to continue...') -## =============== Part 2: Principal Component Analysis =============== +# =============== Part 2: Principal Component Analysis =============== # You should now implement PCA, a dimension reduction technique. You # should complete the code in pca.m -# + print('Running PCA on example dataset.') # Before running PCA, it is important to first normalize X X_norm, mu, sigma = featureNormalize(X) -# Run PCA +# Run PCA U, S, V = pca(X_norm) -# Compute mu, the mean of the each feature +# Compute mu, the mean of the each feature -# Draw the eigenvectors centered at mean of data. These lines show the -# directions of maximum variations in the dataset. +# Draw the eigenvectors centered at mean of data. These lines show the +# directions of maximum variations in the dataset. mu2 = mu + 1.5 * S.dot(U.T) plt.plot([mu[0], mu2[0, 0]], [mu[1], mu2[0, 1]], '-k', lw=2) plt.plot([mu[0], mu2[1, 0]], [mu[1], mu2[1, 1]], '-k', lw=2) @@ -80,65 +80,66 @@ input('Program paused. Press Enter to continue...') - -## =================== Part 3: Dimension Reduction =================== +# =================== Part 3: Dimension Reduction =================== # You should now implement the projection step to map the data onto the # first k eigenvectors. The code will then plot the data in this reduced # dimensional space. This will show you what the data looks like when # using only the corresponding eigenvectors to reconstruct it. # # You should complete the code in projectData.m -# + print('Dimension reduction on example dataset.') -# Plot the normalized dataset (returned from pca) +# Plot the normalized dataset (returned from pca) plt.figure() -plt.scatter(X_norm[:, 0], X_norm[:, 1], marker='o', color='b', facecolors='none', lw=1.0) -plt.axis([-4, 3, -4, 3]) #axis square +plt.scatter(X_norm[:, 0], X_norm[:, 1], marker='o', + color='b', facecolors='none', lw=1.0) +plt.axis([-4, 3, -4, 3]) # axis square plt.axis('equal') show() -# Project the data onto K = 1 dimension +# Project the data onto K = 1 dimension K = 1 Z = projectData(X_norm, U, K) print('Projection of the first example: %f', Z[0]) print('(this value should be about 1.481274)') -X_rec = recoverData(Z, U, K) +X_rec = recoverData(Z, U, K) print('Approximation of the first example: %f %f'% (X_rec[0, 0], X_rec[0, 1])) print('(this value should be about -1.047419 -1.047419)') # Draw lines connecting the projected points to the original points -plt.scatter(X_rec[:, 0], X_rec[:, 1], marker='o', color='r', facecolor='none', lw=1.0) +plt.scatter(X_rec[:, 0], X_rec[:, 1], marker='o', + color='r', facecolor='none', lw=1.0) for i in range(len(X_norm)): plt.plot([X_norm[i, 0], X_rec[i, 0]], [X_norm[i, 1], X_rec[i, 1]], '--k') show() input('Program paused. Press Enter to continue...') -## =============== Part 4: Loading and Visualizing Face Data ============= +# =============== Part 4: Loading and Visualizing Face Data ============= # We start the exercise by first loading and visualizing the dataset. # The following code will load the dataset into your environment -# + print('Loading face dataset.') -# Load Face dataset +# Load Face dataset data = scipy.io.loadmat('ex7faces.mat') X = data['X'] -# Display the first 100 faces in the dataset +# Display the first 100 faces in the dataset displayData(X[0:100, :]) input('Program paused. Press Enter to continue...') -## =========== Part 5: PCA on Face Data: Eigenfaces =================== +# =========== Part 5: PCA on Face Data: Eigenfaces =================== # Run PCA and visualize the eigenvectors which are in this case eigenfaces # We display the first 36 eigenfaces. -# + print('Running PCA on face dataset.\n(this might take a minute or two ...)\n\n') -# Before running PCA, it is important to first normalize X by subtracting -# the mean value from each feature +# Before running PCA, it is important to first normalize X by subtracting +# the mean value from each feature X_norm, mu, sigma = featureNormalize(X) # Run PCA @@ -149,7 +150,7 @@ input('Program paused. Press Enter to continue...') -## ============= Part 6: Dimension Reduction for Faces ================= +# ============= Part 6: Dimension Reduction for Faces ================= # Project images to the eigen space using the top k eigenvectors # If you are applying a machine learning algorithm print('Dimension reduction for face dataset.') @@ -158,11 +159,11 @@ Z = projectData(X_norm, U, K) print('The projected data Z has a size of: ') -print('%d %d'% Z.shape) +print('%d %d' % Z.shape) input('Program paused. Press Enter to continue...') -## ==== Part 7: Visualization of Faces after PCA Dimension Reduction ==== +# ==== Part 7: Visualization of Faces after PCA Dimension Reduction ==== # Project images to the eigen space using the top K eigen vectors and # visualize only using those K dimensions # Compare to the original input, which is also displayed @@ -170,7 +171,7 @@ print('Visualizing the projected (reduced dimension) faces.') K = 100 -X_rec = recoverData(Z, U, K) +X_rec = recoverData(Z, U, K) # Display normalized data plt.subplot(1, 2, 1) @@ -187,7 +188,7 @@ input('Program paused. Press Enter to continue...') -## === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization === +# === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization === # One useful application of PCA is to use it to visualize high-dimensional # data. In the last K-Means exercise you ran K-Means on 3-dimensional # pixel colors of an image. We first visualize this output in 3D, and then @@ -208,13 +209,13 @@ initial_centroids = kMeansInitCentroids(X, K) centroids, idx = runkMeans(X, initial_centroids, max_iters) -# Sample 1000 random indexes (since working with all the data is -# too expensive. If you have a fast computer, you may increase this. +# Sample 1000 random indexes (since working with all the data is +# too expensive. If you have a fast computer, you may increase this. sel = np.floor(np.random.random(1000) * len(X)) + 1 -# Setup Color Palette +# Setup Color Palette -# Visualize the data and centroid memberships in 3D +# Visualize the data and centroid memberships in 3D fig = plt.figure() ax = fig.add_subplot(111, projection='3d') Xs = np.array([X[s] for s in sel]) @@ -225,14 +226,15 @@ idxn = sel.astype('float') / max(sel.astype('float')) colors = cmap(idxn) # ax = Axes3D(fig) -ax.scatter3D(xs, ys, zs=zs, edgecolors=colors, marker='o', facecolors='none', lw=0.4, s=10) +ax.scatter3D(xs, ys, zs=zs, edgecolors=colors, + marker='o', facecolors='none', lw=0.4, s=10) plt.title('Pixel dataset plotted in 3D. Color shows centroid memberships') show() input('Program paused. Press Enter to continue...') -## === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization === -# Use PCA to project this cloud to 2D for visualization +# === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization === +# Use PCA to project this cloud to 2D for visualization # Subtract the mean to use PCA X_norm, mu, sigma = featureNormalize(X) diff --git a/ex7/featureNormalize.py b/ex7/featureNormalize.py index 8044c56..33c91dd 100644 --- a/ex7/featureNormalize.py +++ b/ex7/featureNormalize.py @@ -2,8 +2,7 @@ def featureNormalize(X): - """ - returns a normalized version of X where + """returns a normalized version of X where the mean value of each feature is 0 and the standard deviation is 1. This is often a good preprocessing step to do when working with learning algorithms. @@ -15,4 +14,4 @@ def featureNormalize(X): sigma = np.std(X_norm, axis=0, ddof=1) X_norm = X_norm / sigma - return X_norm, mu, sigma \ No newline at end of file + return X_norm, mu, sigma diff --git a/ex7/findClosestCentroids.py b/ex7/findClosestCentroids.py index e56bfb4..f4841e3 100644 --- a/ex7/findClosestCentroids.py +++ b/ex7/findClosestCentroids.py @@ -13,15 +13,15 @@ def findClosestCentroids(X, centroids): # You need to return the following variables correctly. idx = np.zeros(X.shape[0]) -# ====================== YOUR CODE HERE ====================== -# Instructions: Go over every example, find its closest centroid, and store -# the index inside idx at the appropriate location. -# Concretely, idx(i) should contain the index of the centroid -# closest to example i. Hence, it should be a value in the -# range 1..K -# -# Note: You can use a for-loop over the examples to compute this. -# ============================================================= + # ====================== YOUR CODE HERE ====================== + # Instructions: Go over every example, find its closest centroid, and store + # the index inside idx at the appropriate location. + # Concretely, idx(i) should contain the index of the centroid + # closest to example i. Hence, it should be a value in the + # range 1..K + # + # Note: You can use a for-loop over the examples to compute this. + # ============================================================= return val, idx diff --git a/ex7/kMeansInitCentroids.py b/ex7/kMeansInitCentroids.py index e04cf08..ab44c99 100644 --- a/ex7/kMeansInitCentroids.py +++ b/ex7/kMeansInitCentroids.py @@ -9,9 +9,10 @@ def kMeansInitCentroids(X, K): # You should return this values correctly centroids = np.zeros((K, X.shape[1])) -# ====================== YOUR CODE HERE ====================== -# Instructions: You should set centroids to randomly chosen examples from -# the dataset X -# -# ============================================================= + # ====================== YOUR CODE HERE ====================== + # Instructions: You should set centroids to randomly chosen examples from + # the dataset X + # + # ============================================================= + return centroids diff --git a/ex7/pca.py b/ex7/pca.py index 0fea9d4..1239e9d 100644 --- a/ex7/pca.py +++ b/ex7/pca.py @@ -20,4 +20,5 @@ def pca(X): # number of examples). # # ========================================================================= - return U, S, V \ No newline at end of file + + return U, S, V diff --git a/ex7/plotDataPoints.py b/ex7/plotDataPoints.py index d27d575..2866feb 100644 --- a/ex7/plotDataPoints.py +++ b/ex7/plotDataPoints.py @@ -3,7 +3,6 @@ def plotDataPoints(X, idx): - """plots data points in X, coloring them so that those with the same index assignments in idx have the same color """ diff --git a/ex7/submit.py b/ex7/submit.py index a1dc346..29495eb 100644 --- a/ex7/submit.py +++ b/ex7/submit.py @@ -29,8 +29,8 @@ def output(part_id): C = Z[:5, :] idx = (np.mod(np.arange(1, 16), 3)).T - fname = srcs[part_id-1].rsplit('.', 1)[0] - mod = __import__(fname, fromlist=[fname], level=1) + fname = srcs[part_id - 1].rsplit('.', 1)[0] + mod = __import__(fname, fromlist=[fname], level=0) func = getattr(mod, fname) if part_id == 1: