johnmcdowell
diff --git a/‎ex8/checkCostFunction.py
Lines changed: 8 additions & 8 deletions b/‎ex8/checkCostFunction.py
Lines changed: 8 additions & 8 deletions
diff --git a/‎ex8/cofiCostFunc.py
Lines changed: 2 additions & 3 deletions b/‎ex8/cofiCostFunc.py
Lines changed: 2 additions & 3 deletions
diff --git a/‎ex8/estimateGaussian.py
Lines changed: 1 addition & 0 deletions b/‎ex8/estimateGaussian.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎ex8/ex8.py
Lines changed: 24 additions & 26 deletions b/‎ex8/ex8.py
Lines changed: 24 additions & 26 deletions
@@ -1,4 +1,5 @@
 import numpy as np
+
 from ex4.computeNumericalGradient import computeNumericalGradient
 from cofiCostFunc import cofiCostFunc
 
@@ -11,7 +12,7 @@ def checkCostFunction(Lambda=0):
     computations should result in very similar values.
     """
 
-    ## Create small problem
+    # Create small problem
     X_t = np.random.rand(4, 3)
     Theta_t = np.random.rand(5, 3)
 
@@ -21,14 +22,14 @@ def checkCostFunction(Lambda=0):
     R = np.zeros(Y.shape)
     R[np.where(Y != 0, True, False)] = 1
 
-    ## Run Gradient Checking
+    # Run Gradient Checking
     X = np.random.random_sample(X_t.shape)
     Theta = np.random.random_sample(Theta_t.shape)
     num_users = Y.shape[1]
     num_movies = Y.shape[0]
     num_features = Theta_t.shape[1]
 
-   # Unroll parameters
+    # Unroll parameters
     params = np.hstack((X.T.flatten(), Theta.T.flatten()))
 
     costFunc = lambda t: cofiCostFunc(t, Y, R, num_users, num_movies, num_features, Lambda)
@@ -41,15 +42,14 @@ def costFunc_w(t):
 
     cost, grad = cofiCostFunc(params, Y, R, num_users, num_movies, num_features, Lambda)
 
-
     print(np.column_stack((numgrad, grad)))
 
-    print('The above two columns you get should be very similar.\n' \
-             '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')
+    print('The above two columns you get should be very similar.\n'
+          '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')
 
     diff = np.linalg.norm(numgrad-grad)/np.linalg.norm(numgrad+grad)
 
-    print('If your backpropagation implementation is correct, then\n ' \
-          'the relative difference will be small (less than 1e-9). \n' \
+    print('If your backpropagation implementation is correct, then\n '
+          'the relative difference will be small (less than 1e-9). \n'
           '\nRelative Difference: %g\n' % diff)
 
@@ -2,14 +2,13 @@
 
 
 def cofiCostFunc(params, Y, R, num_users, num_movies, num_features, Lambda):
-    """returns the cost and gradient for the
+    """returns the cost and gradient for the collaborative filtering problem.
     """
 
     # Unfold the U and W matrices from params
     X = np.array(params[:num_movies*num_features]).reshape(num_features, num_movies).T.copy()
     Theta = np.array(params[num_movies*num_features:]).reshape(num_features, num_users).T.copy()
 
-
     # You need to return the following values correctly
     J = 0
     X_grad = np.zeros(X.shape)
@@ -38,6 +37,6 @@ def cofiCostFunc(params, Y, R, num_users, num_movies, num_features, Lambda):
     #                     partial derivatives w.r.t. to each element of Theta
     # =============================================================
 
-    grad = np.hstack((X_grad.T.flatten(),Theta_grad.T.flatten()))
+    grad = np.hstack((X_grad.T.flatten(), Theta_grad.T.flatten()))
 
     return J, grad
@@ -9,6 +9,7 @@ def estimateGaussian(X):
       The output is an n-dimensional vector mu, the mean of the data set
       and the variances sigma^2, an n x 1 vector
     """
+
     m = len(X)
 
     # ====================== YOUR CODE HERE ======================
 
@@ -10,7 +10,7 @@
 from visualizeFit import visualizeFit
 from show import show
 
-## Machine Learning Online Class
+#  Machine Learning Online Class
 #  Exercise 8 | Anomaly Detection and Collaborative Filtering
 #
 #  Instructions
@@ -25,27 +25,26 @@
 #
 #  For this exercise, you will not need to change any code in this file,
 #  or any other files other than those mentioned above.
-#
 
-## ================== Part 1: Load Example Dataset  ===================
+
+#  ================== Part 1: Load Example Dataset  ===================
 #  We start this exercise by using a small dataset that is easy to
 #  visualize.
 #
 #  Our example case consists of 2 network server statistics across
 #  several machines: the latency and throughput of each machine.
 #  This exercise will help us find possibly faulty (or very fast) machines.
-#
 
 print('Visualizing example dataset for outlier detection.')
 
-#  The following command loads the dataset. You should now have the
-#  variables X, Xval, yval in your environment
+# The following command loads the dataset. You should now have the
+# variables X, Xval, yval in your environment
 data = scipy.io.loadmat('ex8data1.mat')
 X = data['X']
 Xval = data['Xval']
 yval = data['yval'].flatten()
 
-#  Visualize the example dataset
+# Visualize the example dataset
 plt.plot(X[:, 0], X[:, 1], 'bx')
 plt.axis([0, 30, 0, 30])
 plt.xlabel('Latency (ms)')
@@ -54,35 +53,34 @@
 input('Program paused. Press Enter to continue...')
 
 
-## ================== Part 2: Estimate the dataset statistics ===================
+# ================== Part 2: Estimate the dataset statistics ===================
 #  For this exercise, we assume a Gaussian distribution for the dataset.
 #
 #  We first estimate the parameters of our assumed Gaussian distribution, 
 #  then compute the probabilities for each of the points and then visualize 
 #  both the overall distribution and where each of the points falls in 
 #  terms of that distribution.
-#
+
 print('Visualizing Gaussian fit.')
 
-#  Estimate my and sigma2
+# Estimate mu and sigma2
 mu, sigma2 = estimateGaussian(X)
 
-#  Returns the density of the multivariate normal at each data point (row) 
-#  of X
+# Returns the density of the multivariate normal at each data point (row)
+# of X
 p = multivariateGaussian(X, mu, sigma2)
 
-#  Visualize the fit
+# Visualize the fit
 visualizeFit(X,  mu, sigma2)
 plt.xlabel('Latency (ms)')
 plt.ylabel('Throughput (mb/s)')
 show()
 
 input('Program paused. Press Enter to continue...')
 
-## ================== Part 3: Find Outliers ===================
+#  ================== Part 3: Find Outliers ===================
 #  Now you will find a good epsilon threshold using a cross-validation set
 #  probabilities given the estimated Gaussian distribution
-# 
 
 pval = multivariateGaussian(Xval, mu, sigma2)
 
@@ -91,38 +89,38 @@
 print('Best F1 on Cross Validation Set:  %f' % F1)
 print('   (you should see a value epsilon of about 8.99e-05)')
 
-#  Find the outliers in the training set and plot the
+# Find the outliers in the training set and plot the
 outliers = np.where(p < epsilon, True, False)
 
-#  Draw a red circle around those outliers
-plt.plot(X[outliers, 0], X[outliers, 1], 'ro', lw=2, markersize=10, fillstyle='none', markeredgewidth=1)
+# Draw a red circle around those outliers
+plt.plot(X[outliers, 0], X[outliers, 1], 'ro', lw=2,
+         markersize=10, fillstyle='none', markeredgewidth=1)
 show()
 
 input('Program paused. Press Enter to continue...')
 
-## ================== Part 4: Multidimensional Outliers ===================
+#  ================== Part 4: Multidimensional Outliers ===================
 #  We will now use the code from the previous part and apply it to a 
 #  harder problem in which more features describe each datapoint and only 
 #  some features indicate whether a point is an outlier.
-#
 
-#  Loads the second dataset. You should now have the
-#  variables X, Xval, yval in your environment
+# Loads the second dataset. You should now have the
+# variables X, Xval, yval in your environment
 data = scipy.io.loadmat('ex8data2.mat')
 X = data['X']
 Xval = data['Xval']
 yval = data['yval'].flatten()
 
-#  Apply the same steps to the larger dataset
+# Apply the same steps to the larger dataset
 mu, sigma2 = estimateGaussian(X)
 
-#  Training set 
+# Training set
 p = multivariateGaussian(X, mu, sigma2)
 
-#  Cross-validation set
+# Cross-validation set
 pval = multivariateGaussian(Xval, mu, sigma2)
 
-#  Find the best threshold
+# Find the best threshold
 epsilon, F1 = selectThreshold(yval, pval)
 
 print('Best epsilon found using cross-validation: %e' % epsilon)