Skip to content

Commit

Permalink
Update ex8
Browse files Browse the repository at this point in the history
  • Loading branch information
jtlowery committed Apr 23, 2017
1 parent 1466951 commit 1e17b41
Show file tree
Hide file tree
Showing 9 changed files with 101 additions and 106 deletions.
16 changes: 8 additions & 8 deletions ex8/checkCostFunction.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np

from ex4.computeNumericalGradient import computeNumericalGradient
from cofiCostFunc import cofiCostFunc

Expand All @@ -11,7 +12,7 @@ def checkCostFunction(Lambda=0):
computations should result in very similar values.
"""

## Create small problem
# Create small problem
X_t = np.random.rand(4, 3)
Theta_t = np.random.rand(5, 3)

Expand All @@ -21,14 +22,14 @@ def checkCostFunction(Lambda=0):
R = np.zeros(Y.shape)
R[np.where(Y != 0, True, False)] = 1

## Run Gradient Checking
# Run Gradient Checking
X = np.random.random_sample(X_t.shape)
Theta = np.random.random_sample(Theta_t.shape)
num_users = Y.shape[1]
num_movies = Y.shape[0]
num_features = Theta_t.shape[1]

# Unroll parameters
# Unroll parameters
params = np.hstack((X.T.flatten(), Theta.T.flatten()))

costFunc = lambda t: cofiCostFunc(t, Y, R, num_users, num_movies, num_features, Lambda)
Expand All @@ -41,15 +42,14 @@ def costFunc_w(t):

cost, grad = cofiCostFunc(params, Y, R, num_users, num_movies, num_features, Lambda)


print(np.column_stack((numgrad, grad)))

print('The above two columns you get should be very similar.\n' \
'(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')
print('The above two columns you get should be very similar.\n'
'(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')

diff = np.linalg.norm(numgrad-grad)/np.linalg.norm(numgrad+grad)

print('If your backpropagation implementation is correct, then\n ' \
'the relative difference will be small (less than 1e-9). \n' \
print('If your backpropagation implementation is correct, then\n '
'the relative difference will be small (less than 1e-9). \n'
'\nRelative Difference: %g\n' % diff)

5 changes: 2 additions & 3 deletions ex8/cofiCostFunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@


def cofiCostFunc(params, Y, R, num_users, num_movies, num_features, Lambda):
"""returns the cost and gradient for the
"""returns the cost and gradient for the collaborative filtering problem.
"""

# Unfold the U and W matrices from params
X = np.array(params[:num_movies*num_features]).reshape(num_features, num_movies).T.copy()
Theta = np.array(params[num_movies*num_features:]).reshape(num_features, num_users).T.copy()


# You need to return the following values correctly
J = 0
X_grad = np.zeros(X.shape)
Expand Down Expand Up @@ -38,6 +37,6 @@ def cofiCostFunc(params, Y, R, num_users, num_movies, num_features, Lambda):
# partial derivatives w.r.t. to each element of Theta
# =============================================================

grad = np.hstack((X_grad.T.flatten(),Theta_grad.T.flatten()))
grad = np.hstack((X_grad.T.flatten(), Theta_grad.T.flatten()))

return J, grad
1 change: 1 addition & 0 deletions ex8/estimateGaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def estimateGaussian(X):
The output is an n-dimensional vector mu, the mean of the data set
and the variances sigma^2, an n x 1 vector
"""

m = len(X)

# ====================== YOUR CODE HERE ======================
Expand Down
50 changes: 24 additions & 26 deletions ex8/ex8.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from visualizeFit import visualizeFit
from show import show

## Machine Learning Online Class
# Machine Learning Online Class
# Exercise 8 | Anomaly Detection and Collaborative Filtering
#
# Instructions
Expand All @@ -25,27 +25,26 @@
#
# For this exercise, you will not need to change any code in this file,
# or any other files other than those mentioned above.
#

## ================== Part 1: Load Example Dataset ===================

# ================== Part 1: Load Example Dataset ===================
# We start this exercise by using a small dataset that is easy to
# visualize.
#
# Our example case consists of 2 network server statistics across
# several machines: the latency and throughput of each machine.
# This exercise will help us find possibly faulty (or very fast) machines.
#

print('Visualizing example dataset for outlier detection.')

# The following command loads the dataset. You should now have the
# variables X, Xval, yval in your environment
# The following command loads the dataset. You should now have the
# variables X, Xval, yval in your environment
data = scipy.io.loadmat('ex8data1.mat')
X = data['X']
Xval = data['Xval']
yval = data['yval'].flatten()

# Visualize the example dataset
# Visualize the example dataset
plt.plot(X[:, 0], X[:, 1], 'bx')
plt.axis([0, 30, 0, 30])
plt.xlabel('Latency (ms)')
Expand All @@ -54,35 +53,34 @@
input('Program paused. Press Enter to continue...')


## ================== Part 2: Estimate the dataset statistics ===================
# ================== Part 2: Estimate the dataset statistics ===================
# For this exercise, we assume a Gaussian distribution for the dataset.
#
# We first estimate the parameters of our assumed Gaussian distribution,
# then compute the probabilities for each of the points and then visualize
# both the overall distribution and where each of the points falls in
# terms of that distribution.
#

print('Visualizing Gaussian fit.')

# Estimate my and sigma2
# Estimate mu and sigma2
mu, sigma2 = estimateGaussian(X)

# Returns the density of the multivariate normal at each data point (row)
# of X
# Returns the density of the multivariate normal at each data point (row)
# of X
p = multivariateGaussian(X, mu, sigma2)

# Visualize the fit
# Visualize the fit
visualizeFit(X, mu, sigma2)
plt.xlabel('Latency (ms)')
plt.ylabel('Throughput (mb/s)')
show()

input('Program paused. Press Enter to continue...')

## ================== Part 3: Find Outliers ===================
# ================== Part 3: Find Outliers ===================
# Now you will find a good epsilon threshold using a cross-validation set
# probabilities given the estimated Gaussian distribution
#

pval = multivariateGaussian(Xval, mu, sigma2)

Expand All @@ -91,38 +89,38 @@
print('Best F1 on Cross Validation Set: %f' % F1)
print(' (you should see a value epsilon of about 8.99e-05)')

# Find the outliers in the training set and plot the
# Find the outliers in the training set and plot the
outliers = np.where(p < epsilon, True, False)

# Draw a red circle around those outliers
plt.plot(X[outliers, 0], X[outliers, 1], 'ro', lw=2, markersize=10, fillstyle='none', markeredgewidth=1)
# Draw a red circle around those outliers
plt.plot(X[outliers, 0], X[outliers, 1], 'ro', lw=2,
markersize=10, fillstyle='none', markeredgewidth=1)
show()

input('Program paused. Press Enter to continue...')

## ================== Part 4: Multidimensional Outliers ===================
# ================== Part 4: Multidimensional Outliers ===================
# We will now use the code from the previous part and apply it to a
# harder problem in which more features describe each datapoint and only
# some features indicate whether a point is an outlier.
#

# Loads the second dataset. You should now have the
# variables X, Xval, yval in your environment
# Loads the second dataset. You should now have the
# variables X, Xval, yval in your environment
data = scipy.io.loadmat('ex8data2.mat')
X = data['X']
Xval = data['Xval']
yval = data['yval'].flatten()

# Apply the same steps to the larger dataset
# Apply the same steps to the larger dataset
mu, sigma2 = estimateGaussian(X)

# Training set
# Training set
p = multivariateGaussian(X, mu, sigma2)

# Cross-validation set
# Cross-validation set
pval = multivariateGaussian(Xval, mu, sigma2)

# Find the best threshold
# Find the best threshold
epsilon, F1 = selectThreshold(yval, pval)

print('Best epsilon found using cross-validation: %e' % epsilon)
Expand Down
Loading

0 comments on commit 1e17b41

Please sign in to comment.