From f0bd57fad405c1ec225349d0eebef2ec19bd6e90 Mon Sep 17 00:00:00 2001 From: Joel Lowery Date: Thu, 16 Mar 2017 01:11:39 -0500 Subject: [PATCH] Minor spacing and comment fixes --- ex4/checkNNGradients.py | 20 ++++--- ex4/computeNumericalGradient.py | 17 +++--- ex4/ex4.py | 102 +++++++++++++++----------------- ex4/nnCostFunction.py | 80 ++++++++++++------------- ex4/randInitializeWeights.py | 9 +-- ex4/sigmoidGradient.py | 8 +-- 6 files changed, 117 insertions(+), 119 deletions(-) diff --git a/ex4/checkNNGradients.py b/ex4/checkNNGradients.py index 2b01823..bc7a1e4 100644 --- a/ex4/checkNNGradients.py +++ b/ex4/checkNNGradients.py @@ -7,11 +7,12 @@ def checkNNGradients(Lambda=0): - """Creates a small neural network to check the - backpropagation gradients, it will output the analytical gradients - produced by your backprop code and the numerical gradients (computed - using computeNumericalGradient). These two gradient computations should - result in very similar values. + """ + Creates a small neural network to check the + backpropagation gradients, it will output the analytical gradients + produced by your backprop code and the numerical gradients (computed + using computeNumericalGradient). These two gradient computations should + result in very similar values. """ input_layer_size = 3 @@ -24,15 +25,16 @@ def checkNNGradients(Lambda=0): Theta2 = debugInitializeWeights(num_labels, hidden_layer_size) # Reusing debugInitializeWeights to generate X - X = debugInitializeWeights(m, input_layer_size - 1) - y = np.mod(range(1, m+1), num_labels) + X = debugInitializeWeights(m, input_layer_size - 1) + y = np.mod(range(1, m + 1), num_labels) # Unroll parameters nn_params = np.hstack((Theta1.T.ravel(), Theta2.T.ravel())) # Short hand for cost function - costFunc = lambda p: nnCostFunction(p, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda) + costFunc = lambda p: nnCostFunction(p, input_layer_size, hidden_layer_size, + num_labels, X, y, Lambda) numgrad = computeNumericalGradient(costFunc, nn_params) grad = costFunc(nn_params)[1] @@ -47,7 +49,7 @@ def checkNNGradients(Lambda=0): # Evaluate the norm of the difference between two solutions. # If you have a correct implementation, and assuming you used EPSILON = 0.0001 # in computeNumericalGradient.m, then diff below should be less than 1e-9 - diff = np.linalg.norm(numgrad-grad)/np.linalg.norm(numgrad+grad) + diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad) print('If your backpropagation implementation is correct, then\n ' 'the relative difference will be small (less than 1e-9). \n' diff --git a/ex4/computeNumericalGradient.py b/ex4/computeNumericalGradient.py index fd35446..f73ed00 100644 --- a/ex4/computeNumericalGradient.py +++ b/ex4/computeNumericalGradient.py @@ -2,15 +2,16 @@ def computeNumericalGradient(J, theta): - """computes the numerical gradient of the function J around theta. - Calling y = J(theta) should return the function value at theta. """ -# Notes: The following code implements numerical gradient checking, and -# returns the numerical gradient.It sets numgrad(i) to (a numerical -# approximation of) the partial derivative of J with respect to the -# i-th input argument, evaluated at theta. (i.e., numgrad(i) should -# be the (approximately) the partial derivative of J with respect -# to theta(i).) + Computes the numerical gradient of the function J around theta. + Calling y = J(theta) should return the function value at theta. + """ + # Notes: The following code implements numerical gradient checking, and + # returns the numerical gradient.It sets numgrad(i) to (a numerical + # approximation of) the partial derivative of J with respect to the + # i-th input argument, evaluated at theta. (i.e., numgrad(i) should + # be the (approximately) the partial derivative of J with respect + # to theta(i).) numgrad = np.zeros(theta.shape[0]) perturb = np.zeros(theta.shape[0]) diff --git a/ex4/ex4.py b/ex4/ex4.py index 86ea218..f7d19b3 100644 --- a/ex4/ex4.py +++ b/ex4/ex4.py @@ -1,10 +1,20 @@ -## Machine Learning Online Class - Exercise 4 Neural Network Learning +import numpy as np +import scipy.io +from scipy.optimize import minimize + +from ex3.displayData import displayData +from ex3.predict import predict +from nnCostFunction import nnCostFunction +from sigmoidGradient import sigmoidGradient +from randInitializeWeights import randInitializeWeights +from checkNNGradients import checkNNGradients +# Machine Learning Online Class - Exercise 4 Neural Network Learning # Instructions # ------------ -# +# # This file contains code that helps you get started on the -# linear exercise. You will need to complete the following functions +# linear exercise. You will need to complete the following functions # in this exericse: # # sigmoidGradient.m @@ -13,29 +23,18 @@ # # For this exercise, you will not need to change any code in this file, # or any other files other than those mentioned above. -# - -import numpy as np -import scipy.io -from scipy.optimize import minimize - -from ex3.displayData import displayData -from ex3.predict import predict -from nnCostFunction import nnCostFunction -from sigmoidGradient import sigmoidGradient -from randInitializeWeights import randInitializeWeights -from checkNNGradients import checkNNGradients -## Setup the parameters you will use for this exercise -input_layer_size = 400 # 20x20 Input Images of Digits -hidden_layer_size = 25 # 25 hidden units -num_labels = 10 # 10 labels, from 1 to 10 - # (note that we have mapped "0" to label 10) +# Setup the parameters you will use for this exercise +# 20x20 Input Images of Digits +input_layer_size = 400 +# 25 hidden units +hidden_layer_size = 25 +# 10 labels, from 1 to 10 (note that we have mapped "0" to label 10) +num_labels = 10 -## =========== Part 1: Loading and Visualizing Data ============= +# =========== Part 1: Loading and Visualizing Data ============= # We start the exercise by first loading and visualizing the dataset. # You will be working with a dataset that contains handwritten digits. -# # Load Training Data print('Loading and Visualizing Data ...') @@ -53,8 +52,7 @@ input('Program paused. Press Enter to continue...') - -## ================ Part 2: Loading Parameters ================ +# ================ Part 2: Loading Parameters ================ # In this part of the exercise, we load some pre-initialized # neural network parameters. @@ -69,7 +67,7 @@ # Unroll parameters nn_params = np.hstack((Theta1.T.ravel(), Theta2.T.ravel())) -## ================ Part 3: Compute Cost (Feedforward) ================ +# ================ Part 3: Compute Cost (Feedforward) ================ # To the neural network, you should first start by implementing the # feedforward part of the neural network that returns the cost only. You # should complete the code in nnCostFunction.m to return cost. After @@ -89,32 +87,32 @@ J, _ = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda) -print('Cost at parameters (loaded from ex4weights): %f \n(this value should be about 0.287629)\n' % J) +print('Cost at parameters (loaded from ex4weights): %f ' + '\n(this value should be about 0.287629)\n' % J) input('Program paused. Press Enter to continue...') -## =============== Part 4: Implement Regularization =============== +# =============== Part 4: Implement Regularization =============== # Once your cost function implementation is correct, you should now # continue to implement the regularization with the cost. -# print('Checking Cost Function (w/ Regularization) ...') # Weight regularization parameter (we set this to 1 here). Lambda = 1 -J, _ = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda) +J, _ = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, + num_labels, X, y, Lambda) -print('Cost at parameters (loaded from ex4weights): %f \n(this value should be about 0.383770)' % J) +print('Cost at parameters (loaded from ex4weights): %f ' + '\n(this value should be about 0.383770)' % J) input('Program paused. Press Enter to continue...') - -## ================ Part 5: Sigmoid Gradient ================ +# ================ Part 5: Sigmoid Gradient ================ # Before you start implementing the neural network, you will first # implement the gradient for the sigmoid function. You should complete the # code in the sigmoidGradient.m file. -# print('Evaluating sigmoid gradient...') @@ -124,9 +122,8 @@ input('Program paused. Press Enter to continue...') - -## ================ Part 6: Initializing Pameters ================ -# In this part of the exercise, you will be starting to implment a two +# ================ Part 6: Initializing Parameters ================ +# In this part of the exercise, you will be starting to implement a two # layer neural network that classifies digits. You will start by # implementing a function to initialize the weights of the neural network # (randInitializeWeights.m) @@ -139,8 +136,7 @@ # Unroll parameters initial_nn_params = np.hstack((initial_Theta1.T.ravel(), initial_Theta2.T.ravel())) - -## =============== Part 7: Implement Backpropagation =============== +# =============== Part 7: Implement Backpropagation =============== # Once your cost matches up with ours, you should proceed to implement the # backpropagation algorithm for the neural network. You should add to the # code you've written in nnCostFunction.m to return the partial @@ -153,8 +149,7 @@ input('Program paused. Press Enter to continue...') - -## =============== Part 8: Implement Regularization =============== +# =============== Part 7: Implement Regularization =============== # Once your backpropagation implementation is correct, you should now # continue to implement the regularization with the cost and gradient. # @@ -166,14 +161,15 @@ checkNNGradients(Lambda) # Also output the costFunction debugging values -debug_J, _ = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda) +debug_J, _ = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, + num_labels, X, y, Lambda) -print('Cost at (fixed) debugging parameters (w/ lambda = 10): %f (this value should be about 0.576051)\n\n' % debug_J) +print('Cost at (fixed) debugging parameters (w/ lambda = 10): %f ' + '(this value should be about 0.576051)\n' % debug_J) input('Program paused. Press Enter to continue...') - -## =================== Part 8: Training NN =================== +# =================== Part 8: Training NN =================== # You have now implemented all the code necessary to train a neural # network. To train your neural network, we will now use "fmincg", which # is a function which works similarly to "fminunc". Recall that these @@ -184,13 +180,15 @@ # After you have completed the assignment, change the MaxIter to a larger # value to see how more training helps. -# options = optimset('MaxIter', 50) +# options = optimset('MaxIter', 50) # You should also try different values of lambda Lambda = 1 -costFunc = lambda p: nnCostFunction(p, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda)[0] -gradFunc = lambda p: nnCostFunction(p, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda)[1] +costFunc = lambda p: nnCostFunction(p, input_layer_size, hidden_layer_size, + num_labels, X, y, Lambda)[0] +gradFunc = lambda p: nnCostFunction(p, input_layer_size, hidden_layer_size, + num_labels, X, y, Lambda)[1] result = minimize(costFunc, initial_nn_params, method='CG', jac=gradFunc, options={'disp': True, 'maxiter': 50.0}) @@ -199,14 +197,13 @@ # Obtain Theta1 and Theta2 back from nn_params Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)], - (hidden_layer_size, input_layer_size + 1), order='F').copy() + (hidden_layer_size, input_layer_size + 1), order='F').copy() Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):], - (num_labels, (hidden_layer_size + 1)), order='F').copy() + (num_labels, (hidden_layer_size + 1)), order='F').copy() input('Program paused. Press Enter to continue...') - -## ================= Part 9: Visualize Weights ================= +# ================= Part 9: Visualize Weights ================= # You can now "visualize" what the neural network is learning by # displaying the hidden units to see what features they are capturing in # the data. @@ -217,7 +214,7 @@ input('Program paused. Press Enter to continue...') -## ================= Part 10: Implement Predict ================= +# ================= Part 10: Implement Predict ================= # After training the neural network, we would like to use it to predict # the labels. You will now implement the "predict" function to use the # neural network to predict the labels of the training set. This lets @@ -228,5 +225,4 @@ accuracy = np.mean(np.double(pred == y)) * 100 print('Training Set Accuracy: %f\n' % accuracy) - input('Program paused. Press Enter to exit...') diff --git a/ex4/nnCostFunction.py b/ex4/nnCostFunction.py index c72c995..abacef2 100644 --- a/ex4/nnCostFunction.py +++ b/ex4/nnCostFunction.py @@ -4,14 +4,15 @@ from sigmoidGradient import sigmoidGradient -def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda): +def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, + num_labels, X, y, Lambda): - """computes the cost and gradient of the neural network. The - parameters for the neural network are "unrolled" into the vector - nn_params and need to be converted back into the weight matrices. + """ computes the cost and gradient of the neural network. The + parameters for the neural network are "unrolled" into the vector + nn_params and need to be converted back into the weight matrices. - The returned parameter grad should be a "unrolled" vector of the - partial derivatives of the neural network. + The returned parameter grad should be a "unrolled" vector of the + partial derivatives of the neural network. """ # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices @@ -23,44 +24,41 @@ def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):], (num_labels, (hidden_layer_size + 1)), order='F').copy() - - # Setup some useful variables m, _ = X.shape - -# ====================== YOUR CODE HERE ====================== -# Instructions: You should complete the code by working through the -# following parts. -# -# Part 1: Feedforward the neural network and return the cost in the -# variable J. After implementing Part 1, you can verify that your -# cost function computation is correct by verifying the cost -# computed in ex4.m -# -# Part 2: Implement the backpropagation algorithm to compute the gradients -# Theta1_grad and Theta2_grad. You should return the partial derivatives of -# the cost function with respect to Theta1 and Theta2 in Theta1_grad and -# Theta2_grad, respectively. After implementing Part 2, you can check -# that your implementation is correct by running checkNNGradients -# -# Note: The vector y passed into the function is a vector of labels -# containing values from 1..K. You need to map this vector into a -# binary vector of 1's and 0's to be used with the neural network -# cost function. -# -# Hint: We recommend implementing backpropagation using a for-loop -# over the training examples if you are implementing it for the -# first time. -# -# Part 3: Implement regularization with the cost function and gradients. -# -# Hint: You can implement this around the code for -# backpropagation. That is, you can compute the gradients for -# the regularization separately and then add them to Theta1_grad -# and Theta2_grad from Part 2. -# -# ========================================================================= + # ====================== YOUR CODE HERE ====================== + # Instructions: You should complete the code by working through the + # following parts. + # + # Part 1: Feedforward the neural network and return the cost in the + # variable J. After implementing Part 1, you can verify that your + # cost function computation is correct by verifying the cost + # computed in ex4.m + # + # Part 2: Implement the backpropagation algorithm to compute the gradients + # Theta1_grad and Theta2_grad. You should return the partial derivatives of + # the cost function with respect to Theta1 and Theta2 in Theta1_grad and + # Theta2_grad, respectively. After implementing Part 2, you can check + # that your implementation is correct by running checkNNGradients + # + # Note: The vector y passed into the function is a vector of labels + # containing values from 1..K. You need to map this vector into a + # binary vector of 1's and 0's to be used with the neural network + # cost function. + # + # Hint: We recommend implementing backpropagation using a for-loop + # over the training examples if you are implementing it for the + # first time. + # + # Part 3: Implement regularization with the cost function and gradients. + # + # Hint: You can implement this around the code for + # backpropagation. That is, you can compute the gradients for + # the regularization separately and then add them to Theta1_grad + # and Theta2_grad from Part 2. + # + # ========================================================================= # Unroll gradient grad = np.hstack((Theta1_grad.T.ravel(), Theta2_grad.T.ravel())) diff --git a/ex4/randInitializeWeights.py b/ex4/randInitializeWeights.py index 68d8d2c..4eb2066 100644 --- a/ex4/randInitializeWeights.py +++ b/ex4/randInitializeWeights.py @@ -2,11 +2,12 @@ def randInitializeWeights(L_in, L_out): - """randomly initializes the weights of a layer with L_in incoming connections and L_out outgoing - connections. + """ Randomly initializes the weights of a layer with L_in + incoming connections and L_out outgoing + connections. - Note that W should be set to a matrix of size(L_out, 1 + L_in) - as the column row of W handles the "bias" terms + Note that W should be set to a matrix of size(L_out, 1 + L_in) + as the column row of W handles the "bias" terms """ # ====================== YOUR CODE HERE ====================== diff --git a/ex4/sigmoidGradient.py b/ex4/sigmoidGradient.py index 18cfda3..b9b1f4c 100644 --- a/ex4/sigmoidGradient.py +++ b/ex4/sigmoidGradient.py @@ -7,9 +7,9 @@ def sigmoidGradient(z): vector. In particular, if z is a vector or matrix, you should return the gradient for each element.""" -# ====================== YOUR CODE HERE ====================== -# Instructions: Compute the gradient of the sigmoid function evaluated at -# each value of z (z can be a matrix, vector or scalar). -# ============================================================= + # ====================== YOUR CODE HERE ====================== + # Instructions: Compute the gradient of the sigmoid function evaluated at + # each value of z (z can be a matrix, vector or scalar). + # ============================================================= return g