|
| 1 | +\begin{MintedVerbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] |
| 2 | +\PYG{k+kn}{import}\PYG{+w}{ }\PYG{n+nn}{numpy}\PYG{+w}{ }\PYG{k}{as}\PYG{+w}{ }\PYG{n+nn}{np} |
| 3 | +\PYG{c+c1}{\PYGZsh{} We use the Sigmoid function as activation function} |
| 4 | +\PYG{k}{def}\PYG{+w}{ }\PYG{n+nf}{sigmoid}\PYG{p}{(}\PYG{n}{z}\PYG{p}{)}\PYG{p}{:} |
| 5 | + \PYG{k}{return} \PYG{l+m+mf}{1.0}\PYG{o}{/}\PYG{p}{(}\PYG{l+m+mf}{1.0}\PYG{o}{+}\PYG{n}{np}\PYG{o}{.}\PYG{n}{exp}\PYG{p}{(}\PYG{o}{\PYGZhy{}}\PYG{n}{z}\PYG{p}{)}\PYG{p}{)} |
| 6 | + |
| 7 | +\PYG{k}{def}\PYG{+w}{ }\PYG{n+nf}{forwardpropagation}\PYG{p}{(}\PYG{n}{x}\PYG{p}{)}\PYG{p}{:} |
| 8 | + \PYG{c+c1}{\PYGZsh{} weighted sum of inputs to the hidden layer} |
| 9 | + \PYG{n}{z\PYGZus{}1} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{matmul}\PYG{p}{(}\PYG{n}{x}\PYG{p}{,} \PYG{n}{w\PYGZus{}1}\PYG{p}{)} \PYG{o}{+} \PYG{n}{b\PYGZus{}1} |
| 10 | + \PYG{c+c1}{\PYGZsh{} activation in the hidden layer} |
| 11 | + \PYG{n}{a\PYGZus{}1} \PYG{o}{=} \PYG{n}{sigmoid}\PYG{p}{(}\PYG{n}{z\PYGZus{}1}\PYG{p}{)} |
| 12 | + \PYG{c+c1}{\PYGZsh{} weighted sum of inputs to the output layer} |
| 13 | + \PYG{n}{z\PYGZus{}2} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{matmul}\PYG{p}{(}\PYG{n}{a\PYGZus{}1}\PYG{p}{,} \PYG{n}{w\PYGZus{}2}\PYG{p}{)} \PYG{o}{+} \PYG{n}{b\PYGZus{}2} |
| 14 | + \PYG{n}{a\PYGZus{}2} \PYG{o}{=} \PYG{n}{z\PYGZus{}2} |
| 15 | + \PYG{k}{return} \PYG{n}{a\PYGZus{}1}\PYG{p}{,} \PYG{n}{a\PYGZus{}2} |
| 16 | + |
| 17 | +\PYG{k}{def}\PYG{+w}{ }\PYG{n+nf}{backpropagation}\PYG{p}{(}\PYG{n}{x}\PYG{p}{,} \PYG{n}{y}\PYG{p}{)}\PYG{p}{:} |
| 18 | + \PYG{n}{a\PYGZus{}1}\PYG{p}{,} \PYG{n}{a\PYGZus{}2} \PYG{o}{=} \PYG{n}{forwardpropagation}\PYG{p}{(}\PYG{n}{x}\PYG{p}{)} |
| 19 | + \PYG{c+c1}{\PYGZsh{} parameter delta for the output layer, note that a\PYGZus{}2=z\PYGZus{}2 and its derivative wrt z\PYGZus{}2 is just 1} |
| 20 | + \PYG{n}{delta\PYGZus{}2} \PYG{o}{=} \PYG{n}{a\PYGZus{}2} \PYG{o}{\PYGZhy{}} \PYG{n}{y} |
| 21 | + \PYG{n+nb}{print}\PYG{p}{(}\PYG{l+m+mf}{0.5}\PYG{o}{*}\PYG{p}{(}\PYG{p}{(}\PYG{n}{a\PYGZus{}2}\PYG{o}{\PYGZhy{}}\PYG{n}{y}\PYG{p}{)}\PYG{o}{*}\PYG{o}{*}\PYG{l+m+mi}{2}\PYG{p}{)}\PYG{p}{)} |
| 22 | + \PYG{c+c1}{\PYGZsh{} delta for the hidden layer} |
| 23 | + \PYG{n}{delta\PYGZus{}1} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{matmul}\PYG{p}{(}\PYG{n}{delta\PYGZus{}2}\PYG{p}{,} \PYG{n}{w\PYGZus{}2}\PYG{o}{.}\PYG{n}{T}\PYG{p}{)} \PYG{o}{*} \PYG{n}{a\PYGZus{}1} \PYG{o}{*} \PYG{p}{(}\PYG{l+m+mi}{1} \PYG{o}{\PYGZhy{}} \PYG{n}{a\PYGZus{}1}\PYG{p}{)} |
| 24 | + \PYG{c+c1}{\PYGZsh{} gradients for the output layer} |
| 25 | + \PYG{n}{output\PYGZus{}weights\PYGZus{}gradient} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{matmul}\PYG{p}{(}\PYG{n}{a\PYGZus{}1}\PYG{o}{.}\PYG{n}{T}\PYG{p}{,} \PYG{n}{delta\PYGZus{}2}\PYG{p}{)} |
| 26 | + \PYG{n}{output\PYGZus{}bias\PYGZus{}gradient} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{sum}\PYG{p}{(}\PYG{n}{delta\PYGZus{}2}\PYG{p}{,} \PYG{n}{axis}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)} |
| 27 | + \PYG{c+c1}{\PYGZsh{} gradient for the hidden layer} |
| 28 | + \PYG{n}{hidden\PYGZus{}weights\PYGZus{}gradient} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{matmul}\PYG{p}{(}\PYG{n}{x}\PYG{o}{.}\PYG{n}{T}\PYG{p}{,} \PYG{n}{delta\PYGZus{}1}\PYG{p}{)} |
| 29 | + \PYG{n}{hidden\PYGZus{}bias\PYGZus{}gradient} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{sum}\PYG{p}{(}\PYG{n}{delta\PYGZus{}1}\PYG{p}{,} \PYG{n}{axis}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{)} |
| 30 | + \PYG{k}{return} \PYG{n}{output\PYGZus{}weights\PYGZus{}gradient}\PYG{p}{,} \PYG{n}{output\PYGZus{}bias\PYGZus{}gradient}\PYG{p}{,} \PYG{n}{hidden\PYGZus{}weights\PYGZus{}gradient}\PYG{p}{,} \PYG{n}{hidden\PYGZus{}bias\PYGZus{}gradient} |
| 31 | + |
| 32 | + |
| 33 | +\PYG{c+c1}{\PYGZsh{} ensure the same random numbers appear every time} |
| 34 | +\PYG{n}{np}\PYG{o}{.}\PYG{n}{random}\PYG{o}{.}\PYG{n}{seed}\PYG{p}{(}\PYG{l+m+mi}{0}\PYG{p}{)} |
| 35 | +\PYG{c+c1}{\PYGZsh{} Input variable} |
| 36 | +\PYG{n}{x} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{array}\PYG{p}{(}\PYG{p}{[}\PYG{l+m+mf}{4.0}\PYG{p}{]}\PYG{p}{,}\PYG{n}{dtype}\PYG{o}{=}\PYG{n}{np}\PYG{o}{.}\PYG{n}{float64}\PYG{p}{)} |
| 37 | +\PYG{c+c1}{\PYGZsh{} Target values} |
| 38 | +\PYG{n}{y} \PYG{o}{=} \PYG{l+m+mi}{2}\PYG{o}{*}\PYG{n}{x}\PYG{o}{+}\PYG{l+m+mf}{1.0} |
| 39 | + |
| 40 | +\PYG{c+c1}{\PYGZsh{} Defining the neural network, only scalars here} |
| 41 | +\PYG{n}{n\PYGZus{}inputs} \PYG{o}{=} \PYG{n}{x}\PYG{o}{.}\PYG{n}{shape} |
| 42 | +\PYG{n}{n\PYGZus{}features} \PYG{o}{=} \PYG{l+m+mi}{1} |
| 43 | +\PYG{n}{n\PYGZus{}hidden\PYGZus{}neurons} \PYG{o}{=} \PYG{l+m+mi}{1} |
| 44 | +\PYG{n}{n\PYGZus{}outputs} \PYG{o}{=} \PYG{l+m+mi}{1} |
| 45 | + |
| 46 | +\PYG{c+c1}{\PYGZsh{} Initialize the network} |
| 47 | +\PYG{c+c1}{\PYGZsh{} weights and bias in the hidden layer} |
| 48 | +\PYG{n}{w\PYGZus{}1} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{random}\PYG{o}{.}\PYG{n}{randn}\PYG{p}{(}\PYG{n}{n\PYGZus{}features}\PYG{p}{,} \PYG{n}{n\PYGZus{}hidden\PYGZus{}neurons}\PYG{p}{)} |
| 49 | +\PYG{n}{b\PYGZus{}1} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{zeros}\PYG{p}{(}\PYG{n}{n\PYGZus{}hidden\PYGZus{}neurons}\PYG{p}{)} \PYG{o}{+} \PYG{l+m+mf}{0.01} |
| 50 | + |
| 51 | +\PYG{c+c1}{\PYGZsh{} weights and bias in the output layer} |
| 52 | +\PYG{n}{w\PYGZus{}2} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{random}\PYG{o}{.}\PYG{n}{randn}\PYG{p}{(}\PYG{n}{n\PYGZus{}hidden\PYGZus{}neurons}\PYG{p}{,} \PYG{n}{n\PYGZus{}outputs}\PYG{p}{)} |
| 53 | +\PYG{n}{b\PYGZus{}2} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{zeros}\PYG{p}{(}\PYG{n}{n\PYGZus{}outputs}\PYG{p}{)} \PYG{o}{+} \PYG{l+m+mf}{0.01} |
| 54 | + |
| 55 | +\PYG{n}{eta} \PYG{o}{=} \PYG{l+m+mf}{0.1} |
| 56 | +\PYG{k}{for} \PYG{n}{i} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{l+m+mi}{50}\PYG{p}{)}\PYG{p}{:} |
| 57 | + \PYG{c+c1}{\PYGZsh{} calculate gradients} |
| 58 | + \PYG{n}{derivW2}\PYG{p}{,} \PYG{n}{derivB2}\PYG{p}{,} \PYG{n}{derivW1}\PYG{p}{,} \PYG{n}{derivB1} \PYG{o}{=} \PYG{n}{backpropagation}\PYG{p}{(}\PYG{n}{x}\PYG{p}{,} \PYG{n}{y}\PYG{p}{)} |
| 59 | + \PYG{c+c1}{\PYGZsh{} update weights and biases} |
| 60 | + \PYG{n}{w\PYGZus{}2} \PYG{o}{\PYGZhy{}}\PYG{o}{=} \PYG{n}{eta} \PYG{o}{*} \PYG{n}{derivW2} |
| 61 | + \PYG{n}{b\PYGZus{}2} \PYG{o}{\PYGZhy{}}\PYG{o}{=} \PYG{n}{eta} \PYG{o}{*} \PYG{n}{derivB2} |
| 62 | + \PYG{n}{w\PYGZus{}1} \PYG{o}{\PYGZhy{}}\PYG{o}{=} \PYG{n}{eta} \PYG{o}{*} \PYG{n}{derivW1} |
| 63 | + \PYG{n}{b\PYGZus{}1} \PYG{o}{\PYGZhy{}}\PYG{o}{=} \PYG{n}{eta} \PYG{o}{*} \PYG{n}{derivB1} |
| 64 | + |
| 65 | + |
| 66 | + |
| 67 | +\end{MintedVerbatim} |
0 commit comments