|
82 | 82 | 2, |
83 | 83 | None, |
84 | 84 | 'same-code-but-now-with-momentum-gradient-descent'), |
85 | | - ('Overview video on Stochastic Gradient Descent', |
| 85 | + ('Overview video on Stochastic Gradient Descent (SGD)', |
86 | 86 | 2, |
87 | 87 | None, |
88 | | - 'overview-video-on-stochastic-gradient-descent'), |
| 88 | + 'overview-video-on-stochastic-gradient-descent-sgd'), |
89 | 89 | ('Batches and mini-batches', 2, None, 'batches-and-mini-batches'), |
| 90 | + ('Pros and cons', 2, None, 'pros-and-cons'), |
| 91 | + ('Convergence rates', 2, None, 'convergence-rates'), |
| 92 | + ('Accuracy', 2, None, 'accuracy'), |
90 | 93 | ('Stochastic Gradient Descent (SGD)', |
91 | 94 | 2, |
92 | 95 | None, |
|
129 | 132 | 2, |
130 | 133 | None, |
131 | 134 | 'algorithms-and-codes-for-adagrad-rmsprop-and-adam'), |
| 135 | + ('AdaGrad algorithm, taken from "Goodfellow et ' |
| 136 | + 'al":"https://www.deeplearningbook.org/contents/optimization.html"', |
| 137 | + 2, |
| 138 | + None, |
| 139 | + 'adagrad-algorithm-taken-from-goodfellow-et-al-https-www-deeplearningbook-org-contents-optimization-html'), |
| 140 | + ('RMSProp algorithm, taken from "Goodfellow et ' |
| 141 | + 'al":"https://www.deeplearningbook.org/contents/optimization.html"', |
| 142 | + 2, |
| 143 | + None, |
| 144 | + 'rmsprop-algorithm-taken-from-goodfellow-et-al-https-www-deeplearningbook-org-contents-optimization-html'), |
| 145 | + ('ADAM algorithm, taken from "Goodfellow et ' |
| 146 | + 'al":"https://www.deeplearningbook.org/contents/optimization.html"', |
| 147 | + 2, |
| 148 | + None, |
| 149 | + 'adam-algorithm-taken-from-goodfellow-et-al-https-www-deeplearningbook-org-contents-optimization-html'), |
132 | 150 | ('Practical tips', 2, None, 'practical-tips'), |
133 | | - ('Sneaking in automatic differentiation using Autograd', |
| 151 | + ('Sneaking in auotmatic differentiation using Autograd', |
134 | 152 | 2, |
135 | 153 | None, |
136 | | - 'sneaking-in-automatic-differentiation-using-autograd'), |
| 154 | + 'sneaking-in-auotmatic-differentiation-using-autograd'), |
137 | 155 | ('Same code but now with momentum gradient descent', |
138 | 156 | 2, |
139 | 157 | None, |
140 | 158 | 'same-code-but-now-with-momentum-gradient-descent'), |
141 | | - ("But none of these can compete with Newton's method", |
142 | | - 2, |
143 | | - None, |
144 | | - 'but-none-of-these-can-compete-with-newton-s-method'), |
145 | 159 | ('Including Stochastic Gradient Descent with Autograd', |
146 | 160 | 2, |
147 | 161 | None, |
|
216 | 230 | <!-- navigation toc: --> <li><a href="._week37-bs012.html#program-example-for-gradient-descent-with-ridge-regression" style="font-size: 80%;">Program example for gradient descent with Ridge Regression</a></li> |
217 | 231 | <!-- navigation toc: --> <li><a href="._week37-bs013.html#using-gradient-descent-methods-limitations" style="font-size: 80%;">Using gradient descent methods, limitations</a></li> |
218 | 232 | <!-- navigation toc: --> <li><a href="._week37-bs014.html#improving-gradient-descent-with-momentum" style="font-size: 80%;">Improving gradient descent with momentum</a></li> |
219 | | - <!-- navigation toc: --> <li><a href="._week37-bs041.html#same-code-but-now-with-momentum-gradient-descent" style="font-size: 80%;">Same code but now with momentum gradient descent</a></li> |
220 | | - <!-- navigation toc: --> <li><a href="._week37-bs016.html#overview-video-on-stochastic-gradient-descent" style="font-size: 80%;">Overview video on Stochastic Gradient Descent</a></li> |
| 233 | + <!-- navigation toc: --> <li><a href="._week37-bs043.html#same-code-but-now-with-momentum-gradient-descent" style="font-size: 80%;">Same code but now with momentum gradient descent</a></li> |
| 234 | + <!-- navigation toc: --> <li><a href="._week37-bs016.html#overview-video-on-stochastic-gradient-descent-sgd" style="font-size: 80%;">Overview video on Stochastic Gradient Descent (SGD)</a></li> |
221 | 235 | <!-- navigation toc: --> <li><a href="._week37-bs017.html#batches-and-mini-batches" style="font-size: 80%;">Batches and mini-batches</a></li> |
222 | | - <!-- navigation toc: --> <li><a href="._week37-bs018.html#stochastic-gradient-descent-sgd" style="font-size: 80%;">Stochastic Gradient Descent (SGD)</a></li> |
223 | | - <!-- navigation toc: --> <li><a href="._week37-bs019.html#stochastic-gradient-descent" style="font-size: 80%;">Stochastic Gradient Descent</a></li> |
224 | | - <!-- navigation toc: --> <li><a href="._week37-bs020.html#computation-of-gradients" style="font-size: 80%;">Computation of gradients</a></li> |
225 | | - <!-- navigation toc: --> <li><a href="._week37-bs021.html#sgd-example" style="font-size: 80%;">SGD example</a></li> |
226 | | - <!-- navigation toc: --> <li><a href="._week37-bs022.html#the-gradient-step" style="font-size: 80%;">The gradient step</a></li> |
227 | | - <!-- navigation toc: --> <li><a href="._week37-bs023.html#simple-example-code" style="font-size: 80%;">Simple example code</a></li> |
228 | | - <!-- navigation toc: --> <li><a href="._week37-bs024.html#when-do-we-stop" style="font-size: 80%;">When do we stop?</a></li> |
229 | | - <!-- navigation toc: --> <li><a href="._week37-bs025.html#slightly-different-approach" style="font-size: 80%;">Slightly different approach</a></li> |
230 | | - <!-- navigation toc: --> <li><a href="._week37-bs026.html#time-decay-rate" style="font-size: 80%;">Time decay rate</a></li> |
231 | | - <!-- navigation toc: --> <li><a href="._week37-bs027.html#code-with-a-number-of-minibatches-which-varies" style="font-size: 80%;">Code with a Number of Minibatches which varies</a></li> |
232 | | - <!-- navigation toc: --> <li><a href="._week37-bs028.html#replace-or-not" style="font-size: 80%;">Replace or not</a></li> |
233 | | - <!-- navigation toc: --> <li><a href="._week37-bs029.html#momentum-based-gd" style="font-size: 80%;">Momentum based GD</a></li> |
234 | | - <!-- navigation toc: --> <li><a href="._week37-bs030.html#more-on-momentum-based-approaches" style="font-size: 80%;">More on momentum based approaches</a></li> |
235 | | - <!-- navigation toc: --> <li><a href="._week37-bs031.html#momentum-parameter" style="font-size: 80%;">Momentum parameter</a></li> |
236 | | - <!-- navigation toc: --> <li><a href="._week37-bs032.html#second-moment-of-the-gradient" style="font-size: 80%;">Second moment of the gradient</a></li> |
237 | | - <!-- navigation toc: --> <li><a href="._week37-bs033.html#rms-prop" style="font-size: 80%;">RMS prop</a></li> |
238 | | - <!-- navigation toc: --> <li><a href="._week37-bs034.html#adam-optimizer-https-arxiv-org-abs-1412-6980" style="font-size: 80%;">"ADAM optimizer":"https://arxiv.org/abs/1412.6980"</a></li> |
239 | | - <!-- navigation toc: --> <li><a href="._week37-bs035.html#algorithms-and-codes-for-adagrad-rmsprop-and-adam" style="font-size: 80%;">Algorithms and codes for Adagrad, RMSprop and Adam</a></li> |
240 | | - <!-- navigation toc: --> <li><a href="._week37-bs036.html#practical-tips" style="font-size: 80%;">Practical tips</a></li> |
241 | | - <!-- navigation toc: --> <li><a href="._week37-bs037.html#sneaking-in-automatic-differentiation-using-autograd" style="font-size: 80%;">Sneaking in automatic differentiation using Autograd</a></li> |
242 | | - <!-- navigation toc: --> <li><a href="._week37-bs041.html#same-code-but-now-with-momentum-gradient-descent" style="font-size: 80%;">Same code but now with momentum gradient descent</a></li> |
243 | | - <!-- navigation toc: --> <li><a href="._week37-bs039.html#but-none-of-these-can-compete-with-newton-s-method" style="font-size: 80%;">But none of these can compete with Newton's method</a></li> |
244 | | - <!-- navigation toc: --> <li><a href="._week37-bs040.html#including-stochastic-gradient-descent-with-autograd" style="font-size: 80%;">Including Stochastic Gradient Descent with Autograd</a></li> |
245 | | - <!-- navigation toc: --> <li><a href="._week37-bs041.html#same-code-but-now-with-momentum-gradient-descent" style="font-size: 80%;">Same code but now with momentum gradient descent</a></li> |
246 | | - <!-- navigation toc: --> <li><a href="._week37-bs042.html#similar-second-order-function-now-problem-but-now-with-adagrad" style="font-size: 80%;">Similar (second order function now) problem but now with AdaGrad</a></li> |
247 | | - <!-- navigation toc: --> <li><a href="._week37-bs043.html#rmsprop-for-adaptive-learning-rate-with-stochastic-gradient-descent" style="font-size: 80%;">RMSprop for adaptive learning rate with Stochastic Gradient Descent</a></li> |
248 | | - <!-- navigation toc: --> <li><a href="._week37-bs044.html#and-finally-adam-https-arxiv-org-pdf-1412-6980-pdf" style="font-size: 80%;">And finally "ADAM":"https://arxiv.org/pdf/1412.6980.pdf"</a></li> |
249 | | - <!-- navigation toc: --> <li><a href="._week37-bs045.html#material-for-the-lab-sessions" style="font-size: 80%;">Material for the lab sessions</a></li> |
| 236 | + <!-- navigation toc: --> <li><a href="._week37-bs018.html#pros-and-cons" style="font-size: 80%;">Pros and cons</a></li> |
| 237 | + <!-- navigation toc: --> <li><a href="._week37-bs019.html#convergence-rates" style="font-size: 80%;">Convergence rates</a></li> |
| 238 | + <!-- navigation toc: --> <li><a href="._week37-bs020.html#accuracy" style="font-size: 80%;">Accuracy</a></li> |
| 239 | + <!-- navigation toc: --> <li><a href="._week37-bs021.html#stochastic-gradient-descent-sgd" style="font-size: 80%;">Stochastic Gradient Descent (SGD)</a></li> |
| 240 | + <!-- navigation toc: --> <li><a href="._week37-bs022.html#stochastic-gradient-descent" style="font-size: 80%;">Stochastic Gradient Descent</a></li> |
| 241 | + <!-- navigation toc: --> <li><a href="._week37-bs023.html#computation-of-gradients" style="font-size: 80%;">Computation of gradients</a></li> |
| 242 | + <!-- navigation toc: --> <li><a href="._week37-bs024.html#sgd-example" style="font-size: 80%;">SGD example</a></li> |
| 243 | + <!-- navigation toc: --> <li><a href="._week37-bs025.html#the-gradient-step" style="font-size: 80%;">The gradient step</a></li> |
| 244 | + <!-- navigation toc: --> <li><a href="._week37-bs026.html#simple-example-code" style="font-size: 80%;">Simple example code</a></li> |
| 245 | + <!-- navigation toc: --> <li><a href="._week37-bs027.html#when-do-we-stop" style="font-size: 80%;">When do we stop?</a></li> |
| 246 | + <!-- navigation toc: --> <li><a href="._week37-bs028.html#slightly-different-approach" style="font-size: 80%;">Slightly different approach</a></li> |
| 247 | + <!-- navigation toc: --> <li><a href="._week37-bs029.html#time-decay-rate" style="font-size: 80%;">Time decay rate</a></li> |
| 248 | + <!-- navigation toc: --> <li><a href="._week37-bs030.html#code-with-a-number-of-minibatches-which-varies" style="font-size: 80%;">Code with a Number of Minibatches which varies</a></li> |
| 249 | + <!-- navigation toc: --> <li><a href="._week37-bs031.html#replace-or-not" style="font-size: 80%;">Replace or not</a></li> |
| 250 | + <!-- navigation toc: --> <li><a href="._week37-bs032.html#momentum-based-gd" style="font-size: 80%;">Momentum based GD</a></li> |
| 251 | + <!-- navigation toc: --> <li><a href="._week37-bs033.html#more-on-momentum-based-approaches" style="font-size: 80%;">More on momentum based approaches</a></li> |
| 252 | + <!-- navigation toc: --> <li><a href="._week37-bs034.html#momentum-parameter" style="font-size: 80%;">Momentum parameter</a></li> |
| 253 | + <!-- navigation toc: --> <li><a href="._week37-bs035.html#second-moment-of-the-gradient" style="font-size: 80%;">Second moment of the gradient</a></li> |
| 254 | + <!-- navigation toc: --> <li><a href="._week37-bs036.html#rms-prop" style="font-size: 80%;">RMS prop</a></li> |
| 255 | + <!-- navigation toc: --> <li><a href="._week37-bs037.html#adam-optimizer-https-arxiv-org-abs-1412-6980" style="font-size: 80%;">"ADAM optimizer":"https://arxiv.org/abs/1412.6980"</a></li> |
| 256 | + <!-- navigation toc: --> <li><a href="._week37-bs038.html#algorithms-and-codes-for-adagrad-rmsprop-and-adam" style="font-size: 80%;">Algorithms and codes for Adagrad, RMSprop and Adam</a></li> |
| 257 | + <!-- navigation toc: --> <li><a href="._week37-bs038.html#adagrad-algorithm-taken-from-goodfellow-et-al-https-www-deeplearningbook-org-contents-optimization-html" style="font-size: 80%;">AdaGrad algorithm, taken from "Goodfellow et al":"https://www.deeplearningbook.org/contents/optimization.html"</a></li> |
| 258 | + <!-- navigation toc: --> <li><a href="._week37-bs038.html#rmsprop-algorithm-taken-from-goodfellow-et-al-https-www-deeplearningbook-org-contents-optimization-html" style="font-size: 80%;">RMSProp algorithm, taken from "Goodfellow et al":"https://www.deeplearningbook.org/contents/optimization.html"</a></li> |
| 259 | + <!-- navigation toc: --> <li><a href="._week37-bs038.html#adam-algorithm-taken-from-goodfellow-et-al-https-www-deeplearningbook-org-contents-optimization-html" style="font-size: 80%;">ADAM algorithm, taken from "Goodfellow et al":"https://www.deeplearningbook.org/contents/optimization.html"</a></li> |
| 260 | + <!-- navigation toc: --> <li><a href="._week37-bs039.html#practical-tips" style="font-size: 80%;">Practical tips</a></li> |
| 261 | + <!-- navigation toc: --> <li><a href="._week37-bs040.html#sneaking-in-auotmatic-differentiation-using-autograd" style="font-size: 80%;">Sneaking in auotmatic differentiation using Autograd</a></li> |
| 262 | + <!-- navigation toc: --> <li><a href="._week37-bs043.html#same-code-but-now-with-momentum-gradient-descent" style="font-size: 80%;">Same code but now with momentum gradient descent</a></li> |
| 263 | + <!-- navigation toc: --> <li><a href="._week37-bs042.html#including-stochastic-gradient-descent-with-autograd" style="font-size: 80%;">Including Stochastic Gradient Descent with Autograd</a></li> |
| 264 | + <!-- navigation toc: --> <li><a href="._week37-bs043.html#same-code-but-now-with-momentum-gradient-descent" style="font-size: 80%;">Same code but now with momentum gradient descent</a></li> |
| 265 | + <!-- navigation toc: --> <li><a href="._week37-bs044.html#similar-second-order-function-now-problem-but-now-with-adagrad" style="font-size: 80%;">Similar (second order function now) problem but now with AdaGrad</a></li> |
| 266 | + <!-- navigation toc: --> <li><a href="._week37-bs045.html#rmsprop-for-adaptive-learning-rate-with-stochastic-gradient-descent" style="font-size: 80%;">RMSprop for adaptive learning rate with Stochastic Gradient Descent</a></li> |
| 267 | + <!-- navigation toc: --> <li><a href="._week37-bs046.html#and-finally-adam-https-arxiv-org-pdf-1412-6980-pdf" style="font-size: 80%;">And finally "ADAM":"https://arxiv.org/pdf/1412.6980.pdf"</a></li> |
| 268 | + <!-- navigation toc: --> <li><a href="._week37-bs047.html#material-for-the-lab-sessions" style="font-size: 80%;">Material for the lab sessions</a></li> |
250 | 269 |
|
251 | 270 | </ul> |
252 | 271 | </li> |
@@ -300,7 +319,7 @@ <h4>September 8-12, 2025</h4> |
300 | 319 | <li><a href="._week37-bs008.html">9</a></li> |
301 | 320 | <li><a href="._week37-bs009.html">10</a></li> |
302 | 321 | <li><a href="">...</a></li> |
303 | | - <li><a href="._week37-bs045.html">46</a></li> |
| 322 | + <li><a href="._week37-bs047.html">48</a></li> |
304 | 323 | <li><a href="._week37-bs001.html">»</a></li> |
305 | 324 | </ul> |
306 | 325 | <!-- ------------------- end of main content --------------- --> |
|
0 commit comments