|
154 | 154 | 2, |
155 | 155 | None, |
156 | 156 | 'adam-optimizer-https-arxiv-org-abs-1412-6980'), |
| 157 | + ('Why Combine Momentum and RMSProp?', |
| 158 | + 2, |
| 159 | + None, |
| 160 | + 'why-combine-momentum-and-rmsprop'), |
| 161 | + ('Adam: Exponential Moving Averages (Moments)', |
| 162 | + 2, |
| 163 | + None, |
| 164 | + 'adam-exponential-moving-averages-moments'), |
| 165 | + ('Adam: Bias Correction', 2, None, 'adam-bias-correction'), |
| 166 | + ('Adam: Update Rule Derivation', |
| 167 | + 2, |
| 168 | + None, |
| 169 | + 'adam-update-rule-derivation'), |
| 170 | + ('Adam vs. AdaGrad and RMSProp', |
| 171 | + 2, |
| 172 | + None, |
| 173 | + 'adam-vs-adagrad-and-rmsprop'), |
| 174 | + ('Adaptivity Across Dimensions', |
| 175 | + 2, |
| 176 | + None, |
| 177 | + 'adaptivity-across-dimensions'), |
157 | 178 | ('ADAM algorithm, taken from "Goodfellow et ' |
158 | 179 | 'al":"https://www.deeplearningbook.org/contents/optimization.html"', |
159 | 180 | 2, |
|
264 | 285 | <!-- navigation toc: --> <li><a href="._week37-bs013.html#using-gradient-descent-methods-limitations" style="font-size: 80%;">Using gradient descent methods, limitations</a></li> |
265 | 286 | <!-- navigation toc: --> <li><a href="._week37-bs014.html#momentum-based-gd" style="font-size: 80%;">Momentum based GD</a></li> |
266 | 287 | <!-- navigation toc: --> <li><a href="._week37-bs015.html#improving-gradient-descent-with-momentum" style="font-size: 80%;">Improving gradient descent with momentum</a></li> |
267 | | - <!-- navigation toc: --> <li><a href="._week37-bs047.html#same-code-but-now-with-momentum-gradient-descent" style="font-size: 80%;">Same code but now with momentum gradient descent</a></li> |
| 288 | + <!-- navigation toc: --> <li><a href="._week37-bs053.html#same-code-but-now-with-momentum-gradient-descent" style="font-size: 80%;">Same code but now with momentum gradient descent</a></li> |
268 | 289 | <!-- navigation toc: --> <li><a href="._week37-bs017.html#overview-video-on-stochastic-gradient-descent-sgd" style="font-size: 80%;">Overview video on Stochastic Gradient Descent (SGD)</a></li> |
269 | 290 | <!-- navigation toc: --> <li><a href="._week37-bs018.html#batches-and-mini-batches" style="font-size: 80%;">Batches and mini-batches</a></li> |
270 | 291 | <!-- navigation toc: --> <li><a href="._week37-bs019.html#pros-and-cons" style="font-size: 80%;">Pros and cons</a></li> |
|
292 | 313 | <!-- navigation toc: --> <li><a href="._week37-bs039.html#rmsprop-algorithm-taken-from-goodfellow-et-al-https-www-deeplearningbook-org-contents-optimization-html" style="font-size: 80%;">RMSProp algorithm, taken from "Goodfellow et al":"https://www.deeplearningbook.org/contents/optimization.html"</a></li> |
293 | 314 | <!-- navigation toc: --> <li><a href="._week37-bs040.html#adam-optimizer" style="font-size: 80%;">Adam Optimizer</a></li> |
294 | 315 | <!-- navigation toc: --> <li><a href="._week37-bs041.html#adam-optimizer-https-arxiv-org-abs-1412-6980" style="font-size: 80%;">"ADAM optimizer":"https://arxiv.org/abs/1412.6980"</a></li> |
295 | | - <!-- navigation toc: --> <li><a href="._week37-bs041.html#adam-algorithm-taken-from-goodfellow-et-al-https-www-deeplearningbook-org-contents-optimization-html" style="font-size: 80%;">ADAM algorithm, taken from "Goodfellow et al":"https://www.deeplearningbook.org/contents/optimization.html"</a></li> |
296 | | - <!-- navigation toc: --> <li><a href="._week37-bs042.html#algorithms-and-codes-for-adagrad-rmsprop-and-adam" style="font-size: 80%;">Algorithms and codes for Adagrad, RMSprop and Adam</a></li> |
297 | | - <!-- navigation toc: --> <li><a href="._week37-bs043.html#practical-tips" style="font-size: 80%;">Practical tips</a></li> |
298 | | - <!-- navigation toc: --> <li><a href="._week37-bs044.html#sneaking-in-automatic-differentiation-using-autograd" style="font-size: 80%;">Sneaking in automatic differentiation using Autograd</a></li> |
299 | | - <!-- navigation toc: --> <li><a href="._week37-bs047.html#same-code-but-now-with-momentum-gradient-descent" style="font-size: 80%;">Same code but now with momentum gradient descent</a></li> |
300 | | - <!-- navigation toc: --> <li><a href="._week37-bs046.html#including-stochastic-gradient-descent-with-autograd" style="font-size: 80%;">Including Stochastic Gradient Descent with Autograd</a></li> |
301 | | - <!-- navigation toc: --> <li><a href="._week37-bs047.html#same-code-but-now-with-momentum-gradient-descent" style="font-size: 80%;">Same code but now with momentum gradient descent</a></li> |
302 | | - <!-- navigation toc: --> <li><a href="._week37-bs048.html#but-none-of-these-can-compete-with-newton-s-method" style="font-size: 80%;">But none of these can compete with Newton's method</a></li> |
303 | | - <!-- navigation toc: --> <li><a href="._week37-bs049.html#similar-second-order-function-now-problem-but-now-with-adagrad" style="font-size: 80%;">Similar (second order function now) problem but now with AdaGrad</a></li> |
304 | | - <!-- navigation toc: --> <li><a href="._week37-bs050.html#rmsprop-for-adaptive-learning-rate-with-stochastic-gradient-descent" style="font-size: 80%;">RMSprop for adaptive learning rate with Stochastic Gradient Descent</a></li> |
305 | | - <!-- navigation toc: --> <li><a href="._week37-bs051.html#and-finally-adam-https-arxiv-org-pdf-1412-6980-pdf" style="font-size: 80%;">And finally "ADAM":"https://arxiv.org/pdf/1412.6980.pdf"</a></li> |
306 | | - <!-- navigation toc: --> <li><a href="._week37-bs052.html#material-for-the-lab-sessions" style="font-size: 80%;">Material for the lab sessions</a></li> |
307 | | - <!-- navigation toc: --> <li><a href="._week37-bs053.html#reminder-on-different-scaling-methods" style="font-size: 80%;">Reminder on different scaling methods</a></li> |
308 | | - <!-- navigation toc: --> <li><a href="._week37-bs054.html#functionality-in-scikit-learn" style="font-size: 80%;">Functionality in Scikit-Learn</a></li> |
309 | | - <!-- navigation toc: --> <li><a href="._week37-bs055.html#more-preprocessing" style="font-size: 80%;">More preprocessing</a></li> |
310 | | - <!-- navigation toc: --> <li><a href="._week37-bs056.html#frequently-used-scaling-functions" style="font-size: 80%;">Frequently used scaling functions</a></li> |
| 316 | + <!-- navigation toc: --> <li><a href="._week37-bs042.html#why-combine-momentum-and-rmsprop" style="font-size: 80%;">Why Combine Momentum and RMSProp?</a></li> |
| 317 | + <!-- navigation toc: --> <li><a href="._week37-bs043.html#adam-exponential-moving-averages-moments" style="font-size: 80%;">Adam: Exponential Moving Averages (Moments)</a></li> |
| 318 | + <!-- navigation toc: --> <li><a href="._week37-bs044.html#adam-bias-correction" style="font-size: 80%;">Adam: Bias Correction</a></li> |
| 319 | + <!-- navigation toc: --> <li><a href="._week37-bs045.html#adam-update-rule-derivation" style="font-size: 80%;">Adam: Update Rule Derivation</a></li> |
| 320 | + <!-- navigation toc: --> <li><a href="._week37-bs046.html#adam-vs-adagrad-and-rmsprop" style="font-size: 80%;">Adam vs. AdaGrad and RMSProp</a></li> |
| 321 | + <!-- navigation toc: --> <li><a href="._week37-bs047.html#adaptivity-across-dimensions" style="font-size: 80%;">Adaptivity Across Dimensions</a></li> |
| 322 | + <!-- navigation toc: --> <li><a href="._week37-bs047.html#adam-algorithm-taken-from-goodfellow-et-al-https-www-deeplearningbook-org-contents-optimization-html" style="font-size: 80%;">ADAM algorithm, taken from "Goodfellow et al":"https://www.deeplearningbook.org/contents/optimization.html"</a></li> |
| 323 | + <!-- navigation toc: --> <li><a href="._week37-bs048.html#algorithms-and-codes-for-adagrad-rmsprop-and-adam" style="font-size: 80%;">Algorithms and codes for Adagrad, RMSprop and Adam</a></li> |
| 324 | + <!-- navigation toc: --> <li><a href="._week37-bs049.html#practical-tips" style="font-size: 80%;">Practical tips</a></li> |
| 325 | + <!-- navigation toc: --> <li><a href="._week37-bs050.html#sneaking-in-automatic-differentiation-using-autograd" style="font-size: 80%;">Sneaking in automatic differentiation using Autograd</a></li> |
| 326 | + <!-- navigation toc: --> <li><a href="._week37-bs053.html#same-code-but-now-with-momentum-gradient-descent" style="font-size: 80%;">Same code but now with momentum gradient descent</a></li> |
| 327 | + <!-- navigation toc: --> <li><a href="._week37-bs052.html#including-stochastic-gradient-descent-with-autograd" style="font-size: 80%;">Including Stochastic Gradient Descent with Autograd</a></li> |
| 328 | + <!-- navigation toc: --> <li><a href="._week37-bs053.html#same-code-but-now-with-momentum-gradient-descent" style="font-size: 80%;">Same code but now with momentum gradient descent</a></li> |
| 329 | + <!-- navigation toc: --> <li><a href="._week37-bs054.html#but-none-of-these-can-compete-with-newton-s-method" style="font-size: 80%;">But none of these can compete with Newton's method</a></li> |
| 330 | + <!-- navigation toc: --> <li><a href="._week37-bs055.html#similar-second-order-function-now-problem-but-now-with-adagrad" style="font-size: 80%;">Similar (second order function now) problem but now with AdaGrad</a></li> |
| 331 | + <!-- navigation toc: --> <li><a href="._week37-bs056.html#rmsprop-for-adaptive-learning-rate-with-stochastic-gradient-descent" style="font-size: 80%;">RMSprop for adaptive learning rate with Stochastic Gradient Descent</a></li> |
| 332 | + <!-- navigation toc: --> <li><a href="._week37-bs057.html#and-finally-adam-https-arxiv-org-pdf-1412-6980-pdf" style="font-size: 80%;">And finally "ADAM":"https://arxiv.org/pdf/1412.6980.pdf"</a></li> |
| 333 | + <!-- navigation toc: --> <li><a href="._week37-bs058.html#material-for-the-lab-sessions" style="font-size: 80%;">Material for the lab sessions</a></li> |
| 334 | + <!-- navigation toc: --> <li><a href="._week37-bs059.html#reminder-on-different-scaling-methods" style="font-size: 80%;">Reminder on different scaling methods</a></li> |
| 335 | + <!-- navigation toc: --> <li><a href="._week37-bs060.html#functionality-in-scikit-learn" style="font-size: 80%;">Functionality in Scikit-Learn</a></li> |
| 336 | + <!-- navigation toc: --> <li><a href="._week37-bs061.html#more-preprocessing" style="font-size: 80%;">More preprocessing</a></li> |
| 337 | + <!-- navigation toc: --> <li><a href="._week37-bs062.html#frequently-used-scaling-functions" style="font-size: 80%;">Frequently used scaling functions</a></li> |
311 | 338 |
|
312 | 339 | </ul> |
313 | 340 | </li> |
@@ -361,7 +388,7 @@ <h4>September 8-12, 2025</h4> |
361 | 388 | <li><a href="._week37-bs008.html">9</a></li> |
362 | 389 | <li><a href="._week37-bs009.html">10</a></li> |
363 | 390 | <li><a href="">...</a></li> |
364 | | - <li><a href="._week37-bs056.html">57</a></li> |
| 391 | + <li><a href="._week37-bs062.html">63</a></li> |
365 | 392 | <li><a href="._week37-bs001.html">»</a></li> |
366 | 393 | </ul> |
367 | 394 | <!-- ------------------- end of main content --------------- --> |
|
0 commit comments