|
2 | 2 | "cells": [ |
3 | 3 | { |
4 | 4 | "cell_type": "markdown", |
5 | | - "id": "1b941c35", |
| 5 | + "id": "8e6632a0", |
6 | 6 | "metadata": { |
7 | 7 | "editable": true |
8 | 8 | }, |
|
14 | 14 | }, |
15 | 15 | { |
16 | 16 | "cell_type": "markdown", |
17 | | - "id": "dc05b096", |
| 17 | + "id": "82705c4f", |
18 | 18 | "metadata": { |
19 | 19 | "editable": true |
20 | 20 | }, |
|
27 | 27 | }, |
28 | 28 | { |
29 | 29 | "cell_type": "markdown", |
30 | | - "id": "2cf07405", |
| 30 | + "id": "921bf331", |
31 | 31 | "metadata": { |
32 | 32 | "editable": true |
33 | 33 | }, |
|
46 | 46 | }, |
47 | 47 | { |
48 | 48 | "cell_type": "markdown", |
49 | | - "id": "3c139edb", |
| 49 | + "id": "adff65d5", |
50 | 50 | "metadata": { |
51 | 51 | "editable": true |
52 | 52 | }, |
|
58 | 58 | }, |
59 | 59 | { |
60 | 60 | "cell_type": "markdown", |
61 | | - "id": "aad4cfac", |
| 61 | + "id": "70418b3d", |
62 | 62 | "metadata": { |
63 | 63 | "editable": true |
64 | 64 | }, |
|
70 | 70 | }, |
71 | 71 | { |
72 | 72 | "cell_type": "markdown", |
73 | | - "id": "6682282f", |
| 73 | + "id": "11a3cf73", |
74 | 74 | "metadata": { |
75 | 75 | "editable": true |
76 | 76 | }, |
|
83 | 83 | }, |
84 | 84 | { |
85 | 85 | "cell_type": "markdown", |
86 | | - "id": "89e2f4c4", |
| 86 | + "id": "04a06b51", |
87 | 87 | "metadata": { |
88 | 88 | "editable": true |
89 | 89 | }, |
|
99 | 99 | }, |
100 | 100 | { |
101 | 101 | "cell_type": "markdown", |
102 | | - "id": "b06d4e53", |
| 102 | + "id": "408db3d9", |
103 | 103 | "metadata": { |
104 | 104 | "editable": true |
105 | 105 | }, |
|
120 | 120 | { |
121 | 121 | "cell_type": "code", |
122 | 122 | "execution_count": 1, |
123 | | - "id": "63796480", |
| 123 | + "id": "37fb732c", |
124 | 124 | "metadata": { |
125 | 125 | "collapsed": false, |
126 | 126 | "editable": true |
|
140 | 140 | }, |
141 | 141 | { |
142 | 142 | "cell_type": "markdown", |
143 | | - "id": "80748600", |
| 143 | + "id": "d861e1e3", |
144 | 144 | "metadata": { |
145 | 145 | "editable": true |
146 | 146 | }, |
147 | 147 | "source": [ |
148 | | - "Fill in the necessary details.\n", |
| 148 | + "Fill in the necessary details. Do we need to center the $y$-values? \n", |
149 | 149 | "\n", |
150 | 150 | "After this preprocessing, each column of $\\boldsymbol{X}_{\\mathrm{norm}}$ has mean zero and standard deviation $1$\n", |
151 | 151 | "and $\\boldsymbol{y}_{\\mathrm{centered}}$ has mean 0. This makes the optimization landscape\n", |
|
156 | 156 | }, |
157 | 157 | { |
158 | 158 | "cell_type": "markdown", |
159 | | - "id": "92751e5f", |
| 159 | + "id": "b3e774d0", |
160 | 160 | "metadata": { |
161 | 161 | "editable": true |
162 | 162 | }, |
|
168 | 168 | }, |
169 | 169 | { |
170 | 170 | "cell_type": "markdown", |
171 | | - "id": "aedfbd7a", |
| 171 | + "id": "d5dc7708", |
172 | 172 | "metadata": { |
173 | 173 | "editable": true |
174 | 174 | }, |
|
179 | 179 | { |
180 | 180 | "cell_type": "code", |
181 | 181 | "execution_count": 2, |
182 | | - "id": "5d1288fa", |
| 182 | + "id": "4c9c86ac", |
183 | 183 | "metadata": { |
184 | 184 | "collapsed": false, |
185 | 185 | "editable": true |
186 | 186 | }, |
187 | 187 | "outputs": [], |
188 | 188 | "source": [ |
189 | 189 | "# Set regularization parameter, either a single value or a vector of values\n", |
190 | | - "lambda = ?\n", |
| 190 | + "# Note that lambda is a python keyword. The lambda keyword is used to create small, single-expression functions without a formal name. These are often called \"anonymous functions\" or \"lambda functions.\"\n", |
| 191 | + "lam = ?\n", |
| 192 | + "\n", |
191 | 193 | "\n", |
192 | 194 | "# Analytical form for OLS and Ridge solution: theta_Ridge = (X^T X + lambda * I)^{-1} X^T y and theta_OLS = (X^T X)^{-1} X^T y\n", |
193 | 195 | "I = np.eye(n_features)\n", |
|
200 | 202 | }, |
201 | 203 | { |
202 | 204 | "cell_type": "markdown", |
203 | | - "id": "628f5e89", |
| 205 | + "id": "eeae00fd", |
204 | 206 | "metadata": { |
205 | 207 | "editable": true |
206 | 208 | }, |
|
214 | 216 | }, |
215 | 217 | { |
216 | 218 | "cell_type": "markdown", |
217 | | - "id": "f115ba4e", |
| 219 | + "id": "e1c215d5", |
218 | 220 | "metadata": { |
219 | 221 | "editable": true |
220 | 222 | }, |
|
226 | 228 | }, |
227 | 229 | { |
228 | 230 | "cell_type": "markdown", |
229 | | - "id": "a9b5189c", |
| 231 | + "id": "587dd3dc", |
230 | 232 | "metadata": { |
231 | 233 | "editable": true |
232 | 234 | }, |
|
238 | 240 | }, |
239 | 241 | { |
240 | 242 | "cell_type": "markdown", |
241 | | - "id": "a3969ff6", |
| 243 | + "id": "bfa34697", |
242 | 244 | "metadata": { |
243 | 245 | "editable": true |
244 | 246 | }, |
|
258 | 260 | { |
259 | 261 | "cell_type": "code", |
260 | 262 | "execution_count": 3, |
261 | | - "id": "34d87303", |
| 263 | + "id": "49245f55", |
262 | 264 | "metadata": { |
263 | 265 | "collapsed": false, |
264 | 266 | "editable": true |
|
273 | 275 | "# Initialize weights for gradient descent\n", |
274 | 276 | "theta = np.zeros(n_features)\n", |
275 | 277 | "\n", |
276 | | - "# Arrays to store history for plotting\n", |
277 | | - "cost_history = np.zeros(num_iters)\n", |
278 | | - "\n", |
279 | 278 | "# Gradient descent loop\n", |
280 | | - "m = n_samples # number of data points\n", |
281 | 279 | "for t in range(num_iters):\n", |
282 | | - " # Compute prediction error\n", |
283 | | - " error = X_norm.dot(theta) - y_centered \n", |
284 | | - " # Compute cost for OLS and Ridge (MSE + regularization for Ridge) for monitoring\n", |
285 | | - " cost_OLS = ?\n", |
286 | | - " cost_Ridge = ?\n", |
287 | | - " # You could add a history for both methods (optional)\n", |
288 | | - " cost_history[t] = ?\n", |
289 | 280 | " # Compute gradients for OSL and Ridge\n", |
290 | 281 | " grad_OLS = ?\n", |
291 | 282 | " grad_Ridge = ?\n", |
|
302 | 293 | }, |
303 | 294 | { |
304 | 295 | "cell_type": "markdown", |
305 | | - "id": "989f70bb", |
| 296 | + "id": "f3f43f2c", |
306 | 297 | "metadata": { |
307 | 298 | "editable": true |
308 | 299 | }, |
309 | 300 | "source": [ |
310 | 301 | "### 4a)\n", |
311 | 302 | "\n", |
312 | | - "Discuss the results as function of the learning rate parameters and the number of iterations." |
| 303 | + "Write first a gradient descent code for OLS only using the above template.\n", |
| 304 | + "Discuss the results as function of the learning rate parameters and the number of iterations" |
313 | 305 | ] |
314 | 306 | }, |
315 | 307 | { |
316 | 308 | "cell_type": "markdown", |
317 | | - "id": "370b2dad", |
| 309 | + "id": "9ba303be", |
318 | 310 | "metadata": { |
319 | 311 | "editable": true |
320 | 312 | }, |
321 | 313 | "source": [ |
322 | 314 | "### 4b)\n", |
323 | 315 | "\n", |
| 316 | + "Write then a similar code for Ridge regression using the above template.\n", |
324 | 317 | "Try to add a stopping parameter as function of the number iterations and the difference between the new and old $\\theta$ values. How would you define a stopping criterion?" |
325 | 318 | ] |
326 | 319 | }, |
327 | 320 | { |
328 | 321 | "cell_type": "markdown", |
329 | | - "id": "ef197cd7", |
| 322 | + "id": "78362c6c", |
330 | 323 | "metadata": { |
331 | 324 | "editable": true |
332 | 325 | }, |
|
352 | 345 | { |
353 | 346 | "cell_type": "code", |
354 | 347 | "execution_count": 4, |
355 | | - "id": "4ccc2f65", |
| 348 | + "id": "8be1cebe", |
356 | 349 | "metadata": { |
357 | 350 | "collapsed": false, |
358 | 351 | "editable": true |
|
381 | 374 | }, |
382 | 375 | { |
383 | 376 | "cell_type": "markdown", |
384 | | - "id": "00e279ef", |
| 377 | + "id": "e2693666", |
385 | 378 | "metadata": { |
386 | 379 | "editable": true |
387 | 380 | }, |
|
395 | 388 | }, |
396 | 389 | { |
397 | 390 | "cell_type": "markdown", |
398 | | - "id": "c910b3f4", |
| 391 | + "id": "bc954d12", |
399 | 392 | "metadata": { |
400 | 393 | "editable": true |
401 | 394 | }, |
|
407 | 400 | }, |
408 | 401 | { |
409 | 402 | "cell_type": "markdown", |
410 | | - "id": "89e6e040", |
| 403 | + "id": "6534b610", |
411 | 404 | "metadata": { |
412 | 405 | "editable": true |
413 | 406 | }, |
|
0 commit comments