diff --git a/02_activities/assignments/assignment_1.ipynb b/02_activities/assignments/assignment_1.ipynb
index 45cfc9cd7..c02730811 100644
--- a/02_activities/assignments/assignment_1.ipynb
+++ b/02_activities/assignments/assignment_1.ipynb
@@ -30,13 +30,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Write your code below.\n",
+    "from dotenv import load_dotenv\n",
+    "load_dotenv()\n",
+    "\n",
+    "import dask.dataframe as dd\n",
+    "\n",
     "\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -55,14 +59,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(0, [])"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "import os\n",
     "from glob import glob\n",
     "\n",
-    "# Write your code below.\n",
+    "import os\n",
+    "from glob import glob\n",
+    "\n",
+    "PRICE_DATA = os.getenv(\"PRICE_DATA\")\n",
+    "\n",
+    "parquet_paths = glob(os.path.join(PRICE_DATA, \"**\", \"*.parquet\"), recursive=True)\n",
+    "parquet_paths[:5], len(parquet_paths)\n",
+    "\n",
+    "all_files = glob(os.path.join(PRICE_DATA, \"**\", \"*.*\"), recursive=True)\n",
+    "len(all_files), all_files[:20]\n",
     "\n"
    ]
   },
@@ -88,32 +112,83 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "PRICE_DATA = ../../05_src/data/prices/\n",
+      "Exists? False\n"
+     ]
+    }
+   ],
    "source": [
-    "# Write your code below.\n",
+    "import os\n",
+    "from dotenv import load_dotenv\n",
+    "from glob import glob\n",
+    "\n",
+    "load_dotenv()\n",
+    "\n",
+    "PRICE_DATA = os.getenv(\"PRICE_DATA\")\n",
+    "print(\"PRICE_DATA =\", PRICE_DATA)\n",
+    "\n",
+    "if PRICE_DATA is None:\n",
+    "    print(\"❌ PRICE_DATA is None (not set).\")\n",
+    "else:\n",
+    "    print(\"Exists?\", os.path.exists(PRICE_DATA))\n",
+    "    if os.path.exists(PRICE_DATA):\n",
+    "        # Show what’s inside the folder\n",
+    "        print(\"Top-level items:\", os.listdir(PRICE_DATA)[:20])\n",
+    "\n",
+    "        # Look for parquet files anywhere under PRICE_DATA\n",
+    "        parquet_paths = glob(os.path.join(PRICE_DATA, \"**\", \"*.parquet\"), recursive=True)\n",
+    "        print(\"Parquet files found:\", len(parquet_paths))\n",
+    "        print(\"Example parquet paths:\", parquet_paths[:5])\n",
+    "import dask.dataframe as dd\n",
+    "\n",
     "\n"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 7,
    "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'dd_feat' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[7]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m df_feat = \u001b[43mdd_feat\u001b[49m.compute()\n\u001b[32m      3\u001b[39m df_feat[\u001b[33m\"\u001b[39m\u001b[33mreturns_ma_10\u001b[39m\u001b[33m\"\u001b[39m] = (\n\u001b[32m      4\u001b[39m     df_feat.groupby(\u001b[33m\"\u001b[39m\u001b[33mTicker\u001b[39m\u001b[33m\"\u001b[39m)[\u001b[33m\"\u001b[39m\u001b[33mreturns\u001b[39m\u001b[33m\"\u001b[39m]\n\u001b[32m      5\u001b[39m     .rolling(\u001b[32m10\u001b[39m)\n\u001b[32m      6\u001b[39m     .mean()\n\u001b[32m      7\u001b[39m     .reset_index(level=\u001b[32m0\u001b[39m, drop=\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[32m      8\u001b[39m )\n\u001b[32m     10\u001b[39m df_feat.head(\u001b[32m15\u001b[39m)\n",
+      "\u001b[31mNameError\u001b[39m: name 'dd_feat' is not defined"
+     ]
+    }
+   ],
    "source": [
-    "+ Convert the Dask data frame to a pandas data frame. \n",
-    "+ Add a new feature containing the moving average of `returns` using a window of 10 days. There are several ways to solve this task, a simple one uses `.rolling(10).mean()`.\n",
+    "df_feat = dd_feat.compute()\n",
     "\n",
-    "(3 pt)"
+    "df_feat[\"returns_ma_10\"] = (\n",
+    "    df_feat.groupby(\"Ticker\")[\"returns\"]\n",
+    "    .rolling(10)\n",
+    "    .mean()\n",
+    "    .reset_index(level=0, drop=True)\n",
+    ")\n",
+    "\n",
+    "df_feat.head(15)\n"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 25,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "# Write your code below.\n",
-    "\n"
+    "+ Convert the Dask data frame to a pandas data frame. \n",
+    "+ Add a new feature containing the moving average of `returns` using a window of 10 days. There are several ways to solve this task, a simple one uses `.rolling(10).mean()`.\n",
+    "\n",
+    "(3 pt)"
    ]
   },
   {
@@ -128,6 +203,36 @@
     "(1 pt)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Comment on moving average calculation approach\n",
+    "\n",
+    "comment = \"\"\"\n",
+    "1) Was it necessary to convert to pandas to calculate the moving average return?\n",
+    "    No, it was not necessary. Dask supports rolling window operations through \n",
+    "    dask.dataframe.rolling(), so the moving average could have been computed \n",
+    "    directly on the Dask dataframe without converting to pandas.\n",
+    "\n",
+    "2) Would it have been better to do it in Dask? Why?\n",
+    "    It depends on the dataset size:\n",
+    "    - For large datasets that don't fit in memory: YES, Dask is better because \n",
+    "      it maintains parallel processing and out-of-core computation.\n",
+    "    - For smaller datasets that fit comfortably in memory: Pandas is acceptable \n",
+    "      and simpler to use.\n",
+    "    \n",
+    "    In this case, since we converted to pandas anyway (via .compute()), the \n",
+    "    operation was already materialized in memory, so using pandas for rolling \n",
+    "    is fine. However, if memory is constrained, computing the rolling mean in \n",
+    "    Dask first (before converting to pandas) would have been more efficient.\n",
+    "\"\"\"\n",
+    "\n",
+    "print(comment)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -165,7 +270,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "env",
+   "display_name": "production-env",
    "language": "python",
    "name": "python3"
   },
@@ -179,7 +284,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.0"
+   "version": "3.11.14"
   }
  },
  "nbformat": 4,
diff --git a/02_activities/assignments/assignment_2.ipynb b/02_activities/assignments/assignment_2.ipynb
index 29d661c57..303a24bd2 100644
--- a/02_activities/assignments/assignment_2.ipynb
+++ b/02_activities/assignments/assignment_2.ipynb
@@ -97,11 +97,45 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "FileNotFoundError",
+     "evalue": "[Errno 2] No such file or directory: '../../05_src/data/fires/forestfires.csv'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mFileNotFoundError\u001b[39m                         Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 11\u001b[39m\n\u001b[32m      4\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msklearn\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mmodel_selection\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m train_test_split\n\u001b[32m      6\u001b[39m columns = [\n\u001b[32m      7\u001b[39m     \u001b[33m'\u001b[39m\u001b[33mcoord_x\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mcoord_y\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mmonth\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mday\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mffmc\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mdmc\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mdc\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33misi\u001b[39m\u001b[33m'\u001b[39m,\n\u001b[32m      8\u001b[39m     \u001b[33m'\u001b[39m\u001b[33mtemp\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mrh\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mwind\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mrain\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33marea\u001b[39m\u001b[33m'\u001b[39m\n\u001b[32m      9\u001b[39m ]\n\u001b[32m---> \u001b[39m\u001b[32m11\u001b[39m fires_dt = \u001b[43mpd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m../../05_src/data/fires/forestfires.csv\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheader\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnames\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     13\u001b[39m X = fires_dt.drop(columns=[\u001b[33m\"\u001b[39m\u001b[33marea\u001b[39m\u001b[33m\"\u001b[39m])\n\u001b[32m     14\u001b[39m y = fires_dt[\u001b[33m\"\u001b[39m\u001b[33marea\u001b[39m\u001b[33m\"\u001b[39m]\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\fatim\\dsi\\production\\production-env\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1026\u001b[39m, in \u001b[36mread_csv\u001b[39m\u001b[34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[39m\n\u001b[32m   1013\u001b[39m kwds_defaults = _refine_defaults_read(\n\u001b[32m   1014\u001b[39m     dialect,\n\u001b[32m   1015\u001b[39m     delimiter,\n\u001b[32m   (...)\u001b[39m\u001b[32m   1022\u001b[39m     dtype_backend=dtype_backend,\n\u001b[32m   1023\u001b[39m )\n\u001b[32m   1024\u001b[39m kwds.update(kwds_defaults)\n\u001b[32m-> \u001b[39m\u001b[32m1026\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\fatim\\dsi\\production\\production-env\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:620\u001b[39m, in \u001b[36m_read\u001b[39m\u001b[34m(filepath_or_buffer, kwds)\u001b[39m\n\u001b[32m    617\u001b[39m _validate_names(kwds.get(\u001b[33m\"\u001b[39m\u001b[33mnames\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[32m    619\u001b[39m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m620\u001b[39m parser = \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    622\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[32m    623\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m parser\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\fatim\\dsi\\production\\production-env\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1620\u001b[39m, in \u001b[36mTextFileReader.__init__\u001b[39m\u001b[34m(self, f, engine, **kwds)\u001b[39m\n\u001b[32m   1617\u001b[39m     \u001b[38;5;28mself\u001b[39m.options[\u001b[33m\"\u001b[39m\u001b[33mhas_index_names\u001b[39m\u001b[33m\"\u001b[39m] = kwds[\u001b[33m\"\u001b[39m\u001b[33mhas_index_names\u001b[39m\u001b[33m\"\u001b[39m]\n\u001b[32m   1619\u001b[39m \u001b[38;5;28mself\u001b[39m.handles: IOHandles | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1620\u001b[39m \u001b[38;5;28mself\u001b[39m._engine = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\fatim\\dsi\\production\\production-env\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1880\u001b[39m, in \u001b[36mTextFileReader._make_engine\u001b[39m\u001b[34m(self, f, engine)\u001b[39m\n\u001b[32m   1878\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[32m   1879\u001b[39m         mode += \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m-> \u001b[39m\u001b[32m1880\u001b[39m \u001b[38;5;28mself\u001b[39m.handles = \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m   1881\u001b[39m \u001b[43m    \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1882\u001b[39m \u001b[43m    \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1883\u001b[39m \u001b[43m    \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mencoding\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1884\u001b[39m \u001b[43m    \u001b[49m\u001b[43mcompression\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcompression\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1885\u001b[39m \u001b[43m    \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmemory_map\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1886\u001b[39m \u001b[43m    \u001b[49m\u001b[43mis_text\u001b[49m\u001b[43m=\u001b[49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1887\u001b[39m \u001b[43m    \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mencoding_errors\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstrict\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1888\u001b[39m \u001b[43m    \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstorage_options\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1889\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1890\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m.handles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m   1891\u001b[39m f = \u001b[38;5;28mself\u001b[39m.handles.handle\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\fatim\\dsi\\production\\production-env\\Lib\\site-packages\\pandas\\io\\common.py:873\u001b[39m, in \u001b[36mget_handle\u001b[39m\u001b[34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[39m\n\u001b[32m    868\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[32m    869\u001b[39m     \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[32m    870\u001b[39m     \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[32m    871\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m ioargs.encoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs.mode:\n\u001b[32m    872\u001b[39m         \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m873\u001b[39m         handle = \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[32m    874\u001b[39m \u001b[43m            \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    875\u001b[39m \u001b[43m            \u001b[49m\u001b[43mioargs\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    876\u001b[39m \u001b[43m            \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[43mioargs\u001b[49m\u001b[43m.\u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    877\u001b[39m \u001b[43m            \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m=\u001b[49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    878\u001b[39m \u001b[43m            \u001b[49m\u001b[43mnewline\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m    879\u001b[39m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    880\u001b[39m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m    881\u001b[39m         \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[32m    882\u001b[39m         handle = \u001b[38;5;28mopen\u001b[39m(handle, ioargs.mode)\n",
+      "\u001b[31mFileNotFoundError\u001b[39m: [Errno 2] No such file or directory: '../../05_src/data/fires/forestfires.csv'"
+     ]
+    }
+   ],
    "source": [
-    "# Load the libraries as required."
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "columns = [\n",
+    "    'coord_x', 'coord_y', 'month', 'day', 'ffmc', 'dmc', 'dc', 'isi',\n",
+    "    'temp', 'rh', 'wind', 'rain', 'area'\n",
+    "]\n",
+    "\n",
+    "fires_dt = pd.read_csv('../../05_src/data/fires/forestfires.csv', header=0, names=columns)\n",
+    "\n",
+    "X = fires_dt.drop(columns=[\"area\"])\n",
+    "y = fires_dt[\"area\"]\n",
+    "\n",
+    "X_train, X_test, y_train, y_test = train_test_split(\n",
+    "    X, y, test_size=0.2, random_state=42\n",
+    ")\n"
    ]
   },
   {
@@ -180,10 +214,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "from sklearn.compose import ColumnTransformer\n",
+    "from sklearn.preprocessing import OneHotEncoder, StandardScaler, PowerTransformer\n",
+    "from sklearn.pipeline import Pipeline\n",
+    "\n",
+    "num_cols = ['coord_x','coord_y','ffmc','dmc','dc','isi','temp','rh','wind','rain']\n",
+    "cat_cols = ['month','day']\n",
+    "\n",
+    "# Preproc 1: scale numeric, OHE categorical\n",
+    "preproc1 = ColumnTransformer(\n",
+    "    transformers=[\n",
+    "        (\"num\", Pipeline([(\"scaler\", StandardScaler())]), num_cols),\n",
+    "        (\"cat\", OneHotEncoder(handle_unknown=\"ignore\"), cat_cols),\n",
+    "    ],\n",
+    "    remainder=\"drop\"\n",
+    ")"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -199,10 +249,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "# Preproc 2: scale + non-linear transform (Yeo-Johnson works with zeros/negatives)\n",
+    "preproc2 = ColumnTransformer(\n",
+    "    transformers=[\n",
+    "        (\"num\", Pipeline([\n",
+    "            (\"power\", PowerTransformer(method=\"yeo-johnson\")),\n",
+    "            (\"scaler\", StandardScaler()),\n",
+    "        ]), num_cols),\n",
+    "        (\"cat\", OneHotEncoder(handle_unknown=\"ignore\"), cat_cols),\n",
+    "    ],\n",
+    "    remainder=\"drop\"\n",
+    ")"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -245,11 +307,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "SyntaxError",
+     "evalue": "illegal target for annotation (380116875.py, line 2)",
+     "output_type": "error",
+     "traceback": [
+      "  \u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[31m    \u001b[39m\u001b[31m\"A_preproc1_ridge\": Pipeline([(\"preprocessing\", preproc1), (\"regressor\", baseline)]),\u001b[39m\n    ^\n\u001b[31mSyntaxError\u001b[39m\u001b[31m:\u001b[39m illegal target for annotation\n"
+     ]
+    }
+   ],
    "source": [
-    "# Pipeline C = preproc1 + advanced model\n"
+    "# Pipeline C = preproc1 + advanced model\n",
+    "\"A_preproc1_ridge\": Pipeline([(\"preprocessing\", preproc1), (\"regressor\", baseline)]),"
    ]
   },
   {
@@ -260,7 +332,7 @@
    "source": [
     "# Pipeline D = preproc2 + advanced model\n",
     "\n",
-    "    "
+    "      \"D_preproc2_rf\":    Pipeline([(\"preprocessing\", preproc2), (\"regressor\", advanced)]),"
    ]
   },
   {
@@ -276,10 +348,70 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'X_train' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 42\u001b[39m\n\u001b[32m     34\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m name, pipe \u001b[38;5;129;01min\u001b[39;00m pipelines.items():\n\u001b[32m     35\u001b[39m     gs = GridSearchCV(\n\u001b[32m     36\u001b[39m         pipe,\n\u001b[32m     37\u001b[39m         param_grids[name],\n\u001b[32m   (...)\u001b[39m\u001b[32m     40\u001b[39m         n_jobs=-\u001b[32m1\u001b[39m\n\u001b[32m     41\u001b[39m     )\n\u001b[32m---> \u001b[39m\u001b[32m42\u001b[39m     gs.fit(\u001b[43mX_train\u001b[49m, y_train)\n\u001b[32m     43\u001b[39m     results[name] = gs\n\u001b[32m     45\u001b[39m \u001b[38;5;66;03m# Compare\u001b[39;00m\n",
+      "\u001b[31mNameError\u001b[39m: name 'X_train' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.linear_model import Ridge\n",
+    "from sklearn.ensemble import RandomForestRegressor\n",
+    "from sklearn.model_selection import GridSearchCV\n",
+    "\n",
+    "# Baseline regressor\n",
+    "baseline = Ridge(random_state=42)\n",
+    "\n",
+    "# Advanced regressor (tree-based => fast SHAP)\n",
+    "advanced = RandomForestRegressor(random_state=42, n_jobs=-1)\n",
+    "\n",
+    "pipelines = {\n",
+    "    \"A_preproc1_ridge\": Pipeline([(\"preprocessing\", preproc1), (\"regressor\", baseline)]),\n",
+    "    \"B_preproc2_ridge\": Pipeline([(\"preprocessing\", preproc2), (\"regressor\", baseline)]),\n",
+    "    \"C_preproc1_rf\":    Pipeline([(\"preprocessing\", preproc1), (\"regressor\", advanced)]),\n",
+    "    \"D_preproc2_rf\":    Pipeline([(\"preprocessing\", preproc2), (\"regressor\", advanced)]),\n",
+    "}\n",
+    "\n",
+    "param_grids = {\n",
+    "    \"A_preproc1_ridge\": {\"regressor__alpha\": [0.1, 1.0, 10.0, 100.0]},\n",
+    "    \"B_preproc2_ridge\": {\"regressor__alpha\": [0.1, 1.0, 10.0, 100.0]},\n",
+    "    \"C_preproc1_rf\": {\n",
+    "        \"regressor__n_estimators\": [200, 400],\n",
+    "        \"regressor__max_depth\": [None, 10],\n",
+    "    },  # 4 combos\n",
+    "    \"D_preproc2_rf\": {\n",
+    "        \"regressor__n_estimators\": [200, 400],\n",
+    "        \"regressor__max_depth\": [None, 10],\n",
+    "    },  # 4 combos\n",
+    "}\n",
+    "\n",
+    "scoring = \"neg_root_mean_squared_error\"\n",
+    "results = {}\n",
+    "\n",
+    "for name, pipe in pipelines.items():\n",
+    "    gs = GridSearchCV(\n",
+    "        pipe,\n",
+    "        param_grids[name],\n",
+    "        scoring=scoring,\n",
+    "        cv=5,\n",
+    "        n_jobs=-1\n",
+    "    )\n",
+    "    gs.fit(X_train, y_train)\n",
+    "    results[name] = gs\n",
+    "\n",
+    "# Compare\n",
+    "for name, gs in results.items():\n",
+    "    print(name, \"best RMSE:\", -gs.best_score_, \"best params:\", gs.best_params_)\n"
+   ]
   },
   {
    "cell_type": "code",
@@ -318,6 +450,37 @@
     "+ Which model has the best performance?"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "min() arg is an empty sequence",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mValueError\u001b[39m                                Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msklearn\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mmetrics\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m mean_squared_error\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m best_name = \u001b[38;5;28;43mmin\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mresults\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m-\u001b[49m\u001b[43mresults\u001b[49m\u001b[43m[\u001b[49m\u001b[43mk\u001b[49m\u001b[43m]\u001b[49m\u001b[43m.\u001b[49m\u001b[43mbest_score_\u001b[49m\u001b[43m)\u001b[49m  \u001b[38;5;66;03m# lowest RMSE\u001b[39;00m\n\u001b[32m      4\u001b[39m best_model = results[best_name].best_estimator_\n\u001b[32m      6\u001b[39m best_model.fit(X_train, y_train)\n",
+      "\u001b[31mValueError\u001b[39m: min() arg is an empty sequence"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.metrics import mean_squared_error\n",
+    "\n",
+    "best_name = min(results, key=lambda k: -results[k].best_score_)  # lowest RMSE\n",
+    "best_model = results[best_name].best_estimator_\n",
+    "\n",
+    "best_model.fit(X_train, y_train)\n",
+    "pred = best_model.predict(X_test)\n",
+    "\n",
+    "rmse_test = mean_squared_error(y_test, pred, squared=False)\n",
+    "print(\"Best model:\", best_name)\n",
+    "print(\"Test RMSE:\", rmse_test)\n"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -329,10 +492,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'best_model' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[7]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpickle\u001b[39;00m\n\u001b[32m      2\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mbest_forest_fire_model.pkl\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mwb\u001b[39m\u001b[33m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m     pickle.dump(\u001b[43mbest_model\u001b[49m, f)\n",
+      "\u001b[31mNameError\u001b[39m: name 'best_model' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "import pickle\n",
+    "with open(\"best_forest_fire_model.pkl\", \"wb\") as f:\n",
+    "    pickle.dump(best_model, f)\n"
+   ]
   },
   {
    "cell_type": "code",
@@ -358,10 +537,50 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'shap'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mModuleNotFoundError\u001b[39m                       Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[8]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;66;03m# If needed: pip/uv install shap\u001b[39;00m\n\u001b[32m      2\u001b[39m \u001b[38;5;66;03m# !uv pip install shap\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mshap\u001b[39;00m\n\u001b[32m      6\u001b[39m \u001b[38;5;66;03m# Get feature names after preprocessing\u001b[39;00m\n\u001b[32m      7\u001b[39m pre = best_model.named_steps[\u001b[33m\"\u001b[39m\u001b[33mpreprocessing\u001b[39m\u001b[33m\"\u001b[39m]\n",
+      "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'shap'"
+     ]
+    }
+   ],
+   "source": [
+    "# If needed: pip/uv install shap\n",
+    "# !uv pip install shap\n",
+    "\n",
+    "import shap\n",
+    "\n",
+    "# Get feature names after preprocessing\n",
+    "pre = best_model.named_steps[\"preprocessing\"]\n",
+    "feature_names = pre.get_feature_names_out()\n",
+    "\n",
+    "# Transform train/test for SHAP\n",
+    "X_train_trans = pre.transform(X_train)\n",
+    "X_test_trans = pre.transform(X_test)\n",
+    "\n",
+    "reg = best_model.named_steps[\"regressor\"]\n",
+    "\n",
+    "explainer = shap.TreeExplainer(reg)\n",
+    "shap_values = explainer.shap_values(X_test_trans)\n",
+    "\n",
+    "# Local explanation (pick one row)\n",
+    "i = 0\n",
+    "shap.plots.waterfall(\n",
+    "    shap.Explanation(values=shap_values[i], base_values=explainer.expected_value,\n",
+    "                     data=X_test_trans[i], feature_names=feature_names)\n",
+    ")\n",
+    "\n",
+    "# Global explanation\n",
+    "shap.summary_plot(shap_values, X_test_trans, feature_names=feature_names)\n"
+   ]
   },
   {
    "cell_type": "code",
@@ -423,7 +642,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "env",
+   "display_name": "production-env",
    "language": "python",
    "name": "python3"
   },
@@ -437,7 +656,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.3"
+   "version": "3.11.14"
   }
  },
  "nbformat": 4,