vignash46 · vignash46 · Dec 8, 2025 · Dec 20, 2025
diff --git a/02_activities/assignments/assignment_1.ipynb b/02_activities/assignments/assignment_1.ipynb
@@ -34,7 +34,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "4a3485d6-ba58-4660-a983-5680821c5719",
    "metadata": {},
    "outputs": [],
@@ -59,7 +59,35 @@
    "execution_count": null,
    "id": "a431d282-f9ca-4d5d-8912-71ffc9d8ea19",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 178 entries, 0 to 177\n",
+      "Data columns (total 14 columns):\n",
+      " #   Column                        Non-Null Count  Dtype  \n",
+      "---  ------                        --------------  -----  \n",
+      " 0   alcohol                       178 non-null    float64\n",
+      " 1   malic_acid                    178 non-null    float64\n",
+      " 2   ash                           178 non-null    float64\n",
+      " 3   alcalinity_of_ash             178 non-null    float64\n",
+      " 4   magnesium                     178 non-null    float64\n",
+      " 5   total_phenols                 178 non-null    float64\n",
+      " 6   flavanoids                    178 non-null    float64\n",
+      " 7   nonflavanoid_phenols          178 non-null    float64\n",
+      " 8   proanthocyanins               178 non-null    float64\n",
+      " 9   color_intensity               178 non-null    float64\n",
+      " 10  hue                           178 non-null    float64\n",
+      " 11  od280/od315_of_diluted_wines  178 non-null    float64\n",
+      " 12  proline                       178 non-null    float64\n",
+      " 13  class                         178 non-null    int64  \n",
+      "dtypes: float64(13), int64(1)\n",
+      "memory usage: 19.6 KB\n"
+     ]
+    }
+   ],
    "source": [
     "from sklearn.datasets import load_wine\n",
     "\n",
@@ -91,12 +119,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "56916892",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "178"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your answer here"
+    "wine_df.shape[0]"
    ]
   },
   {
@@ -109,12 +148,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "df0ef103",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "14"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your answer here"
+    "wine_df.shape[1]"
    ]
   },
   {
@@ -127,12 +177,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "id": "47989426",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "int64\n",
+      "[0 1 2]\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your answer here"
+    "print(wine_df['class'].dtype)  # e.g., int64\n",
+    "print(wine_df['class'].unique())  # array([0, 1, 2])"
    ]
   },
   {
@@ -146,12 +206,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "id": "bd7b0910",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "13"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your answer here"
+    "wine_df.shape[1] - 1  # or len(wine_df.columns) - 1"
    ]
   },
   {
@@ -175,10 +246,37 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "id": "cc899b59",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "    alcohol  malic_acid       ash  alcalinity_of_ash  magnesium  \\\n",
+      "0  1.518613   -0.562250  0.232053          -1.169593   1.913905   \n",
+      "1  0.246290   -0.499413 -0.827996          -2.490847   0.018145   \n",
+      "2  0.196879    0.021231  1.109334          -0.268738   0.088358   \n",
+      "3  1.691550   -0.346811  0.487926          -0.809251   0.930918   \n",
+      "4  0.295700    0.227694  1.840403           0.451946   1.281985   \n",
+      "\n",
+      "   total_phenols  flavanoids  nonflavanoid_phenols  proanthocyanins  \\\n",
+      "0       0.808997    1.034819             -0.659563         1.224884   \n",
+      "1       0.568648    0.733629             -0.820719        -0.544721   \n",
+      "2       0.808997    1.215533             -0.498407         2.135968   \n",
+      "3       2.491446    1.466525             -0.981875         1.032155   \n",
+      "4       0.808997    0.663351              0.226796         0.401404   \n",
+      "\n",
+      "   color_intensity       hue  od280/od315_of_diluted_wines   proline  \n",
+      "0         0.251717  0.362177                      1.847920  1.013009  \n",
+      "1        -0.293321  0.406051                      1.113449  0.965242  \n",
+      "2         0.269020  0.318304                      0.788587  1.395148  \n",
+      "3         1.186068 -0.427544                      1.184071  2.334574  \n",
+      "4        -0.319276  0.362177                      0.449601 -0.037874  \n"
+     ]
+    }
+   ],
    "source": [
     "# Select predictors (excluding the last column)\n",
     "predictors = wine_df.iloc[:, :-1]\n",
@@ -204,7 +302,7 @@
    "id": "403ef0bb",
    "metadata": {},
    "source": [
-    "> Your answer here..."
+    "Since we are using Euclidean distance to measure how close values are to each other, if we have unstandardized values, then predictors measured with larger numbers will dominate distances, and bias results towards those predictors. Standardizing all predictors allows for equal contribution independent of the scale of values used in predictors."
    ]
   },
   {
@@ -220,7 +318,7 @@
    "id": "fdee5a15",
    "metadata": {},
    "source": [
-    "> Your answer here..."
+    "Class is the categorical response, and so standardizing these values will distort the meaning of the dependent variable we are trying to predict."
    ]
   },
   {
@@ -236,7 +334,7 @@
    "id": "f0676c21",
    "metadata": {},
    "source": [
-    "> Your answer here..."
+    "Having a random seed allow for randomness to be used in the analysis, while also allowing for reproducibility of the results if run at a later point or by someone else. The number doesnt matter, any value will work, since it just identifies the type of randomness that must be replicated."
    ]
   },
   {
@@ -251,7 +349,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "id": "72c101f2",
    "metadata": {},
    "outputs": [],
@@ -260,8 +358,13 @@
     "np.random.seed(123)\n",
     "\n",
     "# split the data into a training and testing set. hint: use train_test_split !\n",
-    "\n",
-    "# Your code here ..."
+    "X_train, X_test, y_train, y_test = train_test_split(\n",
+    "    predictors_standardized, \n",
+    "    wine_df['class'],\n",
+    "    train_size = 0.75,\n",
+    "    shuffle = True,\n",
+    "    stratify = wine_df['class']\n",
+    ")"
    ]
   },
   {
@@ -289,7 +392,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here..."
+    "knn = KNeighborsClassifier()\n",
+    "\n",
+    "param_grid = {'n_neighbors': range(1, 51)}\n",
+    "\n",
+    "grid_search = GridSearchCV(knn, param_grid, cv=10)\n",
+    "grid_search.fit(X_train, y_train)\n",
+    "\n",
+    "best_k = grid_search.best_params_['n_neighbors']\n",
+    "best_k"
    ]
   },
   {
@@ -310,7 +421,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here..."
+    "best_knn = KNeighborsClassifier(n_neighbors=best_k)\n",
+    "best_knn.fit(X_train, y_train)\n",
+    "\n",
+    "best_knn.score(X_test, y_test)"
    ]
   },
   {
@@ -365,7 +479,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.10.4",
+   "display_name": "lcr-env (3.11.13)",
    "language": "python",
    "name": "python3"
   },
@@ -379,12 +493,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.19"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "497a84dc8fec8cf8d24e7e87b6d954c9a18a327edc66feb9b9ea7e9e72cc5c7e"
-   }
+   "version": "3.11.13"
   }
  },
  "nbformat": 4,