diff --git a/02_activities/assignments/assignment_1.ipynb b/02_activities/assignments/assignment_1.ipynb index 828092657..123e0295d 100644 --- a/02_activities/assignments/assignment_1.ipynb +++ b/02_activities/assignments/assignment_1.ipynb @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "id": "4a3485d6-ba58-4660-a983-5680821c5719", "metadata": {}, "outputs": [], @@ -56,10 +56,288 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 47, "id": "a431d282-f9ca-4d5d-8912-71ffc9d8ea19", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesprolineclass
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.00
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.00
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.00
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.00
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.00
.............................................
17313.715.652.4520.595.01.680.610.521.067.700.641.74740.02
17413.403.912.4823.0102.01.800.750.431.417.300.701.56750.02
17513.274.282.2620.0120.01.590.690.431.3510.200.591.56835.02
17613.172.592.3720.0120.01.650.680.531.469.300.601.62840.02
17714.134.102.7424.596.02.050.760.561.359.200.611.60560.02
\n", + "

178 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols \\\n", + "0 14.23 1.71 2.43 15.6 127.0 2.80 \n", + "1 13.20 1.78 2.14 11.2 100.0 2.65 \n", + "2 13.16 2.36 2.67 18.6 101.0 2.80 \n", + "3 14.37 1.95 2.50 16.8 113.0 3.85 \n", + "4 13.24 2.59 2.87 21.0 118.0 2.80 \n", + ".. ... ... ... ... ... ... \n", + "173 13.71 5.65 2.45 20.5 95.0 1.68 \n", + "174 13.40 3.91 2.48 23.0 102.0 1.80 \n", + "175 13.27 4.28 2.26 20.0 120.0 1.59 \n", + "176 13.17 2.59 2.37 20.0 120.0 1.65 \n", + "177 14.13 4.10 2.74 24.5 96.0 2.05 \n", + "\n", + " flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue \\\n", + "0 3.06 0.28 2.29 5.64 1.04 \n", + "1 2.76 0.26 1.28 4.38 1.05 \n", + "2 3.24 0.30 2.81 5.68 1.03 \n", + "3 3.49 0.24 2.18 7.80 0.86 \n", + "4 2.69 0.39 1.82 4.32 1.04 \n", + ".. ... ... ... ... ... \n", + "173 0.61 0.52 1.06 7.70 0.64 \n", + "174 0.75 0.43 1.41 7.30 0.70 \n", + "175 0.69 0.43 1.35 10.20 0.59 \n", + "176 0.68 0.53 1.46 9.30 0.60 \n", + "177 0.76 0.56 1.35 9.20 0.61 \n", + "\n", + " od280/od315_of_diluted_wines proline class \n", + "0 3.92 1065.0 0 \n", + "1 3.40 1050.0 0 \n", + "2 3.17 1185.0 0 \n", + "3 3.45 1480.0 0 \n", + "4 2.93 735.0 0 \n", + ".. ... ... ... \n", + "173 1.74 740.0 2 \n", + "174 1.56 750.0 2 \n", + "175 1.56 835.0 2 \n", + "176 1.62 840.0 2 \n", + "177 1.60 560.0 2 \n", + "\n", + "[178 rows x 14 columns]" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from sklearn.datasets import load_wine\n", "\n", @@ -91,12 +369,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 52, "id": "56916892", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "178\n" + ] + } + ], "source": [ - "# Your answer here" + "rows = len(wine_df)\n", + "print(rows)" ] }, { @@ -109,12 +396,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 53, "id": "df0ef103", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "14" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your answer here" + "wine_df.shape[1]" ] }, { @@ -127,12 +425,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 55, "id": "47989426", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "class\n", + "0 59\n", + "1 71\n", + "2 48\n", + "dtype: int64" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your answer here" + "#response variable is of type int \n", + "wine_df.groupby('class').size()" ] }, { @@ -146,12 +460,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 61, "id": "bd7b0910", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "alcohol 178\n", + "malic_acid 178\n", + "ash 178\n", + "alcalinity_of_ash 178\n", + "magnesium 178\n", + "total_phenols 178\n", + "flavanoids 178\n", + "nonflavanoid_phenols 178\n", + "proanthocyanins 178\n", + "color_intensity 178\n", + "hue 178\n", + "od280/od315_of_diluted_wines 178\n", + "proline 178\n", + "dtype: int64\n" + ] + } + ], "source": [ - "# Your answer here" + "#13 predictor variables \n", + "predictor_var=wine_df.iloc[:, :-1].count()\n", + "print(predictor_var)" ] }, { @@ -175,10 +512,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 62, "id": "cc899b59", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium \\\n", + "0 1.518613 -0.562250 0.232053 -1.169593 1.913905 \n", + "1 0.246290 -0.499413 -0.827996 -2.490847 0.018145 \n", + "2 0.196879 0.021231 1.109334 -0.268738 0.088358 \n", + "3 1.691550 -0.346811 0.487926 -0.809251 0.930918 \n", + "4 0.295700 0.227694 1.840403 0.451946 1.281985 \n", + "\n", + " total_phenols flavanoids nonflavanoid_phenols proanthocyanins \\\n", + "0 0.808997 1.034819 -0.659563 1.224884 \n", + "1 0.568648 0.733629 -0.820719 -0.544721 \n", + "2 0.808997 1.215533 -0.498407 2.135968 \n", + "3 2.491446 1.466525 -0.981875 1.032155 \n", + "4 0.808997 0.663351 0.226796 0.401404 \n", + "\n", + " color_intensity hue od280/od315_of_diluted_wines proline \n", + "0 0.251717 0.362177 1.847920 1.013009 \n", + "1 -0.293321 0.406051 1.113449 0.965242 \n", + "2 0.269020 0.318304 0.788587 1.395148 \n", + "3 1.186068 -0.427544 1.184071 2.334574 \n", + "4 -0.319276 0.362177 0.449601 -0.037874 \n" + ] + } + ], "source": [ "# Select predictors (excluding the last column)\n", "predictors = wine_df.iloc[:, :-1]\n", @@ -186,7 +550,6 @@ "# Standardize the predictors\n", "scaler = StandardScaler()\n", "predictors_standardized = pd.DataFrame(scaler.fit_transform(predictors), columns=predictors.columns)\n", - "\n", "# Display the head of the standardized predictors\n", "print(predictors_standardized.head())" ] @@ -204,7 +567,7 @@ "id": "403ef0bb", "metadata": {}, "source": [ - "> Your answer here..." + "when there are many predictor variables,differences in scale can affect the models result.so, its necessary to standardize." ] }, { @@ -220,7 +583,7 @@ "id": "fdee5a15", "metadata": {}, "source": [ - "> Your answer here..." + "we need to exclude the response variable Class from standardising as its not the contributing input factor for the model but the output." ] }, { @@ -236,7 +599,7 @@ "id": "f0676c21", "metadata": {}, "source": [ - "> Your answer here..." + "seed value is important to generate the same random values again for able to reproduce the same results again. to be able to repeat or reproduce the same set of values during every testing phase." ] }, { @@ -251,17 +614,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 63, "id": "72c101f2", "metadata": {}, "outputs": [], "source": [ "# set a seed for reproducibility\n", "np.random.seed(123)\n", - "\n", + "#binding the predictor variable to the standardized data \n", + "predictors_standardized[\"class\"] = wine_df[\"class\"]\n", "# split the data into a training and testing set. hint: use train_test_split !\n", + "wine_train, wine_test = train_test_split(predictors_standardized,train_size=0.75,stratify=predictors_standardized['class'])\n", "\n", - "# Your code here ..." + "#wine_test[\"class\"].unique" ] }, { @@ -284,12 +649,45 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 65, "id": "08818c64", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'n_neighbors': 7}" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here..." + "# Your code here...\n", + "#Step 1\n", + "knn = KNeighborsClassifier(n_neighbors=3)\n", + "#step 2\n", + "parameter_grid = {\n", + " \"n_neighbors\": range(1, 50, 1),\n", + "}\n", + "\n", + "#Step 3\n", + "X= wine_train.drop(columns=['class'])\n", + "y = wine_train[\"class\"]\n", + "\n", + "wine_tune_grid = GridSearchCV(\n", + " estimator=knn,\n", + " param_grid=parameter_grid,\n", + " cv=10\n", + ")\n", + "#Step 4\n", + "#wine_train.select_dtypes(include=[float]\n", + "wine_tune_grid.fit(X,\n", + " y)\n", + "\n", + "wine_tune_grid.best_params_" ] }, { @@ -305,12 +703,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 66, "id": "ffefa9f2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "1.0" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here..." + "# Your code here...using best value of n_neighbours = 7 \n", + "knn = KNeighborsClassifier(n_neighbors=7)\n", + "knn.fit(X,\n", + " y)\n", + "\n", + "wine_test[\"class\"] = knn.predict(wine_test.drop(columns=['class']))\n", + "#wine_test[\"class\"]\n", + "\n", + "#evaluate performance using accuracy score\n", + "knn.score(wine_test.drop(columns=['class']),wine_test[\"class\"])\n" ] }, { @@ -365,7 +783,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.10.4", + "display_name": "dsi_participant", "language": "python", "name": "python3" }, @@ -379,12 +797,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.19" - }, - "vscode": { - "interpreter": { - "hash": "497a84dc8fec8cf8d24e7e87b6d954c9a18a327edc66feb9b9ea7e9e72cc5c7e" - } + "version": "3.9.21" } }, "nbformat": 4,