diff --git a/02_activities/assignments/assignment_1.ipynb b/02_activities/assignments/assignment_1.ipynb index 28d4df017..91a8f3a3b 100644 --- a/02_activities/assignments/assignment_1.ipynb +++ b/02_activities/assignments/assignment_1.ipynb @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "4a3485d6-ba58-4660-a983-5680821c5719", "metadata": {}, "outputs": [], @@ -56,10 +56,288 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "a431d282-f9ca-4d5d-8912-71ffc9d8ea19", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesprolineclass
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.00
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.00
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.00
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.00
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.00
.............................................
17313.715.652.4520.595.01.680.610.521.067.700.641.74740.02
17413.403.912.4823.0102.01.800.750.431.417.300.701.56750.02
17513.274.282.2620.0120.01.590.690.431.3510.200.591.56835.02
17613.172.592.3720.0120.01.650.680.531.469.300.601.62840.02
17714.134.102.7424.596.02.050.760.561.359.200.611.60560.02
\n", + "

178 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols \\\n", + "0 14.23 1.71 2.43 15.6 127.0 2.80 \n", + "1 13.20 1.78 2.14 11.2 100.0 2.65 \n", + "2 13.16 2.36 2.67 18.6 101.0 2.80 \n", + "3 14.37 1.95 2.50 16.8 113.0 3.85 \n", + "4 13.24 2.59 2.87 21.0 118.0 2.80 \n", + ".. ... ... ... ... ... ... \n", + "173 13.71 5.65 2.45 20.5 95.0 1.68 \n", + "174 13.40 3.91 2.48 23.0 102.0 1.80 \n", + "175 13.27 4.28 2.26 20.0 120.0 1.59 \n", + "176 13.17 2.59 2.37 20.0 120.0 1.65 \n", + "177 14.13 4.10 2.74 24.5 96.0 2.05 \n", + "\n", + " flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue \\\n", + "0 3.06 0.28 2.29 5.64 1.04 \n", + "1 2.76 0.26 1.28 4.38 1.05 \n", + "2 3.24 0.30 2.81 5.68 1.03 \n", + "3 3.49 0.24 2.18 7.80 0.86 \n", + "4 2.69 0.39 1.82 4.32 1.04 \n", + ".. ... ... ... ... ... \n", + "173 0.61 0.52 1.06 7.70 0.64 \n", + "174 0.75 0.43 1.41 7.30 0.70 \n", + "175 0.69 0.43 1.35 10.20 0.59 \n", + "176 0.68 0.53 1.46 9.30 0.60 \n", + "177 0.76 0.56 1.35 9.20 0.61 \n", + "\n", + " od280/od315_of_diluted_wines proline class \n", + "0 3.92 1065.0 0 \n", + "1 3.40 1050.0 0 \n", + "2 3.17 1185.0 0 \n", + "3 3.45 1480.0 0 \n", + "4 2.93 735.0 0 \n", + ".. ... ... ... \n", + "173 1.74 740.0 2 \n", + "174 1.56 750.0 2 \n", + "175 1.56 835.0 2 \n", + "176 1.62 840.0 2 \n", + "177 1.60 560.0 2 \n", + "\n", + "[178 rows x 14 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from sklearn.datasets import load_wine\n", "\n", @@ -91,12 +369,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "56916892", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "178" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your answer here" + "178" ] }, { @@ -109,12 +398,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "df0ef103", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "14" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your answer here" + "14\n" ] }, { @@ -127,12 +427,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "47989426", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 1, 2])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your answer here" + " wine_df['class'].unique()\n", + "\n" ] }, { @@ -146,12 +458,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "bd7b0910", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "13" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your answer here" + "13" ] }, { @@ -175,20 +498,306 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "cc899b59", + "execution_count": 7, + "id": "f304cef6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesprolineclass
01.518613-0.5622500.232053-1.1695931.9139050.8089971.034819-0.6595631.2248840.2517170.3621771.8479201.0130090
10.246290-0.499413-0.827996-2.4908470.0181450.5686480.733629-0.820719-0.544721-0.2933210.4060511.1134490.9652420
20.1968790.0212311.109334-0.2687380.0883580.8089971.215533-0.4984072.1359680.2690200.3183040.7885871.3951480
31.691550-0.3468110.487926-0.8092510.9309182.4914461.466525-0.9818751.0321551.186068-0.4275441.1840712.3345740
40.2957000.2276941.8404030.4519461.2819850.8089970.6633510.2267960.401404-0.3192760.3621770.449601-0.0378740
.............................................
1730.8762752.9745430.3051590.301803-0.332922-0.985614-1.4249001.274310-0.9301791.142811-1.392758-1.231206-0.0219522
1740.4933431.4126090.4148201.0525160.158572-0.793334-1.2843440.549108-0.3169500.969783-1.129518-1.4854450.0098932
1750.3327581.744744-0.3893550.1516611.422412-1.129824-1.3445820.549108-0.4220752.224236-1.612125-1.4854450.2805752
1760.2092320.2276940.0127320.1516611.422412-1.033684-1.3546221.354888-0.2293461.834923-1.568252-1.4006990.2964982
1771.3950861.5831651.3652081.502943-0.262708-0.392751-1.2743051.596623-0.4220751.791666-1.524378-1.428948-0.5951602
\n", + "

178 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium \\\n", + "0 1.518613 -0.562250 0.232053 -1.169593 1.913905 \n", + "1 0.246290 -0.499413 -0.827996 -2.490847 0.018145 \n", + "2 0.196879 0.021231 1.109334 -0.268738 0.088358 \n", + "3 1.691550 -0.346811 0.487926 -0.809251 0.930918 \n", + "4 0.295700 0.227694 1.840403 0.451946 1.281985 \n", + ".. ... ... ... ... ... \n", + "173 0.876275 2.974543 0.305159 0.301803 -0.332922 \n", + "174 0.493343 1.412609 0.414820 1.052516 0.158572 \n", + "175 0.332758 1.744744 -0.389355 0.151661 1.422412 \n", + "176 0.209232 0.227694 0.012732 0.151661 1.422412 \n", + "177 1.395086 1.583165 1.365208 1.502943 -0.262708 \n", + "\n", + " total_phenols flavanoids nonflavanoid_phenols proanthocyanins \\\n", + "0 0.808997 1.034819 -0.659563 1.224884 \n", + "1 0.568648 0.733629 -0.820719 -0.544721 \n", + "2 0.808997 1.215533 -0.498407 2.135968 \n", + "3 2.491446 1.466525 -0.981875 1.032155 \n", + "4 0.808997 0.663351 0.226796 0.401404 \n", + ".. ... ... ... ... \n", + "173 -0.985614 -1.424900 1.274310 -0.930179 \n", + "174 -0.793334 -1.284344 0.549108 -0.316950 \n", + "175 -1.129824 -1.344582 0.549108 -0.422075 \n", + "176 -1.033684 -1.354622 1.354888 -0.229346 \n", + "177 -0.392751 -1.274305 1.596623 -0.422075 \n", + "\n", + " color_intensity hue od280/od315_of_diluted_wines proline class \n", + "0 0.251717 0.362177 1.847920 1.013009 0 \n", + "1 -0.293321 0.406051 1.113449 0.965242 0 \n", + "2 0.269020 0.318304 0.788587 1.395148 0 \n", + "3 1.186068 -0.427544 1.184071 2.334574 0 \n", + "4 -0.319276 0.362177 0.449601 -0.037874 0 \n", + ".. ... ... ... ... ... \n", + "173 1.142811 -1.392758 -1.231206 -0.021952 2 \n", + "174 0.969783 -1.129518 -1.485445 0.009893 2 \n", + "175 2.224236 -1.612125 -1.485445 0.280575 2 \n", + "176 1.834923 -1.568252 -1.400699 0.296498 2 \n", + "177 1.791666 -1.524378 -1.428948 -0.595160 2 \n", + "\n", + "[178 rows x 14 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Select predictors (excluding the last column)\n", - "predictors = wine_df.iloc[:, :-1]\n", + "predictors_standardized = wine_df.copy()\n", + "columns_to_exclude = ['class']\n", "\n", - "# Standardize the predictors\n", + "# Select the columns that we want to scale by excluding the 'id' and 'diagnosis' columns\n", + "# This will return a list of the numeric columns we need to scale\n", + "columns_to_scale = predictors_standardized.columns.difference(columns_to_exclude)\n", + "\n", + "# Initialize the StandardScaler to standardize the selected numeric columns\n", "scaler = StandardScaler()\n", - "predictors_standardized = pd.DataFrame(scaler.fit_transform(predictors), columns=predictors.columns)\n", "\n", - "# Display the head of the standardized predictors\n", - "print(predictors_standardized.head())" + "# Apply the scaler to the selected columns. This transforms the data so that each feature\n", + "# has a mean of 0 and a standard deviation of 1, which is essential to prevent larger\n", + "# scale features from dominating the analysis, especially for distance-based algorithms like KNN.\n", + "predictors_standardized[columns_to_scale] = scaler.fit_transform(wine_df[columns_to_scale])\n", + "\n", + "# Output the standardized dataframe with the scaled numeric columns\n", + "predictors_standardized" ] }, { @@ -204,7 +813,7 @@ "id": "403ef0bb", "metadata": {}, "source": [ - "> Your answer here..." + "Standardizing predictor variables allows models to treat each predictor fairly and making the coefficients easier to compare. Without standardization, predictors measured in large units can dominate the calculation and lead to unstable or less reliable model estimates. Standardizing also improves the numerical stability of the regression algorithm and is essential when using regularization methods like Ridge or Lasso, which penalize coefficients based on their size. " ] }, { @@ -220,7 +829,7 @@ "id": "fdee5a15", "metadata": {}, "source": [ - "> Your answer here..." + "The 'class' variable in this case is a categorical variable, despite being a string. Standardization only makes sense for continous veriables where the differences and sitances have meaningful interpreations. In the case of categorical data, it represents groupings of the data opposed to specific numerical values. " ] }, { @@ -236,7 +845,7 @@ "id": "f0676c21", "metadata": {}, "source": [ - "> Your answer here..." + "The specific seed value is not important, but rather setting the seed function, as it ensures that the data is randomly split between the training and testing data and ensures reproducibility in the data. " ] }, { @@ -251,7 +860,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "72c101f2", "metadata": {}, "outputs": [], @@ -261,7 +870,10 @@ "\n", "# split the data into a training and testing set. hint: use train_test_split !\n", "\n", - "# Your code here ..." + "wine_train, wine_test = train_test_split(\n", + " predictors_standardized, train_size=0.75, stratify=predictors_standardized[\"class\"]\n", + ")\n", + "\n" ] }, { @@ -284,12 +896,1496 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "08818c64", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mean_fit_timestd_fit_timemean_score_timestd_score_timeparam_n_neighborsparamssplit0_test_scoresplit1_test_scoresplit2_test_scoresplit3_test_scoresplit4_test_scoresplit5_test_scoresplit6_test_scoresplit7_test_scoresplit8_test_scoresplit9_test_scoremean_test_scorestd_test_scorerank_test_score
00.0052240.0118570.0044190.0087881{'n_neighbors': 1}1.0000000.9285711.00.6923081.0000001.01.0000001.0000001.0000000.9230770.9543960.09213939
10.0009350.0000280.0012490.0000682{'n_neighbors': 2}1.0000000.9285711.00.6923081.0000001.01.0000000.9230770.9230770.9230770.9390110.08962850
20.0010570.0002470.0012770.0000613{'n_neighbors': 3}1.0000000.9285711.00.8461541.0000001.01.0000001.0000000.9230770.9230770.9620880.05121728
30.0010610.0001870.0045890.0096984{'n_neighbors': 4}0.9285710.9285711.00.8461540.9230771.01.0000001.0000000.9230771.0000000.9549450.05040635
40.0010420.0001060.0652210.1914695{'n_neighbors': 5}1.0000000.9285711.00.9230770.9230771.01.0000001.0000000.9230771.0000000.9697800.0370427
50.0013890.0008540.0016170.0004226{'n_neighbors': 6}0.9285711.0000001.00.9230770.9230771.01.0000001.0000000.9230771.0000000.9697800.0370427
60.0009900.0000610.0012860.0000587{'n_neighbors': 7}1.0000000.9285711.00.9230770.9230771.01.0000001.0000001.0000001.0000000.9774730.0344411
70.0009890.0000450.0013570.0001008{'n_neighbors': 8}0.9285711.0000001.00.8461540.9230771.01.0000000.9230770.9230771.0000000.9543960.05071939
80.0009800.0000530.0013400.0001339{'n_neighbors': 9}1.0000000.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9697800.0370429
90.0009550.0000620.0012590.00005510{'n_neighbors': 10}1.0000000.9285711.00.8461540.9230771.00.9230770.9230771.0000001.0000000.9543960.05071939
100.0009460.0000520.0012510.00003811{'n_neighbors': 11}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
110.0009550.0000260.0012980.00010012{'n_neighbors': 12}1.0000000.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9697800.0370429
120.0009680.0000650.0012860.00005513{'n_neighbors': 13}1.0000000.9285711.01.0000000.9230771.01.0000000.9230771.0000001.0000000.9774730.0344411
130.0009250.0000130.0012420.00003614{'n_neighbors': 14}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
140.0009280.0000160.0012480.00003315{'n_neighbors': 15}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
150.0009610.0000480.0012850.00004816{'n_neighbors': 16}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
160.0009580.0000570.0012730.00003417{'n_neighbors': 17}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
170.0009520.0000390.0013010.00004718{'n_neighbors': 18}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
180.0009490.0000350.0012890.00007219{'n_neighbors': 19}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
190.0009280.0000110.0012710.00003520{'n_neighbors': 20}0.8571430.9285711.00.9230770.9230771.01.0000001.0000001.0000001.0000000.9631870.04877711
200.0009530.0000360.0013070.00008821{'n_neighbors': 21}0.9285710.9285711.00.9230770.9230771.01.0000001.0000001.0000001.0000000.9703300.0363803
210.0009490.0000300.0012720.00002422{'n_neighbors': 22}0.9285710.9285711.00.9230770.9230771.01.0000001.0000001.0000001.0000000.9703300.0363803
220.0009860.0000820.0012890.00004723{'n_neighbors': 23}0.9285710.9285711.00.9230770.9230771.01.0000001.0000001.0000000.9230770.9626370.03741113
230.0009390.0000120.0012850.00003224{'n_neighbors': 24}0.9285710.9285711.00.9230770.9230771.01.0000001.0000001.0000001.0000000.9703300.0363803
240.0009490.0000340.0012900.00004125{'n_neighbors': 25}0.9285710.9285711.00.8461540.9230771.01.0000001.0000001.0000000.9230770.9549450.05040633
250.0009330.0000190.0012810.00002926{'n_neighbors': 26}0.9285710.9285711.00.8461540.9230771.01.0000001.0000001.0000001.0000000.9626370.05082313
260.0009480.0000320.0013220.00007527{'n_neighbors': 27}0.9285710.9285711.00.8461540.9230771.01.0000001.0000001.0000000.9230770.9549450.05040633
270.0009280.0000120.0012780.00004328{'n_neighbors': 28}0.9285710.9285711.00.9230770.9230771.01.0000001.0000001.0000000.9230770.9626370.03741113
280.0009390.0000170.0012900.00004629{'n_neighbors': 29}0.9285710.9285711.00.9230770.9230771.01.0000001.0000001.0000000.9230770.9626370.03741113
290.0009570.0000360.0013200.00006330{'n_neighbors': 30}0.8571430.9285711.00.9230770.9230771.01.0000000.9230771.0000000.9230770.9478020.04683242
300.0009650.0000740.0013100.00004931{'n_neighbors': 31}0.8571430.9285711.00.9230770.9230771.01.0000001.0000001.0000000.9230770.9554950.04843029
310.0009270.0000070.0012630.00001332{'n_neighbors': 32}0.8571430.9285711.00.9230770.9230771.01.0000001.0000001.0000000.9230770.9554950.04843029
320.0009240.0000180.0012830.00003033{'n_neighbors': 33}0.8571430.9285711.00.9230770.9230771.01.0000001.0000001.0000000.9230770.9554950.04843029
330.0009470.0000430.0013610.00012434{'n_neighbors': 34}0.8571430.9285711.00.9230770.9230771.01.0000001.0000001.0000001.0000000.9631870.04877711
340.0009290.0000050.0012640.00001035{'n_neighbors': 35}0.8571430.9285711.00.9230770.9230771.00.9230771.0000001.0000001.0000000.9554950.04843029
350.0009610.0000310.0013390.00005236{'n_neighbors': 36}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
360.0009800.0001400.0012910.00002737{'n_neighbors': 37}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
370.0009630.0000640.0013070.00004438{'n_neighbors': 38}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
380.0009530.0000530.0013130.00003939{'n_neighbors': 39}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000001.0000000.9626370.03741117
390.0009330.0000110.0013120.00004740{'n_neighbors': 40}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000000.9230770.9549450.03684235
400.0009510.0000340.0013590.00014541{'n_neighbors': 41}0.9285710.9285711.01.0000000.9230771.01.0000000.9230771.0000001.0000000.9703300.0363803
410.0009320.0000190.0012970.00002942{'n_neighbors': 42}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000000.9230770.9549450.03684235
420.0009510.0000480.0013150.00003843{'n_neighbors': 43}0.9285710.9285711.00.9230770.9230771.01.0000000.9230771.0000000.9230770.9549450.03684235
430.0009380.0000120.0013170.00005144{'n_neighbors': 44}0.9285710.9285711.00.8461540.9230771.01.0000000.9230771.0000000.9230770.9472530.04878744
440.0009670.0000520.0013440.00005045{'n_neighbors': 45}0.9285710.9285711.00.8461540.9230771.01.0000000.9230771.0000000.9230770.9472530.04878744
450.0009490.0000490.0012950.00001346{'n_neighbors': 46}0.9285710.9285711.00.8461540.9230771.01.0000000.9230771.0000000.9230770.9472530.04878744
460.0009620.0000500.0013300.00004247{'n_neighbors': 47}0.8571430.9285711.00.9230770.9230771.01.0000000.9230771.0000000.9230770.9478020.04683242
470.0009360.0000120.0012970.00001848{'n_neighbors': 48}0.9285710.9285711.00.9230770.9230771.01.0000000.9230770.9230770.9230770.9472530.03459444
480.0009460.0000100.0013270.00004549{'n_neighbors': 49}0.9285710.9285711.00.9230770.9230771.01.0000000.9230770.9230770.9230770.9472530.03459444
490.0009320.0000110.0012930.00001550{'n_neighbors': 50}0.9285710.9285711.00.9230770.9230771.01.0000000.9230770.9230770.9230770.9472530.03459444
\n", + "
" + ], + "text/plain": [ + " mean_fit_time std_fit_time mean_score_time std_score_time \\\n", + "0 0.005224 0.011857 0.004419 0.008788 \n", + "1 0.000935 0.000028 0.001249 0.000068 \n", + "2 0.001057 0.000247 0.001277 0.000061 \n", + "3 0.001061 0.000187 0.004589 0.009698 \n", + "4 0.001042 0.000106 0.065221 0.191469 \n", + "5 0.001389 0.000854 0.001617 0.000422 \n", + "6 0.000990 0.000061 0.001286 0.000058 \n", + "7 0.000989 0.000045 0.001357 0.000100 \n", + "8 0.000980 0.000053 0.001340 0.000133 \n", + "9 0.000955 0.000062 0.001259 0.000055 \n", + "10 0.000946 0.000052 0.001251 0.000038 \n", + "11 0.000955 0.000026 0.001298 0.000100 \n", + "12 0.000968 0.000065 0.001286 0.000055 \n", + "13 0.000925 0.000013 0.001242 0.000036 \n", + "14 0.000928 0.000016 0.001248 0.000033 \n", + "15 0.000961 0.000048 0.001285 0.000048 \n", + "16 0.000958 0.000057 0.001273 0.000034 \n", + "17 0.000952 0.000039 0.001301 0.000047 \n", + "18 0.000949 0.000035 0.001289 0.000072 \n", + "19 0.000928 0.000011 0.001271 0.000035 \n", + "20 0.000953 0.000036 0.001307 0.000088 \n", + "21 0.000949 0.000030 0.001272 0.000024 \n", + "22 0.000986 0.000082 0.001289 0.000047 \n", + "23 0.000939 0.000012 0.001285 0.000032 \n", + "24 0.000949 0.000034 0.001290 0.000041 \n", + "25 0.000933 0.000019 0.001281 0.000029 \n", + "26 0.000948 0.000032 0.001322 0.000075 \n", + "27 0.000928 0.000012 0.001278 0.000043 \n", + "28 0.000939 0.000017 0.001290 0.000046 \n", + "29 0.000957 0.000036 0.001320 0.000063 \n", + "30 0.000965 0.000074 0.001310 0.000049 \n", + "31 0.000927 0.000007 0.001263 0.000013 \n", + "32 0.000924 0.000018 0.001283 0.000030 \n", + "33 0.000947 0.000043 0.001361 0.000124 \n", + "34 0.000929 0.000005 0.001264 0.000010 \n", + "35 0.000961 0.000031 0.001339 0.000052 \n", + "36 0.000980 0.000140 0.001291 0.000027 \n", + "37 0.000963 0.000064 0.001307 0.000044 \n", + "38 0.000953 0.000053 0.001313 0.000039 \n", + "39 0.000933 0.000011 0.001312 0.000047 \n", + "40 0.000951 0.000034 0.001359 0.000145 \n", + "41 0.000932 0.000019 0.001297 0.000029 \n", + "42 0.000951 0.000048 0.001315 0.000038 \n", + "43 0.000938 0.000012 0.001317 0.000051 \n", + "44 0.000967 0.000052 0.001344 0.000050 \n", + "45 0.000949 0.000049 0.001295 0.000013 \n", + "46 0.000962 0.000050 0.001330 0.000042 \n", + "47 0.000936 0.000012 0.001297 0.000018 \n", + "48 0.000946 0.000010 0.001327 0.000045 \n", + "49 0.000932 0.000011 0.001293 0.000015 \n", + "\n", + " param_n_neighbors params split0_test_score \\\n", + "0 1 {'n_neighbors': 1} 1.000000 \n", + "1 2 {'n_neighbors': 2} 1.000000 \n", + "2 3 {'n_neighbors': 3} 1.000000 \n", + "3 4 {'n_neighbors': 4} 0.928571 \n", + "4 5 {'n_neighbors': 5} 1.000000 \n", + "5 6 {'n_neighbors': 6} 0.928571 \n", + "6 7 {'n_neighbors': 7} 1.000000 \n", + "7 8 {'n_neighbors': 8} 0.928571 \n", + "8 9 {'n_neighbors': 9} 1.000000 \n", + "9 10 {'n_neighbors': 10} 1.000000 \n", + "10 11 {'n_neighbors': 11} 0.928571 \n", + "11 12 {'n_neighbors': 12} 1.000000 \n", + "12 13 {'n_neighbors': 13} 1.000000 \n", + "13 14 {'n_neighbors': 14} 0.928571 \n", + "14 15 {'n_neighbors': 15} 0.928571 \n", + "15 16 {'n_neighbors': 16} 0.928571 \n", + "16 17 {'n_neighbors': 17} 0.928571 \n", + "17 18 {'n_neighbors': 18} 0.928571 \n", + "18 19 {'n_neighbors': 19} 0.928571 \n", + "19 20 {'n_neighbors': 20} 0.857143 \n", + "20 21 {'n_neighbors': 21} 0.928571 \n", + "21 22 {'n_neighbors': 22} 0.928571 \n", + "22 23 {'n_neighbors': 23} 0.928571 \n", + "23 24 {'n_neighbors': 24} 0.928571 \n", + "24 25 {'n_neighbors': 25} 0.928571 \n", + "25 26 {'n_neighbors': 26} 0.928571 \n", + "26 27 {'n_neighbors': 27} 0.928571 \n", + "27 28 {'n_neighbors': 28} 0.928571 \n", + "28 29 {'n_neighbors': 29} 0.928571 \n", + "29 30 {'n_neighbors': 30} 0.857143 \n", + "30 31 {'n_neighbors': 31} 0.857143 \n", + "31 32 {'n_neighbors': 32} 0.857143 \n", + "32 33 {'n_neighbors': 33} 0.857143 \n", + "33 34 {'n_neighbors': 34} 0.857143 \n", + "34 35 {'n_neighbors': 35} 0.857143 \n", + "35 36 {'n_neighbors': 36} 0.928571 \n", + "36 37 {'n_neighbors': 37} 0.928571 \n", + "37 38 {'n_neighbors': 38} 0.928571 \n", + "38 39 {'n_neighbors': 39} 0.928571 \n", + "39 40 {'n_neighbors': 40} 0.928571 \n", + "40 41 {'n_neighbors': 41} 0.928571 \n", + "41 42 {'n_neighbors': 42} 0.928571 \n", + "42 43 {'n_neighbors': 43} 0.928571 \n", + "43 44 {'n_neighbors': 44} 0.928571 \n", + "44 45 {'n_neighbors': 45} 0.928571 \n", + "45 46 {'n_neighbors': 46} 0.928571 \n", + "46 47 {'n_neighbors': 47} 0.857143 \n", + "47 48 {'n_neighbors': 48} 0.928571 \n", + "48 49 {'n_neighbors': 49} 0.928571 \n", + "49 50 {'n_neighbors': 50} 0.928571 \n", + "\n", + " split1_test_score split2_test_score split3_test_score \\\n", + "0 0.928571 1.0 0.692308 \n", + "1 0.928571 1.0 0.692308 \n", + "2 0.928571 1.0 0.846154 \n", + "3 0.928571 1.0 0.846154 \n", + "4 0.928571 1.0 0.923077 \n", + "5 1.000000 1.0 0.923077 \n", + "6 0.928571 1.0 0.923077 \n", + "7 1.000000 1.0 0.846154 \n", + "8 0.928571 1.0 0.923077 \n", + "9 0.928571 1.0 0.846154 \n", + "10 0.928571 1.0 0.923077 \n", + "11 0.928571 1.0 0.923077 \n", + "12 0.928571 1.0 1.000000 \n", + "13 0.928571 1.0 0.923077 \n", + "14 0.928571 1.0 0.923077 \n", + "15 0.928571 1.0 0.923077 \n", + "16 0.928571 1.0 0.923077 \n", + "17 0.928571 1.0 0.923077 \n", + "18 0.928571 1.0 0.923077 \n", + "19 0.928571 1.0 0.923077 \n", + "20 0.928571 1.0 0.923077 \n", + "21 0.928571 1.0 0.923077 \n", + "22 0.928571 1.0 0.923077 \n", + "23 0.928571 1.0 0.923077 \n", + "24 0.928571 1.0 0.846154 \n", + "25 0.928571 1.0 0.846154 \n", + "26 0.928571 1.0 0.846154 \n", + "27 0.928571 1.0 0.923077 \n", + "28 0.928571 1.0 0.923077 \n", + "29 0.928571 1.0 0.923077 \n", + "30 0.928571 1.0 0.923077 \n", + "31 0.928571 1.0 0.923077 \n", + "32 0.928571 1.0 0.923077 \n", + "33 0.928571 1.0 0.923077 \n", + "34 0.928571 1.0 0.923077 \n", + "35 0.928571 1.0 0.923077 \n", + "36 0.928571 1.0 0.923077 \n", + "37 0.928571 1.0 0.923077 \n", + "38 0.928571 1.0 0.923077 \n", + "39 0.928571 1.0 0.923077 \n", + "40 0.928571 1.0 1.000000 \n", + "41 0.928571 1.0 0.923077 \n", + "42 0.928571 1.0 0.923077 \n", + "43 0.928571 1.0 0.846154 \n", + "44 0.928571 1.0 0.846154 \n", + "45 0.928571 1.0 0.846154 \n", + "46 0.928571 1.0 0.923077 \n", + "47 0.928571 1.0 0.923077 \n", + "48 0.928571 1.0 0.923077 \n", + "49 0.928571 1.0 0.923077 \n", + "\n", + " split4_test_score split5_test_score split6_test_score \\\n", + "0 1.000000 1.0 1.000000 \n", + "1 1.000000 1.0 1.000000 \n", + "2 1.000000 1.0 1.000000 \n", + "3 0.923077 1.0 1.000000 \n", + "4 0.923077 1.0 1.000000 \n", + "5 0.923077 1.0 1.000000 \n", + "6 0.923077 1.0 1.000000 \n", + "7 0.923077 1.0 1.000000 \n", + "8 0.923077 1.0 1.000000 \n", + "9 0.923077 1.0 0.923077 \n", + "10 0.923077 1.0 1.000000 \n", + "11 0.923077 1.0 1.000000 \n", + "12 0.923077 1.0 1.000000 \n", + "13 0.923077 1.0 1.000000 \n", + "14 0.923077 1.0 1.000000 \n", + "15 0.923077 1.0 1.000000 \n", + "16 0.923077 1.0 1.000000 \n", + "17 0.923077 1.0 1.000000 \n", + "18 0.923077 1.0 1.000000 \n", + "19 0.923077 1.0 1.000000 \n", + "20 0.923077 1.0 1.000000 \n", + "21 0.923077 1.0 1.000000 \n", + "22 0.923077 1.0 1.000000 \n", + "23 0.923077 1.0 1.000000 \n", + "24 0.923077 1.0 1.000000 \n", + "25 0.923077 1.0 1.000000 \n", + "26 0.923077 1.0 1.000000 \n", + "27 0.923077 1.0 1.000000 \n", + "28 0.923077 1.0 1.000000 \n", + "29 0.923077 1.0 1.000000 \n", + "30 0.923077 1.0 1.000000 \n", + "31 0.923077 1.0 1.000000 \n", + "32 0.923077 1.0 1.000000 \n", + "33 0.923077 1.0 1.000000 \n", + "34 0.923077 1.0 0.923077 \n", + "35 0.923077 1.0 1.000000 \n", + "36 0.923077 1.0 1.000000 \n", + "37 0.923077 1.0 1.000000 \n", + "38 0.923077 1.0 1.000000 \n", + "39 0.923077 1.0 1.000000 \n", + "40 0.923077 1.0 1.000000 \n", + "41 0.923077 1.0 1.000000 \n", + "42 0.923077 1.0 1.000000 \n", + "43 0.923077 1.0 1.000000 \n", + "44 0.923077 1.0 1.000000 \n", + "45 0.923077 1.0 1.000000 \n", + "46 0.923077 1.0 1.000000 \n", + "47 0.923077 1.0 1.000000 \n", + "48 0.923077 1.0 1.000000 \n", + "49 0.923077 1.0 1.000000 \n", + "\n", + " split7_test_score split8_test_score split9_test_score mean_test_score \\\n", + "0 1.000000 1.000000 0.923077 0.954396 \n", + "1 0.923077 0.923077 0.923077 0.939011 \n", + "2 1.000000 0.923077 0.923077 0.962088 \n", + "3 1.000000 0.923077 1.000000 0.954945 \n", + "4 1.000000 0.923077 1.000000 0.969780 \n", + "5 1.000000 0.923077 1.000000 0.969780 \n", + "6 1.000000 1.000000 1.000000 0.977473 \n", + "7 0.923077 0.923077 1.000000 0.954396 \n", + "8 0.923077 1.000000 1.000000 0.969780 \n", + "9 0.923077 1.000000 1.000000 0.954396 \n", + "10 0.923077 1.000000 1.000000 0.962637 \n", + "11 0.923077 1.000000 1.000000 0.969780 \n", + "12 0.923077 1.000000 1.000000 0.977473 \n", + "13 0.923077 1.000000 1.000000 0.962637 \n", + "14 0.923077 1.000000 1.000000 0.962637 \n", + "15 0.923077 1.000000 1.000000 0.962637 \n", + "16 0.923077 1.000000 1.000000 0.962637 \n", + "17 0.923077 1.000000 1.000000 0.962637 \n", + "18 0.923077 1.000000 1.000000 0.962637 \n", + "19 1.000000 1.000000 1.000000 0.963187 \n", + "20 1.000000 1.000000 1.000000 0.970330 \n", + "21 1.000000 1.000000 1.000000 0.970330 \n", + "22 1.000000 1.000000 0.923077 0.962637 \n", + "23 1.000000 1.000000 1.000000 0.970330 \n", + "24 1.000000 1.000000 0.923077 0.954945 \n", + "25 1.000000 1.000000 1.000000 0.962637 \n", + "26 1.000000 1.000000 0.923077 0.954945 \n", + "27 1.000000 1.000000 0.923077 0.962637 \n", + "28 1.000000 1.000000 0.923077 0.962637 \n", + "29 0.923077 1.000000 0.923077 0.947802 \n", + "30 1.000000 1.000000 0.923077 0.955495 \n", + "31 1.000000 1.000000 0.923077 0.955495 \n", + "32 1.000000 1.000000 0.923077 0.955495 \n", + "33 1.000000 1.000000 1.000000 0.963187 \n", + "34 1.000000 1.000000 1.000000 0.955495 \n", + "35 0.923077 1.000000 1.000000 0.962637 \n", + "36 0.923077 1.000000 1.000000 0.962637 \n", + "37 0.923077 1.000000 1.000000 0.962637 \n", + "38 0.923077 1.000000 1.000000 0.962637 \n", + "39 0.923077 1.000000 0.923077 0.954945 \n", + "40 0.923077 1.000000 1.000000 0.970330 \n", + "41 0.923077 1.000000 0.923077 0.954945 \n", + "42 0.923077 1.000000 0.923077 0.954945 \n", + "43 0.923077 1.000000 0.923077 0.947253 \n", + "44 0.923077 1.000000 0.923077 0.947253 \n", + "45 0.923077 1.000000 0.923077 0.947253 \n", + "46 0.923077 1.000000 0.923077 0.947802 \n", + "47 0.923077 0.923077 0.923077 0.947253 \n", + "48 0.923077 0.923077 0.923077 0.947253 \n", + "49 0.923077 0.923077 0.923077 0.947253 \n", + "\n", + " std_test_score rank_test_score \n", + "0 0.092139 39 \n", + "1 0.089628 50 \n", + "2 0.051217 28 \n", + "3 0.050406 35 \n", + "4 0.037042 7 \n", + "5 0.037042 7 \n", + "6 0.034441 1 \n", + "7 0.050719 39 \n", + "8 0.037042 9 \n", + "9 0.050719 39 \n", + "10 0.037411 17 \n", + "11 0.037042 9 \n", + "12 0.034441 1 \n", + "13 0.037411 17 \n", + "14 0.037411 17 \n", + "15 0.037411 17 \n", + "16 0.037411 17 \n", + "17 0.037411 17 \n", + "18 0.037411 17 \n", + "19 0.048777 11 \n", + "20 0.036380 3 \n", + "21 0.036380 3 \n", + "22 0.037411 13 \n", + "23 0.036380 3 \n", + "24 0.050406 33 \n", + "25 0.050823 13 \n", + "26 0.050406 33 \n", + "27 0.037411 13 \n", + "28 0.037411 13 \n", + "29 0.046832 42 \n", + "30 0.048430 29 \n", + "31 0.048430 29 \n", + "32 0.048430 29 \n", + "33 0.048777 11 \n", + "34 0.048430 29 \n", + "35 0.037411 17 \n", + "36 0.037411 17 \n", + "37 0.037411 17 \n", + "38 0.037411 17 \n", + "39 0.036842 35 \n", + "40 0.036380 3 \n", + "41 0.036842 35 \n", + "42 0.036842 35 \n", + "43 0.048787 44 \n", + "44 0.048787 44 \n", + "45 0.048787 44 \n", + "46 0.046832 42 \n", + "47 0.034594 44 \n", + "48 0.034594 44 \n", + "49 0.034594 44 " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here..." + "knn = KNeighborsClassifier()\n", + "\n", + "parameter_grid = {\n", + " \"n_neighbors\": range(1,51)\n", + "}\n", + "\n", + "wine_tune_grid = GridSearchCV(\n", + " estimator=knn,\n", + " param_grid=parameter_grid,\n", + " cv=10\n", + ")\n", + "\n", + "X_train = wine_train.iloc[:, :-1] # all columns except last\n", + "y_train = wine_train['class'] \n", + "\n", + "wine_tune_grid.fit(X_train, y_train)\n", + "\n", + "accuracy_grid = pd.DataFrame(wine_tune_grid.cv_results_)\n", + "accuracy_grid\n" ] }, { @@ -305,12 +2401,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "ffefa9f2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.9411764705882353" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here..." + "kvalue = wine_tune_grid.best_params_[\"n_neighbors\"]\n", + "\n", + "wine_subtrain, wine_validation = train_test_split(\n", + " wine_train,\n", + " train_size=0.75,\n", + " stratify=wine_train['class']\n", + ")\n", + "\n", + "X_sub = wine_subtrain.iloc[:, :-1]\n", + "y_sub = wine_subtrain['class']\n", + "\n", + "X_val = wine_validation.iloc[:, :-1]\n", + "y_val = wine_validation['class']\n", + "\n", + "# 3. Fit final KNN using best k\n", + "final_knn = KNeighborsClassifier(n_neighbors=kvalue)\n", + "final_knn.fit(X_sub, y_sub)\n", + "\n", + "# 4. Predict on validation set\n", + "val_predictions = final_knn.predict(X_val)\n", + "\n", + "# 5. Accuracy on validation set\n", + "val_accuracy = accuracy_score(y_val, val_predictions)\n", + "val_accuracy\n", + "\n" ] }, { @@ -365,7 +2496,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.10.4", + "display_name": "lcr-env", "language": "python", "name": "python3" }, @@ -379,12 +2510,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.19" - }, - "vscode": { - "interpreter": { - "hash": "497a84dc8fec8cf8d24e7e87b6d954c9a18a327edc66feb9b9ea7e9e72cc5c7e" - } + "version": "3.11.1" } }, "nbformat": 4, diff --git a/02_activities/assignments/assignment_2.ipynb b/02_activities/assignments/assignment_2.ipynb index a05da5cd3..6d35e44f5 100644 --- a/02_activities/assignments/assignment_2.ipynb +++ b/02_activities/assignments/assignment_2.ipynb @@ -34,10 +34,21 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": null, "id": "4a3485d6-ba58-4660-a983-5680821c5719", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mFailed to start the Kernel. \n", + "\u001b[1;31mUnable to start Kernel 'lcr-env (Python 3.11.1)' due to a timeout waiting for the ports to get used. \n", + "\u001b[1;31mView Jupyter log for further details." + ] + } + ], "source": [ "# Import standard libraries\n", "import pandas as pd\n", @@ -87,7 +98,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Your answer here..." + "mpg_data.shape\n", + "#rows,columns" ] }, { @@ -105,7 +117,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your answer here..." + "mpg_data['mpg'].info()" ] }, { @@ -113,7 +125,7 @@ "id": "6d759089", "metadata": {}, "source": [ - "Your explanation... \n" + "A float is a data type used to represent numbers that have a decimal point.\n" ] }, { @@ -131,7 +143,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your answer here... " + "mpg_data['horsepower'].nlargest(5)" ] }, { @@ -149,7 +161,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your answer here..." + "len(mpg_data.columns) - 1" ] }, { @@ -225,7 +237,12 @@ "id": "f67e57ab", "metadata": {}, "source": [ - "> Your answer here..." + "Positive association between miles per gallon and acceleration.\n", + "Negative assoication between miles per gallon and cylinders. \n", + "Negative assoication between miles per gallon and displacement. \n", + "Negative assoication between miles per gallon and horsepower. \n", + "Positive association between miles per gallon and model year.\n", + "Negative assoication between miles per gallon and weight. " ] }, { @@ -241,7 +258,7 @@ "id": "843f9eef", "metadata": {}, "source": [ - "> Your answer here..." + "This line is mathematically determined to represent the best possible linear summary of the relationship between the independent (predictor) variable and the dependent (response) variable in a dataset. " ] }, { @@ -257,7 +274,7 @@ "id": "2ea782fc", "metadata": {}, "source": [ - "> Your answer here..." + "Data points do not always or usually fall perfectly on the regression line, this is because the line only reflects the averaged relationship between two factors. Each point is influenced by many factors, causing deviations and noise. " ] }, { @@ -279,7 +296,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Your answer here..." + "mpg_train, mpg_test = train_test_split(\n", + " mpg_data, train_size=0.75, random_state=42\n", + ")" ] }, { @@ -299,12 +318,18 @@ "source": [ "# Your code here ...\n", "\n", - "numeric_predictors = 🤷‍♂️\n", + "numeric_predictors = ['cylinders', 'displacement', 'horsepower', 'weight', 'acceleration']\n", + "\n", + "lm = LinearRegression()\n", + "lm.fit(\n", + " mpg_train[numeric_predictors], # A single-column data frame (square footage)\n", + " mpg_train[\"mpg\"] # A series (house prices)\n", + ")\n", "\n", "\n", "# Create a DataFrame containing the slope (coefficients) and intercept\n", "coefficients_df = pd.DataFrame({\n", - " \"predictor\": numeric_predictors.columns,\n", + " \"predictor\": numeric_predictors,\n", " \"slope\": lm.coef_,\n", " \"intercept\": [lm.intercept_] * len(lm.coef_)\n", "})\n", @@ -335,7 +360,15 @@ "metadata": {}, "outputs": [], "source": [ - "# Your code here ..." + "mpg_test[\"predicted\"] = lm.predict(mpg_test[numeric_predictors])\n", + "\n", + "# Calculate RMSPE\n", + "lm_rmspe = mean_squared_error(\n", + " y_true=mpg_test[\"mpg\"], # actual sale prices\n", + " y_pred=mpg_test[\"predicted\"] # the value predicted by the model\n", + ")**1/2\n", + "\n", + "lm_rmspe" ] }, { @@ -386,7 +419,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "lcr-env", "language": "python", "name": "python3" }, @@ -400,12 +433,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.3" - }, - "vscode": { - "interpreter": { - "hash": "497a84dc8fec8cf8d24e7e87b6d954c9a18a327edc66feb9b9ea7e9e72cc5c7e" - } + "version": "3.11.1" } }, "nbformat": 4, diff --git a/02_activities/assignments/assignment_3.ipynb b/02_activities/assignments/assignment_3.ipynb index 889b10f21..f0c048d0b 100644 --- a/02_activities/assignments/assignment_3.ipynb +++ b/02_activities/assignments/assignment_3.ipynb @@ -76,7 +76,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "a431d282-f9ca-4d5d-8912-71ffc9d8ea19", "metadata": {}, "outputs": [ @@ -138,7 +138,9 @@ "# Display the DataFrame\n", "print(wine_df)\n", "\n", - "# Your code here..." + "#code here\n", + "row, col = wine_df.shape\n", + "print('rows:', row, \",\", \"columns:\", col)" ] }, { @@ -232,7 +234,7 @@ "id": "35308e2c", "metadata": {}, "source": [ - "> Your answer..." + "Malic acids and flavanoids seems to distinctively form separate clusters. " ] }, { @@ -301,10 +303,10 @@ }, { "cell_type": "markdown", - "id": "53d77d5c", + "id": "69f7bb7f", "metadata": {}, "source": [ - "> Your answer here ... " + "Depending on the range of values, when you compare larger ranges to smaller ranges, the differences in changes will be weighted differently when assessing their impact on the clusters. By using standardized data, it assesses the relatively range of the values for each values opposed to the absolute values. " ] }, { @@ -325,13 +327,20 @@ "metadata": {}, "outputs": [], "source": [ - "# Your answer...\n", "\n", - "clustered_wine_data = 🤷‍♂️\n", + "clustered_wine_data = KMeans(n_clusters=3, random_state=12)\n", + "\n", + "# fit cluster to our data \n", + "clusters = clustered_wine_data.fit(scaled_wine_df)\n", + "\n", + "scaled_wine_clusters = scaled_wine_df.copy()\n", + "\n", + "\n", + "scaled_wine_clusters['Cluster'] = clusters.labels_\n", "\n", "\n", "# Use the helper function to plot scatter plots, colored by cluster labels\n", - "plot_feature_pairs(clustered_wine_data, feature_names, color_labels=clustered_wine_data['Cluster'], title_prefix='Clustered Data: ')" + "plot_feature_pairs(scaled_wine_clusters, feature_names, color_labels=scaled_wine_clusters['Cluster'], title_prefix='Clustered Data: ')" ] }, { @@ -349,7 +358,7 @@ "id": "83349688", "metadata": {}, "source": [ - "> Your answer here..." + "Answer: Running a forloop using a range of K values to determine where the elbow point is - i.e. where increasing the number of clusters no longer leads to meaningful reduction in WSSD." ] }, { @@ -371,13 +380,28 @@ "outputs": [], "source": [ "# Your answer here...\n", + "bootstrap_samples = []\n", "\n", - "mean_color_intensity = 🤷‍♂️\n", + "for i in range(10_000):\n", + " sample = wine_df.sample(n=len(wine_df), replace=True) # Sample with replacement\n", + " sample = sample.assign(replicate=i) # Add replicate number\n", + " bootstrap_samples.append(sample) # Store the sample\n", + "\n", + "# Combine all bootstrap samples into one DataFrame\n", + "boot20000 = pd.concat(bootstrap_samples)\n", + "\n", + "\n", + "\n", + "# generating the table with the means of each bootstrap\n", + "summary_mean_color_intensity = boot20000.groupby('replicate')['color_intensity'].mean().reset_index(name='mean_color_intesity')\n", + "#generating the final mean\n", + "mean_color_intensity = summary_mean_color_intensity[\"mean_color_intesity\"].mean()\n", "\n", "np.random.seed(123)\n", "\n", - "lower_bound = 🤷‍♂️\n", - "upper_bound = 🤷‍♂️\n", + "lower_bound = summary_mean_color_intensity[\"mean_color_intesity\"].quantile(0.05)\n", + "upper_bound = summary_mean_color_intensity[\"mean_color_intesity\"].quantile(0.95)\n", + "\n", "\n", "# Display the result\n", "print(f\"Mean of Color Intensity: {mean_color_intensity}\")\n", @@ -398,7 +422,7 @@ "id": "16a6e104", "metadata": {}, "source": [ - "> Your answer..." + "In this case, we can sample a small sub-set of the total number of wines, and make an estimate about the colour intensity of all the wines in a collection " ] }, { @@ -415,7 +439,7 @@ "id": "e5be82ec", "metadata": {}, "source": [ - "> Your answer..." + "It tells us that our estimate is 90% likely to be true. A smaller range indicates a higher confidence in the mean value, whereas a wider range indicates a smaller confidence in the true mean value. " ] }, { @@ -433,7 +457,7 @@ "id": "0a7e6778", "metadata": {}, "source": [ - "> Your answer here..." + "Our confidence intervals are very narrow, and close to the estimated mean, suggesting that our bootstrapping and the color intensity values tend to be very stable. " ] }, {