From 61ba300b95119ebdec81fd840e4dc2286cf0a564 Mon Sep 17 00:00:00 2001 From: Shesh Narayan Gupta <91396937+SheshNGupta@users.noreply.github.com> Date: Mon, 6 Jun 2022 18:46:17 -0400 Subject: [PATCH 1/8] Adding imputation best practices notebook This notebook demonstrated how to use the imputation techniques on missing data --- notebooks/Imputation_best_practices.ipynb | 4557 +++++++++++++++++++++ notebooks/random_numbers_1000.csv | 1001 +++++ 2 files changed, 5558 insertions(+) create mode 100644 notebooks/Imputation_best_practices.ipynb create mode 100644 notebooks/random_numbers_1000.csv diff --git a/notebooks/Imputation_best_practices.ipynb b/notebooks/Imputation_best_practices.ipynb new file mode 100644 index 0000000..87d582d --- /dev/null +++ b/notebooks/Imputation_best_practices.ipynb @@ -0,0 +1,4557 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e2ceaeb0-e282-4c63-97e2-f1dd03810aa2", + "metadata": {}, + "source": [ + "# What to try in this notebook?\n", + "\n", + "#### 1. Get a random number generated dataset from kaggle, use one column and create missing (1%, 5%, 10%), scale values, apply KNN, MEAN imputation. Compare the results and compute mean() and var() for the list of differences between org. and Imputed value \n", + "\n", + "Dataset - https://www.kaggle.com/timoboz/random-numbers\n", + "\n", + "#### 2. Use a housing dataset from UCI, use one column and create missing (1%, 5%, 10%), scale values, apply KNN, MEAN imputation. Compare the results and compute mean() and var() for the list of differences between org. and Imputed value \n", + "\n", + "Dataset - https://github.com/nikbearbrown/AI_Research_Group/blob/main/Awesome-UCI-Datasets/Classification/House_Price_predication/train.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "id": "d8fe4103-6e71-4b97-810c-b599a0482944", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "from sklearn.impute import KNNImputer\n", + "from sklearn.preprocessing import MinMaxScaler" + ] + }, + { + "cell_type": "markdown", + "id": "f95427ef-d6bc-47b8-a516-45a05b238180", + "metadata": {}, + "source": [ + "# 1.1 Random Numbers dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "id": "03fc0415-cdd2-415b-a273-08037b06afcf", + "metadata": {}, + "outputs": [], + "source": [ + "random_dataset = pd.read_csv('random_numbers_1000.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "id": "5ea97930-03cd-48ff-97b9-97e9cd9dde55", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | Unnamed: 0 | \n", + "number | \n", + "
---|---|---|
782 | \n", + "782 | \n", + "0.955151 | \n", + "
378 | \n", + "378 | \n", + "0.310217 | \n", + "
542 | \n", + "542 | \n", + "0.607177 | \n", + "
80 | \n", + "80 | \n", + "0.861696 | \n", + "
282 | \n", + "282 | \n", + "0.204316 | \n", + "
976 | \n", + "976 | \n", + "0.059688 | \n", + "
924 | \n", + "924 | \n", + "0.372837 | \n", + "
329 | \n", + "329 | \n", + "0.406915 | \n", + "
131 | \n", + "131 | \n", + "0.402420 | \n", + "
607 | \n", + "607 | \n", + "0.078909 | \n", + "
\n", + " | number | \n", + "number_copy_1_percent | \n", + "number_copy_5_percent | \n", + "number_copy_10_percent | \n", + "
---|---|---|---|---|
0 | \n", + "0.144616 | \n", + "0.144616 | \n", + "0.144616 | \n", + "0.144616 | \n", + "
1 | \n", + "0.077515 | \n", + "0.077515 | \n", + "0.077515 | \n", + "0.077515 | \n", + "
2 | \n", + "0.155933 | \n", + "0.155933 | \n", + "0.155933 | \n", + "0.155933 | \n", + "
3 | \n", + "0.097209 | \n", + "0.097209 | \n", + "0.097209 | \n", + "0.097209 | \n", + "
4 | \n", + "0.323750 | \n", + "0.323750 | \n", + "0.323750 | \n", + "0.323750 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
995 | \n", + "0.182107 | \n", + "0.182107 | \n", + "0.182107 | \n", + "0.182107 | \n", + "
996 | \n", + "0.787988 | \n", + "0.787988 | \n", + "0.787988 | \n", + "0.787988 | \n", + "
997 | \n", + "0.148707 | \n", + "0.148707 | \n", + "0.148707 | \n", + "0.148707 | \n", + "
998 | \n", + "0.153121 | \n", + "0.153121 | \n", + "0.153121 | \n", + "0.153121 | \n", + "
999 | \n", + "0.474737 | \n", + "0.474737 | \n", + "0.474737 | \n", + "0.474737 | \n", + "
1000 rows × 4 columns
\n", + "\n", + " | number | \n", + "number_copy_1_percent | \n", + "number_copy_5_percent | \n", + "number_copy_10_percent | \n", + "
---|---|---|---|---|
347 | \n", + "0.372389 | \n", + "0.372389 | \n", + "0.372389 | \n", + "0.372389 | \n", + "
934 | \n", + "0.327766 | \n", + "0.327766 | \n", + "0.327766 | \n", + "0.327766 | \n", + "
927 | \n", + "0.753892 | \n", + "0.753892 | \n", + "0.753892 | \n", + "0.753892 | \n", + "
997 | \n", + "0.148707 | \n", + "0.148707 | \n", + "0.148707 | \n", + "0.148707 | \n", + "
167 | \n", + "0.730901 | \n", + "0.730901 | \n", + "0.730901 | \n", + "0.730901 | \n", + "
914 | \n", + "0.841330 | \n", + "0.841330 | \n", + "0.841330 | \n", + "0.841330 | \n", + "
432 | \n", + "0.897466 | \n", + "0.897466 | \n", + "0.897466 | \n", + "0.897466 | \n", + "
587 | \n", + "0.411685 | \n", + "0.411685 | \n", + "0.411685 | \n", + "0.411685 | \n", + "
884 | \n", + "0.378794 | \n", + "0.378794 | \n", + "0.378794 | \n", + "0.378794 | \n", + "
379 | \n", + "0.265429 | \n", + "0.265429 | \n", + "0.265429 | \n", + "0.264843 | \n", + "
\n", + " | number | \n", + "number_copy_1_percent | \n", + "number_copy_5_percent | \n", + "number_copy_10_percent | \n", + "
---|---|---|---|---|
366 | \n", + "0.425525 | \n", + "0.425525 | \n", + "0.425525 | \n", + "0.425525 | \n", + "
145 | \n", + "0.246589 | \n", + "0.246589 | \n", + "0.246589 | \n", + "0.246589 | \n", + "
538 | \n", + "0.503701 | \n", + "0.503701 | \n", + "0.503701 | \n", + "0.503701 | \n", + "
256 | \n", + "0.118901 | \n", + "0.118901 | \n", + "0.491932 | \n", + "0.118901 | \n", + "
156 | \n", + "0.773215 | \n", + "0.773215 | \n", + "0.773215 | \n", + "0.773215 | \n", + "
500 | \n", + "0.441087 | \n", + "0.441087 | \n", + "0.441087 | \n", + "0.441087 | \n", + "
325 | \n", + "0.095068 | \n", + "0.095068 | \n", + "0.095068 | \n", + "0.095068 | \n", + "
97 | \n", + "0.209842 | \n", + "0.209842 | \n", + "0.209842 | \n", + "0.487348 | \n", + "
905 | \n", + "0.117657 | \n", + "0.491084 | \n", + "0.117657 | \n", + "0.117657 | \n", + "
251 | \n", + "0.961305 | \n", + "0.961305 | \n", + "0.961305 | \n", + "0.961305 | \n", + "
' + table._repr_html_() + ' | ' for table in table_list]) +\n", + " '
\n",
+ "\n",
+ "
| \n",
+ "\n",
+ "
|
\n",
+ "\n",
+ "
| \n",
+ "\n",
+ "
|
\n",
+ "\n",
+ "
| \n",
+ "\n",
+ "
|
\n", + " | diff. list Mean(KNN) | \n", + "diff. list Var.(KNN) | \n", + "diff. list Mean(MI) | \n", + "diff. list Var.(MI) | \n", + "
---|---|---|---|---|
1%_number | \n", + "0.000790 | \n", + "4.568702e-07 | \n", + "NaN | \n", + "NaN | \n", + "
5%_number | \n", + "0.000676 | \n", + "3.072444e-07 | \n", + "NaN | \n", + "NaN | \n", + "
10%_number | \n", + "0.000648 | \n", + "2.480609e-07 | \n", + "NaN | \n", + "NaN | \n", + "
1%_number | \n", + "NaN | \n", + "NaN | \n", + "0.269369 | \n", + "0.018130 | \n", + "
5%_number | \n", + "NaN | \n", + "NaN | \n", + "0.184841 | \n", + "0.014921 | \n", + "
10%_number | \n", + "NaN | \n", + "NaN | \n", + "0.231501 | \n", + "0.020024 | \n", + "
\n", + " | Id | \n", + "MSSubClass | \n", + "MSZoning | \n", + "LotFrontage | \n", + "LotArea | \n", + "Street | \n", + "Alley | \n", + "LotShape | \n", + "LandContour | \n", + "Utilities | \n", + "... | \n", + "PoolArea | \n", + "PoolQC | \n", + "Fence | \n", + "MiscFeature | \n", + "MiscVal | \n", + "MoSold | \n", + "YrSold | \n", + "SaleType | \n", + "SaleCondition | \n", + "SalePrice | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
820 | \n", + "821 | \n", + "60 | \n", + "RL | \n", + "72.0 | \n", + "7226 | \n", + "Pave | \n", + "NaN | \n", + "IR1 | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "6 | \n", + "2008 | \n", + "WD | \n", + "Normal | \n", + "183000 | \n", + "
1390 | \n", + "1391 | \n", + "20 | \n", + "RL | \n", + "70.0 | \n", + "9100 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "9 | \n", + "2006 | \n", + "WD | \n", + "Normal | \n", + "235000 | \n", + "
535 | \n", + "536 | \n", + "190 | \n", + "RL | \n", + "70.0 | \n", + "7000 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "1 | \n", + "2008 | \n", + "WD | \n", + "Normal | \n", + "107500 | \n", + "
1236 | \n", + "1237 | \n", + "160 | \n", + "RL | \n", + "36.0 | \n", + "2628 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "6 | \n", + "2010 | \n", + "WD | \n", + "Normal | \n", + "175500 | \n", + "
1337 | \n", + "1338 | \n", + "30 | \n", + "RM | \n", + "153.0 | \n", + "4118 | \n", + "Pave | \n", + "Grvl | \n", + "IR1 | \n", + "Bnk | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "3 | \n", + "2006 | \n", + "WD | \n", + "Normal | \n", + "52500 | \n", + "
674 | \n", + "675 | \n", + "20 | \n", + "RL | \n", + "80.0 | \n", + "9200 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "7 | \n", + "2008 | \n", + "WD | \n", + "Normal | \n", + "140000 | \n", + "
604 | \n", + "605 | \n", + "20 | \n", + "RL | \n", + "88.0 | \n", + "12803 | \n", + "Pave | \n", + "NaN | \n", + "IR1 | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "9 | \n", + "2008 | \n", + "WD | \n", + "Normal | \n", + "221000 | \n", + "
605 | \n", + "606 | \n", + "60 | \n", + "RL | \n", + "85.0 | \n", + "13600 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "10 | \n", + "2009 | \n", + "WD | \n", + "Normal | \n", + "205000 | \n", + "
1218 | \n", + "1219 | \n", + "50 | \n", + "RM | \n", + "52.0 | \n", + "6240 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "7 | \n", + "2006 | \n", + "WD | \n", + "Normal | \n", + "80500 | \n", + "
882 | \n", + "883 | \n", + "60 | \n", + "RL | \n", + "NaN | \n", + "9636 | \n", + "Pave | \n", + "NaN | \n", + "IR1 | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "MnPrv | \n", + "NaN | \n", + "0 | \n", + "12 | \n", + "2009 | \n", + "WD | \n", + "Normal | \n", + "178000 | \n", + "
10 rows × 81 columns
\n", + "\n", + " | SalePrice | \n", + "sp_copy_1_percent | \n", + "sp_copy_5_percent | \n", + "sp_copy_10_percent | \n", + "
---|---|---|---|---|
0 | \n", + "208500 | \n", + "208500 | \n", + "208500 | \n", + "208500 | \n", + "
1 | \n", + "181500 | \n", + "181500 | \n", + "181500 | \n", + "181500 | \n", + "
2 | \n", + "223500 | \n", + "223500 | \n", + "223500 | \n", + "223500 | \n", + "
3 | \n", + "140000 | \n", + "140000 | \n", + "140000 | \n", + "140000 | \n", + "
4 | \n", + "250000 | \n", + "250000 | \n", + "250000 | \n", + "250000 | \n", + "
\n", + " | SalePrice | \n", + "sp_copy_1_percent | \n", + "sp_copy_5_percent | \n", + "sp_copy_10_percent | \n", + "
---|---|---|---|---|
0 | \n", + "0.241078 | \n", + "0.241078 | \n", + "0.241078 | \n", + "0.241078 | \n", + "
1 | \n", + "0.203583 | \n", + "0.203583 | \n", + "0.203583 | \n", + "0.203583 | \n", + "
2 | \n", + "0.261908 | \n", + "0.261908 | \n", + "0.261908 | \n", + "0.261908 | \n", + "
3 | \n", + "0.145952 | \n", + "0.145952 | \n", + "0.145952 | \n", + "0.145952 | \n", + "
4 | \n", + "0.298709 | \n", + "0.298709 | \n", + "0.298709 | \n", + "0.298709 | \n", + "
\n", + " | SalePrice | \n", + "sp_copy_1_percent | \n", + "sp_copy_5_percent | \n", + "sp_copy_10_percent | \n", + "
---|---|---|---|---|
0 | \n", + "208500.0 | \n", + "208500.0 | \n", + "208500.0 | \n", + "208500.0 | \n", + "
1 | \n", + "181500.0 | \n", + "181500.0 | \n", + "181500.0 | \n", + "181500.0 | \n", + "
2 | \n", + "223500.0 | \n", + "223500.0 | \n", + "223500.0 | \n", + "223500.0 | \n", + "
3 | \n", + "140000.0 | \n", + "140000.0 | \n", + "140000.0 | \n", + "140000.0 | \n", + "
4 | \n", + "250000.0 | \n", + "250000.0 | \n", + "250000.0 | \n", + "250000.0 | \n", + "
\n", + " | SalePrice | \n", + "sp_copy_1_percent | \n", + "sp_copy_5_percent | \n", + "sp_copy_10_percent | \n", + "
---|---|---|---|---|
0 | \n", + "0.241078 | \n", + "0.241078 | \n", + "0.240855 | \n", + "0.241078 | \n", + "
1 | \n", + "0.203583 | \n", + "0.203583 | \n", + "0.203583 | \n", + "0.203583 | \n", + "
2 | \n", + "0.261908 | \n", + "0.261908 | \n", + "0.261908 | \n", + "0.261908 | \n", + "
3 | \n", + "0.145952 | \n", + "0.145952 | \n", + "0.145952 | \n", + "0.145952 | \n", + "
4 | \n", + "0.298709 | \n", + "0.298709 | \n", + "0.298709 | \n", + "0.298709 | \n", + "
\n", + " | diff. list Mean(KNN) | \n", + "diff. list Var.(KNN) | \n", + "
---|---|---|
1%_saleprice | \n", + "170.000 | \n", + "4.240000e+04 | \n", + "
5%_saleprice | \n", + "444.944 | \n", + "2.554554e+06 | \n", + "
10%_saleprice | \n", + "564.784 | \n", + "6.304767e+06 | \n", + "
\n", + " | diff. list Mean(KNN) scaled | \n", + "diff. list Var.(KNN) scaled | \n", + "
---|---|---|
1%_saleprice | \n", + "0.000000 | \n", + "0.000000e+00 | \n", + "
5%_saleprice | \n", + "0.000026 | \n", + "2.134350e-08 | \n", + "
10%_saleprice | \n", + "0.000032 | \n", + "1.417383e-08 | \n", + "
\n", + " | SalePrice | \n", + "sp_copy_1_percent | \n", + "sp_copy_5_percent | \n", + "sp_copy_10_percent | \n", + "
---|---|---|---|---|
571 | \n", + "120000 | \n", + "120000.0 | \n", + "120000.000000 | \n", + "182343.817778 | \n", + "
2 | \n", + "223500 | \n", + "223500.0 | \n", + "223500.000000 | \n", + "223500.000000 | \n", + "
313 | \n", + "375000 | \n", + "375000.0 | \n", + "375000.000000 | \n", + "375000.000000 | \n", + "
377 | \n", + "340000 | \n", + "340000.0 | \n", + "182457.342105 | \n", + "182343.817778 | \n", + "
987 | \n", + "395192 | \n", + "395192.0 | \n", + "395192.000000 | \n", + "395192.000000 | \n", + "
\n", + " | SalePrice | \n", + "sp_copy_1_percent | \n", + "sp_copy_5_percent | \n", + "sp_copy_10_percent | \n", + "
---|---|---|---|---|
216 | \n", + "0.243161 | \n", + "0.243161 | \n", + "0.243161 | \n", + "0.243161 | \n", + "
1 | \n", + "0.203583 | \n", + "0.203583 | \n", + "0.203583 | \n", + "0.203583 | \n", + "
575 | \n", + "0.116095 | \n", + "0.116095 | \n", + "0.116095 | \n", + "0.116095 | \n", + "
397 | \n", + "0.186918 | \n", + "0.186918 | \n", + "0.186918 | \n", + "0.205253 | \n", + "
703 | \n", + "0.145952 | \n", + "0.145952 | \n", + "0.145952 | \n", + "0.145952 | \n", + "
\n", + " | diff. list Mean(MI) | \n", + "diff. list Var.(MI) | \n", + "
---|---|---|
1%_saleprice | \n", + "55971.636768 | \n", + "1.103367e+09 | \n", + "
5%_saleprice | \n", + "58478.242105 | \n", + "3.139731e+09 | \n", + "
10%_saleprice | \n", + "61028.709911 | \n", + "3.846675e+09 | \n", + "
\n", + " | diff. list Mean(MI) scaled | \n", + "diff. list Var.(MI) scaled | \n", + "
---|---|---|
1%_saleprice_scaled | \n", + "0.000000 | \n", + "0.000000 | \n", + "
5%_saleprice_scaled | \n", + "0.008936 | \n", + "0.001404 | \n", + "
10%_saleprice_scaled | \n", + "0.007492 | \n", + "0.000443 | \n", + "
\n", + " | diff. list Mean(KNN) | \n", + "diff. list Var.(KNN) | \n", + "diff. list Mean(KNN) scaled | \n", + "diff. list Var.(KNN) scaled | \n", + "diff. list Mean(MI) | \n", + "diff. list Var.(MI) | \n", + "diff. list Mean(MI) scaled | \n", + "diff. list Var.(MI) scaled | \n", + "
---|---|---|---|---|---|---|---|---|
1%_saleprice | \n", + "170.000 | \n", + "4.240000e+04 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
5%_saleprice | \n", + "444.944 | \n", + "2.554554e+06 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
10%_saleprice | \n", + "564.784 | \n", + "6.304767e+06 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
1%_saleprice | \n", + "NaN | \n", + "NaN | \n", + "0.000000 | \n", + "0.000000e+00 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
5%_saleprice | \n", + "NaN | \n", + "NaN | \n", + "0.000026 | \n", + "2.134350e-08 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
10%_saleprice | \n", + "NaN | \n", + "NaN | \n", + "0.000032 | \n", + "1.417383e-08 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
1%_saleprice | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "55971.636768 | \n", + "1.103367e+09 | \n", + "NaN | \n", + "NaN | \n", + "
5%_saleprice | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "58478.242105 | \n", + "3.139731e+09 | \n", + "NaN | \n", + "NaN | \n", + "
10%_saleprice | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "61028.709911 | \n", + "3.846675e+09 | \n", + "NaN | \n", + "NaN | \n", + "
1%_saleprice_scaled | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0.000000 | \n", + "0.000000 | \n", + "
5%_saleprice_scaled | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0.008936 | \n", + "0.001404 | \n", + "
10%_saleprice_scaled | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0.007492 | \n", + "0.000443 | \n", + "
\n", - " | Unnamed: 0 | \n", - "number | \n", - "
---|---|---|
782 | \n", - "782 | \n", - "0.955151 | \n", - "
378 | \n", - "378 | \n", - "0.310217 | \n", - "
542 | \n", - "542 | \n", - "0.607177 | \n", - "
80 | \n", - "80 | \n", - "0.861696 | \n", - "
282 | \n", - "282 | \n", - "0.204316 | \n", - "
976 | \n", - "976 | \n", - "0.059688 | \n", - "
924 | \n", - "924 | \n", - "0.372837 | \n", - "
329 | \n", - "329 | \n", - "0.406915 | \n", - "
131 | \n", - "131 | \n", - "0.402420 | \n", - "
607 | \n", - "607 | \n", - "0.078909 | \n", - "
\n", - " | number | \n", - "number_copy_1_percent | \n", - "number_copy_5_percent | \n", - "number_copy_10_percent | \n", - "
---|---|---|---|---|
0 | \n", - "0.144616 | \n", - "0.144616 | \n", - "0.144616 | \n", - "0.144616 | \n", - "
1 | \n", - "0.077515 | \n", - "0.077515 | \n", - "0.077515 | \n", - "0.077515 | \n", - "
2 | \n", - "0.155933 | \n", - "0.155933 | \n", - "0.155933 | \n", - "0.155933 | \n", - "
3 | \n", - "0.097209 | \n", - "0.097209 | \n", - "0.097209 | \n", - "0.097209 | \n", - "
4 | \n", - "0.323750 | \n", - "0.323750 | \n", - "0.323750 | \n", - "0.323750 | \n", - "
... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "
995 | \n", - "0.182107 | \n", - "0.182107 | \n", - "0.182107 | \n", - "0.182107 | \n", - "
996 | \n", - "0.787988 | \n", - "0.787988 | \n", - "0.787988 | \n", - "0.787988 | \n", - "
997 | \n", - "0.148707 | \n", - "0.148707 | \n", - "0.148707 | \n", - "0.148707 | \n", - "
998 | \n", - "0.153121 | \n", - "0.153121 | \n", - "0.153121 | \n", - "0.153121 | \n", - "
999 | \n", - "0.474737 | \n", - "0.474737 | \n", - "0.474737 | \n", - "0.474737 | \n", - "
1000 rows × 4 columns
\n", - "\n", - " | number | \n", - "number_copy_1_percent | \n", - "number_copy_5_percent | \n", - "number_copy_10_percent | \n", - "
---|---|---|---|---|
347 | \n", - "0.372389 | \n", - "0.372389 | \n", - "0.372389 | \n", - "0.372389 | \n", - "
934 | \n", - "0.327766 | \n", - "0.327766 | \n", - "0.327766 | \n", - "0.327766 | \n", - "
927 | \n", - "0.753892 | \n", - "0.753892 | \n", - "0.753892 | \n", - "0.753892 | \n", - "
997 | \n", - "0.148707 | \n", - "0.148707 | \n", - "0.148707 | \n", - "0.148707 | \n", - "
167 | \n", - "0.730901 | \n", - "0.730901 | \n", - "0.730901 | \n", - "0.730901 | \n", - "
914 | \n", - "0.841330 | \n", - "0.841330 | \n", - "0.841330 | \n", - "0.841330 | \n", - "
432 | \n", - "0.897466 | \n", - "0.897466 | \n", - "0.897466 | \n", - "0.897466 | \n", - "
587 | \n", - "0.411685 | \n", - "0.411685 | \n", - "0.411685 | \n", - "0.411685 | \n", - "
884 | \n", - "0.378794 | \n", - "0.378794 | \n", - "0.378794 | \n", - "0.378794 | \n", - "
379 | \n", - "0.265429 | \n", - "0.265429 | \n", - "0.265429 | \n", - "0.264843 | \n", - "
\n", - " | number | \n", - "number_copy_1_percent | \n", - "number_copy_5_percent | \n", - "number_copy_10_percent | \n", - "
---|---|---|---|---|
366 | \n", - "0.425525 | \n", - "0.425525 | \n", - "0.425525 | \n", - "0.425525 | \n", - "
145 | \n", - "0.246589 | \n", - "0.246589 | \n", - "0.246589 | \n", - "0.246589 | \n", - "
538 | \n", - "0.503701 | \n", - "0.503701 | \n", - "0.503701 | \n", - "0.503701 | \n", - "
256 | \n", - "0.118901 | \n", - "0.118901 | \n", - "0.491932 | \n", - "0.118901 | \n", - "
156 | \n", - "0.773215 | \n", - "0.773215 | \n", - "0.773215 | \n", - "0.773215 | \n", - "
500 | \n", - "0.441087 | \n", - "0.441087 | \n", - "0.441087 | \n", - "0.441087 | \n", - "
325 | \n", - "0.095068 | \n", - "0.095068 | \n", - "0.095068 | \n", - "0.095068 | \n", - "
97 | \n", - "0.209842 | \n", - "0.209842 | \n", - "0.209842 | \n", - "0.487348 | \n", - "
905 | \n", - "0.117657 | \n", - "0.491084 | \n", - "0.117657 | \n", - "0.117657 | \n", - "
251 | \n", - "0.961305 | \n", - "0.961305 | \n", - "0.961305 | \n", - "0.961305 | \n", - "
' + table._repr_html_() + ' | ' for table in table_list]) +\n", - " '
\n",
- "\n",
- "
| \n",
- "\n",
- "
|
\n",
- "\n",
- "
| \n",
- "\n",
- "
|
\n",
- "\n",
- "
| \n",
- "\n",
- "
|
\n", - " | diff. list Mean(KNN) | \n", - "diff. list Var.(KNN) | \n", - "diff. list Mean(MI) | \n", - "diff. list Var.(MI) | \n", - "
---|---|---|---|---|
1%_number | \n", - "0.000790 | \n", - "4.568702e-07 | \n", - "NaN | \n", - "NaN | \n", - "
5%_number | \n", - "0.000676 | \n", - "3.072444e-07 | \n", - "NaN | \n", - "NaN | \n", - "
10%_number | \n", - "0.000648 | \n", - "2.480609e-07 | \n", - "NaN | \n", - "NaN | \n", - "
1%_number | \n", - "NaN | \n", - "NaN | \n", - "0.269369 | \n", - "0.018130 | \n", - "
5%_number | \n", - "NaN | \n", - "NaN | \n", - "0.184841 | \n", - "0.014921 | \n", - "
10%_number | \n", - "NaN | \n", - "NaN | \n", - "0.231501 | \n", - "0.020024 | \n", - "
\n", - " | Id | \n", - "MSSubClass | \n", - "MSZoning | \n", - "LotFrontage | \n", - "LotArea | \n", - "Street | \n", - "Alley | \n", - "LotShape | \n", - "LandContour | \n", - "Utilities | \n", - "... | \n", - "PoolArea | \n", - "PoolQC | \n", - "Fence | \n", - "MiscFeature | \n", - "MiscVal | \n", - "MoSold | \n", - "YrSold | \n", - "SaleType | \n", - "SaleCondition | \n", - "SalePrice | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
820 | \n", - "821 | \n", - "60 | \n", - "RL | \n", - "72.0 | \n", - "7226 | \n", - "Pave | \n", - "NaN | \n", - "IR1 | \n", - "Lvl | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "6 | \n", - "2008 | \n", - "WD | \n", - "Normal | \n", - "183000 | \n", - "
1390 | \n", - "1391 | \n", - "20 | \n", - "RL | \n", - "70.0 | \n", - "9100 | \n", - "Pave | \n", - "NaN | \n", - "Reg | \n", - "Lvl | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "9 | \n", - "2006 | \n", - "WD | \n", - "Normal | \n", - "235000 | \n", - "
535 | \n", - "536 | \n", - "190 | \n", - "RL | \n", - "70.0 | \n", - "7000 | \n", - "Pave | \n", - "NaN | \n", - "Reg | \n", - "Lvl | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "1 | \n", - "2008 | \n", - "WD | \n", - "Normal | \n", - "107500 | \n", - "
1236 | \n", - "1237 | \n", - "160 | \n", - "RL | \n", - "36.0 | \n", - "2628 | \n", - "Pave | \n", - "NaN | \n", - "Reg | \n", - "Lvl | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "6 | \n", - "2010 | \n", - "WD | \n", - "Normal | \n", - "175500 | \n", - "
1337 | \n", - "1338 | \n", - "30 | \n", - "RM | \n", - "153.0 | \n", - "4118 | \n", - "Pave | \n", - "Grvl | \n", - "IR1 | \n", - "Bnk | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "3 | \n", - "2006 | \n", - "WD | \n", - "Normal | \n", - "52500 | \n", - "
674 | \n", - "675 | \n", - "20 | \n", - "RL | \n", - "80.0 | \n", - "9200 | \n", - "Pave | \n", - "NaN | \n", - "Reg | \n", - "Lvl | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "7 | \n", - "2008 | \n", - "WD | \n", - "Normal | \n", - "140000 | \n", - "
604 | \n", - "605 | \n", - "20 | \n", - "RL | \n", - "88.0 | \n", - "12803 | \n", - "Pave | \n", - "NaN | \n", - "IR1 | \n", - "Lvl | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "9 | \n", - "2008 | \n", - "WD | \n", - "Normal | \n", - "221000 | \n", - "
605 | \n", - "606 | \n", - "60 | \n", - "RL | \n", - "85.0 | \n", - "13600 | \n", - "Pave | \n", - "NaN | \n", - "Reg | \n", - "Lvl | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "10 | \n", - "2009 | \n", - "WD | \n", - "Normal | \n", - "205000 | \n", - "
1218 | \n", - "1219 | \n", - "50 | \n", - "RM | \n", - "52.0 | \n", - "6240 | \n", - "Pave | \n", - "NaN | \n", - "Reg | \n", - "Lvl | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "7 | \n", - "2006 | \n", - "WD | \n", - "Normal | \n", - "80500 | \n", - "
882 | \n", - "883 | \n", - "60 | \n", - "RL | \n", - "NaN | \n", - "9636 | \n", - "Pave | \n", - "NaN | \n", - "IR1 | \n", - "Lvl | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "MnPrv | \n", - "NaN | \n", - "0 | \n", - "12 | \n", - "2009 | \n", - "WD | \n", - "Normal | \n", - "178000 | \n", - "
10 rows × 81 columns
\n", - "\n", - " | SalePrice | \n", - "sp_copy_1_percent | \n", - "sp_copy_5_percent | \n", - "sp_copy_10_percent | \n", - "
---|---|---|---|---|
0 | \n", - "208500 | \n", - "208500 | \n", - "208500 | \n", - "208500 | \n", - "
1 | \n", - "181500 | \n", - "181500 | \n", - "181500 | \n", - "181500 | \n", - "
2 | \n", - "223500 | \n", - "223500 | \n", - "223500 | \n", - "223500 | \n", - "
3 | \n", - "140000 | \n", - "140000 | \n", - "140000 | \n", - "140000 | \n", - "
4 | \n", - "250000 | \n", - "250000 | \n", - "250000 | \n", - "250000 | \n", - "
\n", - " | SalePrice | \n", - "sp_copy_1_percent | \n", - "sp_copy_5_percent | \n", - "sp_copy_10_percent | \n", - "
---|---|---|---|---|
0 | \n", - "0.241078 | \n", - "0.241078 | \n", - "0.241078 | \n", - "0.241078 | \n", - "
1 | \n", - "0.203583 | \n", - "0.203583 | \n", - "0.203583 | \n", - "0.203583 | \n", - "
2 | \n", - "0.261908 | \n", - "0.261908 | \n", - "0.261908 | \n", - "0.261908 | \n", - "
3 | \n", - "0.145952 | \n", - "0.145952 | \n", - "0.145952 | \n", - "0.145952 | \n", - "
4 | \n", - "0.298709 | \n", - "0.298709 | \n", - "0.298709 | \n", - "0.298709 | \n", - "
\n", - " | SalePrice | \n", - "sp_copy_1_percent | \n", - "sp_copy_5_percent | \n", - "sp_copy_10_percent | \n", - "
---|---|---|---|---|
0 | \n", - "208500.0 | \n", - "208500.0 | \n", - "208500.0 | \n", - "208500.0 | \n", - "
1 | \n", - "181500.0 | \n", - "181500.0 | \n", - "181500.0 | \n", - "181500.0 | \n", - "
2 | \n", - "223500.0 | \n", - "223500.0 | \n", - "223500.0 | \n", - "223500.0 | \n", - "
3 | \n", - "140000.0 | \n", - "140000.0 | \n", - "140000.0 | \n", - "140000.0 | \n", - "
4 | \n", - "250000.0 | \n", - "250000.0 | \n", - "250000.0 | \n", - "250000.0 | \n", - "
\n", - " | SalePrice | \n", - "sp_copy_1_percent | \n", - "sp_copy_5_percent | \n", - "sp_copy_10_percent | \n", - "
---|---|---|---|---|
0 | \n", - "0.241078 | \n", - "0.241078 | \n", - "0.240855 | \n", - "0.241078 | \n", - "
1 | \n", - "0.203583 | \n", - "0.203583 | \n", - "0.203583 | \n", - "0.203583 | \n", - "
2 | \n", - "0.261908 | \n", - "0.261908 | \n", - "0.261908 | \n", - "0.261908 | \n", - "
3 | \n", - "0.145952 | \n", - "0.145952 | \n", - "0.145952 | \n", - "0.145952 | \n", - "
4 | \n", - "0.298709 | \n", - "0.298709 | \n", - "0.298709 | \n", - "0.298709 | \n", - "
\n", - " | diff. list Mean(KNN) | \n", - "diff. list Var.(KNN) | \n", - "
---|---|---|
1%_saleprice | \n", - "170.000 | \n", - "4.240000e+04 | \n", - "
5%_saleprice | \n", - "444.944 | \n", - "2.554554e+06 | \n", - "
10%_saleprice | \n", - "564.784 | \n", - "6.304767e+06 | \n", - "
\n", - " | diff. list Mean(KNN) scaled | \n", - "diff. list Var.(KNN) scaled | \n", - "
---|---|---|
1%_saleprice | \n", - "0.000000 | \n", - "0.000000e+00 | \n", - "
5%_saleprice | \n", - "0.000026 | \n", - "2.134350e-08 | \n", - "
10%_saleprice | \n", - "0.000032 | \n", - "1.417383e-08 | \n", - "
\n", - " | SalePrice | \n", - "sp_copy_1_percent | \n", - "sp_copy_5_percent | \n", - "sp_copy_10_percent | \n", - "
---|---|---|---|---|
571 | \n", - "120000 | \n", - "120000.0 | \n", - "120000.000000 | \n", - "182343.817778 | \n", - "
2 | \n", - "223500 | \n", - "223500.0 | \n", - "223500.000000 | \n", - "223500.000000 | \n", - "
313 | \n", - "375000 | \n", - "375000.0 | \n", - "375000.000000 | \n", - "375000.000000 | \n", - "
377 | \n", - "340000 | \n", - "340000.0 | \n", - "182457.342105 | \n", - "182343.817778 | \n", - "
987 | \n", - "395192 | \n", - "395192.0 | \n", - "395192.000000 | \n", - "395192.000000 | \n", - "
\n", - " | SalePrice | \n", - "sp_copy_1_percent | \n", - "sp_copy_5_percent | \n", - "sp_copy_10_percent | \n", - "
---|---|---|---|---|
216 | \n", - "0.243161 | \n", - "0.243161 | \n", - "0.243161 | \n", - "0.243161 | \n", - "
1 | \n", - "0.203583 | \n", - "0.203583 | \n", - "0.203583 | \n", - "0.203583 | \n", - "
575 | \n", - "0.116095 | \n", - "0.116095 | \n", - "0.116095 | \n", - "0.116095 | \n", - "
397 | \n", - "0.186918 | \n", - "0.186918 | \n", - "0.186918 | \n", - "0.205253 | \n", - "
703 | \n", - "0.145952 | \n", - "0.145952 | \n", - "0.145952 | \n", - "0.145952 | \n", - "
\n", - " | diff. list Mean(MI) | \n", - "diff. list Var.(MI) | \n", - "
---|---|---|
1%_saleprice | \n", - "55971.636768 | \n", - "1.103367e+09 | \n", - "
5%_saleprice | \n", - "58478.242105 | \n", - "3.139731e+09 | \n", - "
10%_saleprice | \n", - "61028.709911 | \n", - "3.846675e+09 | \n", - "
\n", - " | diff. list Mean(MI) scaled | \n", - "diff. list Var.(MI) scaled | \n", - "
---|---|---|
1%_saleprice_scaled | \n", - "0.000000 | \n", - "0.000000 | \n", - "
5%_saleprice_scaled | \n", - "0.008936 | \n", - "0.001404 | \n", - "
10%_saleprice_scaled | \n", - "0.007492 | \n", - "0.000443 | \n", - "
\n", - " | diff. list Mean(KNN) | \n", - "diff. list Var.(KNN) | \n", - "diff. list Mean(KNN) scaled | \n", - "diff. list Var.(KNN) scaled | \n", - "diff. list Mean(MI) | \n", - "diff. list Var.(MI) | \n", - "diff. list Mean(MI) scaled | \n", - "diff. list Var.(MI) scaled | \n", - "
---|---|---|---|---|---|---|---|---|
1%_saleprice | \n", - "170.000 | \n", - "4.240000e+04 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "
5%_saleprice | \n", - "444.944 | \n", - "2.554554e+06 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "
10%_saleprice | \n", - "564.784 | \n", - "6.304767e+06 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "
1%_saleprice | \n", - "NaN | \n", - "NaN | \n", - "0.000000 | \n", - "0.000000e+00 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "
5%_saleprice | \n", - "NaN | \n", - "NaN | \n", - "0.000026 | \n", - "2.134350e-08 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "
10%_saleprice | \n", - "NaN | \n", - "NaN | \n", - "0.000032 | \n", - "1.417383e-08 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "
1%_saleprice | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "55971.636768 | \n", - "1.103367e+09 | \n", - "NaN | \n", - "NaN | \n", - "
5%_saleprice | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "58478.242105 | \n", - "3.139731e+09 | \n", - "NaN | \n", - "NaN | \n", - "
10%_saleprice | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "61028.709911 | \n", - "3.846675e+09 | \n", - "NaN | \n", - "NaN | \n", - "
1%_saleprice_scaled | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0.000000 | \n", - "0.000000 | \n", - "
5%_saleprice_scaled | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0.008936 | \n", - "0.001404 | \n", - "
10%_saleprice_scaled | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0.007492 | \n", - "0.000443 | \n", - "
\n", + " | Unnamed: 0 | \n", + "number | \n", + "
---|---|---|
782 | \n", + "782 | \n", + "0.955151 | \n", + "
378 | \n", + "378 | \n", + "0.310217 | \n", + "
542 | \n", + "542 | \n", + "0.607177 | \n", + "
80 | \n", + "80 | \n", + "0.861696 | \n", + "
282 | \n", + "282 | \n", + "0.204316 | \n", + "
976 | \n", + "976 | \n", + "0.059688 | \n", + "
924 | \n", + "924 | \n", + "0.372837 | \n", + "
329 | \n", + "329 | \n", + "0.406915 | \n", + "
131 | \n", + "131 | \n", + "0.402420 | \n", + "
607 | \n", + "607 | \n", + "0.078909 | \n", + "
\n", + " | number | \n", + "number_copy_1_percent | \n", + "number_copy_5_percent | \n", + "number_copy_10_percent | \n", + "
---|---|---|---|---|
0 | \n", + "0.144616 | \n", + "0.144616 | \n", + "0.144616 | \n", + "0.144616 | \n", + "
1 | \n", + "0.077515 | \n", + "0.077515 | \n", + "0.077515 | \n", + "0.077515 | \n", + "
2 | \n", + "0.155933 | \n", + "0.155933 | \n", + "0.155933 | \n", + "0.155933 | \n", + "
3 | \n", + "0.097209 | \n", + "0.097209 | \n", + "0.097209 | \n", + "0.097209 | \n", + "
4 | \n", + "0.323750 | \n", + "0.323750 | \n", + "0.323750 | \n", + "0.323750 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
995 | \n", + "0.182107 | \n", + "0.182107 | \n", + "0.182107 | \n", + "0.182107 | \n", + "
996 | \n", + "0.787988 | \n", + "0.787988 | \n", + "0.787988 | \n", + "0.787988 | \n", + "
997 | \n", + "0.148707 | \n", + "0.148707 | \n", + "0.148707 | \n", + "0.148707 | \n", + "
998 | \n", + "0.153121 | \n", + "0.153121 | \n", + "0.153121 | \n", + "0.153121 | \n", + "
999 | \n", + "0.474737 | \n", + "0.474737 | \n", + "0.474737 | \n", + "0.474737 | \n", + "
1000 rows × 4 columns
\n", + "\n", + " | number | \n", + "number_copy_1_percent | \n", + "number_copy_5_percent | \n", + "number_copy_10_percent | \n", + "
---|---|---|---|---|
347 | \n", + "0.372389 | \n", + "0.372389 | \n", + "0.372389 | \n", + "0.372389 | \n", + "
934 | \n", + "0.327766 | \n", + "0.327766 | \n", + "0.327766 | \n", + "0.327766 | \n", + "
927 | \n", + "0.753892 | \n", + "0.753892 | \n", + "0.753892 | \n", + "0.753892 | \n", + "
997 | \n", + "0.148707 | \n", + "0.148707 | \n", + "0.148707 | \n", + "0.148707 | \n", + "
167 | \n", + "0.730901 | \n", + "0.730901 | \n", + "0.730901 | \n", + "0.730901 | \n", + "
914 | \n", + "0.841330 | \n", + "0.841330 | \n", + "0.841330 | \n", + "0.841330 | \n", + "
432 | \n", + "0.897466 | \n", + "0.897466 | \n", + "0.897466 | \n", + "0.897466 | \n", + "
587 | \n", + "0.411685 | \n", + "0.411685 | \n", + "0.411685 | \n", + "0.411685 | \n", + "
884 | \n", + "0.378794 | \n", + "0.378794 | \n", + "0.378794 | \n", + "0.378794 | \n", + "
379 | \n", + "0.265429 | \n", + "0.265429 | \n", + "0.265429 | \n", + "0.264843 | \n", + "
\n", + " | number | \n", + "number_copy_1_percent | \n", + "number_copy_5_percent | \n", + "number_copy_10_percent | \n", + "
---|---|---|---|---|
366 | \n", + "0.425525 | \n", + "0.425525 | \n", + "0.425525 | \n", + "0.425525 | \n", + "
145 | \n", + "0.246589 | \n", + "0.246589 | \n", + "0.246589 | \n", + "0.246589 | \n", + "
538 | \n", + "0.503701 | \n", + "0.503701 | \n", + "0.503701 | \n", + "0.503701 | \n", + "
256 | \n", + "0.118901 | \n", + "0.118901 | \n", + "0.491932 | \n", + "0.118901 | \n", + "
156 | \n", + "0.773215 | \n", + "0.773215 | \n", + "0.773215 | \n", + "0.773215 | \n", + "
500 | \n", + "0.441087 | \n", + "0.441087 | \n", + "0.441087 | \n", + "0.441087 | \n", + "
325 | \n", + "0.095068 | \n", + "0.095068 | \n", + "0.095068 | \n", + "0.095068 | \n", + "
97 | \n", + "0.209842 | \n", + "0.209842 | \n", + "0.209842 | \n", + "0.487348 | \n", + "
905 | \n", + "0.117657 | \n", + "0.491084 | \n", + "0.117657 | \n", + "0.117657 | \n", + "
251 | \n", + "0.961305 | \n", + "0.961305 | \n", + "0.961305 | \n", + "0.961305 | \n", + "
' + table._repr_html_() + ' | ' for table in table_list]) +\n", + " '
\n",
+ "\n",
+ "
| \n",
+ "\n",
+ "
|
\n",
+ "\n",
+ "
| \n",
+ "\n",
+ "
|
\n",
+ "\n",
+ "
| \n",
+ "\n",
+ "
|
\n", + " | diff. list Mean(KNN) | \n", + "diff. list Var.(KNN) | \n", + "diff. list Mean(MI) | \n", + "diff. list Var.(MI) | \n", + "
---|---|---|---|---|
1%_number | \n", + "0.000790 | \n", + "4.568702e-07 | \n", + "NaN | \n", + "NaN | \n", + "
5%_number | \n", + "0.000676 | \n", + "3.072444e-07 | \n", + "NaN | \n", + "NaN | \n", + "
10%_number | \n", + "0.000648 | \n", + "2.480609e-07 | \n", + "NaN | \n", + "NaN | \n", + "
1%_number | \n", + "NaN | \n", + "NaN | \n", + "0.269369 | \n", + "0.018130 | \n", + "
5%_number | \n", + "NaN | \n", + "NaN | \n", + "0.184841 | \n", + "0.014921 | \n", + "
10%_number | \n", + "NaN | \n", + "NaN | \n", + "0.231501 | \n", + "0.020024 | \n", + "
\n", + " | Id | \n", + "MSSubClass | \n", + "MSZoning | \n", + "LotFrontage | \n", + "LotArea | \n", + "Street | \n", + "Alley | \n", + "LotShape | \n", + "LandContour | \n", + "Utilities | \n", + "... | \n", + "PoolArea | \n", + "PoolQC | \n", + "Fence | \n", + "MiscFeature | \n", + "MiscVal | \n", + "MoSold | \n", + "YrSold | \n", + "SaleType | \n", + "SaleCondition | \n", + "SalePrice | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
820 | \n", + "821 | \n", + "60 | \n", + "RL | \n", + "72.0 | \n", + "7226 | \n", + "Pave | \n", + "NaN | \n", + "IR1 | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "6 | \n", + "2008 | \n", + "WD | \n", + "Normal | \n", + "183000 | \n", + "
1390 | \n", + "1391 | \n", + "20 | \n", + "RL | \n", + "70.0 | \n", + "9100 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "9 | \n", + "2006 | \n", + "WD | \n", + "Normal | \n", + "235000 | \n", + "
535 | \n", + "536 | \n", + "190 | \n", + "RL | \n", + "70.0 | \n", + "7000 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "1 | \n", + "2008 | \n", + "WD | \n", + "Normal | \n", + "107500 | \n", + "
1236 | \n", + "1237 | \n", + "160 | \n", + "RL | \n", + "36.0 | \n", + "2628 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "6 | \n", + "2010 | \n", + "WD | \n", + "Normal | \n", + "175500 | \n", + "
1337 | \n", + "1338 | \n", + "30 | \n", + "RM | \n", + "153.0 | \n", + "4118 | \n", + "Pave | \n", + "Grvl | \n", + "IR1 | \n", + "Bnk | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "3 | \n", + "2006 | \n", + "WD | \n", + "Normal | \n", + "52500 | \n", + "
674 | \n", + "675 | \n", + "20 | \n", + "RL | \n", + "80.0 | \n", + "9200 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "7 | \n", + "2008 | \n", + "WD | \n", + "Normal | \n", + "140000 | \n", + "
604 | \n", + "605 | \n", + "20 | \n", + "RL | \n", + "88.0 | \n", + "12803 | \n", + "Pave | \n", + "NaN | \n", + "IR1 | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "9 | \n", + "2008 | \n", + "WD | \n", + "Normal | \n", + "221000 | \n", + "
605 | \n", + "606 | \n", + "60 | \n", + "RL | \n", + "85.0 | \n", + "13600 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "10 | \n", + "2009 | \n", + "WD | \n", + "Normal | \n", + "205000 | \n", + "
1218 | \n", + "1219 | \n", + "50 | \n", + "RM | \n", + "52.0 | \n", + "6240 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "7 | \n", + "2006 | \n", + "WD | \n", + "Normal | \n", + "80500 | \n", + "
882 | \n", + "883 | \n", + "60 | \n", + "RL | \n", + "NaN | \n", + "9636 | \n", + "Pave | \n", + "NaN | \n", + "IR1 | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "MnPrv | \n", + "NaN | \n", + "0 | \n", + "12 | \n", + "2009 | \n", + "WD | \n", + "Normal | \n", + "178000 | \n", + "
10 rows × 81 columns
\n", + "\n", + " | SalePrice | \n", + "sp_copy_1_percent | \n", + "sp_copy_5_percent | \n", + "sp_copy_10_percent | \n", + "
---|---|---|---|---|
0 | \n", + "208500 | \n", + "208500 | \n", + "208500 | \n", + "208500 | \n", + "
1 | \n", + "181500 | \n", + "181500 | \n", + "181500 | \n", + "181500 | \n", + "
2 | \n", + "223500 | \n", + "223500 | \n", + "223500 | \n", + "223500 | \n", + "
3 | \n", + "140000 | \n", + "140000 | \n", + "140000 | \n", + "140000 | \n", + "
4 | \n", + "250000 | \n", + "250000 | \n", + "250000 | \n", + "250000 | \n", + "
\n", + " | SalePrice | \n", + "sp_copy_1_percent | \n", + "sp_copy_5_percent | \n", + "sp_copy_10_percent | \n", + "
---|---|---|---|---|
0 | \n", + "0.241078 | \n", + "0.241078 | \n", + "0.241078 | \n", + "0.241078 | \n", + "
1 | \n", + "0.203583 | \n", + "0.203583 | \n", + "0.203583 | \n", + "0.203583 | \n", + "
2 | \n", + "0.261908 | \n", + "0.261908 | \n", + "0.261908 | \n", + "0.261908 | \n", + "
3 | \n", + "0.145952 | \n", + "0.145952 | \n", + "0.145952 | \n", + "0.145952 | \n", + "
4 | \n", + "0.298709 | \n", + "0.298709 | \n", + "0.298709 | \n", + "0.298709 | \n", + "
\n", + " | SalePrice | \n", + "sp_copy_1_percent | \n", + "sp_copy_5_percent | \n", + "sp_copy_10_percent | \n", + "
---|---|---|---|---|
0 | \n", + "208500.0 | \n", + "208500.0 | \n", + "208500.0 | \n", + "208500.0 | \n", + "
1 | \n", + "181500.0 | \n", + "181500.0 | \n", + "181500.0 | \n", + "181500.0 | \n", + "
2 | \n", + "223500.0 | \n", + "223500.0 | \n", + "223500.0 | \n", + "223500.0 | \n", + "
3 | \n", + "140000.0 | \n", + "140000.0 | \n", + "140000.0 | \n", + "140000.0 | \n", + "
4 | \n", + "250000.0 | \n", + "250000.0 | \n", + "250000.0 | \n", + "250000.0 | \n", + "
\n", + " | SalePrice | \n", + "sp_copy_1_percent | \n", + "sp_copy_5_percent | \n", + "sp_copy_10_percent | \n", + "
---|---|---|---|---|
0 | \n", + "0.241078 | \n", + "0.241078 | \n", + "0.240855 | \n", + "0.241078 | \n", + "
1 | \n", + "0.203583 | \n", + "0.203583 | \n", + "0.203583 | \n", + "0.203583 | \n", + "
2 | \n", + "0.261908 | \n", + "0.261908 | \n", + "0.261908 | \n", + "0.261908 | \n", + "
3 | \n", + "0.145952 | \n", + "0.145952 | \n", + "0.145952 | \n", + "0.145952 | \n", + "
4 | \n", + "0.298709 | \n", + "0.298709 | \n", + "0.298709 | \n", + "0.298709 | \n", + "
\n", + " | diff. list Mean(KNN) | \n", + "diff. list Var.(KNN) | \n", + "
---|---|---|
1%_saleprice | \n", + "170.000 | \n", + "4.240000e+04 | \n", + "
5%_saleprice | \n", + "444.944 | \n", + "2.554554e+06 | \n", + "
10%_saleprice | \n", + "564.784 | \n", + "6.304767e+06 | \n", + "
\n", + " | diff. list Mean(KNN) scaled | \n", + "diff. list Var.(KNN) scaled | \n", + "
---|---|---|
1%_saleprice | \n", + "0.000000 | \n", + "0.000000e+00 | \n", + "
5%_saleprice | \n", + "0.000026 | \n", + "2.134350e-08 | \n", + "
10%_saleprice | \n", + "0.000032 | \n", + "1.417383e-08 | \n", + "
\n", + " | SalePrice | \n", + "sp_copy_1_percent | \n", + "sp_copy_5_percent | \n", + "sp_copy_10_percent | \n", + "
---|---|---|---|---|
571 | \n", + "120000 | \n", + "120000.0 | \n", + "120000.000000 | \n", + "182343.817778 | \n", + "
2 | \n", + "223500 | \n", + "223500.0 | \n", + "223500.000000 | \n", + "223500.000000 | \n", + "
313 | \n", + "375000 | \n", + "375000.0 | \n", + "375000.000000 | \n", + "375000.000000 | \n", + "
377 | \n", + "340000 | \n", + "340000.0 | \n", + "182457.342105 | \n", + "182343.817778 | \n", + "
987 | \n", + "395192 | \n", + "395192.0 | \n", + "395192.000000 | \n", + "395192.000000 | \n", + "
\n", + " | SalePrice | \n", + "sp_copy_1_percent | \n", + "sp_copy_5_percent | \n", + "sp_copy_10_percent | \n", + "
---|---|---|---|---|
216 | \n", + "0.243161 | \n", + "0.243161 | \n", + "0.243161 | \n", + "0.243161 | \n", + "
1 | \n", + "0.203583 | \n", + "0.203583 | \n", + "0.203583 | \n", + "0.203583 | \n", + "
575 | \n", + "0.116095 | \n", + "0.116095 | \n", + "0.116095 | \n", + "0.116095 | \n", + "
397 | \n", + "0.186918 | \n", + "0.186918 | \n", + "0.186918 | \n", + "0.205253 | \n", + "
703 | \n", + "0.145952 | \n", + "0.145952 | \n", + "0.145952 | \n", + "0.145952 | \n", + "
\n", + " | diff. list Mean(MI) | \n", + "diff. list Var.(MI) | \n", + "
---|---|---|
1%_saleprice | \n", + "55971.636768 | \n", + "1.103367e+09 | \n", + "
5%_saleprice | \n", + "58478.242105 | \n", + "3.139731e+09 | \n", + "
10%_saleprice | \n", + "61028.709911 | \n", + "3.846675e+09 | \n", + "
\n", + " | diff. list Mean(MI) scaled | \n", + "diff. list Var.(MI) scaled | \n", + "
---|---|---|
1%_saleprice_scaled | \n", + "0.000000 | \n", + "0.000000 | \n", + "
5%_saleprice_scaled | \n", + "0.008936 | \n", + "0.001404 | \n", + "
10%_saleprice_scaled | \n", + "0.007492 | \n", + "0.000443 | \n", + "
\n", + " | diff. list Mean(KNN) | \n", + "diff. list Var.(KNN) | \n", + "diff. list Mean(KNN) scaled | \n", + "diff. list Var.(KNN) scaled | \n", + "diff. list Mean(MI) | \n", + "diff. list Var.(MI) | \n", + "diff. list Mean(MI) scaled | \n", + "diff. list Var.(MI) scaled | \n", + "
---|---|---|---|---|---|---|---|---|
1%_saleprice | \n", + "170.000 | \n", + "4.240000e+04 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
5%_saleprice | \n", + "444.944 | \n", + "2.554554e+06 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
10%_saleprice | \n", + "564.784 | \n", + "6.304767e+06 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
1%_saleprice | \n", + "NaN | \n", + "NaN | \n", + "0.000000 | \n", + "0.000000e+00 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
5%_saleprice | \n", + "NaN | \n", + "NaN | \n", + "0.000026 | \n", + "2.134350e-08 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
10%_saleprice | \n", + "NaN | \n", + "NaN | \n", + "0.000032 | \n", + "1.417383e-08 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
1%_saleprice | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "55971.636768 | \n", + "1.103367e+09 | \n", + "NaN | \n", + "NaN | \n", + "
5%_saleprice | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "58478.242105 | \n", + "3.139731e+09 | \n", + "NaN | \n", + "NaN | \n", + "
10%_saleprice | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "61028.709911 | \n", + "3.846675e+09 | \n", + "NaN | \n", + "NaN | \n", + "
1%_saleprice_scaled | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0.000000 | \n", + "0.000000 | \n", + "
5%_saleprice_scaled | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0.008936 | \n", + "0.001404 | \n", + "
10%_saleprice_scaled | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0.007492 | \n", + "0.000443 | \n", + "
\n", - " | Unnamed: 0 | \n", - "number | \n", - "
---|---|---|
782 | \n", - "782 | \n", - "0.955151 | \n", - "
378 | \n", - "378 | \n", - "0.310217 | \n", - "
542 | \n", - "542 | \n", - "0.607177 | \n", - "
80 | \n", - "80 | \n", - "0.861696 | \n", - "
282 | \n", - "282 | \n", - "0.204316 | \n", - "
976 | \n", - "976 | \n", - "0.059688 | \n", - "
924 | \n", - "924 | \n", - "0.372837 | \n", - "
329 | \n", - "329 | \n", - "0.406915 | \n", - "
131 | \n", - "131 | \n", - "0.402420 | \n", - "
607 | \n", - "607 | \n", - "0.078909 | \n", - "
\n", - " | number | \n", - "number_copy_1_percent | \n", - "number_copy_5_percent | \n", - "number_copy_10_percent | \n", - "
---|---|---|---|---|
0 | \n", - "0.144616 | \n", - "0.144616 | \n", - "0.144616 | \n", - "0.144616 | \n", - "
1 | \n", - "0.077515 | \n", - "0.077515 | \n", - "0.077515 | \n", - "0.077515 | \n", - "
2 | \n", - "0.155933 | \n", - "0.155933 | \n", - "0.155933 | \n", - "0.155933 | \n", - "
3 | \n", - "0.097209 | \n", - "0.097209 | \n", - "0.097209 | \n", - "0.097209 | \n", - "
4 | \n", - "0.323750 | \n", - "0.323750 | \n", - "0.323750 | \n", - "0.323750 | \n", - "
... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "
995 | \n", - "0.182107 | \n", - "0.182107 | \n", - "0.182107 | \n", - "0.182107 | \n", - "
996 | \n", - "0.787988 | \n", - "0.787988 | \n", - "0.787988 | \n", - "0.787988 | \n", - "
997 | \n", - "0.148707 | \n", - "0.148707 | \n", - "0.148707 | \n", - "0.148707 | \n", - "
998 | \n", - "0.153121 | \n", - "0.153121 | \n", - "0.153121 | \n", - "0.153121 | \n", - "
999 | \n", - "0.474737 | \n", - "0.474737 | \n", - "0.474737 | \n", - "0.474737 | \n", - "
1000 rows × 4 columns
\n", - "\n", - " | number | \n", - "number_copy_1_percent | \n", - "number_copy_5_percent | \n", - "number_copy_10_percent | \n", - "
---|---|---|---|---|
347 | \n", - "0.372389 | \n", - "0.372389 | \n", - "0.372389 | \n", - "0.372389 | \n", - "
934 | \n", - "0.327766 | \n", - "0.327766 | \n", - "0.327766 | \n", - "0.327766 | \n", - "
927 | \n", - "0.753892 | \n", - "0.753892 | \n", - "0.753892 | \n", - "0.753892 | \n", - "
997 | \n", - "0.148707 | \n", - "0.148707 | \n", - "0.148707 | \n", - "0.148707 | \n", - "
167 | \n", - "0.730901 | \n", - "0.730901 | \n", - "0.730901 | \n", - "0.730901 | \n", - "
914 | \n", - "0.841330 | \n", - "0.841330 | \n", - "0.841330 | \n", - "0.841330 | \n", - "
432 | \n", - "0.897466 | \n", - "0.897466 | \n", - "0.897466 | \n", - "0.897466 | \n", - "
587 | \n", - "0.411685 | \n", - "0.411685 | \n", - "0.411685 | \n", - "0.411685 | \n", - "
884 | \n", - "0.378794 | \n", - "0.378794 | \n", - "0.378794 | \n", - "0.378794 | \n", - "
379 | \n", - "0.265429 | \n", - "0.265429 | \n", - "0.265429 | \n", - "0.264843 | \n", - "
\n", - " | number | \n", - "number_copy_1_percent | \n", - "number_copy_5_percent | \n", - "number_copy_10_percent | \n", - "
---|---|---|---|---|
366 | \n", - "0.425525 | \n", - "0.425525 | \n", - "0.425525 | \n", - "0.425525 | \n", - "
145 | \n", - "0.246589 | \n", - "0.246589 | \n", - "0.246589 | \n", - "0.246589 | \n", - "
538 | \n", - "0.503701 | \n", - "0.503701 | \n", - "0.503701 | \n", - "0.503701 | \n", - "
256 | \n", - "0.118901 | \n", - "0.118901 | \n", - "0.491932 | \n", - "0.118901 | \n", - "
156 | \n", - "0.773215 | \n", - "0.773215 | \n", - "0.773215 | \n", - "0.773215 | \n", - "
500 | \n", - "0.441087 | \n", - "0.441087 | \n", - "0.441087 | \n", - "0.441087 | \n", - "
325 | \n", - "0.095068 | \n", - "0.095068 | \n", - "0.095068 | \n", - "0.095068 | \n", - "
97 | \n", - "0.209842 | \n", - "0.209842 | \n", - "0.209842 | \n", - "0.487348 | \n", - "
905 | \n", - "0.117657 | \n", - "0.491084 | \n", - "0.117657 | \n", - "0.117657 | \n", - "
251 | \n", - "0.961305 | \n", - "0.961305 | \n", - "0.961305 | \n", - "0.961305 | \n", - "
' + table._repr_html_() + ' | ' for table in table_list]) +\n", - " '
\n",
- "\n",
- "
| \n",
- "\n",
- "
|
\n",
- "\n",
- "
| \n",
- "\n",
- "
|
\n",
- "\n",
- "
| \n",
- "\n",
- "
|
\n", - " | diff. list Mean(KNN) | \n", - "diff. list Var.(KNN) | \n", - "diff. list Mean(MI) | \n", - "diff. list Var.(MI) | \n", - "
---|---|---|---|---|
1%_number | \n", - "0.000790 | \n", - "4.568702e-07 | \n", - "NaN | \n", - "NaN | \n", - "
5%_number | \n", - "0.000676 | \n", - "3.072444e-07 | \n", - "NaN | \n", - "NaN | \n", - "
10%_number | \n", - "0.000648 | \n", - "2.480609e-07 | \n", - "NaN | \n", - "NaN | \n", - "
1%_number | \n", - "NaN | \n", - "NaN | \n", - "0.269369 | \n", - "0.018130 | \n", - "
5%_number | \n", - "NaN | \n", - "NaN | \n", - "0.184841 | \n", - "0.014921 | \n", - "
10%_number | \n", - "NaN | \n", - "NaN | \n", - "0.231501 | \n", - "0.020024 | \n", - "
\n", - " | Id | \n", - "MSSubClass | \n", - "MSZoning | \n", - "LotFrontage | \n", - "LotArea | \n", - "Street | \n", - "Alley | \n", - "LotShape | \n", - "LandContour | \n", - "Utilities | \n", - "... | \n", - "PoolArea | \n", - "PoolQC | \n", - "Fence | \n", - "MiscFeature | \n", - "MiscVal | \n", - "MoSold | \n", - "YrSold | \n", - "SaleType | \n", - "SaleCondition | \n", - "SalePrice | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
820 | \n", - "821 | \n", - "60 | \n", - "RL | \n", - "72.0 | \n", - "7226 | \n", - "Pave | \n", - "NaN | \n", - "IR1 | \n", - "Lvl | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "6 | \n", - "2008 | \n", - "WD | \n", - "Normal | \n", - "183000 | \n", - "
1390 | \n", - "1391 | \n", - "20 | \n", - "RL | \n", - "70.0 | \n", - "9100 | \n", - "Pave | \n", - "NaN | \n", - "Reg | \n", - "Lvl | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "9 | \n", - "2006 | \n", - "WD | \n", - "Normal | \n", - "235000 | \n", - "
535 | \n", - "536 | \n", - "190 | \n", - "RL | \n", - "70.0 | \n", - "7000 | \n", - "Pave | \n", - "NaN | \n", - "Reg | \n", - "Lvl | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "1 | \n", - "2008 | \n", - "WD | \n", - "Normal | \n", - "107500 | \n", - "
1236 | \n", - "1237 | \n", - "160 | \n", - "RL | \n", - "36.0 | \n", - "2628 | \n", - "Pave | \n", - "NaN | \n", - "Reg | \n", - "Lvl | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "6 | \n", - "2010 | \n", - "WD | \n", - "Normal | \n", - "175500 | \n", - "
1337 | \n", - "1338 | \n", - "30 | \n", - "RM | \n", - "153.0 | \n", - "4118 | \n", - "Pave | \n", - "Grvl | \n", - "IR1 | \n", - "Bnk | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "3 | \n", - "2006 | \n", - "WD | \n", - "Normal | \n", - "52500 | \n", - "
674 | \n", - "675 | \n", - "20 | \n", - "RL | \n", - "80.0 | \n", - "9200 | \n", - "Pave | \n", - "NaN | \n", - "Reg | \n", - "Lvl | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "7 | \n", - "2008 | \n", - "WD | \n", - "Normal | \n", - "140000 | \n", - "
604 | \n", - "605 | \n", - "20 | \n", - "RL | \n", - "88.0 | \n", - "12803 | \n", - "Pave | \n", - "NaN | \n", - "IR1 | \n", - "Lvl | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "9 | \n", - "2008 | \n", - "WD | \n", - "Normal | \n", - "221000 | \n", - "
605 | \n", - "606 | \n", - "60 | \n", - "RL | \n", - "85.0 | \n", - "13600 | \n", - "Pave | \n", - "NaN | \n", - "Reg | \n", - "Lvl | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "10 | \n", - "2009 | \n", - "WD | \n", - "Normal | \n", - "205000 | \n", - "
1218 | \n", - "1219 | \n", - "50 | \n", - "RM | \n", - "52.0 | \n", - "6240 | \n", - "Pave | \n", - "NaN | \n", - "Reg | \n", - "Lvl | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "7 | \n", - "2006 | \n", - "WD | \n", - "Normal | \n", - "80500 | \n", - "
882 | \n", - "883 | \n", - "60 | \n", - "RL | \n", - "NaN | \n", - "9636 | \n", - "Pave | \n", - "NaN | \n", - "IR1 | \n", - "Lvl | \n", - "AllPub | \n", - "... | \n", - "0 | \n", - "NaN | \n", - "MnPrv | \n", - "NaN | \n", - "0 | \n", - "12 | \n", - "2009 | \n", - "WD | \n", - "Normal | \n", - "178000 | \n", - "
10 rows × 81 columns
\n", - "\n", - " | SalePrice | \n", - "sp_copy_1_percent | \n", - "sp_copy_5_percent | \n", - "sp_copy_10_percent | \n", - "
---|---|---|---|---|
0 | \n", - "208500 | \n", - "208500 | \n", - "208500 | \n", - "208500 | \n", - "
1 | \n", - "181500 | \n", - "181500 | \n", - "181500 | \n", - "181500 | \n", - "
2 | \n", - "223500 | \n", - "223500 | \n", - "223500 | \n", - "223500 | \n", - "
3 | \n", - "140000 | \n", - "140000 | \n", - "140000 | \n", - "140000 | \n", - "
4 | \n", - "250000 | \n", - "250000 | \n", - "250000 | \n", - "250000 | \n", - "
\n", - " | SalePrice | \n", - "sp_copy_1_percent | \n", - "sp_copy_5_percent | \n", - "sp_copy_10_percent | \n", - "
---|---|---|---|---|
0 | \n", - "0.241078 | \n", - "0.241078 | \n", - "0.241078 | \n", - "0.241078 | \n", - "
1 | \n", - "0.203583 | \n", - "0.203583 | \n", - "0.203583 | \n", - "0.203583 | \n", - "
2 | \n", - "0.261908 | \n", - "0.261908 | \n", - "0.261908 | \n", - "0.261908 | \n", - "
3 | \n", - "0.145952 | \n", - "0.145952 | \n", - "0.145952 | \n", - "0.145952 | \n", - "
4 | \n", - "0.298709 | \n", - "0.298709 | \n", - "0.298709 | \n", - "0.298709 | \n", - "
\n", - " | SalePrice | \n", - "sp_copy_1_percent | \n", - "sp_copy_5_percent | \n", - "sp_copy_10_percent | \n", - "
---|---|---|---|---|
0 | \n", - "208500.0 | \n", - "208500.0 | \n", - "208500.0 | \n", - "208500.0 | \n", - "
1 | \n", - "181500.0 | \n", - "181500.0 | \n", - "181500.0 | \n", - "181500.0 | \n", - "
2 | \n", - "223500.0 | \n", - "223500.0 | \n", - "223500.0 | \n", - "223500.0 | \n", - "
3 | \n", - "140000.0 | \n", - "140000.0 | \n", - "140000.0 | \n", - "140000.0 | \n", - "
4 | \n", - "250000.0 | \n", - "250000.0 | \n", - "250000.0 | \n", - "250000.0 | \n", - "
\n", - " | SalePrice | \n", - "sp_copy_1_percent | \n", - "sp_copy_5_percent | \n", - "sp_copy_10_percent | \n", - "
---|---|---|---|---|
0 | \n", - "0.241078 | \n", - "0.241078 | \n", - "0.240855 | \n", - "0.241078 | \n", - "
1 | \n", - "0.203583 | \n", - "0.203583 | \n", - "0.203583 | \n", - "0.203583 | \n", - "
2 | \n", - "0.261908 | \n", - "0.261908 | \n", - "0.261908 | \n", - "0.261908 | \n", - "
3 | \n", - "0.145952 | \n", - "0.145952 | \n", - "0.145952 | \n", - "0.145952 | \n", - "
4 | \n", - "0.298709 | \n", - "0.298709 | \n", - "0.298709 | \n", - "0.298709 | \n", - "
\n", - " | diff. list Mean(KNN) | \n", - "diff. list Var.(KNN) | \n", - "
---|---|---|
1%_saleprice | \n", - "170.000 | \n", - "4.240000e+04 | \n", - "
5%_saleprice | \n", - "444.944 | \n", - "2.554554e+06 | \n", - "
10%_saleprice | \n", - "564.784 | \n", - "6.304767e+06 | \n", - "
\n", - " | diff. list Mean(KNN) scaled | \n", - "diff. list Var.(KNN) scaled | \n", - "
---|---|---|
1%_saleprice | \n", - "0.000000 | \n", - "0.000000e+00 | \n", - "
5%_saleprice | \n", - "0.000026 | \n", - "2.134350e-08 | \n", - "
10%_saleprice | \n", - "0.000032 | \n", - "1.417383e-08 | \n", - "
\n", - " | SalePrice | \n", - "sp_copy_1_percent | \n", - "sp_copy_5_percent | \n", - "sp_copy_10_percent | \n", - "
---|---|---|---|---|
571 | \n", - "120000 | \n", - "120000.0 | \n", - "120000.000000 | \n", - "182343.817778 | \n", - "
2 | \n", - "223500 | \n", - "223500.0 | \n", - "223500.000000 | \n", - "223500.000000 | \n", - "
313 | \n", - "375000 | \n", - "375000.0 | \n", - "375000.000000 | \n", - "375000.000000 | \n", - "
377 | \n", - "340000 | \n", - "340000.0 | \n", - "182457.342105 | \n", - "182343.817778 | \n", - "
987 | \n", - "395192 | \n", - "395192.0 | \n", - "395192.000000 | \n", - "395192.000000 | \n", - "
\n", - " | SalePrice | \n", - "sp_copy_1_percent | \n", - "sp_copy_5_percent | \n", - "sp_copy_10_percent | \n", - "
---|---|---|---|---|
216 | \n", - "0.243161 | \n", - "0.243161 | \n", - "0.243161 | \n", - "0.243161 | \n", - "
1 | \n", - "0.203583 | \n", - "0.203583 | \n", - "0.203583 | \n", - "0.203583 | \n", - "
575 | \n", - "0.116095 | \n", - "0.116095 | \n", - "0.116095 | \n", - "0.116095 | \n", - "
397 | \n", - "0.186918 | \n", - "0.186918 | \n", - "0.186918 | \n", - "0.205253 | \n", - "
703 | \n", - "0.145952 | \n", - "0.145952 | \n", - "0.145952 | \n", - "0.145952 | \n", - "
\n", - " | diff. list Mean(MI) | \n", - "diff. list Var.(MI) | \n", - "
---|---|---|
1%_saleprice | \n", - "55971.636768 | \n", - "1.103367e+09 | \n", - "
5%_saleprice | \n", - "58478.242105 | \n", - "3.139731e+09 | \n", - "
10%_saleprice | \n", - "61028.709911 | \n", - "3.846675e+09 | \n", - "
\n", - " | diff. list Mean(MI) scaled | \n", - "diff. list Var.(MI) scaled | \n", - "
---|---|---|
1%_saleprice_scaled | \n", - "0.000000 | \n", - "0.000000 | \n", - "
5%_saleprice_scaled | \n", - "0.008936 | \n", - "0.001404 | \n", - "
10%_saleprice_scaled | \n", - "0.007492 | \n", - "0.000443 | \n", - "
\n", - " | diff. list Mean(KNN) | \n", - "diff. list Var.(KNN) | \n", - "diff. list Mean(KNN) scaled | \n", - "diff. list Var.(KNN) scaled | \n", - "diff. list Mean(MI) | \n", - "diff. list Var.(MI) | \n", - "diff. list Mean(MI) scaled | \n", - "diff. list Var.(MI) scaled | \n", - "
---|---|---|---|---|---|---|---|---|
1%_saleprice | \n", - "170.000 | \n", - "4.240000e+04 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "
5%_saleprice | \n", - "444.944 | \n", - "2.554554e+06 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "
10%_saleprice | \n", - "564.784 | \n", - "6.304767e+06 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "
1%_saleprice | \n", - "NaN | \n", - "NaN | \n", - "0.000000 | \n", - "0.000000e+00 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "
5%_saleprice | \n", - "NaN | \n", - "NaN | \n", - "0.000026 | \n", - "2.134350e-08 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "
10%_saleprice | \n", - "NaN | \n", - "NaN | \n", - "0.000032 | \n", - "1.417383e-08 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "
1%_saleprice | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "55971.636768 | \n", - "1.103367e+09 | \n", - "NaN | \n", - "NaN | \n", - "
5%_saleprice | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "58478.242105 | \n", - "3.139731e+09 | \n", - "NaN | \n", - "NaN | \n", - "
10%_saleprice | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "61028.709911 | \n", - "3.846675e+09 | \n", - "NaN | \n", - "NaN | \n", - "
1%_saleprice_scaled | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0.000000 | \n", - "0.000000 | \n", - "
5%_saleprice_scaled | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0.008936 | \n", - "0.001404 | \n", - "
10%_saleprice_scaled | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0.007492 | \n", - "0.000443 | \n", - "
\n", + " | number | \n", + "
---|---|
823 | \n", + "0.925249 | \n", + "
266 | \n", + "0.077479 | \n", + "
959 | \n", + "0.897447 | \n", + "
493 | \n", + "0.259423 | \n", + "
768 | \n", + "0.193178 | \n", + "
105 | \n", + "0.174632 | \n", + "
610 | \n", + "0.456349 | \n", + "
824 | \n", + "0.688290 | \n", + "
968 | \n", + "0.493667 | \n", + "
849 | \n", + "0.368834 | \n", + "
\n", + " | number | \n", + "number_copy_1_percent | \n", + "number_copy_5_percent | \n", + "number_copy_10_percent | \n", + "
---|---|---|---|---|
0 | \n", + "0.438564 | \n", + "0.438564 | \n", + "0.438564 | \n", + "0.438564 | \n", + "
1 | \n", + "0.836801 | \n", + "0.836801 | \n", + "0.836801 | \n", + "0.836801 | \n", + "
2 | \n", + "0.798077 | \n", + "0.798077 | \n", + "0.798077 | \n", + "0.798077 | \n", + "
3 | \n", + "0.269161 | \n", + "0.269161 | \n", + "0.269161 | \n", + "0.269161 | \n", + "
4 | \n", + "0.830948 | \n", + "0.830948 | \n", + "0.830948 | \n", + "0.830948 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
995 | \n", + "0.920130 | \n", + "0.920130 | \n", + "0.920130 | \n", + "0.920130 | \n", + "
996 | \n", + "0.007397 | \n", + "0.007397 | \n", + "0.007397 | \n", + "0.007397 | \n", + "
997 | \n", + "0.163360 | \n", + "0.163360 | \n", + "0.163360 | \n", + "0.163360 | \n", + "
998 | \n", + "0.553700 | \n", + "0.553700 | \n", + "0.553700 | \n", + "0.553700 | \n", + "
999 | \n", + "0.771442 | \n", + "0.771442 | \n", + "0.771442 | \n", + "0.771442 | \n", + "
1000 rows × 4 columns
\n", + "\n", + " | number | \n", + "number_copy_1_percent | \n", + "number_copy_5_percent | \n", + "number_copy_10_percent | \n", + "
---|---|---|---|---|
701 | \n", + "0.244629 | \n", + "0.244629 | \n", + "0.244629 | \n", + "0.244629 | \n", + "
39 | \n", + "0.517202 | \n", + "0.517202 | \n", + "0.517202 | \n", + "0.517202 | \n", + "
335 | \n", + "0.100813 | \n", + "0.100813 | \n", + "0.100813 | \n", + "0.100813 | \n", + "
204 | \n", + "0.277534 | \n", + "0.277534 | \n", + "0.277534 | \n", + "0.277534 | \n", + "
391 | \n", + "0.859032 | \n", + "0.859032 | \n", + "0.857231 | \n", + "0.859032 | \n", + "
203 | \n", + "0.252622 | \n", + "0.252622 | \n", + "0.252622 | \n", + "0.252622 | \n", + "
144 | \n", + "0.844587 | \n", + "0.844587 | \n", + "0.844587 | \n", + "0.844587 | \n", + "
201 | \n", + "0.431603 | \n", + "0.431603 | \n", + "0.431603 | \n", + "0.431603 | \n", + "
749 | \n", + "0.848537 | \n", + "0.848537 | \n", + "0.848537 | \n", + "0.848240 | \n", + "
497 | \n", + "0.464531 | \n", + "0.464531 | \n", + "0.464531 | \n", + "0.464531 | \n", + "
\n", + " | number | \n", + "number_copy_1_percent | \n", + "number_copy_5_percent | \n", + "number_copy_10_percent | \n", + "
---|---|---|---|---|
293 | \n", + "0.583231 | \n", + "0.583231 | \n", + "0.583231 | \n", + "0.583231 | \n", + "
461 | \n", + "0.867035 | \n", + "0.867035 | \n", + "0.867035 | \n", + "0.867035 | \n", + "
875 | \n", + "0.676228 | \n", + "0.676228 | \n", + "0.676228 | \n", + "0.676228 | \n", + "
999 | \n", + "0.771442 | \n", + "0.771442 | \n", + "0.771442 | \n", + "0.771442 | \n", + "
75 | \n", + "0.909050 | \n", + "0.909050 | \n", + "0.909050 | \n", + "0.909050 | \n", + "
98 | \n", + "0.629583 | \n", + "0.629583 | \n", + "0.629583 | \n", + "0.629583 | \n", + "
381 | \n", + "0.181614 | \n", + "0.181614 | \n", + "0.181614 | \n", + "0.181614 | \n", + "
592 | \n", + "0.523109 | \n", + "0.523109 | \n", + "0.523109 | \n", + "0.523109 | \n", + "
155 | \n", + "0.038074 | \n", + "0.038074 | \n", + "0.038074 | \n", + "0.038074 | \n", + "
630 | \n", + "0.869200 | \n", + "0.869200 | \n", + "0.869200 | \n", + "0.869200 | \n", + "
' + table._repr_html_() + ' | ' for table in table_list]) +\n", + " '
\n",
+ "\n",
+ "
| \n",
+ "\n",
+ "
|
\n",
+ "\n",
+ "
| \n",
+ "\n",
+ "
|
\n",
+ "\n",
+ "
| \n",
+ "\n",
+ "
|
\n",
+ "\n",
+ "
| \n",
+ "\n",
+ "
|
\n", + " | Id | \n", + "MSSubClass | \n", + "MSZoning | \n", + "LotFrontage | \n", + "LotArea | \n", + "Street | \n", + "Alley | \n", + "LotShape | \n", + "LandContour | \n", + "Utilities | \n", + "... | \n", + "PoolArea | \n", + "PoolQC | \n", + "Fence | \n", + "MiscFeature | \n", + "MiscVal | \n", + "MoSold | \n", + "YrSold | \n", + "SaleType | \n", + "SaleCondition | \n", + "SalePrice | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
740 | \n", + "741 | \n", + "70 | \n", + "RM | \n", + "60.0 | \n", + "9600 | \n", + "Pave | \n", + "Grvl | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "GdPrv | \n", + "NaN | \n", + "0 | \n", + "5 | \n", + "2007 | \n", + "WD | \n", + "Abnorml | \n", + "132000 | \n", + "
1209 | \n", + "1210 | \n", + "20 | \n", + "RL | \n", + "85.0 | \n", + "10182 | \n", + "Pave | \n", + "NaN | \n", + "IR1 | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "5 | \n", + "2006 | \n", + "New | \n", + "Partial | \n", + "290000 | \n", + "
64 | \n", + "65 | \n", + "60 | \n", + "RL | \n", + "NaN | \n", + "9375 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "GdPrv | \n", + "NaN | \n", + "0 | \n", + "2 | \n", + "2009 | \n", + "WD | \n", + "Normal | \n", + "219500 | \n", + "
208 | \n", + "209 | \n", + "60 | \n", + "RL | \n", + "NaN | \n", + "14364 | \n", + "Pave | \n", + "NaN | \n", + "IR1 | \n", + "Low | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "4 | \n", + "2007 | \n", + "WD | \n", + "Normal | \n", + "277000 | \n", + "
436 | \n", + "437 | \n", + "50 | \n", + "RM | \n", + "40.0 | \n", + "4400 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "10 | \n", + "2006 | \n", + "WD | \n", + "Normal | \n", + "116000 | \n", + "
19 | \n", + "20 | \n", + "20 | \n", + "RL | \n", + "70.0 | \n", + "7560 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "MnPrv | \n", + "NaN | \n", + "0 | \n", + "5 | \n", + "2009 | \n", + "COD | \n", + "Abnorml | \n", + "139000 | \n", + "
1449 | \n", + "1450 | \n", + "180 | \n", + "RM | \n", + "21.0 | \n", + "1533 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "8 | \n", + "2006 | \n", + "WD | \n", + "Abnorml | \n", + "92000 | \n", + "
449 | \n", + "450 | \n", + "50 | \n", + "RM | \n", + "50.0 | \n", + "6000 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "6 | \n", + "2007 | \n", + "WD | \n", + "Normal | \n", + "120000 | \n", + "
1185 | \n", + "1186 | \n", + "50 | \n", + "RL | \n", + "60.0 | \n", + "9738 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "3 | \n", + "2006 | \n", + "WD | \n", + "Normal | \n", + "104900 | \n", + "
1023 | \n", + "1024 | \n", + "120 | \n", + "RL | \n", + "43.0 | \n", + "3182 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "5 | \n", + "2008 | \n", + "WD | \n", + "Normal | \n", + "191000 | \n", + "
10 rows × 81 columns
\n", + "\n", + " | SalePrice | \n", + "sp_copy_1_percent | \n", + "sp_copy_5_percent | \n", + "sp_copy_10_percent | \n", + "
---|---|---|---|---|
0 | \n", + "208500 | \n", + "208500 | \n", + "208500 | \n", + "208500 | \n", + "
1 | \n", + "181500 | \n", + "181500 | \n", + "181500 | \n", + "181500 | \n", + "
2 | \n", + "223500 | \n", + "223500 | \n", + "223500 | \n", + "223500 | \n", + "
3 | \n", + "140000 | \n", + "140000 | \n", + "140000 | \n", + "140000 | \n", + "
4 | \n", + "250000 | \n", + "250000 | \n", + "250000 | \n", + "250000 | \n", + "
\n", + " | SalePrice | \n", + "sp_copy_1_percent | \n", + "sp_copy_5_percent | \n", + "sp_copy_10_percent | \n", + "
---|---|---|---|---|
0 | \n", + "0.241078 | \n", + "0.241078 | \n", + "0.241078 | \n", + "0.241078 | \n", + "
1 | \n", + "0.203583 | \n", + "0.203583 | \n", + "0.203583 | \n", + "0.203583 | \n", + "
2 | \n", + "0.261908 | \n", + "0.261908 | \n", + "0.261908 | \n", + "0.261908 | \n", + "
3 | \n", + "0.145952 | \n", + "0.145952 | \n", + "0.145952 | \n", + "0.145952 | \n", + "
4 | \n", + "0.298709 | \n", + "0.298709 | \n", + "0.298709 | \n", + "0.298709 | \n", + "
\n", + " | SalePrice | \n", + "sp_copy_1_percent | \n", + "sp_copy_5_percent | \n", + "sp_copy_10_percent | \n", + "
---|---|---|---|---|
0 | \n", + "208500.0 | \n", + "208500.0 | \n", + "208500.0 | \n", + "208500.0 | \n", + "
1 | \n", + "181500.0 | \n", + "181500.0 | \n", + "181500.0 | \n", + "181500.0 | \n", + "
2 | \n", + "223500.0 | \n", + "223500.0 | \n", + "223500.0 | \n", + "223500.0 | \n", + "
3 | \n", + "140000.0 | \n", + "140000.0 | \n", + "140000.0 | \n", + "140000.0 | \n", + "
4 | \n", + "250000.0 | \n", + "250000.0 | \n", + "250000.0 | \n", + "250000.0 | \n", + "
\n", + " | SalePrice | \n", + "sp_copy_1_percent | \n", + "sp_copy_5_percent | \n", + "sp_copy_10_percent | \n", + "
---|---|---|---|---|
0 | \n", + "0.241078 | \n", + "0.241078 | \n", + "0.241078 | \n", + "0.241078 | \n", + "
1 | \n", + "0.203583 | \n", + "0.203583 | \n", + "0.203583 | \n", + "0.203583 | \n", + "
2 | \n", + "0.261908 | \n", + "0.261908 | \n", + "0.261908 | \n", + "0.261908 | \n", + "
3 | \n", + "0.145952 | \n", + "0.145952 | \n", + "0.145952 | \n", + "0.145952 | \n", + "
4 | \n", + "0.298709 | \n", + "0.298709 | \n", + "0.298709 | \n", + "0.298709 | \n", + "
\n", + " | diff. list Mean(KNN) | \n", + "diff. list Var.(KNN) | \n", + "
---|---|---|
1%_saleprice | \n", + "105.000 | \n", + "5.210500e+04 | \n", + "
5%_saleprice | \n", + "163.012 | \n", + "4.601896e+04 | \n", + "
10%_saleprice | \n", + "470.800 | \n", + "3.667553e+06 | \n", + "
\n", + " | diff. list Mean(KNN) scaled | \n", + "diff. list Var.(KNN) scaled | \n", + "
---|---|---|
1%_saleprice | \n", + "0.000000 | \n", + "0.000000e+00 | \n", + "
5%_saleprice | \n", + "0.000012 | \n", + "7.654124e-09 | \n", + "
10%_saleprice | \n", + "0.000265 | \n", + "2.973842e-06 | \n", + "
\n", + " | SalePrice | \n", + "sp_copy_1_percent | \n", + "sp_copy_5_percent | \n", + "sp_copy_10_percent | \n", + "
---|---|---|---|---|
436 | \n", + "116000 | \n", + "116000.0 | \n", + "116000.0 | \n", + "116000.000000 | \n", + "
21 | \n", + "139400 | \n", + "139400.0 | \n", + "139400.0 | \n", + "139400.000000 | \n", + "
618 | \n", + "314813 | \n", + "314813.0 | \n", + "314813.0 | \n", + "314813.000000 | \n", + "
207 | \n", + "141000 | \n", + "141000.0 | \n", + "141000.0 | \n", + "182369.783333 | \n", + "
366 | \n", + "159000 | \n", + "159000.0 | \n", + "159000.0 | \n", + "159000.000000 | \n", + "
\n", + " | SalePrice | \n", + "sp_copy_1_percent | \n", + "sp_copy_5_percent | \n", + "sp_copy_10_percent | \n", + "
---|---|---|---|---|
457 | \n", + "0.307041 | \n", + "0.307041 | \n", + "0.307041 | \n", + "0.201890 | \n", + "
876 | \n", + "0.135190 | \n", + "0.135190 | \n", + "0.135190 | \n", + "0.135190 | \n", + "
361 | \n", + "0.152895 | \n", + "0.152895 | \n", + "0.152895 | \n", + "0.152895 | \n", + "
682 | \n", + "0.191779 | \n", + "0.191779 | \n", + "0.191779 | \n", + "0.201890 | \n", + "
523 | \n", + "0.208096 | \n", + "0.208096 | \n", + "0.208096 | \n", + "0.208096 | \n", + "
\n", + " | diff. list Mean(MI) | \n", + "diff. list Var.(MI) | \n", + "
---|---|---|
1%_saleprice | \n", + "47198.616970 | \n", + "6.345466e+08 | \n", + "
5%_saleprice | \n", + "54438.206863 | \n", + "1.768876e+09 | \n", + "
10%_saleprice | \n", + "58045.636667 | \n", + "2.875291e+09 | \n", + "
\n", + " | diff. list Mean(MI) scaled | \n", + "diff. list Var.(MI) scaled | \n", + "
---|---|---|
1%_saleprice_scaled | \n", + "0.000000 | \n", + "0.000000 | \n", + "
5%_saleprice_scaled | \n", + "0.001618 | \n", + "0.000056 | \n", + "
10%_saleprice_scaled | \n", + "0.018922 | \n", + "0.004251 | \n", + "
\n",
+ "\n",
+ "
| \n",
+ "\n",
+ "
| \n",
+ "\n",
+ "
| \n",
+ "\n",
+ "
|