From 7c26890c610fe8d90d8053fc1ff333373d81da93 Mon Sep 17 00:00:00 2001 From: "girish.kumar" Date: Tue, 14 Oct 2025 16:50:16 +0200 Subject: [PATCH] Completed pandas lab --- your-code/pandas_1.ipynb | 1143 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 1090 insertions(+), 53 deletions(-) diff --git a/your-code/pandas_1.ipynb b/your-code/pandas_1.ipynb index a6c64554..edfceeea 100644 --- a/your-code/pandas_1.ipynb +++ b/your-code/pandas_1.ipynb @@ -46,9 +46,191 @@ "cell_type": "code", "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['ArrowDtype',\n", + " 'BooleanDtype',\n", + " 'Categorical',\n", + " 'CategoricalDtype',\n", + " 'CategoricalIndex',\n", + " 'DataFrame',\n", + " 'DateOffset',\n", + " 'DatetimeIndex',\n", + " 'DatetimeTZDtype',\n", + " 'ExcelFile',\n", + " 'ExcelWriter',\n", + " 'Flags',\n", + " 'Float32Dtype',\n", + " 'Float64Dtype',\n", + " 'Grouper',\n", + " 'HDFStore',\n", + " 'Index',\n", + " 'IndexSlice',\n", + " 'Int16Dtype',\n", + " 'Int32Dtype',\n", + " 'Int64Dtype',\n", + " 'Int8Dtype',\n", + " 'Interval',\n", + " 'IntervalDtype',\n", + " 'IntervalIndex',\n", + " 'MultiIndex',\n", + " 'NA',\n", + " 'NaT',\n", + " 'NamedAgg',\n", + " 'Period',\n", + " 'PeriodDtype',\n", + " 'PeriodIndex',\n", + " 'RangeIndex',\n", + " 'Series',\n", + " 'SparseDtype',\n", + " 'StringDtype',\n", + " 'Timedelta',\n", + " 'TimedeltaIndex',\n", + " 'Timestamp',\n", + " 'UInt16Dtype',\n", + " 'UInt32Dtype',\n", + " 'UInt64Dtype',\n", + " 'UInt8Dtype',\n", + " '__all__',\n", + " '__builtins__',\n", + " '__cached__',\n", + " '__doc__',\n", + " '__docformat__',\n", + " '__file__',\n", + " '__git_version__',\n", + " '__loader__',\n", + " '__name__',\n", + " '__package__',\n", + " '__path__',\n", + " '__spec__',\n", + " '__version__',\n", + " '_built_with_meson',\n", + " '_config',\n", + " '_is_numpy_dev',\n", + " '_libs',\n", + " '_pandas_datetime_CAPI',\n", + " '_pandas_parser_CAPI',\n", + " '_testing',\n", + " '_typing',\n", + " '_version_meson',\n", + " 'annotations',\n", + " 'api',\n", + " 'array',\n", + " 'arrays',\n", + " 'bdate_range',\n", + " 'compat',\n", + " 'concat',\n", + " 'core',\n", + " 'crosstab',\n", + " 'cut',\n", + " 'date_range',\n", + " 'describe_option',\n", + " 'errors',\n", + " 'eval',\n", + " 'factorize',\n", + " 'from_dummies',\n", + " 'get_dummies',\n", + " 'get_option',\n", + " 'infer_freq',\n", + " 'interval_range',\n", + " 'io',\n", + " 'isna',\n", + " 'isnull',\n", + " 'json_normalize',\n", + " 'lreshape',\n", + " 'melt',\n", + " 'merge',\n", + " 'merge_asof',\n", + " 'merge_ordered',\n", + " 'notna',\n", + " 'notnull',\n", + " 'offsets',\n", + " 'option_context',\n", + " 'options',\n", + " 'pandas',\n", + " 'period_range',\n", + " 'pivot',\n", + " 'pivot_table',\n", + " 'plotting',\n", + " 'qcut',\n", + " 'read_clipboard',\n", + " 'read_csv',\n", + " 'read_excel',\n", + " 'read_feather',\n", + " 'read_fwf',\n", + " 'read_gbq',\n", + " 'read_hdf',\n", + " 'read_html',\n", + " 'read_json',\n", + " 'read_orc',\n", + " 'read_parquet',\n", + " 'read_pickle',\n", + " 'read_sas',\n", + " 'read_spss',\n", + " 'read_sql',\n", + " 'read_sql_query',\n", + " 'read_sql_table',\n", + " 'read_stata',\n", + " 'read_table',\n", + " 'read_xml',\n", + " 'reset_option',\n", + " 'set_eng_float_format',\n", + " 'set_option',\n", + " 'show_versions',\n", + " 'test',\n", + " 'testing',\n", + " 'timedelta_range',\n", + " 'to_datetime',\n", + " 'to_numeric',\n", + " 'to_pickle',\n", + " 'to_timedelta',\n", + " 'tseries',\n", + " 'unique',\n", + " 'util',\n", + " 'value_counts',\n", + " 'wide_to_long']" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "dir(pd)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 5.7\n", + "1 75.2\n", + "2 74.4\n", + "3 84.0\n", + "4 66.5\n", + "5 66.3\n", + "6 55.8\n", + "7 75.7\n", + "8 29.1\n", + "9 43.7\n", + "dtype: float64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.Series(lst)\n", + "df" ] }, { @@ -62,11 +244,23 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "np.float64(74.4)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "df_third = df[2]\n", + "df_third" ] }, { @@ -78,7 +272,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -96,11 +290,144 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01234
053.195.067.535.078.4
161.340.830.837.887.6
220.673.244.214.691.8
357.40.196.14.269.5
483.620.585.422.835.9
549.069.00.131.889.1
623.340.795.083.826.9
727.626.453.888.868.5
896.696.453.472.450.1
973.739.043.281.634.7
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "dfs = pd.DataFrame(b)\n", + "dfs" ] }, { @@ -112,7 +439,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -130,11 +457,147 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
score_1score_2score_3score_4score_5
053.195.067.535.078.4
161.340.830.837.887.6
220.673.244.214.691.8
357.40.196.14.269.5
483.620.585.422.835.9
549.069.00.131.889.1
623.340.795.083.826.9
727.626.453.888.868.5
896.696.453.472.450.1
973.739.043.281.634.7
\n", + "
" + ], + "text/plain": [ + " score_1 score_2 score_3 score_4 score_5\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "df_b = pd.DataFrame(b)\n", + "b_names = [\"score_1\", \"score_2\", \"score_3\", \"score_4\", \"score_5\"]\n", + "df_rename = df_b.copy()\n", + "df_rename.columns = b_names\n", + "df_rename\n" ] }, { @@ -146,11 +609,122 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
score_1score_3score_5
053.167.578.4
161.330.887.6
220.644.291.8
357.496.169.5
483.685.435.9
549.00.189.1
623.395.026.9
727.653.868.5
896.653.450.1
973.743.234.7
\n", + "
" + ], + "text/plain": [ + " score_1 score_3 score_5\n", + "0 53.1 67.5 78.4\n", + "1 61.3 30.8 87.6\n", + "2 20.6 44.2 91.8\n", + "3 57.4 96.1 69.5\n", + "4 83.6 85.4 35.9\n", + "5 49.0 0.1 89.1\n", + "6 23.3 95.0 26.9\n", + "7 27.6 53.8 68.5\n", + "8 96.6 53.4 50.1\n", + "9 73.7 43.2 34.7" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "df_subset = df_rename[[\"score_1\", \"score_3\", \"score_5\"]]\n", + "df_subset" ] }, { @@ -162,11 +736,23 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "np.float64(56.95000000000001)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "df_avg_s3 = np.mean(df_rename[[\"score_3\"]])\n", + "df_avg_s3" ] }, { @@ -178,11 +764,23 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "88.8" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "df_max_s4 = np.max(df_rename[[\"score_4\"]])\n", + "df_max_s4" ] }, { @@ -194,11 +792,23 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "np.float64(40.75)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "df_med_s2 = np.median(df_rename[[\"score_2\"]])\n", + "df_med_s2" ] }, { @@ -210,7 +820,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -231,11 +841,133 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DescriptionQuantityUnitPriceRevenue
0LUNCH BAG APPLE DESIGN11.651.65
1SET OF 60 VINTAGE LEAF CAKE CASES240.5513.20
2RIBBON REEL STRIPES DESIGN11.651.65
3WORLD WAR 2 GLIDERS ASSTD DESIGNS28800.18518.40
4PLAYING CARDS JUBILEE UNION JACK21.252.50
5POPCORN HOLDER70.855.95
6BOX OF VINTAGE ALPHABET BLOCKS111.9511.95
7PARTY BUNTING44.9519.80
8JAZZ HEARTS ADDRESS BOOK100.191.90
9SET OF 4 SANTA PLACE SETTINGS481.2560.00
\n", + "
" + ], + "text/plain": [ + " Description Quantity UnitPrice Revenue\n", + "0 LUNCH BAG APPLE DESIGN 1 1.65 1.65\n", + "1 SET OF 60 VINTAGE LEAF CAKE CASES 24 0.55 13.20\n", + "2 RIBBON REEL STRIPES DESIGN 1 1.65 1.65\n", + "3 WORLD WAR 2 GLIDERS ASSTD DESIGNS 2880 0.18 518.40\n", + "4 PLAYING CARDS JUBILEE UNION JACK 2 1.25 2.50\n", + "5 POPCORN HOLDER 7 0.85 5.95\n", + "6 BOX OF VINTAGE ALPHABET BLOCKS 1 11.95 11.95\n", + "7 PARTY BUNTING 4 4.95 19.80\n", + "8 JAZZ HEARTS ADDRESS BOOK 10 0.19 1.90\n", + "9 SET OF 4 SANTA PLACE SETTINGS 48 1.25 60.00" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "df_prod_orders = pd.DataFrame(orders)\n", + "df_prod_orders" ] }, { @@ -247,11 +979,49 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 10 entries, 0 to 9\n", + "Data columns (total 4 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Description 10 non-null object \n", + " 1 Quantity 10 non-null int64 \n", + " 2 UnitPrice 10 non-null float64\n", + " 3 Revenue 10 non-null float64\n", + "dtypes: float64(2), int64(1), object(1)\n", + "memory usage: 452.0+ bytes\n" + ] + } + ], + "source": [ + "df_prod_orders.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total quantity is: 2978 and Revenue Generated with this order is: 637.0\n" + ] + } + ], "source": [ - "# your code here" + "tot_quantity = df_prod_orders[\"Quantity\"].sum()\n", + "revenue_gen = df_prod_orders[\"Revenue\"].sum()\n", + "\n", + "print(f\"Total quantity is: {tot_quantity} and Revenue Generated with this order is: {revenue_gen}\")\n" ] }, { @@ -263,11 +1033,26 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Expensive item cost is: 11.95 and less expensive is: 0.18\n", + "Their difference is: 11.77\n" + ] + } + ], "source": [ - "# your code here" + "exp_item = df_prod_orders[\"UnitPrice\"].max()\n", + "least_exp = df_prod_orders[\"UnitPrice\"].min()\n", + "\n", + "print(f\"Expensive item cost is: {exp_item} and less expensive is: {least_exp}\")\n", + "\n", + "diff_in_price = exp_item - least_exp\n", + "print(f\"Their difference is: {diff_in_price}\")" ] }, { @@ -279,7 +1064,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -296,11 +1081,129 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 20, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of Admit
0133711844.54.59.6510.92
1231610433.03.58.0010.72
2332211033.52.58.6710.80
3431410322.03.08.2100.65
4533011554.53.09.3410.90
\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "1 2 316 104 3 3.0 3.5 8.00 \n", + "2 3 322 110 3 3.5 2.5 8.67 \n", + "3 4 314 103 2 2.0 3.0 8.21 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + "\n", + " Research Chance of Admit \n", + "0 1 0.92 \n", + "1 1 0.72 \n", + "2 1 0.80 \n", + "3 0 0.65 \n", + "4 1 0.90 " + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "admissions.head()" ] }, { @@ -312,11 +1215,32 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Serial No. 0\n", + "GRE Score 0\n", + "TOEFL Score 0\n", + "University Rating 0\n", + "SOP 0\n", + "LOR 0\n", + "CGPA 0\n", + "Research 0\n", + "Chance of Admit 0\n", + "dtype: int64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "missing_value_check = admissions.isnull().sum()\n", + "missing_value_check" ] }, { @@ -328,11 +1252,48 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 28, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR \\\n", + "Serial No. \n", + "1 1 337 118 4 4.5 4.5 \n", + "2 2 316 104 3 3.0 3.5 \n", + "3 3 322 110 3 3.5 2.5 \n", + "4 4 314 103 2 2.0 3.0 \n", + "5 5 330 115 5 4.5 3.0 \n", + "... ... ... ... ... ... ... \n", + "381 381 324 110 3 3.5 3.5 \n", + "382 382 325 107 3 3.0 3.5 \n", + "383 383 330 116 4 5.0 4.5 \n", + "384 384 312 103 3 3.5 4.0 \n", + "385 385 333 117 4 5.0 4.0 \n", + "\n", + " CGPA Research Chance of Admit \n", + "Serial No. \n", + "1 9.65 1 0.92 \n", + "2 8.00 1 0.72 \n", + "3 8.67 1 0.80 \n", + "4 8.21 0 0.65 \n", + "5 9.34 1 0.90 \n", + "... ... ... ... \n", + "381 9.04 1 0.82 \n", + "382 9.11 1 0.84 \n", + "383 9.45 1 0.91 \n", + "384 8.78 0 0.67 \n", + "385 9.66 1 0.95 \n", + "\n", + "[385 rows x 9 columns]\n" + ] + } + ], "source": [ - "# your code here" + "admissions.set_index(\"Serial No.\", drop=False, inplace=True)\n", + "print(admissions)" ] }, { @@ -342,6 +1303,55 @@ "Turns out that `GRE Score` and `CGPA` also uniquely identify the data. Show this in the cell below." ] }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape of admissions dataset (385, 9)\n", + "\n", + "\n", + "Total number of unique GRE scores: 49\n", + "Total number of dupllicate GRE scores: 336\n", + "\n", + "\n", + "Total number of unique CGPA scores: 168\n", + "Total number of duplicate CGPA scores: 217\n", + "\n", + "\n", + "GRE Score and CGPA are not unique\n" + ] + } + ], + "source": [ + "admissions_shape = admissions.shape\n", + "print(\"Shape of admissions dataset\", admissions_shape)\n", + "print(\"\\n\")\n", + "\n", + "unique_GRE_count = admissions[\"GRE Score\"].nunique()\n", + "print(f\"Total number of unique GRE scores: {unique_GRE_count}\")\n", + "\n", + "\n", + "duplicates_GRE = admissions[\"GRE Score\"].duplicated().sum()\n", + "print(f\"Total number of dupllicate GRE scores: {duplicates_GRE}\")\n", + "\n", + "\n", + "print(\"\\n\")\n", + "\n", + "unique_CGPA_count = admissions[\"CGPA\"].nunique()\n", + "print(f\"Total number of unique CGPA scores: {unique_CGPA_count}\")\n", + "\n", + "duplicates_CGPA = admissions[\"CGPA\"].duplicated().sum()\n", + "print(f\"Total number of duplicate CGPA scores: {duplicates_CGPA}\")\n", + "\n", + "print(\"\\n\")\n", + "print(\"GRE Score and CGPA are not unique\")\n" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -351,13 +1361,23 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 33, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "101\n" + ] + } + ], "source": [ - "# your code here" + "condition = (admissions[\"CGPA\"] > 9) & (admissions[\"Research\"] == 1)\n", + "greater_than_9 = condition.sum()\n", + "print(greater_than_9)\n" ] }, { @@ -369,17 +1389,34 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 36, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.8019999999999999\n" + ] + } + ], "source": [ - "# your code here" + "condition_1 = (admissions[\"CGPA\"] > 9) & (admissions[\"SOP\"] < 3.5)\n", + "mean_of_admit = admissions.loc[condition_1, \"Chance of Admit \"].mean()\n", + "print(mean_of_admit)\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -393,7 +1430,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.13.5" }, "toc": { "base_numbering": "",