diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 7687137..098795a 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -34,11 +34,37 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "admissions = pd.read_csv('data/Admission_Predict.csv')" + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "1 2 316 104 3 3.0 3.5 8.00 \n", + "2 3 322 110 3 3.5 2.5 8.67 \n", + "3 4 314 103 2 2.0 3.0 8.21 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + "\n", + " Research Chance of Admit \n", + "0 1 0.92 \n", + "1 1 0.72 \n", + "2 1 0.80 \n", + "3 0 0.65 \n", + "4 1 0.90 \n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "# Load the CSV file into a DataFrame\n", + "admission_predict = pd.read_csv(\"../data/Admission_Predict.csv\")\n", + "\n", + "# Display the first few rows to confirm it loaded correctly\n", + "print(admission_predict.head())\n" ] }, { @@ -50,15 +76,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "admissions.columns = [col.strip() for col in admissions.columns]\n", - "admissions.columns\n", - "\n", - "# or:\n", - "# admissions.columns = admissions.columns.str.rstrip()" + "admission_predict.columns = admission_predict.columns.str.rstrip()\n" ] }, { @@ -70,11 +92,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "1 2 316 104 3 3.0 3.5 8.00 \n", + "2 3 322 110 3 3.5 2.5 8.67 \n", + "3 4 314 103 2 2.0 3.0 8.21 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + "\n", + " Research Chance of Admit \n", + "0 1 0.92 \n", + "1 1 0.72 \n", + "2 1 0.80 \n", + "3 0 0.65 \n", + "4 1 0.90 \n" + ] + } + ], + "source": [ + "# Your code here:\n", + "print(admission_predict.head())\n" ] }, { @@ -86,11 +129,32 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:" + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Serial No. 0\n", + "GRE Score 0\n", + "TOEFL Score 0\n", + "University Rating 0\n", + "SOP 0\n", + "LOR 0\n", + "CGPA 0\n", + "Research 0\n", + "Chance of Admit 0\n", + "dtype: int64\n", + "False\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "print(admission_predict.isnull().sum())\n", + "\n", + "print(admission_predict.isnull().values.any())\n" ] }, { @@ -102,11 +166,12 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ - "# Your code here:" + "# Your code here:\n", + "admission_predict = admission_predict.set_index('Serial No.', drop=False)\n" ] }, { @@ -118,13 +183,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": { "scrolled": true }, - "outputs": [], - "source": [ - "# Your code here:" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total rows in dataset: 385\n", + "Unique (GRE Score, CGPA) pairs: 385\n", + "The combination of 'GRE Score' and 'CGPA' uniquely identifies each row.\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "# Check if combined 'GRE Score' and 'CGPA' uniquely identify each row\n", + "unique_pairs = admission_predict[['GRE Score', 'CGPA']].drop_duplicates()\n", + "total_rows = len(admission_predict)\n", + "unique_pairs_count = len(unique_pairs)\n", + "\n", + "print(f\"Total rows in dataset: {total_rows}\")\n", + "print(f\"Unique (GRE Score, CGPA) pairs: {unique_pairs_count}\")\n", + "\n", + "if total_rows == unique_pairs_count:\n", + " print(\"The combination of 'GRE Score' and 'CGPA' uniquely identifies each row.\")\n", + "else:\n", + " print(\"The combination of 'GRE Score' and 'CGPA' does NOT uniquely identify each row.\")\n" ] }, { @@ -136,11 +223,12 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ - "# Your code here:" + "# Your code here:\n", + "admission_predict.set_index(['GRE Score', 'CGPA'], inplace=True, drop=True)\n" ] }, { @@ -152,11 +240,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ - "# Your code here:" + "# Your code here:\n", + "admission_predict.reset_index(inplace=True)\n" ] }, { @@ -170,11 +259,26 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:" + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of students with CGPA > 9 and research conducted: 101\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "# Filter rows where CGPA > 9 and Research == 1 (or True)\n", + "filtered_rows = admission_predict[(admission_predict['CGPA'] > 9) & (admission_predict['Research'] == 1)]\n", + "\n", + "# Get the count\n", + "count = len(filtered_rows)\n", + "\n", + "print(f\"Number of students with CGPA > 9 and research conducted: {count}\")\n" ] }, { @@ -186,11 +290,26 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:" + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean Chance of Admit for applicants with CGPA > 9 and SOP < 3.5: 0.8019999999999999\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "# Filter rows with CGPA > 9 and SOP < 3.5\n", + "filtered = admission_predict[(admission_predict['CGPA'] > 9) & (admission_predict['SOP'] < 3.5)]\n", + "\n", + "# Calculate the mean Chance of Admit\n", + "mean_chance = filtered['Chance of Admit'].mean()\n", + "\n", + "print(f\"Mean Chance of Admit for applicants with CGPA > 9 and SOP < 3.5: {mean_chance}\")\n" ] }, { @@ -208,18 +327,22 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ - "def standardize(col):\n", - " \"\"\"\n", - " This function takes a column from a dataframe \n", - " and returns a standardized column by subtracting the column's mean\n", - " and dividing by the column's standard deviation.\n", - " \"\"\"\n", - " \n", - " # Your code here:" + "import numpy as np\n", + "\n", + "def standardize_column(col):\n", + " mean = np.mean(col)\n", + " std = np.std(col)\n", + " standardized = (col - mean) / std\n", + " return standardized\n", + "\n", + "admission_predict['standardized_CGPA'] = standardize_column(admission_predict['CGPA'])\n", + "\n", + "\n", + " " ] }, { @@ -231,11 +354,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ - "# Your code here:" + "# Your code here:\n", + "# Standardization function\n", + "import numpy as np\n", + "\n", + "def standardize_column(col):\n", + " mean = np.mean(col)\n", + " std = np.std(col)\n", + " return (col - mean) / std\n", + "\n", + "# Create standardized columns\n", + "admission_predict['CGPA_std'] = standardize_column(admission_predict['CGPA'])\n", + "admission_predict['GRE_std'] = standardize_column(admission_predict['GRE Score'])\n", + "admission_predict['LOR_std'] = standardize_column(admission_predict['LOR'])\n", + "\n" ] }, { @@ -247,25 +383,43 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "# Libraries\n", - "from random import choices" + "from random import choices\n", + "\n", + "# Define these possible deciding factors\n", + "factors = ['CGPA_std', 'GRE_std', 'LOR_std']\n", + "\n", + "# Randomly choose one factor for each student\n", + "admission_predict['deciding_factor'] = [choices(factors)[0] for _ in range(len(admission_predict))]\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'admission' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[36], line 5\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Run this code:\u001b[39;00m\n\u001b[0;32m 3\u001b[0m std_columns \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCGPA_std\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGRE_std\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mLOR_std\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m----> 5\u001b[0m decision_choice \u001b[38;5;241m=\u001b[39m choices(std_columns, k\u001b[38;5;241m=\u001b[39madmission\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m])\n\u001b[0;32m 6\u001b[0m decision_choice\n", + "\u001b[1;31mNameError\u001b[0m: name 'admission' is not defined" + ] + } + ], "source": [ "# Run this code:\n", "\n", "std_columns = ['CGPA_std', 'GRE_std', 'LOR_std']\n", "\n", - "decision_choice = choices(std_columns, k=admissions.shape[0])\n", + "decision_choice = choices(std_columns, k=admission.shape[0])\n", "decision_choice" ] }, @@ -278,11 +432,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ - "# Your code here:" + "# Your code here:\n", + "\n", + "# Create list of standardized column names\n", + "std_columns = ['CGPA_std', 'GRE_std', 'LOR_std']\n", + "\n", + "# Generate random decision choices\n", + "decision_choice = choices(std_columns, k=admission_predict.shape[0])\n", + "decision_choice\n", + "\n", + "# Assign to DataFrame\n", + "admission_predict['decision_choice'] = decision_choice\n", + "\n", + "row_indices = np.arange(len(admission_predict))\n", + "col_indices = admission_predict['decision_choice'].values\n", + "\n", + "# Create the deciding column\n", + "admission_predict['deciding_column'] = admission_predict.to_numpy()[row_indices, \n", + " [admission_predict.columns.get_loc(col) for col in col_indices]]\n", + "\n" ] }, { @@ -294,11 +466,14 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ - "# Your code here:" + "# Your code here:\n", + "# Assign 1 if deciding_column > 0.8, else 0\n", + "admission_predict['decision'] = (admission_predict['deciding_column'] > 0.8).astype(int)\n", + "\n" ] }, { @@ -310,11 +485,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of applicants accepted to the program: 87\n" + ] + } + ], "source": [ - "# Your code here:" + "# Your code here:\n", + "accepted_count = admission_predict['decision'].sum()\n", + "print(f\"Number of applicants accepted to the program: {accepted_count}\")\n" ] }, { @@ -330,11 +515,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ - "# Your code here:" + "# Your code here:\n", + "admission_predict.columns = admission_predict.columns.str.replace(r'[^a-zA-Z\\s]', '', regex=True).str.replace(' ', '_').str.lower()\n" ] }, { @@ -346,17 +532,41 @@ }, { "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:" + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " gre_score university_rating adjusted_gre adjusted_gre_bin\n", + "0 337 4 347 (335.0, 350.0]\n", + "1 316 3 316 (305.0, 320.0]\n", + "2 322 3 322 (320.0, 335.0]\n", + "3 314 2 314 (305.0, 320.0]\n", + "4 330 5 340 (335.0, 350.0]\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "import pandas as pd\n", + "\n", + "# Step 1: Boost GRE scores for university_rating >= 4\n", + "admission_predict['adjusted_gre'] = admission_predict['gre_score']\n", + "admission_predict.loc[admission_predict['university_rating'] >= 4, 'adjusted_gre'] += 10\n", + "\n", + "# Step 2: Bin the adjusted GRE scores into 4 categories\n", + "admission_predict['adjusted_gre_bin'] = pd.cut(admission_predict['adjusted_gre'], bins=4)\n", + "\n", + "# Show the first few rows to verify\n", + "print(admission_predict[['gre_score', 'university_rating', 'adjusted_gre', 'adjusted_gre_bin']].head())\n" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -370,7 +580,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.12.7" } }, "nbformat": 4,