diff --git a/ASSIGNMENT-2-Akash Singh.ipynb b/ASSIGNMENT-2-Akash Singh.ipynb new file mode 100644 index 0000000..b693285 --- /dev/null +++ b/ASSIGNMENT-2-Akash Singh.ipynb @@ -0,0 +1,2190 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "Lo19gePaMPB6" + }, + "outputs": [], + "source": [ + "import pyforest\n", + "import statsmodels.api as sm\n", + "import statsmodels.stats.api as sms\n", + "import scipy.stats as st" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1c55Km_3bhVg" + }, + "source": [ + "## Q1.Use GQ test to check for the presence of heteroskedasticity." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sk255gHAXpBU" + }, + "source": [ + "## LOAD DATASET\n", + "### The following data on consumption-expenditure and income of 20 families (in ‘000 rupees):" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 676 + }, + "id": "eGUJnCujMx0l", + "outputId": "965d6d2d-8767-44e6-fb30-33bb4516a63d" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nimport matplotlib.pyplot as plt'); }\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IncomeCon-exp
022.319.9
132.331.2
236.631.8
312.112.1
442.340.7
56.26.1
644.738.6
726.125.5
810.310.3
940.238.8
108.18.0
1134.533.1
1238.033.5
1314.113.1
1416.414.8
1524.121.6
1630.129.3
1728.325.0
1818.217.9
1920.119.8
\n", + "
" + ], + "text/plain": [ + " Income Con-exp\n", + "0 22.3 19.9\n", + "1 32.3 31.2\n", + "2 36.6 31.8\n", + "3 12.1 12.1\n", + "4 42.3 40.7\n", + "5 6.2 6.1\n", + "6 44.7 38.6\n", + "7 26.1 25.5\n", + "8 10.3 10.3\n", + "9 40.2 38.8\n", + "10 8.1 8.0\n", + "11 34.5 33.1\n", + "12 38.0 33.5\n", + "13 14.1 13.1\n", + "14 16.4 14.8\n", + "15 24.1 21.6\n", + "16 30.1 29.3\n", + "17 28.3 25.0\n", + "18 18.2 17.9\n", + "19 20.1 19.8" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d= pd.read_csv(\"C:/Users/Akash/Desktop/Mayukh sir practical/assignment2/a2d1.csv\")\n", + "d" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p8Ut756kX5jA" + }, + "source": [ + "### SORT THE DATA-SET BASED ON INCOME " + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 676 + }, + "id": "IG9YdO8_Odtl", + "outputId": "10d5585e-136e-43c4-f250-01285cbca234" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IncomeCon-exp
56.26.1
108.18.0
810.310.3
312.112.1
1314.113.1
1416.414.8
1818.217.9
1920.119.8
022.319.9
1524.121.6
726.125.5
1728.325.0
1630.129.3
132.331.2
1134.533.1
236.631.8
1238.033.5
940.238.8
442.340.7
644.738.6
\n", + "
" + ], + "text/plain": [ + " Income Con-exp\n", + "5 6.2 6.1\n", + "10 8.1 8.0\n", + "8 10.3 10.3\n", + "3 12.1 12.1\n", + "13 14.1 13.1\n", + "14 16.4 14.8\n", + "18 18.2 17.9\n", + "19 20.1 19.8\n", + "0 22.3 19.9\n", + "15 24.1 21.6\n", + "7 26.1 25.5\n", + "17 28.3 25.0\n", + "16 30.1 29.3\n", + "1 32.3 31.2\n", + "11 34.5 33.1\n", + "2 36.6 31.8\n", + "12 38.0 33.5\n", + "9 40.2 38.8\n", + "4 42.3 40.7\n", + "6 44.7 38.6" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1=data.sort_values(\"Income\")\n", + "d1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GNlwAVIaYCvW" + }, + "source": [ + "### HERE, INDEPENDENT COVARIATE IS INCOME(x) AND DEPENDENT VARIABLE IS CONSUMPTION-EXPENDITURE(y)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2YLPJf3mPBBi", + "outputId": "ba813255-f388-4ed0-fd55-a49c5d71dd03" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5 6.2\n", + "10 8.1\n", + "8 10.3\n", + "3 12.1\n", + "13 14.1\n", + "14 16.4\n", + "18 18.2\n", + "19 20.1\n", + "0 22.3\n", + "15 24.1\n", + "7 26.1\n", + "17 28.3\n", + "16 30.1\n", + "1 32.3\n", + "11 34.5\n", + "2 36.6\n", + "12 38.0\n", + "9 40.2\n", + "4 42.3\n", + "6 44.7\n", + "Name: Income, dtype: float64\n", + "5 6.1\n", + "10 8.0\n", + "8 10.3\n", + "3 12.1\n", + "13 13.1\n", + "14 14.8\n", + "18 17.9\n", + "19 19.8\n", + "0 19.9\n", + "15 21.6\n", + "7 25.5\n", + "17 25.0\n", + "16 29.3\n", + "1 31.2\n", + "11 33.1\n", + "2 31.8\n", + "12 33.5\n", + "9 38.8\n", + "4 40.7\n", + "6 38.6\n", + "Name: Con-exp, dtype: float64\n" + ] + } + ], + "source": [ + "x=data1[\"Income\"]\n", + "print(x)\n", + "y=data1[\"Con-exp\"]\n", + "print(y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QFCrI9dlYsc5" + }, + "source": [ + "### PERFORMING SIMPLE LINEAR REGREESION EQUATION USING STATS MODEL\n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xWGDp8aLQtVO", + "outputId": "bce543d3-f72d-45e8-c6a8-2e32c8437971" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: Con-exp R-squared: 0.986\n", + "Model: OLS Adj. R-squared: 0.985\n", + "Method: Least Squares F-statistic: 1263.\n", + "Date: Mon, 30 May 2022 Prob (F-statistic): 4.00e-18\n", + "Time: 23:58:56 Log-Likelihood: -32.785\n", + "No. Observations: 20 AIC: 69.57\n", + "Df Residuals: 18 BIC: 71.56\n", + "Df Model: 1 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "const 0.8471 0.703 1.204 0.244 -0.631 2.325\n", + "Income 0.8993 0.025 35.534 0.000 0.846 0.952\n", + "==============================================================================\n", + "Omnibus: 1.874 Durbin-Watson: 2.060\n", + "Prob(Omnibus): 0.392 Jarque-Bera (JB): 1.113\n", + "Skew: -0.236 Prob(JB): 0.573\n", + "Kurtosis: 1.945 Cond. No. 66.6\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" + ] + } + ], + "source": [ + "x_const=sm.add_constant(x)\n", + "model=sm.OLS(y,x_const)\n", + "result=model.fit()\n", + "print(result.summary())" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "j8rdKO0uRMo0", + "outputId": "53590a60-49dd-4046-acbf-777b6f2de7ce" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "5 6.422865\n", + "10 8.131582\n", + "8 10.110096\n", + "3 11.728880\n", + "13 13.527530\n", + "14 15.595977\n", + "18 17.214761\n", + "19 18.923478\n", + "0 20.901992\n", + "15 22.520777\n", + "7 24.319426\n", + "17 26.297940\n", + "16 27.916725\n", + "1 29.895239\n", + "11 31.873753\n", + "2 33.762335\n", + "12 35.021390\n", + "9 36.999904\n", + "4 38.888486\n", + "6 41.046865\n", + "dtype: float64" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred= result.predict(x_const)\n", + "y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-zjzutkjRfzs", + "outputId": "f18becb9-ee94-4eb3-88bd-30565ef1d620" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5 -0.322865\n", + "10 -0.131582\n", + "8 0.189904\n", + "3 0.371120\n", + "13 -0.427530\n", + "14 -0.795977\n", + "18 0.685239\n", + "19 0.876522\n", + "0 -1.001992\n", + "15 -0.920777\n", + "7 1.180574\n", + "17 -1.297940\n", + "16 1.383275\n", + "1 1.304761\n", + "11 1.226247\n", + "2 -1.962335\n", + "12 -1.521390\n", + "9 1.800096\n", + "4 1.811514\n", + "6 -2.446865\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "res= y-y_pred\n", + "print(res)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rDXv_NGTZ1q_" + }, + "source": [ + "### RESIDUAL PLOT ON INDEPENDENT VARIABLE VS RESIUAL" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 286 + }, + "id": "q4MqQ-dVRqsF", + "outputId": "53ba81ce-df79-4f72-8e75-98e8c9ea9e4c" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nimport matplotlib.pyplot as plt'); }\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXIAAAD7CAYAAAB37B+tAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAOqUlEQVR4nO3db4hc133G8eepIujiuGyKNom0sqqQNoJgBysMhiJoQ2oqJzWNIkiJQ4NLAsqLBBwKaqz4RZ0XwSZqQ160tFUbU7d1EwKW1yX/FAenhECaduVVIruK2lCcViMRbTBLYrpQW/71xd61VquZ2RnNuXPPufP9gPDuXfvOzwfr8dXvnj+OCAEAyvULTRcAABgPQQ4AhSPIAaBwBDkAFI4gB4DCEeQAULixg9z2Lba/Zfuc7eds35eiMADAcDzuPHLbOyXtjIhnbN8s6bSkQxHx7ykKBAAM9ppxbxARlyRdqr7+ue1zkuYl9Q3yHTt2xN69e8f9aACYKqdPn/5pRMxtvj52kG9ke6+k/ZK+1+NnRyQdkaQ9e/ZocXEx5UcDQOvZ/nGv68ledtp+raTHJX08In62+ecRcSIiOhHRmZu77n8oAIAblCTIbW/XWog/FhEnU9wTADCcFLNWLOnzks5FxGfHLwkAMIoUT+QHJH1Q0jttn6l+vTvBfQEAQ0gxa+U7kpygFgDADUg6awUA2mRhqavjp87r4sqqds3O6OjBfTq0f77psq5DkANopXFDeGGpq2Mnz2r1pSuSpO7Kqo6dPCtJ2YU5e60AaJ31EO6urCp0NYQXlrpD3+P4qfOvhvi61Zeu6Pip84mrHR9BDqB1UoTwxZXVka43iSAH0DopQnjX7MxI15tEkANonRQhfPTgPs1s33bNtZnt23T04L6xaqsDQQ6gdVKE8KH983ro8G2an52RJc3Pzuihw7dl96JTYtYKgBZaD9txpw4e2j+fZXBvRpADaKVSQjgFWisAUDiCHAAKR5ADQOEIcgAoHEEOAIUjyAGgcAQ5ABSOIAeAwhHkAFA4ghwACkeQA0Dh2GsFyFQp50WieQQ5kKGSzotE82itABkq6bxINI8gBzJU0nmRaB6tFSBDu2Zn1O0R2jdyXiS99vbjiRzIUKrzItd77d2VVYWu9toXlroJq0XTCHIgQ6nOi6TXPh1orQCZSnFUWapeO+2ZvPFEDrRYv576KL122jP5I8iBFkvRa6c9kz9aK0AfbWgnrNc7zr8HUyHzR5ADPbRpZeW4vfaUUyFRD1orQA+0E65KNRUS9eGJHOiBdsJVKdozqBdBDvRAO+FaKaZCoj60VoAeaCegJDyRAz3QTkBJCHKgD9oJKEWS1ortR2xftv1sivsBAIaXqkf+t5LuSnQvAMAIkgR5RHxb0gsp7gUAGM3EZq3YPmJ70fbi8vLypD4WAFpvYkEeESciohMRnbm5uUl9LAC0HvPIAaBwTD9ErdqwgyCQu1TTD78g6buS9tm+YPvDKe6LsnEgATAZSZ7II+KeFPdBuwzaQZCnciAdeuSoDTsIApNBkKM2Kc6LBLA1ghy1YQdBYDKYtYLasIMgMBkEOWrFDoJA/WitAEDheCLHlljUA+SNIMdA64t61ueDry/qkUSYA5mgtYKBBi3qAZAHghwDsagHyB9BjoFY1APkjyDHQCzqAfLHy04MxKIeIH8EObbEoh4gb7RWAKBwBDkAFI4gB4DCEeQAUDiCHAAKR5ADQOGYfjgB7B4IoE4Eec3YPRBA3Wit1IzdAwHUjSCvGbsHAqgbQV4zdg8EUDeCvGbsHgigbrzsrBm7BwKoG0E+AeweCKBOtFYAoHA8kY+AhT0AckSQD4mFPQByRWtlSCzsAZArgnxILOwBkCuCfEgs7AGQK4J8SCzsAZArXnYOiYU9AHJFkI+AhT0AcpSktWL7Ltvnbf/I9v0p7gkAGM7YQW57m6Q/l/QuSW+VdI/tt457XwDAcFI8kd8h6UcR8V8R8X+SvijpPQnuCwAYQoogn5f0Pxu+v1Bdu4btI7YXbS8uLy8n+FgAgJQmyN3jWlx3IeJERHQiojM3N5fgYwEAUpogvyDplg3f75Z0McF9AQBDSDH98N8k/ZrtN0nqSnq/pA8kuG8y7FoIoM3GDvKIeNn2xySdkrRN0iMR8dzYlSXCroUA2i7JPPKI+GpEvCUi3hwRn05xz1TYtRBA27V+rxV2LQTQdq0PcnYtBNB2rQ9ydi0E0Hat3zSLXQsBtF3rg1xi10IA7db61goAtF0xT+Qs6gGA3ooIchb1AEB/RbRWWNQDAP0VEeQs6gGA/ooIchb1AEB/RQT5tC/qWVjq6sDDT+tN939FBx5+WgtL3aZLApCRIl52TvOiHl70AthKEUEuTe+inkEveqdxPABcr4jWyjTjRS+ArRDkmeNFL4CtEOSZm/YXvQC2VkyPfFpN84teAMMhyAswrS96AQyH1goAFI4gB4DCEeQAUDiCHAAKR5ADQOEIcgAoHEEOAIUjyAGgcAQ5ABSOlZ1TZGGpy1J/oIUI8inBARVAe9FamRKDDqgAUDaCfEpwQAXQXgT5lOCACqC9CPIpwQEVQHvxsnNKTOsBFczUwTQgyKfItB1QwUwdTAtaK2gtZupgWowV5LbfZ/s526/Y7qQqCkiBmTqYFuO2Vp6VdFjSXyWoBejpRvvcu2Zn1O0R2szUQduM9UQeEecigj+nojbrfe7uyqpCV/vcC0vdLf9ZZupgWtAjR9bG6XMf2j+vhw7fpvnZGVnS/OyMHjp8Gy860TpbtlZsf1PSG3v86IGIeHLYD7J9RNIRSdqzZ8/QBWK6jdvnnraZOphOWwZ5RNyZ4oMi4oSkE5LU6XQixT3RfvS5ga3RWkHW6HMDWxt3+uF7bV+Q9OuSvmL7VJqygDX0uYGtOWLyXY5OpxOLi4sT/1wAKJnt0xFx3ZodWisAUDiCHAAKR5ADQOEIcgAoHEEOAIUjyAGgcBwsAdSMU4pQN4IcqBGnFGESaK0ANeKUIkwCQQ7UiFOKMAkEOVCjfrs0snsjUiLIgRqxeyMmgZedQI3WX2gyawV1IsiBmnFKEepGawUACkeQA0DhaK0AyA6rYUdDkAPICqthR0drBUBWWA07OoIcQFZYDTs6ghxAVlgNOzqCHEBWWA07Ol52AsgKq2FHR5ADyA6rYUdDawUACkeQA0DhCHIAKBxBDgCFI8gBoHAEOQAUjiAHgMIR5ABQOIIcAApHkANA4QhyACgcQQ4AhWPTLAC14wzOehHkAGrFGZz1G6u1Yvu47R/a/oHtJ2zPJqoLQEtwBmf9xu2RPyXp1oh4m6T/kHRs/JIAtAlncNZvrCCPiG9ExMvVt/8iaff4JQFoE87grF/KWSsfkvS1fj+0fcT2ou3F5eXlhB8LIGecwVm/LV922v6mpDf2+NEDEfFk9fc8IOllSY/1u09EnJB0QpI6nU7cULUAisMZnPXbMsgj4s5BP7d9r6S7Jf1WRBDQAK7DGZz1Gmv6oe27JH1C0m9GxP+mKQkAMIpxe+R/JulmSU/ZPmP7LxPUBAAYwVhP5BHxq6kKAQDcGPZaAYDCEeQAUDiCHAAKR5ADQOEIcgAoHEEOAIUjyAGgcAQ5ABSOIAeAwhHkAFA4ghwACkeQA0Dhxto0CwAwnIWlbm2HaxDkAFCzhaWujp08q9WXrkiSuiurOnbyrCQlCXNaKwBQs+Onzr8a4utWX7qi46fOJ7k/QQ4ANbu4sjrS9VER5ABQs12zMyNdHxVBDgA1O3pwn2a2b7vm2sz2bTp6cF+S+/OyEwBqtv5Ck1krAFCwQ/vnkwX3ZrRWAKBwBDkAFI4gB4DCEeQAUDiCHAAK54iY/Ifay5J+3ONHOyT9dMLlDCvn2qS868u5Ninv+nKuTcq7vpxrk26svl+JiLnNFxsJ8n5sL0ZEp+k6esm5Ninv+nKuTcq7vpxrk/KuL+fapLT10VoBgMIR5ABQuNyC/ETTBQyQc21S3vXlXJuUd3051yblXV/OtUkJ68uqRw4AGF1uT+QAgBER5ABQuCyC3Pbzts/aPmN7MYN6HrF92fazG679su2nbP9n9dfXZVTbg7a71fidsf3uJmqrarnF9rdsn7P9nO37quuNj9+A2rIYP9u/aPtfbX+/qu9T1fUcxq5fbVmMXVXLNttLtr9cfd/4uG1RX7Kxy6JHbvt5SZ2IyGLyvu3fkPSipL+LiFura5+R9EJEPGz7fkmvi4hPZFLbg5JejIg/mXQ9m9neKWlnRDxj+2ZJpyUdkvQHanj8BtT2e8pg/Gxb0k0R8aLt7ZK+I+k+SYfV/Nj1q+0uZTB2kmT7DyV1JP1SRNydy+/ZAfU9qERjl8UTeW4i4tuSXth0+T2SHq2+flRrATBxfWrLRkRciohnqq9/LumcpHllMH4DastCrHmx+nZ79SuUx9j1qy0LtndL+h1Jf7PhcuPjtq5PfcnkEuQh6Ru2T9s+0nQxfbwhIi5Ja4Eg6fUN17PZx2z/oGq9NPpHyHW290raL+l7ymz8NtUmZTJ+1R+/z0i6LOmpiMhm7PrUJuUxdp+T9EeSXtlwLYtxq3xO19cnJRq7XIL8QES8XdK7JH20ah9geH8h6c2Sbpd0SdKfNlqNJNuvlfS4pI9HxM+armejHrVlM34RcSUibpe0W9Idtm9tqpbN+tTW+NjZvlvS5Yg4PenPHsaA+pKNXRZBHhEXq79elvSEpDuarainn1Q91vVe6+WG63lVRPyk+k32iqS/VsPjV/VQH5f0WEScrC5nMX69astt/KqaViT9s9Z60FmM3bqNtWUydgck/W71ru2Lkt5p+x+Uz7j1rC/l2DUe5LZvql48yfZNkn5b0rOD/6lG/JOke6uv75X0ZIO1XGP9P9bKe9Xg+FUvxT4v6VxEfHbDjxofv3615TJ+tudsz1Zfz0i6U9IPlcfY9awth7GLiGMRsTsi9kp6v6SnI+L3lcG4Daov5djlcPjyGyQ9sfZ7TK+R9I8R8fUmC7L9BUnvkLTD9gVJfyzpYUlfsv1hSf8t6X0Z1fYO27dr7V3D85I+0kRtlQOSPijpbNVPlaRPKo/x61fbPZmM305Jj9reprWHrC9FxJdtf1fNj12/2v4+k7HrJYf/5gb5TKqxy2L6IQDgxjXeWgEAjIcgB4DCEeQAUDiCHAAKR5ADQOEIcgAoHEEOAIX7fxm4H/HsggsqAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(x,res)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nCmOxQiwaFWW" + }, + "source": [ + "#### OBSERED FANNEL SHAPED DIAGRAM i.e HETEROSKEDASTICITY IS PRESENT. NEXT, WE PERFORMED GQ-TEST TO ENSURE PRESENCE OF HETEROSKEDASTICITY" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NLvIx4ooSDWX", + "outputId": "6649b1b4-336d-4668-e165-4402deda9d21" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5 6.2\n", + "10 8.1\n", + "8 10.3\n", + "3 12.1\n", + "13 14.1\n", + "14 16.4\n", + "18 18.2\n", + "19 20.1\n", + "Name: Income, dtype: float64\n" + ] + } + ], + "source": [ + "c=4\n", + "smallest = data1[:8]\n", + "#print(smallest)\n", + "print(smallest[\"Income\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "r34yogUlSfxO", + "outputId": "0081d742-8384-4141-8e5d-f8988b7f0a8b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: Con-exp R-squared: 0.988\n", + "Model: OLS Adj. R-squared: 0.986\n", + "Method: Least Squares F-statistic: 481.2\n", + "Date: Mon, 30 May 2022 Prob (F-statistic): 5.86e-07\n", + "Time: 23:59:01 Log-Likelihood: -5.5874\n", + "No. Observations: 8 AIC: 15.17\n", + "Df Residuals: 6 BIC: 15.33\n", + "Df Model: 1 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "const 0.2350 0.605 0.389 0.711 -1.244 1.714\n", + "Income 0.9500 0.043 21.937 0.000 0.844 1.056\n", + "==============================================================================\n", + "Omnibus: 3.523 Durbin-Watson: 1.610\n", + "Prob(Omnibus): 0.172 Jarque-Bera (JB): 1.472\n", + "Skew: -1.043 Prob(JB): 0.479\n", + "Kurtosis: 2.746 Cond. No. 42.7\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Akash\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\scipy\\stats\\_stats_py.py:1477: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=8\n", + " warnings.warn(\"kurtosistest only valid for n>=20 ... continuing \"\n" + ] + } + ], + "source": [ + "x1_const=sm.add_constant(smallest[\"Income\"])\n", + "model1= sm.OLS(smallest[\"Con-exp\"],x1_const)\n", + "result1=model1.fit()\n", + "print(result1.summary())" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "id": "EMK2E8EnTmKf" + }, + "outputs": [], + "source": [ + "y1_pred= result1.predict(x1_const)\n", + "y1=smallest[\"Con-exp\"]\n", + "res1=y1-y1_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Cs0EzYqZT3dv", + "outputId": "0187ea17-ec90-4b8a-ed3b-77dba85b345e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.8934714887358706\n" + ] + } + ], + "source": [ + "Res1 = res1**2\n", + "ssr1 = Res1.sum()\n", + "print(ssr1)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 300 + }, + "id": "luiI5QPFUVfE", + "outputId": "cf46d9e0-a212-4900-af50-e4d05908a00a" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IncomeCon-exp
1630.129.3
132.331.2
1134.533.1
236.631.8
1238.033.5
940.238.8
442.340.7
644.738.6
\n", + "
" + ], + "text/plain": [ + " Income Con-exp\n", + "16 30.1 29.3\n", + "1 32.3 31.2\n", + "11 34.5 33.1\n", + "2 36.6 31.8\n", + "12 38.0 33.5\n", + "9 40.2 38.8\n", + "4 42.3 40.7\n", + "6 44.7 38.6" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "largest = data1[-8:]\n", + "largest\n" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "YkRMFGRFU_vx", + "outputId": "3854c82a-55c4-4afc-9240-06a95dae4d28" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: Con-exp R-squared: 0.833\n", + "Model: OLS Adj. R-squared: 0.806\n", + "Method: Least Squares F-statistic: 30.00\n", + "Date: Mon, 30 May 2022 Prob (F-statistic): 0.00155\n", + "Time: 23:59:05 Log-Likelihood: -15.076\n", + "No. Observations: 8 AIC: 34.15\n", + "Df Residuals: 6 BIC: 34.31\n", + "Df Model: 1 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "const 6.0938 5.250 1.161 0.290 -6.751 18.939\n", + "Income 0.7641 0.140 5.477 0.002 0.423 1.106\n", + "==============================================================================\n", + "Omnibus: 1.258 Durbin-Watson: 1.852\n", + "Prob(Omnibus): 0.533 Jarque-Bera (JB): 0.631\n", + "Skew: -0.009 Prob(JB): 0.729\n", + "Kurtosis: 1.624 Cond. No. 304.\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Akash\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\scipy\\stats\\_stats_py.py:1477: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=8\n", + " warnings.warn(\"kurtosistest only valid for n>=20 ... continuing \"\n" + ] + } + ], + "source": [ + "x2_const = sm.add_constant(largest[\"Income\"])\n", + "mod = sm.OLS(largest[\"Con-exp\"],x2_const)\n", + "result2 = mod.fit()\n", + "print(result2.summary())" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2hyrdOW6VMKn", + "outputId": "e988a102-1f42-4f80-c1a1-6c2dadb6b22c" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "20.299525511488298" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred2 = result2.predict(x2_const)\n", + "res2 = (largest[\"Con-exp\"]-y_pred2)\n", + "Res2 = res2**2\n", + "ssr2 = Res2.sum()\n", + "ssr2" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jb7Vv9L3VdyT", + "outputId": "bb1d618a-392c-44b7-da5c-d1a430cb063a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10.720798085552783\n" + ] + } + ], + "source": [ + "F_obs=ssr2/ssr1\n", + "print(F_obs)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "b1chi6R6Vjms", + "outputId": "ccd55100-cf48-4bcf-83ff-606a3ee1d193" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4.283865713822639\n" + ] + } + ], + "source": [ + "F_critical=st.f.ppf(0.95,6,6)\n", + "print(F_critical)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "M8Lv2NDaVqpP", + "outputId": "7057598e-30d1-4c16-be5e-79527b4199d7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "we reject H0 i.e heteroskedasticity is present\n" + ] + } + ], + "source": [ + "if(F_obs>F_critical):\n", + " print(\"we reject H0 i.e heteroskedasticity is present\")\n", + "else:\n", + " print(\"we accept H0 i.e homoskedasticity is present\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rUvwtrqLbpSY" + }, + "source": [ + "## Q2.If heteroskedasticity is present, use any simple model for the error-variance to obtain better estimates of the parameters." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "glCcZB4ybw9M" + }, + "source": [ + "### PERFORMING GLS TO OBTAIN BETTER ESTIMATOR OF THE PARAMETERS" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vSD3TmjLcqeU", + "outputId": "2f1e3312-3dac-49af-852a-28a72975ee9d" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " GLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: Con-exp R-squared: 0.986\n", + "Model: GLS Adj. R-squared: 0.985\n", + "Method: Least Squares F-statistic: 1263.\n", + "Date: Mon, 30 May 2022 Prob (F-statistic): 4.00e-18\n", + "Time: 23:59:55 Log-Likelihood: -32.785\n", + "No. Observations: 20 AIC: 69.57\n", + "Df Residuals: 18 BIC: 71.56\n", + "Df Model: 1 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "const 0.8471 0.703 1.204 0.244 -0.631 2.325\n", + "Income 0.8993 0.025 35.534 0.000 0.846 0.952\n", + "==============================================================================\n", + "Omnibus: 1.874 Durbin-Watson: 2.060\n", + "Prob(Omnibus): 0.392 Jarque-Bera (JB): 1.113\n", + "Skew: -0.236 Prob(JB): 0.573\n", + "Kurtosis: 1.945 Cond. No. 66.6\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" + ] + } + ], + "source": [ + "x_const = sm.add_constant(x)\n", + "model3 = sm.GLS(y,x_const)\n", + "result3 = model3.fit()\n", + "print(result3.summary())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rNDOfnUZdy78" + }, + "source": [ + "## B)Q1. Use Glejser test to check for the presence of heteroskedasticity." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sdzvhb-3d8cE" + }, + "source": [ + "### LOAD DATA-SET\n", + "### The following data are on speed (sp) of a car and distance (dis) it covers to come to a standstill." + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "N4jcjS16eAiU", + "outputId": "b4862105-844d-433e-8b51-bcf5e4d5e5aa" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nimport matplotlib.pyplot as plt'); }\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dissp
044
125
245
385
485
\n", + "
" + ], + "text/plain": [ + " dis sp\n", + "0 4 4\n", + "1 2 5\n", + "2 4 5\n", + "3 8 5\n", + "4 8 5" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data2= pd.read_csv(\"C:/Users/Akash/Desktop/Mayukh sir practical/assignment2/a2d2.csv\")\n", + "data2.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RpBfBSo-xKdV" + }, + "source": [ + "### HERE SPEED OF CAR IS INDEPENDENT VARIABLE(x) and DISTANCE IS DEPENDENT VARIABLE(y)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "id": "p6Qzq6HTeKnQ" + }, + "outputs": [], + "source": [ + "x = data2[\"sp\"]\n", + "y = data2[\"dis\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mrjSkHfVx6T7" + }, + "source": [ + "### PERFORMING OLS" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "j7ZgesvvhOVB", + "outputId": "55c62cd4-aeeb-4cd9-ba52-e1f35bb6e175" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: dis R-squared: 0.876\n", + "Model: OLS Adj. R-squared: 0.874\n", + "Method: Least Squares F-statistic: 408.6\n", + "Date: Tue, 31 May 2022 Prob (F-statistic): 6.13e-28\n", + "Time: 00:02:36 Log-Likelihood: -232.96\n", + "No. Observations: 60 AIC: 469.9\n", + "Df Residuals: 58 BIC: 474.1\n", + "Df Model: 1 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "const -20.4174 3.345 -6.105 0.000 -27.112 -13.722\n", + "sp 3.1515 0.156 20.213 0.000 2.839 3.464\n", + "==============================================================================\n", + "Omnibus: 4.671 Durbin-Watson: 1.821\n", + "Prob(Omnibus): 0.097 Jarque-Bera (JB): 3.693\n", + "Skew: 0.499 Prob(JB): 0.158\n", + "Kurtosis: 3.693 Cond. No. 46.6\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" + ] + } + ], + "source": [ + "x_const = sm.add_constant(x)\n", + "mod = sm.OLS(y,x_const)\n", + "result = mod.fit()\n", + "print(result.summary())" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "L0C9lwZLhTZQ", + "outputId": "90b4306b-a5b5-49b8-fdfa-c91ea667bdba" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Intercepts: -20.41742046782962\n", + "Slope: 3.151528220726599\n" + ] + } + ], + "source": [ + "parameters = result.params\n", + "intercept = parameters.const\n", + "slope = parameters.sp\n", + "print(\"Intercepts: \", intercept)\n", + "print(\"Slope: \",slope)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JbdXQH-1h_fh", + "outputId": "eb2165d5-79f8-4588-f63c-c9476622b6dd" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 -7.811308\n", + "1 -4.659779\n", + "2 -4.659779\n", + "3 -4.659779\n", + "4 -4.659779\n", + "dtype: float64" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred=result.predict(x_const)\n", + "y_pred.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tY0vvpMtiXm3", + "outputId": "1968386b-2cdd-44a8-e2d5-8d2ee6c14d62" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 11.811308\n", + "1 6.659779\n", + "2 8.659779\n", + "3 12.659779\n", + "4 12.659779\n", + "dtype: float64" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res2= y- y_pred\n", + "res2.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 422 + }, + "id": "x_c3TB9SiqNT", + "outputId": "027ec19a-5f00-4734-a5a6-58815e206053" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nimport matplotlib.pyplot as plt'); }\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nimport matplotlib.pyplot as plt'); }\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nimport matplotlib.pyplot as plt'); }\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nimport matplotlib.pyplot as plt'); }\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nimport matplotlib.pyplot as plt'); }\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nimport matplotlib.pyplot as plt'); }\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nimport matplotlib.pyplot as plt'); }\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nimport matplotlib.pyplot as plt'); }\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nimport matplotlib.pyplot as plt'); }\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nimport matplotlib.pyplot as plt'); }\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nimport matplotlib.pyplot as plt'); }\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'X vs Residual plot')" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(20,6))\n", + "\n", + "plt.subplot(1,2,1)\n", + "plt.scatter(y_pred,res2)\n", + "plt.xlabel(\"y\")\n", + "plt.ylabel(\"Residual\")\n", + "plt.title(\"Y vs Residual plot\")\n", + "\n", + "plt.subplot(1,2,2)\n", + "plt.scatter(x,res2)\n", + "plt.xlabel(\"x\")\n", + "plt.ylabel(\"Residual\")\n", + "plt.title(\"X vs Residual plot\")" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "id": "Q-DFwsnAj0LD" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nimport matplotlib.pyplot as plt\\nimport numpy as np'); }\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "RES=np.absolute(res2)\n", + "#1 |ei| = b0+b1.sqrt(1/xi)+Vi\n", + "#2 |ei| = b0+b1.sqrt(xi)+Vi\n", + "#3 |ei| = b0+b1.(1/xi)+Vi" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "rdsZmGw5ke0u", + "outputId": "ae8115a8-ccc7-4326-ed81-dff92e0993c3" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nimport matplotlib.pyplot as plt\\nimport numpy as np'); }\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: dis R-squared: 0.876\n", + "Model: OLS Adj. R-squared: 0.874\n", + "Method: Least Squares F-statistic: 408.6\n", + "Date: Tue, 31 May 2022 Prob (F-statistic): 6.13e-28\n", + "Time: 00:03:40 Log-Likelihood: -232.96\n", + "No. Observations: 60 AIC: 469.9\n", + "Df Residuals: 58 BIC: 474.1\n", + "Df Model: 1 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "const -20.4174 3.345 -6.105 0.000 -27.112 -13.722\n", + "sp 3.1515 0.156 20.213 0.000 2.839 3.464\n", + "==============================================================================\n", + "Omnibus: 4.671 Durbin-Watson: 1.821\n", + "Prob(Omnibus): 0.097 Jarque-Bera (JB): 3.693\n", + "Skew: 0.499 Prob(JB): 0.158\n", + "Kurtosis: 3.693 Cond. No. 46.6\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" + ] + } + ], + "source": [ + "x11_const = sm.add_constant(np.sqrt(1/x))\n", + "model11 = sm.OLS(RES,x11_const)\n", + "result11 = mod.fit()\n", + "print(result11.summary())" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "N7P9kfnplsrS", + "outputId": "48db29f6-bb0f-47f7-90fb-f518448f3a57" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: y R-squared: 0.047\n", + "Model: OLS Adj. R-squared: 0.031\n", + "Method: Least Squares F-statistic: 2.880\n", + "Date: Tue, 31 May 2022 Prob (F-statistic): 0.0951\n", + "Time: 00:03:42 Log-Likelihood: -202.75\n", + "No. Observations: 60 AIC: 409.5\n", + "Df Residuals: 58 BIC: 413.7\n", + "Df Model: 1 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "const 11.5330 1.649 6.996 0.000 8.233 14.833\n", + "sp -30.9150 18.217 -1.697 0.095 -67.380 5.550\n", + "==============================================================================\n", + "Omnibus: 18.002 Durbin-Watson: 1.930\n", + "Prob(Omnibus): 0.000 Jarque-Bera (JB): 22.022\n", + "Skew: 1.266 Prob(JB): 1.65e-05\n", + "Kurtosis: 4.549 Cond. No. 19.6\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" + ] + } + ], + "source": [ + "x22_const=sm.add_constant(1/x)\n", + "model22= sm.OLS(RES,x22_const)\n", + "result22=model22.fit()\n", + "print(result22.summary())" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jXKrn_xAmW0f", + "outputId": "4c387e46-d492-4f73-edb1-b004835b20dd" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nimport matplotlib.pyplot as plt\\nimport numpy as np'); }\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: y R-squared: 0.165\n", + "Model: OLS Adj. R-squared: 0.150\n", + "Method: Least Squares F-statistic: 11.43\n", + "Date: Tue, 31 May 2022 Prob (F-statistic): 0.00130\n", + "Time: 00:03:42 Log-Likelihood: -198.80\n", + "No. Observations: 60 AIC: 401.6\n", + "Df Residuals: 58 BIC: 405.8\n", + "Df Model: 1 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "const -1.3810 3.256 -0.424 0.673 -7.898 5.136\n", + "sp 2.5236 0.746 3.381 0.001 1.030 4.018\n", + "==============================================================================\n", + "Omnibus: 8.280 Durbin-Watson: 2.080\n", + "Prob(Omnibus): 0.016 Jarque-Bera (JB): 7.555\n", + "Skew: 0.786 Prob(JB): 0.0229\n", + "Kurtosis: 3.743 Cond. No. 17.1\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" + ] + } + ], + "source": [ + "x33_const=sm.add_constant(np.sqrt(x))\n", + "model33= sm.OLS(RES,x33_const)\n", + "result33=model33.fit()\n", + "print(result33.summary())" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "31obVZqgntoa", + "outputId": "83334aca-f8ae-4eb1-b8c7-d3bdef6134ef" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.151528220726599\n" + ] + } + ], + "source": [ + "slope11=result11.params.sp\n", + "print(slope11)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p3_6CczPuyLz", + "outputId": "3d4eb7c6-d87b-41d4-bc85-0600cb271083" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20.202103979016663\n" + ] + } + ], + "source": [ + "T_obs= slope11/ 0.156 \n", + "print(T_obs)" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "F5ZYbXcTwLBS", + "outputId": "253c711e-b6a1-495f-a71d-a378d0fefff9" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1.671552762153672" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "T_critical=st.t.ppf(0.95,58)\n", + "T_critical" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NIu-RLGu1jKi", + "outputId": "4e72dd28-4c7c-4c71-960e-05b3f5e45666" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WE REJECT H0 i.e HETEROSKEDASTICITY IS PRESENT\n" + ] + } + ], + "source": [ + "if(T_obs>T_critical):\n", + " print(\"WE REJECT H0 i.e HETEROSKEDASTICITY IS PRESENT\")\n", + "else:\n", + " print(\"WE ACCEPECT H0 i.e HETEROSKEDASTICITY IS NOT PRESENT \")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uC8FJc7p39f1" + }, + "source": [ + "## REMEDY" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cecxtz6i4CE5" + }, + "source": [ + "### TRANSFORMING Y --> LOG Y" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "id": "GTZVj7Ld2Pgt" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nimport matplotlib.pyplot as plt\\nimport numpy as np'); }\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "y_log = np.log(y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "myTUQpaC4PNN" + }, + "source": [ + "### PERFORMING GLS" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gonkeiXX4OfT", + "outputId": "b038ce0f-ad65-41d4-b2cf-6efec659e7c0" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " GLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: dis R-squared: 0.865\n", + "Model: GLS Adj. R-squared: 0.862\n", + "Method: Least Squares F-statistic: 370.5\n", + "Date: Tue, 31 May 2022 Prob (F-statistic): 7.27e-27\n", + "Time: 00:03:48 Log-Likelihood: -24.427\n", + "No. Observations: 60 AIC: 52.85\n", + "Df Residuals: 58 BIC: 57.04\n", + "Df Model: 1 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "const 1.4977 0.103 14.471 0.000 1.291 1.705\n", + "sp 0.0929 0.005 19.249 0.000 0.083 0.103\n", + "==============================================================================\n", + "Omnibus: 8.532 Durbin-Watson: 1.323\n", + "Prob(Omnibus): 0.014 Jarque-Bera (JB): 8.095\n", + "Skew: -0.724 Prob(JB): 0.0175\n", + "Kurtosis: 4.067 Cond. No. 46.6\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" + ] + } + ], + "source": [ + "x44_const = sm.add_constant(x)\n", + "model5 = sm.GLS(y_log,x44_const)\n", + "result5 = model5.fit()\n", + "print(result5.summary())" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "kwafJSlO3Y6b", + "outputId": "7e4af608-a62b-4a69-895f-6c95cf9e3c18" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 -0.482895\n", + "1 -1.268910\n", + "2 -0.575763\n", + "3 0.117384\n", + "4 0.117384\n", + "dtype: float64" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred_gls = result5.predict(x44_const)\n", + "\n", + "res_gls = (y_log - y_pred_gls)\n", + "res_gls.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ijQKoPik4ypN" + }, + "source": [ + "### Confirming that our new gls model's error terms are homoskedastic using BP Test" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Y5skKsmz40Jw", + "outputId": "cffb3843-0abb-483f-c9b5-ed76fa0482c9" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The p value of our test is 0.06\n", + "We accept the null hypothesis\n", + "Our errors are Homoscedastic\n" + ] + } + ], + "source": [ + "names = ['Lagrange multiplier statistic', 'p-value',\n", + " 'f-value', 'f p-value']\n", + "lmstat, pvalue , fvalue , fpvalue = sms.het_breuschpagan(res_gls, x44_const)\n", + "print(\"The p value of our test is {:.2f}\".format(pvalue))\n", + "\n", + "\n", + "if pvalue<=0.05:\n", + " print(\"Our data is heteroscedastic\")\n", + "else:\n", + " print(\"We accept the null hypothesis\\nOur errors are Homoscedastic\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "ASSIGNMENT-2(GQ)_ANUZA _PAUL.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.2" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/a2d1.csv b/a2d1.csv new file mode 100644 index 0000000..7926b39 --- /dev/null +++ b/a2d1.csv @@ -0,0 +1,21 @@ +Income,Con-exp +22.3,19.9 +32.3,31.2 +36.6,31.8 +12.1,12.1 +42.3,40.7 +6.2,6.1 +44.7,38.6 +26.1,25.5 +10.3,10.3 +40.2,38.8 +8.1,8 +34.5,33.1 +38,33.5 +14.1,13.1 +16.4,14.8 +24.1,21.6 +30.1,29.3 +28.3,25 +18.2,17.9 +20.1,19.8 diff --git a/a2d2.csv b/a2d2.csv new file mode 100644 index 0000000..782a2ae --- /dev/null +++ b/a2d2.csv @@ -0,0 +1,61 @@ +dis,sp +4,4 +2,5 +4,5 +8,5 +8,5 +7,7 +8,8 +9,8 +11,8 +13,8 +13,9 +8,10 +14,10 +17,10 +11,12 +19,12 +21,12 +15,13 +18,13 +27,13 +16,15 +14,16 +19,16 +34,16 +22,17 +29,17 +29,18 +34,18 +47,18 +30,19 +42,21 +55,21 +33,25 +48,25 +56,25 +59,25 +39,26 +41,26 +57,27 +78,27 +54,29 +68,29 +60,30 +67,30 +101,30 +77,31 +85,35 +107,35 +79,36 +138,39 +5,9 +14,14 +5,9 +16,14 +48,20 +39,21 +64,28 +84,28 +110,40 +134,40