From 12b5b75c9dcd0a0b0f9ee7ef9569ca737241716f Mon Sep 17 00:00:00 2001 From: Shristi Rawat Date: Mon, 7 Oct 2024 19:26:02 +0530 Subject: [PATCH] Matched feature names in model prediction --- .../Stock_Price_Prediction-checkpoint.ipynb | 4681 ++++++++++++ Stock_Price_Prediction.ipynb | 6398 ++++++++++++----- 2 files changed, 9307 insertions(+), 1772 deletions(-) create mode 100644 .ipynb_checkpoints/Stock_Price_Prediction-checkpoint.ipynb diff --git a/.ipynb_checkpoints/Stock_Price_Prediction-checkpoint.ipynb b/.ipynb_checkpoints/Stock_Price_Prediction-checkpoint.ipynb new file mode 100644 index 0000000..afca730 --- /dev/null +++ b/.ipynb_checkpoints/Stock_Price_Prediction-checkpoint.ipynb @@ -0,0 +1,4681 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "qCDSjVhXLr_Z" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.svm import SVR\n", + "from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor, DecisionTreeRegressor\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error\n", + "from sklearn.neighbors import KNeighborsRegressor\n", + "from tensorflow.keras.models import Sequential\n", + "from tensorflow.keras.layers import Dense,LSTM" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')\n", + "df = pd.read_csv('drive/My Drive/Colab Notebooks/Stock Price Prediction RNN/SBIN.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "Sc4id6VxL8BS", + "outputId": "568d039c-faf4-4636-bfc1-70b9ef83367b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateOpenHighLowCloseAdj CloseVolume
001-01-199618.69114718.97892218.54018418.82324012.40993143733533.0
102-01-199618.89400518.96476717.73819218.22410612.01493156167280.0
203-01-199618.32789218.56848917.64383917.73819211.69457768296318.0
304-01-199617.50231217.83254217.22397217.67686311.65414286073880.0
405-01-199617.73819217.78536617.45985217.57779311.58882776613039.0
\n", + "
" + ], + "text/plain": [ + " Date Open High Low Close Adj Close \\\n", + "0 01-01-1996 18.691147 18.978922 18.540184 18.823240 12.409931 \n", + "1 02-01-1996 18.894005 18.964767 17.738192 18.224106 12.014931 \n", + "2 03-01-1996 18.327892 18.568489 17.643839 17.738192 11.694577 \n", + "3 04-01-1996 17.502312 17.832542 17.223972 17.676863 11.654142 \n", + "4 05-01-1996 17.738192 17.785366 17.459852 17.577793 11.588827 \n", + "\n", + " Volume \n", + "0 43733533.0 \n", + "1 56167280.0 \n", + "2 68296318.0 \n", + "3 86073880.0 \n", + "4 76613039.0 " + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load the dataset\n", + "#df = pd.read_csv('/content/SBIN.NS.csv')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "id": "7LaYGXsfN-8y" + }, + "outputs": [], + "source": [ + "# Drop the 'Date' and 'Adj Close' columns\n", + "df.drop(['Date', 'Adj Close'], axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "pqbTBdnBOKJc", + "outputId": "21da8a7f-4f3e-4f4f-e32b-3b90c230ce55" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
OpenHighLowCloseVolume
018.69114718.97892218.54018418.82324043733533.0
118.89400518.96476717.73819218.22410656167280.0
218.32789218.56848917.64383917.73819268296318.0
317.50231217.83254217.22397217.67686386073880.0
417.73819217.78536617.45985217.57779376613039.0
\n", + "
" + ], + "text/plain": [ + " Open High Low Close Volume\n", + "0 18.691147 18.978922 18.540184 18.823240 43733533.0\n", + "1 18.894005 18.964767 17.738192 18.224106 56167280.0\n", + "2 18.327892 18.568489 17.643839 17.738192 68296318.0\n", + "3 17.502312 17.832542 17.223972 17.676863 86073880.0\n", + "4 17.738192 17.785366 17.459852 17.577793 76613039.0" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "id": "dydEPoNeM6eN" + }, + "outputs": [], + "source": [ + "# Handle missing values\n", + "imputer = SimpleImputer(strategy='mean')\n", + "df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "id": "OQ3cGqgTMBwt" + }, + "outputs": [], + "source": [ + "# Select features and target variable\n", + "X = df[['Open', 'High', 'Low', 'Volume']]\n", + "y = df['Close']" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "id": "9Oz-bwJOMEWD" + }, + "outputs": [], + "source": [ + "# Split the data into training and testing sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "id": "ugapDyXODtn3" + }, + "outputs": [], + "source": [ + "# Scale the features using Min-Max scaling\n", + "scaler = MinMaxScaler()\n", + "X_train_scaled = scaler.fit_transform(X_train)\n", + "X_test_scaled = scaler.transform(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "997ZEgibCZIO", + "outputId": "2a45a8e3-71b0-47f3-bd66-91bcdc028c76" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(5659, 4)" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bmtt76RuCeyG", + "outputId": "658075af-e75d-45b1-f6cf-756e349a32d1" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1415, 4)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "CeJkUJ92Ciqd", + "outputId": "93dec527-ea2e-42e6-c70b-a9491c71d917" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(5659,)" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_train.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7HGC7VuTCjWc", + "outputId": "64dc2569-b4b4-4c2e-d416-1cf77c41ac75" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1415,)" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "# Function to evaluate and print RMSE, MAE, and MAPE\n", + "def evaluate_model(model, X_test, y_test):\n", + " predictions = model.predict(X_test)\n", + " rmse = np.sqrt(mean_squared_error(y_test, predictions))\n", + " mae = mean_absolute_error(y_test, predictions)\n", + " mape = mean_absolute_percentage_error(y_test, predictions)\n", + "\n", + " print(f\"RMSE: {rmse}\")\n", + " print(f\"MAE: {mae}\")\n", + " print(f\"MAPE: {mape}\\n\")\n", + " \n", + " return rmse, mae, mape\n" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "metrics = {\n", + " \"Model\": [],\n", + " \"RMSE\": [],\n", + " \"MAE\": [],\n", + " \"MAPE\": []\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "c6Ek8jRlO2_I" + }, + "source": [ + "## 1. LINEAR REGRESSION" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "id": "RdZ1SpzdMHAJ" + }, + "outputs": [], + "source": [ + "# Create a linear regression model\n", + "model1 = LinearRegression()" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mPM035IzMY04", + "outputId": "07379dba-cfe8-4814-b972-d08b12f224ac" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "5286 257.350006\n", + "3408 129.464996\n", + "5477 279.350006\n", + "6906 588.500000\n", + "530 21.644367\n", + "Name: Close, dtype: float64" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_train.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 74 + }, + "id": "qBhQ9HbYMI3d", + "outputId": "52e0655f-1d23-47b7-decc-7a7ca35c0470" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 142, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model1.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "id": "X269co2kMS4z" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 1.6881364651923558\n", + "MAE: 0.9433353486266928\n", + "MAPE: 0.006085435968276741\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model1, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"Linear Regressor\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GxtMzlg-gR2P" + }, + "source": [ + "## 2. SVR" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "id": "0xQewd7QWTtq" + }, + "outputs": [], + "source": [ + "# Create an SVR model\n", + "model2 = SVR()" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 74 + }, + "id": "22SaCsQmfhgP", + "outputId": "2121e992-399d-4b78-e42c-fc20b9d52189" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
SVR()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "SVR()" + ] + }, + "execution_count": 144, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model2.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "id": "OQ1nL4oYfkAC" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 17.574809673127547\n", + "MAE: 6.278157692070486\n", + "MAPE: 0.09040265035344064\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model2, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"SVR\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hcIfVMWdgcKt" + }, + "source": [ + "## 3. Random Forest" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": { + "id": "f7raXT_hf2ij" + }, + "outputs": [], + "source": [ + "model3 = RandomForestRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 74 + }, + "id": "fF002Yepgk55", + "outputId": "d148c589-4879-4e2d-8b0f-5b5ca01a2a53" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
RandomForestRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "RandomForestRegressor()" + ] + }, + "execution_count": 146, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model3.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": { + "id": "8nRU_pzEgnCt" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 2.2053909891328036\n", + "MAE: 1.2608162799481166\n", + "MAPE: 0.008015308194076972\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model3, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"Random Forest\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mZsLwLivhLGH" + }, + "source": [ + "## 4. Gradient Boosting Models (GBM)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": { + "id": "TI8idoxOg6jF" + }, + "outputs": [], + "source": [ + "model4 = GradientBoostingRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 + }, + "id": "2gpbDxshhexj", + "outputId": "b2b1a681-7ede-4d66-be5d-1a8606d0f470" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
GradientBoostingRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "GradientBoostingRegressor()" + ] + }, + "execution_count": 148, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model4.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": { + "id": "Jj9DXdUPhh9V" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 2.6985863368468084\n", + "MAE: 1.692542658558929\n", + "MAPE: 0.011883244132236716\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model4, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"GBM\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d8nSGoyuh9dx" + }, + "source": [ + "## 5. Extreme Gradient Boosting (XGBoost)" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": { + "id": "DyhhdlZAhx94" + }, + "outputs": [], + "source": [ + "import xgboost as xgb\n", + "# Create an XGBoost model\n", + "model5 = xgb.XGBRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 150, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 + }, + "id": "RAIwxIp5iH9Z", + "outputId": "d2b4aa97-7e07-4015-c308-76a292b0929f" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
+       "             colsample_bylevel=None, colsample_bynode=None,\n",
+       "             colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
+       "             enable_categorical=False, eval_metric=None, feature_types=None,\n",
+       "             gamma=None, grow_policy=None, importance_type=None,\n",
+       "             interaction_constraints=None, learning_rate=None, max_bin=None,\n",
+       "             max_cat_threshold=None, max_cat_to_onehot=None,\n",
+       "             max_delta_step=None, max_depth=None, max_leaves=None,\n",
+       "             min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+       "             multi_strategy=None, n_estimators=None, n_jobs=None,\n",
+       "             num_parallel_tree=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "XGBRegressor(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=None, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=None, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " multi_strategy=None, n_estimators=None, n_jobs=None,\n", + " num_parallel_tree=None, random_state=None, ...)" + ] + }, + "execution_count": 150, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model5.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 2.733930065274145\n", + "MAE: 1.502457380471909\n", + "MAPE: 0.010026410639661481\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model5, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"XGBoost\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A_J776rtiovq" + }, + "source": [ + "## 6. AdaBoostRegressor" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": { + "id": "HNq66cXRiYPJ" + }, + "outputs": [], + "source": [ + "model6 = AdaBoostRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 74 + }, + "id": "P0oB5wjQivBr", + "outputId": "8726c583-6782-4504-b0ac-d2ef4ccbca4c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
AdaBoostRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "AdaBoostRegressor()" + ] + }, + "execution_count": 152, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model6.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": { + "id": "Bf1m5ukOi2VM" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 9.175482477551942\n", + "MAE: 7.527617905792734\n", + "MAPE: 0.1858930099598583\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model6, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"AdaBoost Regressor\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q9DzOt3CkWFX" + }, + "source": [ + "## 7. Decision Tree" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": { + "id": "23DZ2biSjF9a" + }, + "outputs": [], + "source": [ + "model7 = DecisionTreeRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 74 + }, + "id": "6mQEQf-ykc9F", + "outputId": "f1a62020-4125-4aea-e7e4-11acffdc5169" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
DecisionTreeRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "DecisionTreeRegressor()" + ] + }, + "execution_count": 154, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model7.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": { + "id": "BFJ9q_tvkgRC" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 3.12966540689625\n", + "MAE: 1.6497286032971983\n", + "MAPE: 0.010286427942970355\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model7, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"Decision Tree\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LH-B-Xd6k5UD" + }, + "source": [ + "## 8. KNeighborsRegressor(KNN)" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": { + "id": "JVDSed7yktFY" + }, + "outputs": [], + "source": [ + "# Create a KNN model\n", + "model8 = KNeighborsRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 74 + }, + "id": "9fn64o-ZlBka", + "outputId": "dc5e6af2-de37-46ee-cde7-e0a3baa31a1f" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
KNeighborsRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "KNeighborsRegressor()" + ] + }, + "execution_count": 156, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model8.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": { + "id": "hbfbbjcSlDn7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 3.0274590148039873\n", + "MAE: 1.7525904376439672\n", + "MAPE: 0.013668115353592272\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model8, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"KNN\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "X5XtlzMXljps" + }, + "source": [ + "## 9. Artificial Neural Networks (ANN)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create an ANN model\n", + "model9 = Sequential()\n", + "model9.add(Dense(32, activation='relu', input_shape=(X_train.shape[1],)))\n", + "model9.add(Dense(16, activation='relu'))\n", + "model9.add(Dense(1, activation='linear'))" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": { + "id": "ZIf94WLMlv04" + }, + "outputs": [], + "source": [ + "# Compile the model\n", + "model9.compile(loss='mean_squared_error', optimizer='adam')" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "FX5DTKqslxWf", + "outputId": "9253b26c-1a79-4390-975e-d14c28a5e2a8" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 126, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model9.fit(X_train_scaled, y_train, epochs=100, batch_size=32, verbose=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OVW2qpNsmGVq", + "outputId": "34343782-f560-4dee-c307-ff0d0c52ab5a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1m45/45\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 3ms/step\n", + "RMSE: 2.801001091255311\n", + "MAE: 1.7605365826618848\n", + "MAPE: 0.0126215060590655\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model9, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"ANN\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vjSMQNcOnFPJ" + }, + "source": [ + "## 10. LSTM(Long Short term Memory)" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": { + "id": "uACvajfImrbB" + }, + "outputs": [], + "source": [ + "# Reshape the input data for LSTM\n", + "n_features = X_train_scaled.shape[1]\n", + "n_steps = 10\n", + "n_samples_train = X_train_scaled.shape[0] - n_steps + 1\n", + "n_samples_test = X_test_scaled.shape[0] - n_steps + 1\n", + "\n", + "# Reshape the input data\n", + "X_train_reshaped = np.array([X_train_scaled[i:i+n_steps, :] for i in range(n_samples_train)])\n", + "X_test_reshaped = np.array([X_test_scaled[i:i+n_steps, :] for i in range(n_samples_test)])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create an LSTM model\n", + "model = Sequential()\n", + "model.add(LSTM(64, activation='relu', input_shape=(n_steps, n_features)))\n", + "model.add(Dense(1))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": { + "id": "YpSfHu6gov35" + }, + "outputs": [], + "source": [ + "# Compile the model\n", + "model.compile(loss='mean_squared_error', optimizer='adam')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0vHjcluaoxzP", + "outputId": "1eaafd31-9f91-4655-f437-e9199c0f7933" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model.fit(X_train_reshaped, y_train[n_steps-1:], epochs=100, batch_size=32, verbose=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rmse, mae, mape = evaluate_model(model10, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"LSTM\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a DataFrame for metrics\n", + "metrics_df = pd.DataFrame(metrics)\n", + "\n", + "# Plot RMSE, MAE, and MAPE for each model\n", + "plt.figure(figsize=(15, 5))\n", + "\n", + "# RMSE Plot\n", + "plt.subplot(1, 3, 1)\n", + "plt.bar(metrics_df['Model'], metrics_df['RMSE'], color='lightblue')\n", + "plt.xlabel('RMSE')\n", + "plt.title('RMSE for Different Models')\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# MAE Plot\n", + "plt.subplot(1, 3, 2)\n", + "plt.bar(metrics_df['Model'], metrics_df['MAE'], color='lightgreen')\n", + "plt.xlabel('MAE')\n", + "plt.title('MAE for Different Models')\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# MAPE Plot\n", + "plt.subplot(1, 3, 3)\n", + "plt.bar(metrics_df['Model'], metrics_df['MAPE'], color='salmon')\n", + "plt.xlabel('MAPE')\n", + "plt.title('MAPE for Different Models')\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using of LightGBM and CatBoost For Optimizing the model accuracy and time complexity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import necessary libraries\n", + "import lightgbm as lgb\n", + "from catboost import CatBoostRegressor\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, accuracy_score, precision_score, confusion_matrix, recall_score, f1_score\n", + "\n", + "# Function to train and evaluate a model\n", + "def train_and_evaluate_model(model, X_train, X_test, y_train, y_test):\n", + " model.fit(X_train, y_train)\n", + " pred = model.predict(X_test)\n", + " rmse = np.sqrt(mean_squared_error(y_test, pred))\n", + " mae = mean_absolute_error(y_test, pred)\n", + " mape = mean_absolute_percentage_error(y_test, pred)\n", + " accuracy = accuracy_score(y_test > pred, y_test > pred.round())\n", + " precision = precision_score(y_test > pred, y_test > pred.round())\n", + " confusion = confusion_matrix(y_test > pred, y_test > pred.round())\n", + " recall = recall_score(y_test > pred, y_test > pred.round())\n", + " f1 = f1_score(y_test > pred, y_test > pred.round())\n", + " return rmse, mae, mape, accuracy, precision, confusion, recall, f1\n", + "\n", + "# Train and evaluate LightGBM model for from this directly print accuracy \n", + "model_lightgbm = lgb.LGBMRegressor()\n", + "metrics_lightgbm = train_and_evaluate_model(model_lightgbm, X_train, X_test, y_train, y_test)\n", + "print(\"LightGBM Metrics:\", metrics_lightgbm)\n", + "\n", + "# Train and evaluate CatBoost model\n", + "model_catboost = CatBoostRegressor(verbose=0)\n", + "metrics_catboost = train_and_evaluate_model(model_catboost, X_train, X_test, y_train, y_test)\n", + "print(\"CatBoost Metrics:\", metrics_catboost)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Stock_Price_Prediction.ipynb b/Stock_Price_Prediction.ipynb index b6205f9..01ad953 100644 --- a/Stock_Price_Prediction.ipynb +++ b/Stock_Price_Prediction.ipynb @@ -1,1848 +1,4702 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "qCDSjVhXLr_Z" - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.impute import SimpleImputer\n", - "from sklearn.preprocessing import MinMaxScaler\n", - "from sklearn.linear_model import LinearRegression\n", - "from sklearn.svm import SVR\n", - "from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor, DecisionTreeRegressor\n", - "from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error\n", - "from sklearn.neighbors import KNeighborsRegressor\n", - "from tensorflow.keras.models import Sequential\n", - "from tensorflow.keras.layers import Dense,LSTM" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "SOQbXSiB-g5G", - "outputId": "6ae02a27-02b0-4bd9-a1ae-a7029056f32e" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" - ] - } - ], - "source": [ - "from google.colab import drive\n", - "drive.mount('/content/drive')\n", - "df = pd.read_csv('drive/My Drive/Colab Notebooks/Stock Price Prediction RNN/SBIN.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 206 - }, - "id": "Sc4id6VxL8BS", - "outputId": "568d039c-faf4-4636-bfc1-70b9ef83367b" - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
DateOpenHighLowCloseAdj CloseVolume
001-01-199618.69114718.97892218.54018418.82324012.40993143733533.0
102-01-199618.89400518.96476717.73819218.22410612.01493156167280.0
203-01-199618.32789218.56848917.64383917.73819211.69457768296318.0
304-01-199617.50231217.83254217.22397217.67686311.65414286073880.0
405-01-199617.73819217.78536617.45985217.57779311.58882776613039.0
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "text/plain": [ - " Date Open High Low Close Adj Close \\\n", - "0 01-01-1996 18.691147 18.978922 18.540184 18.823240 12.409931 \n", - "1 02-01-1996 18.894005 18.964767 17.738192 18.224106 12.014931 \n", - "2 03-01-1996 18.327892 18.568489 17.643839 17.738192 11.694577 \n", - "3 04-01-1996 17.502312 17.832542 17.223972 17.676863 11.654142 \n", - "4 05-01-1996 17.738192 17.785366 17.459852 17.577793 11.588827 \n", - "\n", - " Volume \n", - "0 43733533.0 \n", - "1 56167280.0 \n", - "2 68296318.0 \n", - "3 86073880.0 \n", - "4 76613039.0 " - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load the dataset\n", - "#df = pd.read_csv('/content/SBIN.NS.csv')\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "id": "7LaYGXsfN-8y" - }, - "outputs": [], - "source": [ - "# Drop the 'Date' and 'Adj Close' columns\n", - "df.drop(['Date', 'Adj Close'], axis=1, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 206 - }, - "id": "pqbTBdnBOKJc", - "outputId": "21da8a7f-4f3e-4f4f-e32b-3b90c230ce55" - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
OpenHighLowCloseVolume
018.69114718.97892218.54018418.82324043733533.0
118.89400518.96476717.73819218.22410656167280.0
218.32789218.56848917.64383917.73819268296318.0
317.50231217.83254217.22397217.67686386073880.0
417.73819217.78536617.45985217.57779376613039.0
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "text/plain": [ - " Open High Low Close Volume\n", - "0 18.691147 18.978922 18.540184 18.823240 43733533.0\n", - "1 18.894005 18.964767 17.738192 18.224106 56167280.0\n", - "2 18.327892 18.568489 17.643839 17.738192 68296318.0\n", - "3 17.502312 17.832542 17.223972 17.676863 86073880.0\n", - "4 17.738192 17.785366 17.459852 17.577793 76613039.0" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "id": "dydEPoNeM6eN" - }, - "outputs": [], - "source": [ - "# Handle missing values\n", - "imputer = SimpleImputer(strategy='mean')\n", - "df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": { - "id": "OQ3cGqgTMBwt" - }, - "outputs": [], - "source": [ - "# Select features and target variable\n", - "X = df[['Open', 'High', 'Low', 'Volume']]\n", - "y = df['Close']" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "id": "9Oz-bwJOMEWD" - }, - "outputs": [], - "source": [ - "# Split the data into training and testing sets\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "ugapDyXODtn3" - }, - "outputs": [], - "source": [ - "# Scale the features using Min-Max scaling\n", - "scaler = MinMaxScaler()\n", - "X_train_scaled = scaler.fit_transform(X_train)\n", - "X_test_scaled = scaler.transform(X_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "997ZEgibCZIO", - "outputId": "2a45a8e3-71b0-47f3-bd66-91bcdc028c76" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(5659, 4)" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "X_train.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "bmtt76RuCeyG", - "outputId": "658075af-e75d-45b1-f6cf-756e349a32d1" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(1415, 4)" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "X_test.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "CeJkUJ92Ciqd", - "outputId": "93dec527-ea2e-42e6-c70b-a9491c71d917" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(5659,)" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "y_train.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "7HGC7VuTCjWc", - "outputId": "64dc2569-b4b4-4c2e-d416-1cf77c41ac75" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(1415,)" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "y_test.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Function to evaluate and print RMSE, MAE, and MAPE\n", - "def evaluate_model(model, X_test, y_test):\n", - " predictions = model.predict(X_test)\n", - " rmse = np.sqrt(mean_squared_error(y_test, predictions))\n", - " mae = mean_absolute_error(y_test, predictions)\n", - " mape = mean_absolute_percentage_error(y_test, predictions)\n", - "\n", - " print(f\"RMSE: {rmse}\")\n", - " print(f\"MAE: {mae}\")\n", - " print(f\"MAPE: {mape}\\n\")\n", - " \n", - " return rmse, mae, mape\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "metrics = {\n", - " \"Model\": [],\n", - " \"RMSE\": [],\n", - " \"MAE\": [],\n", - " \"MAPE\": []\n", - "}" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "c6Ek8jRlO2_I" - }, - "source": [ - "## 1. LINEAR REGRESSION" - ] - }, + "cells": [ + { + "cell_type": "code", + "execution_count": 161, + "metadata": { + "id": "qCDSjVhXLr_Z" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.svm import SVR\n", + "from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor\n", + "from sklearn.tree import DecisionTreeRegressor\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error\n", + "from sklearn.neighbors import KNeighborsRegressor\n", + "from tensorflow.keras.models import Sequential\n", + "from tensorflow.keras.layers import Dense,LSTM" + ] + }, + { + "cell_type": "code", + "execution_count": 179, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": 33, - "metadata": { - "id": "RdZ1SpzdMHAJ" - }, - "outputs": [], - "source": [ - "# Create a linear regression model\n", - "model1 = LinearRegression()" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Running in local system\n" + ] + } + ], + "source": [ + "try:\n", + " import google.colab\n", + " In_colab=True\n", + "except:\n", + " In_colab=False\n", + "\n", + "if(In_colab):\n", + " print(\"Running in google colab\")\n", + " from google.colab import drive\n", + " drive.mount('/content/drive')\n", + " df = pd.read_csv('drive/My Drive/Colab Notebooks/Stock Price Prediction RNN/SBIN.csv')\n", + "else:\n", + " print(\"Running in local system\")\n", + " path=r'C:\\Users\\SHRISTI\\OneDrive\\Desktop\\GitHub\\Stock-Price-Prediction\\Data\\SBIN.csv'\n", + " df=pd.read_csv(path)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 }, + "id": "Sc4id6VxL8BS", + "outputId": "568d039c-faf4-4636-bfc1-70b9ef83367b" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 34, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "mPM035IzMY04", - "outputId": "07379dba-cfe8-4814-b972-d08b12f224ac" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "5286 257.350006\n", - "3408 129.464996\n", - "5477 279.350006\n", - "6906 588.500000\n", - "530 21.644367\n", - "Name: Close, dtype: float64" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateOpenHighLowCloseAdj CloseVolume
001-01-199618.69114718.97892218.54018418.82324012.40993143733533.0
102-01-199618.89400518.96476717.73819218.22410612.01493156167280.0
203-01-199618.32789218.56848917.64383917.73819211.69457768296318.0
304-01-199617.50231217.83254217.22397217.67686311.65414286073880.0
405-01-199617.73819217.78536617.45985217.57779311.58882776613039.0
\n", + "
" ], - "source": [ - "y_train.head()" + "text/plain": [ + " Date Open High Low Close Adj Close \\\n", + "0 01-01-1996 18.691147 18.978922 18.540184 18.823240 12.409931 \n", + "1 02-01-1996 18.894005 18.964767 17.738192 18.224106 12.014931 \n", + "2 03-01-1996 18.327892 18.568489 17.643839 17.738192 11.694577 \n", + "3 04-01-1996 17.502312 17.832542 17.223972 17.676863 11.654142 \n", + "4 05-01-1996 17.738192 17.785366 17.459852 17.577793 11.588827 \n", + "\n", + " Volume \n", + "0 43733533.0 \n", + "1 56167280.0 \n", + "2 68296318.0 \n", + "3 86073880.0 \n", + "4 76613039.0 " ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load the dataset\n", + "#df = pd.read_csv('/content/SBIN.NS.csv')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "id": "7LaYGXsfN-8y" + }, + "outputs": [], + "source": [ + "# Drop the 'Date' and 'Adj Close' columns\n", + "df.drop(['Date', 'Adj Close'], axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 }, + "id": "pqbTBdnBOKJc", + "outputId": "21da8a7f-4f3e-4f4f-e32b-3b90c230ce55" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 35, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 74 - }, - "id": "qBhQ9HbYMI3d", - "outputId": "52e0655f-1d23-47b7-decc-7a7ca35c0470" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - ], - "text/plain": [ - "LinearRegression()" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
OpenHighLowCloseVolume
018.69114718.97892218.54018418.82324043733533.0
118.89400518.96476717.73819218.22410656167280.0
218.32789218.56848917.64383917.73819268296318.0
317.50231217.83254217.22397217.67686386073880.0
417.73819217.78536617.45985217.57779376613039.0
\n", + "
" ], - "source": [ - "# Train the model\n", - "model1.fit(X_train, y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": { - "id": "X269co2kMS4z" - }, - "outputs": [], - "source": [ - "rmse, mae, mape = evaluate_model(model1, X_test_scaled, y_test)\n", - "metrics[\"Model\"].append(\"Linear Regressor\")\n", - "metrics[\"RMSE\"].append(rmse)\n", - "metrics[\"MAE\"].append(mae)\n", - "metrics[\"MAPE\"].append(mape)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GxtMzlg-gR2P" - }, - "source": [ - "## 2. SVR" + "text/plain": [ + " Open High Low Close Volume\n", + "0 18.691147 18.978922 18.540184 18.823240 43733533.0\n", + "1 18.894005 18.964767 17.738192 18.224106 56167280.0\n", + "2 18.327892 18.568489 17.643839 17.738192 68296318.0\n", + "3 17.502312 17.832542 17.223972 17.676863 86073880.0\n", + "4 17.738192 17.785366 17.459852 17.577793 76613039.0" ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "id": "dydEPoNeM6eN" + }, + "outputs": [], + "source": [ + "# Handle missing values\n", + "imputer = SimpleImputer(strategy='mean')\n", + "df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "id": "OQ3cGqgTMBwt" + }, + "outputs": [], + "source": [ + "# Select features and target variable\n", + "X = df[['Open', 'High', 'Low', 'Volume']]\n", + "y = df['Close']" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "id": "9Oz-bwJOMEWD" + }, + "outputs": [], + "source": [ + "# Split the data into training and testing sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "id": "ugapDyXODtn3" + }, + "outputs": [], + "source": [ + "# Scale the features using Min-Max scaling\n", + "scaler = MinMaxScaler()\n", + "X_train_scaled = scaler.fit_transform(X_train)\n", + "X_test_scaled = scaler.transform(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "997ZEgibCZIO", + "outputId": "2a45a8e3-71b0-47f3-bd66-91bcdc028c76" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 40, - "metadata": { - "id": "0xQewd7QWTtq" - }, - "outputs": [], - "source": [ - "# Create an SVR model\n", - "model2 = SVR()" + "data": { + "text/plain": [ + "(5659, 4)" ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "bmtt76RuCeyG", + "outputId": "658075af-e75d-45b1-f6cf-756e349a32d1" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 42, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 74 - }, - "id": "22SaCsQmfhgP", - "outputId": "2121e992-399d-4b78-e42c-fc20b9d52189" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
SVR()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - ], - "text/plain": [ - "SVR()" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Train the model\n", - "model2.fit(X_train, y_train)" + "data": { + "text/plain": [ + "(1415, 4)" ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "CeJkUJ92Ciqd", + "outputId": "93dec527-ea2e-42e6-c70b-a9491c71d917" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 43, - "metadata": { - "id": "OQ1nL4oYfkAC" - }, - "outputs": [], - "source": [ - "rmse, mae, mape = evaluate_model(model2, X_test_scaled, y_test)\n", - "metrics[\"Model\"].append(\"SVR\")\n", - "metrics[\"RMSE\"].append(rmse)\n", - "metrics[\"MAE\"].append(mae)\n", - "metrics[\"MAPE\"].append(mape)" + "data": { + "text/plain": [ + "(5659,)" ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_train.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "7HGC7VuTCjWc", + "outputId": "64dc2569-b4b4-4c2e-d416-1cf77c41ac75" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "hcIfVMWdgcKt" - }, - "source": [ - "## 3. Random Forest" + "data": { + "text/plain": [ + "(1415,)" ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "# Function to evaluate and print RMSE, MAE, and MAPE\n", + "def evaluate_model(model, X_test, y_test):\n", + " predictions = model.predict(X_test)\n", + " rmse = np.sqrt(mean_squared_error(y_test, predictions))\n", + " mae = mean_absolute_error(y_test, predictions)\n", + " mape = mean_absolute_percentage_error(y_test, predictions)\n", + "\n", + " print(f\"RMSE: {rmse}\")\n", + " print(f\"MAE: {mae}\")\n", + " print(f\"MAPE: {mape}\\n\")\n", + " \n", + " return rmse, mae, mape\n" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "metrics = {\n", + " \"Model\": [],\n", + " \"RMSE\": [],\n", + " \"MAE\": [],\n", + " \"MAPE\": []\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "c6Ek8jRlO2_I" + }, + "source": [ + "## 1. LINEAR REGRESSION" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "id": "RdZ1SpzdMHAJ" + }, + "outputs": [], + "source": [ + "# Create a linear regression model\n", + "model1 = LinearRegression()" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "mPM035IzMY04", + "outputId": "07379dba-cfe8-4814-b972-d08b12f224ac" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 46, - "metadata": { - "id": "f7raXT_hf2ij" - }, - "outputs": [], - "source": [ - "model3 = RandomForestRegressor()" + "data": { + "text/plain": [ + "5286 257.350006\n", + "3408 129.464996\n", + "5477 279.350006\n", + "6906 588.500000\n", + "530 21.644367\n", + "Name: Close, dtype: float64" ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_train.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 74 }, + "id": "qBhQ9HbYMI3d", + "outputId": "52e0655f-1d23-47b7-decc-7a7ca35c0470" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 48, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 74 - }, - "id": "fF002Yepgk55", - "outputId": "d148c589-4879-4e2d-8b0f-5b5ca01a2a53" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
RandomForestRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - ], - "text/plain": [ - "RandomForestRegressor()" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], - "source": [ - "# Train the model\n", - "model3.fit(X_train, y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": { - "id": "8nRU_pzEgnCt" - }, - "outputs": [], - "source": [ - "rmse, mae, mape = evaluate_model(model3, X_test_scaled, y_test)\n", - "metrics[\"Model\"].append(\"Random Forest\")\n", - "metrics[\"RMSE\"].append(rmse)\n", - "metrics[\"MAE\"].append(mae)\n", - "metrics[\"MAPE\"].append(mape)" + "text/plain": [ + "LinearRegression()" ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mZsLwLivhLGH" - }, - "source": [ - "## 4. Gradient Boosting Models (GBM)" - ] - }, + }, + "execution_count": 163, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model1.fit(X_train_scaled, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "id": "X269co2kMS4z" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 52, - "metadata": { - "id": "TI8idoxOg6jF" - }, - "outputs": [], - "source": [ - "model4 = GradientBoostingRegressor()" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 1.6881364651923558\n", + "MAE: 0.9433353486266928\n", + "MAPE: 0.006085435968276741\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model1, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"Linear Regressor\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GxtMzlg-gR2P" + }, + "source": [ + "## 2. SVR" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "id": "0xQewd7QWTtq" + }, + "outputs": [], + "source": [ + "# Create an SVR model\n", + "model2 = SVR()" + ] + }, + { + "cell_type": "code", + "execution_count": 165, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 74 }, + "id": "22SaCsQmfhgP", + "outputId": "2121e992-399d-4b78-e42c-fc20b9d52189" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 54, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 248 - }, - "id": "2gpbDxshhexj", - "outputId": "b2b1a681-7ede-4d66-be5d-1a8606d0f470" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
-              "             colsample_bylevel=None, colsample_bynode=None,\n",
-              "             colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
-              "             enable_categorical=False, eval_metric=None, feature_types=None,\n",
-              "             gamma=None, grow_policy=None, importance_type=None,\n",
-              "             interaction_constraints=None, learning_rate=None, max_bin=None,\n",
-              "             max_cat_threshold=None, max_cat_to_onehot=None,\n",
-              "             max_delta_step=None, max_depth=None, max_leaves=None,\n",
-              "             min_child_weight=None, missing=nan, monotone_constraints=None,\n",
-              "             multi_strategy=None, n_estimators=None, n_jobs=None,\n",
-              "             num_parallel_tree=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - ], - "text/plain": [ - "XGBRegressor(base_score=None, booster=None, callbacks=None,\n", - " colsample_bylevel=None, colsample_bynode=None,\n", - " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", - " enable_categorical=False, eval_metric=None, feature_types=None,\n", - " gamma=None, grow_policy=None, importance_type=None,\n", - " interaction_constraints=None, learning_rate=None, max_bin=None,\n", - " max_cat_threshold=None, max_cat_to_onehot=None,\n", - " max_delta_step=None, max_depth=None, max_leaves=None,\n", - " min_child_weight=None, missing=nan, monotone_constraints=None,\n", - " multi_strategy=None, n_estimators=None, n_jobs=None,\n", - " num_parallel_tree=None, random_state=None, ...)" - ] - }, - "execution_count": 54, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "
SVR()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], - "source": [ - "# Train the model\n", - "model4.fit(X_train, y_train)" + "text/plain": [ + "SVR()" ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": { - "id": "Jj9DXdUPhh9V" - }, - "outputs": [], - "source": [ - "rmse, mae, mape = evaluate_model(model4, X_test_scaled, y_test)\n", - "metrics[\"Model\"].append(\"GBM\")\n", - "metrics[\"RMSE\"].append(rmse)\n", - "metrics[\"MAE\"].append(mae)\n", - "metrics[\"MAPE\"].append(mape)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "d8nSGoyuh9dx" - }, - "source": [ - "## 5. Extreme Gradient Boosting (XGBoost)" - ] - }, + }, + "execution_count": 165, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model2.fit(X_train_scaled, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "id": "OQ1nL4oYfkAC" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 58, - "metadata": { - "id": "DyhhdlZAhx94" - }, - "outputs": [], - "source": [ - "import xgboost as xgb\n", - "# Create an XGBoost model\n", - "model5 = xgb.XGBRegressor()" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 17.574809673127547\n", + "MAE: 6.278157692070486\n", + "MAPE: 0.09040265035344064\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model2, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"SVR\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hcIfVMWdgcKt" + }, + "source": [ + "## 3. Random Forest" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": { + "id": "f7raXT_hf2ij" + }, + "outputs": [], + "source": [ + "model3 = RandomForestRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 74 }, + "id": "fF002Yepgk55", + "outputId": "d148c589-4879-4e2d-8b0f-5b5ca01a2a53" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 60, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 248 - }, - "id": "RAIwxIp5iH9Z", - "outputId": "d2b4aa97-7e07-4015-c308-76a292b0929f" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
-              "             colsample_bylevel=None, colsample_bynode=None,\n",
-              "             colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
-              "             enable_categorical=False, eval_metric=None, feature_types=None,\n",
-              "             gamma=None, grow_policy=None, importance_type=None,\n",
-              "             interaction_constraints=None, learning_rate=None, max_bin=None,\n",
-              "             max_cat_threshold=None, max_cat_to_onehot=None,\n",
-              "             max_delta_step=None, max_depth=None, max_leaves=None,\n",
-              "             min_child_weight=None, missing=nan, monotone_constraints=None,\n",
-              "             multi_strategy=None, n_estimators=None, n_jobs=None,\n",
-              "             num_parallel_tree=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - ], - "text/plain": [ - "XGBRegressor(base_score=None, booster=None, callbacks=None,\n", - " colsample_bylevel=None, colsample_bynode=None,\n", - " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", - " enable_categorical=False, eval_metric=None, feature_types=None,\n", - " gamma=None, grow_policy=None, importance_type=None,\n", - " interaction_constraints=None, learning_rate=None, max_bin=None,\n", - " max_cat_threshold=None, max_cat_to_onehot=None,\n", - " max_delta_step=None, max_depth=None, max_leaves=None,\n", - " min_child_weight=None, missing=nan, monotone_constraints=None,\n", - " multi_strategy=None, n_estimators=None, n_jobs=None,\n", - " num_parallel_tree=None, random_state=None, ...)" - ] - }, - "execution_count": 60, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "
RandomForestRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], - "source": [ - "# Train the model\n", - "model5.fit(X_train, y_train)" + "text/plain": [ + "RandomForestRegressor()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "rmse, mae, mape = evaluate_model(model5, X_test_scaled, y_test)\n", - "metrics[\"Model\"].append(\"XGBoost\")\n", - "metrics[\"RMSE\"].append(rmse)\n", - "metrics[\"MAE\"].append(mae)\n", - "metrics[\"MAPE\"].append(mape)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "A_J776rtiovq" - }, - "source": [ - "## 6. AdaBoostRegressor" - ] - }, + }, + "execution_count": 167, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model3.fit(X_train_scaled, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": { + "id": "8nRU_pzEgnCt" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 64, - "metadata": { - "id": "HNq66cXRiYPJ" - }, - "outputs": [], - "source": [ - "model6 = AdaBoostRegressor()" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 2.2053909891328036\n", + "MAE: 1.2608162799481166\n", + "MAPE: 0.008015308194076972\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model3, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"Random Forest\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mZsLwLivhLGH" + }, + "source": [ + "## 4. Gradient Boosting Models (GBM)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": { + "id": "TI8idoxOg6jF" + }, + "outputs": [], + "source": [ + "model4 = GradientBoostingRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 }, + "id": "2gpbDxshhexj", + "outputId": "b2b1a681-7ede-4d66-be5d-1a8606d0f470" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 66, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 74 - }, - "id": "P0oB5wjQivBr", - "outputId": "8726c583-6782-4504-b0ac-d2ef4ccbca4c" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
AdaBoostRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - ], - "text/plain": [ - "AdaBoostRegressor()" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "
GradientBoostingRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], - "source": [ - "# Train the model\n", - "model6.fit(X_train, y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": { - "id": "Bf1m5ukOi2VM" - }, - "outputs": [], - "source": [ - "rmse, mae, mape = evaluate_model(model6, X_test_scaled, y_test)\n", - "metrics[\"Model\"].append(\"AdaBoost Regressor\")\n", - "metrics[\"RMSE\"].append(rmse)\n", - "metrics[\"MAE\"].append(mae)\n", - "metrics[\"MAPE\"].append(mape)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Q9DzOt3CkWFX" - }, - "source": [ - "## 7. Decision Tree" + "text/plain": [ + "GradientBoostingRegressor()" ] - }, + }, + "execution_count": 169, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model4.fit(X_train_scaled, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": { + "id": "Jj9DXdUPhh9V" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 70, - "metadata": { - "id": "23DZ2biSjF9a" - }, - "outputs": [], - "source": [ - "model7 = DecisionTreeRegressor()" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 2.6985863368468084\n", + "MAE: 1.692542658558929\n", + "MAPE: 0.011883244132236716\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model4, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"GBM\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d8nSGoyuh9dx" + }, + "source": [ + "## 5. Extreme Gradient Boosting (XGBoost)" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": { + "id": "DyhhdlZAhx94" + }, + "outputs": [], + "source": [ + "import xgboost as xgb\n", + "# Create an XGBoost model\n", + "model5 = xgb.XGBRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 }, + "id": "RAIwxIp5iH9Z", + "outputId": "d2b4aa97-7e07-4015-c308-76a292b0929f" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 72, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 74 - }, - "id": "6mQEQf-ykc9F", - "outputId": "f1a62020-4125-4aea-e7e4-11acffdc5169" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
DecisionTreeRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - ], - "text/plain": [ - "DecisionTreeRegressor()" - ] - }, - "execution_count": 72, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "
XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
+       "             colsample_bylevel=None, colsample_bynode=None,\n",
+       "             colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
+       "             enable_categorical=False, eval_metric=None, feature_types=None,\n",
+       "             gamma=None, grow_policy=None, importance_type=None,\n",
+       "             interaction_constraints=None, learning_rate=None, max_bin=None,\n",
+       "             max_cat_threshold=None, max_cat_to_onehot=None,\n",
+       "             max_delta_step=None, max_depth=None, max_leaves=None,\n",
+       "             min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+       "             multi_strategy=None, n_estimators=None, n_jobs=None,\n",
+       "             num_parallel_tree=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], - "source": [ - "# Train the model\n", - "model7.fit(X_train, y_train)" + "text/plain": [ + "XGBRegressor(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=None, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=None, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " multi_strategy=None, n_estimators=None, n_jobs=None,\n", + " num_parallel_tree=None, random_state=None, ...)" ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": { - "id": "BFJ9q_tvkgRC" - }, - "outputs": [], - "source": [ - "rmse, mae, mape = evaluate_model(model7, X_test_scaled, y_test)\n", - "metrics[\"Model\"].append(\"Decision Tree\")\n", - "metrics[\"RMSE\"].append(rmse)\n", - "metrics[\"MAE\"].append(mae)\n", - "metrics[\"MAPE\"].append(mape)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "LH-B-Xd6k5UD" - }, - "source": [ - "## 8. KNeighborsRegressor(KNN)" - ] - }, + }, + "execution_count": 171, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model5.fit(X_train_scaled, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": 76, - "metadata": { - "id": "JVDSed7yktFY" - }, - "outputs": [], - "source": [ - "# Create a KNN model\n", - "model8 = KNeighborsRegressor()" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 2.733930065274145\n", + "MAE: 1.502457380471909\n", + "MAPE: 0.010026410639661481\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model5, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"XGBoost\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A_J776rtiovq" + }, + "source": [ + "## 6. AdaBoostRegressor" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": { + "id": "HNq66cXRiYPJ" + }, + "outputs": [], + "source": [ + "model6 = AdaBoostRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 74 }, + "id": "P0oB5wjQivBr", + "outputId": "8726c583-6782-4504-b0ac-d2ef4ccbca4c" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 78, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 74 - }, - "id": "9fn64o-ZlBka", - "outputId": "dc5e6af2-de37-46ee-cde7-e0a3baa31a1f" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
KNeighborsRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - ], - "text/plain": [ - "KNeighborsRegressor()" - ] - }, - "execution_count": 78, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "
AdaBoostRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], - "source": [ - "# Train the model\n", - "model8.fit(X_train, y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": { - "id": "hbfbbjcSlDn7" - }, - "outputs": [], - "source": [ - "rmse, mae, mape = evaluate_model(model8, X_test_scaled, y_test)\n", - "metrics[\"Model\"].append(\"KNN\")\n", - "metrics[\"RMSE\"].append(rmse)\n", - "metrics[\"MAE\"].append(mae)\n", - "metrics[\"MAPE\"].append(mape)" + "text/plain": [ + "AdaBoostRegressor()" ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "X5XtlzMXljps" - }, - "source": [ - "## 9. Artificial Neural Networks (ANN)" - ] - }, - { - "cell_type": "code", - "execution_count": 84, - "metadata": { - "id": "vd1fDjQiltP4" - }, - "outputs": [], - "source": [ - "# Create an ANN model\n", - "model9 = Sequential()\n", - "model9.add(Dense(32, activation='relu', input_shape=(X_train.shape[1],)))\n", - "model9.add(Dense(16, activation='relu'))\n", - "model9.add(Dense(1, activation='linear'))" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "metadata": { - "id": "ZIf94WLMlv04" - }, - "outputs": [], - "source": [ - "# Compile the model\n", - "model9.compile(loss='mean_squared_error', optimizer='adam')" - ] - }, + }, + "execution_count": 173, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model6.fit(X_train_scaled, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": { + "id": "Bf1m5ukOi2VM" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 86, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "FX5DTKqslxWf", - "outputId": "9253b26c-1a79-4390-975e-d14c28a5e2a8" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 86, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Train the model\n", - "model9.fit(X_train_scaled, y_train, epochs=100, batch_size=32, verbose=0)" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 9.175482477551942\n", + "MAE: 7.527617905792734\n", + "MAPE: 0.1858930099598583\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model6, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"AdaBoost Regressor\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q9DzOt3CkWFX" + }, + "source": [ + "## 7. Decision Tree" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": { + "id": "23DZ2biSjF9a" + }, + "outputs": [], + "source": [ + "model7 = DecisionTreeRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 74 }, + "id": "6mQEQf-ykc9F", + "outputId": "f1a62020-4125-4aea-e7e4-11acffdc5169" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 87, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "OVW2qpNsmGVq", - "outputId": "34343782-f560-4dee-c307-ff0d0c52ab5a" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "45/45 [==============================] - 0s 1ms/step\n" - ] - } + "data": { + "text/html": [ + "
DecisionTreeRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], - "source": [ - "rmse, mae, mape = evaluate_model(model9, X_test_scaled, y_test)\n", - "metrics[\"Model\"].append(\"ANN\")\n", - "metrics[\"RMSE\"].append(rmse)\n", - "metrics[\"MAE\"].append(mae)\n", - "metrics[\"MAPE\"].append(mape)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vjSMQNcOnFPJ" - }, - "source": [ - "## 10. LSTM(Long Short term Memory)" - ] - }, - { - "cell_type": "code", - "execution_count": 92, - "metadata": { - "id": "uACvajfImrbB" - }, - "outputs": [], - "source": [ - "# Reshape the input data for LSTM\n", - "n_features = X_train_scaled.shape[1]\n", - "n_steps = 10\n", - "n_samples_train = X_train_scaled.shape[0] - n_steps + 1\n", - "n_samples_test = X_test_scaled.shape[0] - n_steps + 1\n", - "\n", - "# Reshape the input data\n", - "X_train_reshaped = np.array([X_train_scaled[i:i+n_steps, :] for i in range(n_samples_train)])\n", - "X_test_reshaped = np.array([X_test_scaled[i:i+n_steps, :] for i in range(n_samples_test)])\n" - ] - }, - { - "cell_type": "code", - "execution_count": 93, - "metadata": { - "id": "r066pVYpnXH5" - }, - "outputs": [], - "source": [ - "# Create an LSTM model\n", - "model = Sequential()\n", - "model.add(LSTM(64, activation='relu', input_shape=(n_steps, n_features)))\n", - "model.add(Dense(1))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "metadata": { - "id": "YpSfHu6gov35" - }, - "outputs": [], - "source": [ - "# Compile the model\n", - "model.compile(loss='mean_squared_error', optimizer='adam')\n" + "text/plain": [ + "DecisionTreeRegressor()" ] - }, + }, + "execution_count": 175, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model7.fit(X_train_scaled, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": { + "id": "BFJ9q_tvkgRC" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 95, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "0vHjcluaoxzP", - "outputId": "1eaafd31-9f91-4655-f437-e9199c0f7933" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 95, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Train the model\n", - "model.fit(X_train_reshaped, y_train[n_steps-1:], epochs=100, batch_size=32, verbose=0)" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 3.12966540689625\n", + "MAE: 1.6497286032971983\n", + "MAPE: 0.010286427942970355\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model7, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"Decision Tree\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LH-B-Xd6k5UD" + }, + "source": [ + "## 8. KNeighborsRegressor(KNN)" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": { + "id": "JVDSed7yktFY" + }, + "outputs": [], + "source": [ + "# Create a KNN model\n", + "model8 = KNeighborsRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 74 }, + "id": "9fn64o-ZlBka", + "outputId": "dc5e6af2-de37-46ee-cde7-e0a3baa31a1f" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 96, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "gEE06_TjozYv", - "outputId": "30306af7-2ec8-4733-db96-d3416a7fc6d4" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "44/44 [==============================] - 0s 4ms/step\n" - ] - } + "data": { + "text/html": [ + "
KNeighborsRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], - "source": [ - "rmse, mae, mape = evaluate_model(model10, X_test_scaled, y_test)\n", - "metrics[\"Model\"].append(\"LSTM\")\n", - "metrics[\"RMSE\"].append(rmse)\n", - "metrics[\"MAE\"].append(mae)\n", - "metrics[\"MAPE\"].append(mape)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Create a DataFrame for metrics\n", - "metrics_df = pd.DataFrame(metrics)\n", - "\n", - "# Plot RMSE, MAE, and MAPE for each model\n", - "plt.figure(figsize=(15, 5))\n", - "\n", - "# RMSE Plot\n", - "plt.subplot(1, 3, 1)\n", - "plt.bar(metrics_df['Model'], metrics_df['RMSE'], color='lightblue')\n", - "plt.xlabel('RMSE')\n", - "plt.title('RMSE for Different Models')\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# MAE Plot\n", - "plt.subplot(1, 3, 2)\n", - "plt.bar(metrics_df['Model'], metrics_df['MAE'], color='lightgreen')\n", - "plt.xlabel('MAE')\n", - "plt.title('MAE for Different Models')\n", - "plt.tight_layout()\n", - "plt.show()" + "text/plain": [ + "KNeighborsRegressor()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# MAPE Plot\n", - "plt.subplot(1, 3, 3)\n", - "plt.bar(metrics_df['Model'], metrics_df['MAPE'], color='salmon')\n", - "plt.xlabel('MAPE')\n", - "plt.title('MAPE for Different Models')\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, + }, + "execution_count": 177, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model8.fit(X_train_scaled, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": { + "id": "hbfbbjcSlDn7" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Using of LightGBM and CatBoost For Optimizing the model accuracy and time complexity" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 3.0274590148039873\n", + "MAE: 1.7525904376439672\n", + "MAPE: 0.013668115353592272\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model8, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"KNN\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "X5XtlzMXljps" + }, + "source": [ + "## 9. Artificial Neural Networks (ANN)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create an ANN model\n", + "model9 = Sequential()\n", + "model9.add(Dense(32, activation='relu', input_shape=(X_train.shape[1],)))\n", + "model9.add(Dense(16, activation='relu'))\n", + "model9.add(Dense(1, activation='linear'))" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": { + "id": "ZIf94WLMlv04" + }, + "outputs": [], + "source": [ + "# Compile the model\n", + "model9.compile(loss='mean_squared_error', optimizer='adam')" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "FX5DTKqslxWf", + "outputId": "9253b26c-1a79-4390-975e-d14c28a5e2a8" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Import necessary libraries\n", - "import lightgbm as lgb\n", - "from catboost import CatBoostRegressor\n", - "from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, accuracy_score, precision_score, confusion_matrix, recall_score, f1_score\n", - "\n", - "# Function to train and evaluate a model\n", - "def train_and_evaluate_model(model, X_train, X_test, y_train, y_test):\n", - " model.fit(X_train, y_train)\n", - " pred = model.predict(X_test)\n", - " rmse = np.sqrt(mean_squared_error(y_test, pred))\n", - " mae = mean_absolute_error(y_test, pred)\n", - " mape = mean_absolute_percentage_error(y_test, pred)\n", - " accuracy = accuracy_score(y_test > pred, y_test > pred.round())\n", - " precision = precision_score(y_test > pred, y_test > pred.round())\n", - " confusion = confusion_matrix(y_test > pred, y_test > pred.round())\n", - " recall = recall_score(y_test > pred, y_test > pred.round())\n", - " f1 = f1_score(y_test > pred, y_test > pred.round())\n", - " return rmse, mae, mape, accuracy, precision, confusion, recall, f1\n", - "\n", - "# Train and evaluate LightGBM model for from this directly print accuracy \n", - "model_lightgbm = lgb.LGBMRegressor()\n", - "metrics_lightgbm = train_and_evaluate_model(model_lightgbm, X_train, X_test, y_train, y_test)\n", - "print(\"LightGBM Metrics:\", metrics_lightgbm)\n", - "\n", - "# Train and evaluate CatBoost model\n", - "model_catboost = CatBoostRegressor(verbose=0)\n", - "metrics_catboost = train_and_evaluate_model(model_catboost, X_train, X_test, y_train, y_test)\n", - "print(\"CatBoost Metrics:\", metrics_catboost)" + "data": { + "text/plain": [ + "" ] + }, + "execution_count": 126, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model9.fit(X_train_scaled, y_train, epochs=100, batch_size=32, verbose=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "OVW2qpNsmGVq", + "outputId": "34343782-f560-4dee-c307-ff0d0c52ab5a" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1m45/45\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 3ms/step\n", + "RMSE: 2.801001091255311\n", + "MAE: 1.7605365826618848\n", + "MAPE: 0.0126215060590655\n", + "\n" + ] } - ], - "metadata": { + ], + "source": [ + "rmse, mae, mape = evaluate_model(model9, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"ANN\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vjSMQNcOnFPJ" + }, + "source": [ + "## 10. LSTM(Long Short term Memory)" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": { + "id": "uACvajfImrbB" + }, + "outputs": [], + "source": [ + "# Reshape the input data for LSTM\n", + "n_features = X_train_scaled.shape[1]\n", + "n_steps = 10\n", + "n_samples_train = X_train_scaled.shape[0] - n_steps + 1\n", + "n_samples_test = X_test_scaled.shape[0] - n_steps + 1\n", + "\n", + "# Reshape the input data\n", + "X_train_reshaped = np.array([X_train_scaled[i:i+n_steps, :] for i in range(n_samples_train)])\n", + "X_test_reshaped = np.array([X_test_scaled[i:i+n_steps, :] for i in range(n_samples_test)])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create an LSTM model\n", + "model = Sequential()\n", + "model.add(LSTM(64, activation='relu', input_shape=(n_steps, n_features)))\n", + "model.add(Dense(1))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": { + "id": "YpSfHu6gov35" + }, + "outputs": [], + "source": [ + "# Compile the model\n", + "model.compile(loss='mean_squared_error', optimizer='adam')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": { "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" + "base_uri": "https://localhost:8080/" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" + "id": "0vHjcluaoxzP", + "outputId": "1eaafd31-9f91-4655-f437-e9199c0f7933" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" } + ], + "source": [ + "# Train the model\n", + "model.fit(X_train_reshaped, y_train[n_steps-1:], epochs=100, batch_size=32, verbose=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rmse, mae, mape = evaluate_model(model10, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"LSTM\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a DataFrame for metrics\n", + "metrics_df = pd.DataFrame(metrics)\n", + "\n", + "# Plot RMSE, MAE, and MAPE for each model\n", + "plt.figure(figsize=(15, 5))\n", + "\n", + "# RMSE Plot\n", + "plt.subplot(1, 3, 1)\n", + "plt.bar(metrics_df['Model'], metrics_df['RMSE'], color='lightblue')\n", + "plt.xlabel('RMSE')\n", + "plt.title('RMSE for Different Models')\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# MAE Plot\n", + "plt.subplot(1, 3, 2)\n", + "plt.bar(metrics_df['Model'], metrics_df['MAE'], color='lightgreen')\n", + "plt.xlabel('MAE')\n", + "plt.title('MAE for Different Models')\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# MAPE Plot\n", + "plt.subplot(1, 3, 3)\n", + "plt.bar(metrics_df['Model'], metrics_df['MAPE'], color='salmon')\n", + "plt.xlabel('MAPE')\n", + "plt.title('MAPE for Different Models')\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using of LightGBM and CatBoost For Optimizing the model accuracy and time complexity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import necessary libraries\n", + "import lightgbm as lgb\n", + "from catboost import CatBoostRegressor\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, accuracy_score, precision_score, confusion_matrix, recall_score, f1_score\n", + "\n", + "# Function to train and evaluate a model\n", + "def train_and_evaluate_model(model, X_train, X_test, y_train, y_test):\n", + " model.fit(X_train, y_train)\n", + " pred = model.predict(X_test)\n", + " rmse = np.sqrt(mean_squared_error(y_test, pred))\n", + " mae = mean_absolute_error(y_test, pred)\n", + " mape = mean_absolute_percentage_error(y_test, pred)\n", + " accuracy = accuracy_score(y_test > pred, y_test > pred.round())\n", + " precision = precision_score(y_test > pred, y_test > pred.round())\n", + " confusion = confusion_matrix(y_test > pred, y_test > pred.round())\n", + " recall = recall_score(y_test > pred, y_test > pred.round())\n", + " f1 = f1_score(y_test > pred, y_test > pred.round())\n", + " return rmse, mae, mape, accuracy, precision, confusion, recall, f1\n", + "\n", + "# Train and evaluate LightGBM model for from this directly print accuracy \n", + "model_lightgbm = lgb.LGBMRegressor()\n", + "metrics_lightgbm = train_and_evaluate_model(model_lightgbm, X_train, X_test, y_train, y_test)\n", + "print(\"LightGBM Metrics:\", metrics_lightgbm)\n", + "\n", + "# Train and evaluate CatBoost model\n", + "model_catboost = CatBoostRegressor(verbose=0)\n", + "metrics_catboost = train_and_evaluate_model(model_catboost, X_train, X_test, y_train, y_test)\n", + "print(\"CatBoost Metrics:\", metrics_catboost)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 }