-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBankniftyprediction.py
140 lines (115 loc) · 5.29 KB
/
Bankniftyprediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from xgboost import XGBRegressor
import matplotlib.pyplot as plt
# Load the dataset
data = pd.read_csv("C:/Users/snehj/OneDrive/Desktop/Forest final/bank-nifty-1h-data (1).csv")
data['Date'] = pd.to_datetime(data['Date'], dayfirst=True)
data.set_index('Date', inplace=True)
print("Dataset Head:")
print(data.head())
# Feature engineering
data['High-Low'] = data['High'] - data['Low']
data['Open-Close'] = data['Open'] - data['Close']
data['MA_10'] = data['Close'].rolling(window=10).mean()
data['MA_50'] = data['Close'].rolling(window=50).mean()
# Target variable: Shift to predict the next day's close
data['Next_Day_Close'] = data['Close'].shift(-1)
# Drop rows with NaN values
data = data.dropna()
# Define features and target
features = ['Open', 'High', 'Low', 'High-Low', 'Open-Close', 'MA_10', 'MA_50']
X = data[features]
y = data['Next_Day_Close']
# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
# Train XGBoost Model
model = XGBRegressor(n_estimators=200, learning_rate=0.1, max_depth=5, random_state=42)
model.fit(X_train, y_train)
# Predictions on the test set
y_pred = model.predict(X_test)
# Evaluate model
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print(f"Root Mean Squared Error: {rmse:.2f}")
print(f"R² Score: {r2:.2f}")
print(f"Mean Absolute Error: {mae:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
# Visualization for model performance
plt.figure(figsize=(10, 5))
plt.plot(y_test.values[:50], label="Actual Next Day Price", color="blue")
plt.plot(y_pred[:50], label="Predicted Next Day Price", color="red")
plt.title("Next Day Stock Price Prediction")
plt.xlabel("Data Points")
plt.ylabel("Stock Price")
plt.legend()
plt.show()
importances = model.feature_importances_
features_importance_df = pd.DataFrame({
'Feature': features,
'Importance': importances
}).sort_values(by='Importance', ascending=False)
plt.figure(figsize=(12, 6))
plt.bar(features_importance_df['Feature'], features_importance_df['Importance'])
plt.xticks(rotation=45)
plt.title('Feature Importances')
plt.show()
# Visualization to check for overfitting
plt.figure(figsize=(10, 5))
plt.plot(model.predict(X_train)[:50], label="Training Predictions", color="green")
plt.plot(y_train.values[:50], label="Training Actuals", color="orange")
plt.plot(y_test.values[:50], label="Testing Actuals", color="blue")
plt.plot(y_pred[:50], label="Testing Predictions", color="red")
plt.title("Overfitting Check: Training vs Testing Predictions")
plt.xlabel("Data Points")
plt.ylabel("Stock Price")
plt.legend()
plt.show()
# Prediction for user-specified date
# Prediction for user-specified date
# Prediction for user-specified date
# Prediction for user-specified date
# Prediction for user-specified date
# Prediction for user-specified date
# Prediction for user-specified date
# Convert the input date to datetime
# Prompt the user for input date
# Prompt the user for input date
input_date = input("Enter the date (DD-MM-YYYY) to predict the next day's stock price: ")
try:
# Convert the input date to datetime
input_date_dt = pd.to_datetime(input_date, dayfirst=True)
# Check if the date exists in the dataset
if input_date_dt not in data.index:
print(f"Date {input_date} is not in the dataset.")
else:
# Extract the features for the input date as a Series
features_for_date = data.loc[input_date_dt, features]
# If there are multiple rows for the date, select the first one explicitly
if isinstance(features_for_date, pd.DataFrame):
features_for_date = features_for_date.iloc[0]
# Reshape and scale the extracted features
features_for_date = features_for_date.values.reshape(1, -1) # Reshape to (1, 7)
scaled_features_for_date = scaler.transform(features_for_date)
# Predict the next day's stock price
predicted_price = model.predict(scaled_features_for_date)[0]
# Use shift to get the actual price for the next day (if it exists)
next_day_data = data.shift(-1)
next_day_actual_price = next_day_data.loc[input_date_dt, 'Close'] if input_date_dt in next_day_data.index else None
if next_day_actual_price is not None:
# Access the scalar value from the Series before formatting
next_day_actual_price = next_day_actual_price.iloc[0] if isinstance(next_day_actual_price, pd.Series) else next_day_actual_price
print(f"Predicted Next Day Close Price: {predicted_price:.2f}")
print(f"Actual Next Day Close Price: {next_day_actual_price:.2f}")
else:
print("No data available for the next day to compare.")
except Exception as e:
print(f"An error occurred: {e}")