From d5fd7cdd227eff8b6108b068da2551f77cf0a672 Mon Sep 17 00:00:00 2001 From: jhuang2026 Date: Mon, 8 Apr 2024 17:47:44 -0400 Subject: [PATCH] Added Stat Arbitrage and ML (Ryan Gilbert) --- app/app.py | 277 +++++++++++++++++++++++++++++++++- app/helpers.py | 60 ++++++++ app/templates/about.html | 53 ++++--- app/templates/formbase.html | 142 +++++++++++++++++ app/templates/index.html | 154 +++++-------------- app/templates/mlbase.html | 182 ++++++++++++++++++++++ app/templates/mlindex.html | 8 + app/templates/mlresults.html | 12 ++ app/templates/montecarlo.html | 134 ++++++++++++++++ app/templates/results.html | 10 ++ app/templates/statarb.html | 8 + 11 files changed, 897 insertions(+), 143 deletions(-) create mode 100644 app/helpers.py create mode 100644 app/templates/formbase.html create mode 100644 app/templates/mlbase.html create mode 100644 app/templates/mlindex.html create mode 100644 app/templates/mlresults.html create mode 100644 app/templates/montecarlo.html create mode 100644 app/templates/results.html create mode 100644 app/templates/statarb.html diff --git a/app/app.py b/app/app.py index dd86943..81cdd9f 100644 --- a/app/app.py +++ b/app/app.py @@ -13,9 +13,9 @@ # Specify the folder from which Flask will serve static files app.static_folder = 'static' -@app.route('/') -def index(): - return render_template('index.html') +@app.route('/montecarlo') +def montecarlo(): + return render_template('montecarlo.html') @app.route('/about') def about(): @@ -126,5 +126,276 @@ def get_plots(ticker, days): 'date': days }), 200 +# Second Section ~ +from flask import Flask, render_template, request, flash, redirect, url_for +import matplotlib.pyplot as plt +import yfinance as yf +import time +from sklearn.ensemble import RandomForestRegressor +import os +from helpers import plot_helper, ml_plot_helper +import pandas as pd + +app.secret_key = os.getenv('flasksecret') + +# Define routes +@app.route('/') +def index(): + form_results = {} + return render_template('index.html', results=form_results) + +@app.route('/statarb') +def statarb(): + form_results = {} + return render_template('statarb.html', results=form_results) + +@app.route('/statresults', methods=['POST']) +def results(): + form_results={} + # Extract user inputs from the form + equity1 = request.form['equity1'] + equity2 = request.form['equity2'] + timeframe = request.form['timeframe'] + period = request.form['period'] + window_size = request.form['window_size'] + multiplier = request.form['multiplier'] + std_mult = request.form['std_mult'] + # confirm inputs are valid + if not equity1 or not equity2 or not timeframe or not window_size or not multiplier: + flash('All inputs must be provided', 'error') + return redirect(url_for('statarb')) + # try to cast window_size and multiplier to int, float + try: + window_size = int(window_size) + except ValueError: + flash('Window size must be an integer', 'error') + return redirect(url_for('statarb')) + try: + multiplier = float(multiplier) + except ValueError: + flash('Multiplier must be a float', 'error') + return redirect(url_for('statarb')) + + try: + std_mult = int(std_mult) + except ValueError: + flash('Standard deviation multiplier must be an integer', 'error') + return redirect(url_for('statarb')) + + form_results = { + 'equity1': equity1, + 'equity2': equity2, + 'timeframe': timeframe, + 'period': period, + 'window_size': window_size, + 'multiplier': multiplier, + 'std_mult': std_mult + } + + equity1_df = yf.download(equity1, interval=timeframe, period=period) + equity2_df = yf.download(equity2, interval=timeframe, period=period) + + if len(equity1_df) == 0: + flash(f'No data found for {equity1}', 'error') + return redirect(url_for('statarb')) + if len(equity2_df) == 0: + flash(f'No data found for {equity2}', 'error') + return redirect(url_for('statarb')) + + # join the dfs with prefix eq1 and eq2 + equity1_df = equity1_df.add_prefix('eq1_') + equity2_df = equity2_df.add_prefix('eq2_') + + # merge the dataframes + df = equity1_df.join(equity2_df, how='outer') + df['Difference'] = df['eq1_Close'] - df['eq2_Close'] + + df[f'{window_size}_MA_Difference'] = df['Difference'].rolling(window=window_size).mean() + df[f'{window_size}_MA_Difference_Difference'] = df['Difference'] - df[f'{window_size}_MA_Difference'] + + df['Upper_Band'] = df[f'{window_size}_MA_Difference_Difference'].rolling(window=window_size*std_mult).std() * multiplier + df['Lower_Band'] = -df[f'{window_size}_MA_Difference_Difference'].rolling(window=window_size*std_mult).std() * multiplier + # add marks for when the difference difference is outside the bands + df['Outside_Upper'] = df[f'{window_size}_MA_Difference_Difference'] > df['Upper_Band'] + df['Outside_Lower'] = df[f'{window_size}_MA_Difference_Difference'] < df['Lower_Band'] + + # iterate through df, short the spread when the difference difference is above the upper band and long the spread when the difference difference is below the lower band + position = 0 + df['return'] = 0 + df['position'] = 0 + for i in range(len(df)): + if position == 0 and df[f'{window_size}_MA_Difference_Difference'].iloc[i] > df['Upper_Band'].iloc[i]: + position = -1 + entry = df['Difference'].iloc[i] + entry_cost = df['eq1_Close'].iloc[i] + df['eq2_Close'].iloc[i] + df['position'].iloc[i] = -1 + elif position == 0 and df[f'{window_size}_MA_Difference_Difference'].iloc[i] < df['Lower_Band'].iloc[i]: + position = 1 + entry = df['Difference'].iloc[i] + entry_cost = df['eq1_Close'].iloc[i] + df['eq2_Close'].iloc[i] + df['position'].iloc[i] = 1 + + elif position == -1 and df[f'{window_size}_MA_Difference_Difference'].iloc[i] < df['Lower_Band'].iloc[i]: + position = 0 + exitv = df['Difference'].iloc[i] + df['return'].iloc[i] = (entry - exitv) / entry_cost + + elif position == 1 and df[f'{window_size}_MA_Difference_Difference'].iloc[i] > df['Upper_Band'].iloc[i]: + position = 0 + exitv = df['Difference'].iloc[i] + df['return'].iloc[i] = (exitv - entry) / entry_cost + + + imgdata = plot_helper(df, window_size) + return render_template('results.html', results=form_results, data = imgdata) + +@app.route('/machinelearning') +def machinelearning(): + form_results = {} + return render_template('mlindex.html', results=form_results) + +@app.route('/mlresults', methods=['POST','GET']) +def mlresults(): + # get form data + # these three same as before + equity1 = request.form['equity1'] + timeframe = request.form['timeframe'] + period = request.form['period'] + # new + estimators = request.form['estimators'] # 1 to 100 + features = request.form.getlist('features') # a list of the features keep in mind + shift = request.form['shift'] # 0 to 10 + threshold = request.form['threshold'] # 0 to 1 + + # confirm inputs are valid + if not equity1 or not timeframe or not period or not estimators or not features or not shift or not threshold: + flash('All inputs must be provided', 'error') + return redirect(url_for('machinelearning')) + + # try to cast estimators, shift to int, threshold to float + try: + estimators = int(estimators) + except ValueError: + flash('Estimators must be an integer', 'error') + return redirect(url_for('machinelearning')) + + try: + shift = int(shift) + except ValueError: + flash('Shift must be an integer', 'error') + return redirect(url_for('machinelearning')) + + try: + threshold = float(threshold) + except ValueError: + flash('Threshold must be a float', 'error') + return redirect(url_for('machinelearning')) + + form_results = { + 'equity1': equity1, + 'timeframe': timeframe, + 'period': period, + 'estimators': estimators, + 'features': features, + 'shift': shift, + 'threshold': threshold + } + + # Fetch data using yfinance + df = yf.download(equity1, interval=timeframe, period=period) + # check if data was found + if len(df) == 0: + flash(f'No data found for {equity1}', 'error') + return redirect(url_for('machinelearning')) + + # create features + feature_list = [] + for feature in features: + if feature == 'rsi': + # calculate rsi from the close price + delta = df['Close'].diff() + gain = (delta.where(delta > 0, 0)).rolling(14).mean() + loss = (-delta.where(delta < 0, 0)).rolling(14).mean() + rs = gain / loss + df['rsi'] = 100 - (100 / (1 + rs)) + feature_list.append('rsi') + elif feature == 'macd': + # calculate the macd + df['ema12'] = df['Close'].ewm(span=12, adjust=False).mean() + df['ema26'] = df['Close'].ewm(span=26, adjust=False).mean() + df['macd'] = df['ema12'] - df['ema26'] + feature_list.append('macd') + elif feature == 'pctfrom100ma': + # calculate the percent from the 100 day moving average + df['100ma'] = df['Close'].rolling(window=100).mean() + df['pctfrom100ma'] = (df['Close'] - df['100ma']) / df['100ma'] + feature_list.append('pctfrom100ma') + elif feature == 'prevreturn': + # calculate the previous day return + df['prevreturn'] = df['Close'].pct_change() + feature_list.append('prevreturn') + + # shift the features and add to list + shifted_features = [] + for feature in feature_list: + df[f'{feature}_shift{shift}'] = df[feature].shift(shift) + shifted_features.append(f'{feature}_shift{shift}') + + # add shifted names to feature list + for name in shifted_features: + feature_list.append(name) + + # drop na + df = df.dropna() + + # create target next (shift -1) Close higher than Open + df['return'] = df['Close'].shift(-1) / df['Open'].shift(-1) + df['target'] = df['return'] > 1 + df['target'] = df['target'].astype(int) + + # create X and y + X = df[feature_list] + y = df['target'] + + # get time index for split at 80% of data + split_index = int(len(df) * 0.8) + split_date = df.index[split_index] + + # split the data + X_train = X[:split_date] + X_test = X[split_date:] + y_train = y[:split_date] + y_test = y[split_date:] + + # print date rate for train and test + print(f'Train date range: {X_train.index[0]} to {X_train.index[-1]}') + print(f'Test date range: {X_test.index[0]} to {X_test.index[-1]}') + + # create the model + model = RandomForestRegressor(n_estimators=estimators, random_state=42) + model.fit(X_train, y_train) + + # get the predictions + predictions = model.predict(X_test) + + # create the predictions dataframe + preds = pd.DataFrame({'predictions': predictions, 'actual': y_test}) + + # add back in return column + preds['return'] = df['return'][split_date:] + preds['met'] = preds['predictions'] > threshold + preds['met'] = preds['met'].astype(int) + + # calculate the accuracy where threshold is met + accuracy = preds[preds['predictions'] > threshold]['actual'].mean() + + # calculate returns, 1 if not met, return if met + preds['return_strat'] = 1 + ((preds['return']-1) * preds['met']) + + data = ml_plot_helper(preds) + + # Render results template with the calculated results + return render_template('mlresults.html', results=form_results, imgdata=data, accuracy=accuracy, count=preds['met'].sum()) + if __name__ == '__main__': app.run(debug=True) diff --git a/app/helpers.py b/app/helpers.py new file mode 100644 index 0000000..81085fc --- /dev/null +++ b/app/helpers.py @@ -0,0 +1,60 @@ +import matplotlib.pyplot as plt +import mplfinance as mpf + +from io import BytesIO +import base64 +def plot_helper(df, window_size): + + # Create a figure and axes for the main plot + plt.switch_backend('Agg') + fig, (ax1, ax2, ax3) = plt.subplots(3, 1, sharex=True, figsize=(15, 15)) + + # Plot the main data on the first subplot + ax1.plot(df[f'{window_size}_MA_Difference_Difference'], label=f'{window_size} Period MA Difference Difference', color='b') + ax1.plot(df['Upper_Band'], label='Upper Band', color='g') + ax1.plot(df['Lower_Band'], label='Lower Band', color='r') + ax1.axhline(0, color='red', linestyle='--') + ax1.set_ylabel('Difference Difference') + ax1.legend(loc='upper left') + + # Plot the additional data on the second subplot + ax2.plot(df['Difference'], label='Actual Difference', color='orange') + ax2.set_xlabel('Time') + ax2.set_ylabel('Actual Difference') + ax2.legend(loc='upper right') + ax2.grid(True) + # Add marks for when the conditions are true on the bottom subplot + ax2.scatter(df.index[df['position']==-1], df['Difference'][df['position']==-1], color='red', label='Outside Upper') + ax2.scatter(df.index[df['position']==1], df['Difference'][df['position']==1], color='green', label='Outside Lower') + + ax2.plot(df[f'{window_size}_MA_Difference'], label=f'{window_size} Period MA Difference', color='b') + + ax3.plot((1+df['return']).cumprod(), label='Cumulative Return', color='green') + ax3.set_xlabel('Time') + ax3.set_ylabel('Cumulative Return') + ax3.legend(loc='upper left') + ax3.grid(True) + + # Adjust layout + plt.tight_layout() + + buf = BytesIO() + plt.savefig(buf, format='png') + data = base64.b64encode(buf.getbuffer()).decode("ascii") + return data + + +def ml_plot_helper(df): + # plot cumulative return + plt.figure(figsize=(15, 7)) + plt.plot((df['return_strat']).cumprod(), label='Cumulative Return (Strategy)', color='green') + plt.plot((df['return']).cumprod(), label='Cumulative Return (Buy & Hold)', color='blue') + plt.xlabel('Time') + plt.ylabel('Cumulative Return') + plt.legend(loc='upper left') + plt.grid(True) + + buf = BytesIO() + plt.savefig(buf, format='png') + data = base64.b64encode(buf.getbuffer()).decode("ascii") + return data \ No newline at end of file diff --git a/app/templates/about.html b/app/templates/about.html index 88b8905..eae9181 100644 --- a/app/templates/about.html +++ b/app/templates/about.html @@ -9,36 +9,35 @@
-

About Stock Price Prediction using Monte Carlo Methods

- -

This website allows you to predict future stock prices using Monte Carlo simulation methods.

- -

How to Use

-

To use this website, enter a valid stock ticker symbol and the number of days you want to predict into the form on the homepage. Click the "Simulate" button, and the website will generate a prediction plot and display relevant stock data.

- -

What is Monte Carlo Simulation?

-

Monte Carlo simulation is a computational technique used to approximate the probability of certain outcomes by running multiple simulations with random variables. In the context of stock price prediction, Monte Carlo simulation can be used to generate possible future price scenarios based on historical data and statistical models.

- -

About Us

-

This website was originally created by Ryan Nie.

+

About Us

+

This website was originally created by Ryan Nie. It was then modified by Jason Huang. Additional features and functionality were added by including the work done by Ryan Gilbert. The platform is designed to display the work done by Quant Seniors at BU Alpha.

+

The website is managed by BU Alpha, a subdivision of BU Finance & Investment Club, a student organization at Boston University dedicated to promoting financial literacy and fostering a community of aspiring finance professionals.

+

Our long-term goal is to simplify complex financial concepts and empower individuals to make informed decisions in the financial markets. We aim to help users enhance their trading strategies and optimize their investment portfolios.

diff --git a/app/templates/formbase.html b/app/templates/formbase.html new file mode 100644 index 0000000..33fec99 --- /dev/null +++ b/app/templates/formbase.html @@ -0,0 +1,142 @@ + + + + + + + Statistical Arbitrage Trading + + + + + +
+ {% with messages = get_flashed_messages() %} + {% if messages %} + + {% endif %} + + {% endwith %} +
+ +
+

Statistical Arbitrage Trading

+
+
+ + +

This is the first equity symbol. Example: KO, V, AUDUSD=X.


+
+
+ + +

This is the second equity symbol. Example: PEP, M, NZDUSD=X.


+
+
+ + +

This is the granularity of historical data retrieved.


+
+
+ + +

How far back to fetch data.


+
+
+ + +

This is used to smooth out and normalize the spread.


+
+
+ + +

By using a dynamic threshold, we can account for varying volatility. This changes how far the threshold is.


+
+
+ + +

This is used to smooth out the standard deviation further.


+
+ +
+

In this trading strategy, known as statistical arbitrage, statistical models are leveraged to identify and exploit pricing disparities between two equities, which are referred to as Equity 1 and Equity 2. By analyzing historical price data and applying statistical techniques such as mean reversion or cointegration analysis, the strategy aims to identify pairs of equities that exhibit a stable relationship over time.


+ +

Once you've identified a suitable pair, an entry and exit strategy capitalizes on pricing disparities. Positions are initiated when no open position exists and the spread between the two equities exceeds a predefined threshold. If the spread indicates that one equity is overvalued relative to the other, the strategy will short the overvalued asset and long the undervalued asset, anticipating a mean reversion towards their historical relationship.


+ +

Conversely, if the spread indicates that the other equity is overvalued, the positions are adjusted accordingly. The exit strategy is triggered when the spread reverts back and reaches the opposite side of the threshold, allowing us to exit the positions and lock in profits.


+ +

By dynamically adjusting the positions based on statistical signals and effectively managing risk, the strategy aims to exploit short-term pricing inefficiencies between Equity 1 and Equity 2, while maximizing returns.


+
+ +
+ {% block content %} + {% endblock %} +
+ + + + + + + + + diff --git a/app/templates/index.html b/app/templates/index.html index ed367db..a785bb1 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -1,127 +1,55 @@ + - Stock Price Prediction using Monte Carlo Methods - - - + Statistical Arbitrage Trading + + + -
-

Stock Price Prediction using Monte Carlo Methods

- -
-
- - -
- -
- - -
- - -
- - -
-
+
+

Quant Inform

+

Welcome to Our Platform

+

Discover the world of quantitative finance with us. Our platform is dedicated to simplifying complex financial concepts and empowering individuals to make informed trading decisions.

+

Understanding Statistical Arbitrage

+

Statistical arbitrage is a strategy that involves exploiting pricing inefficiencies between assets based on statistical models. We provide insights into how this approach can be used to identify trading opportunities and maximize returns.

+

Harnessing Machine Learning for Trading Strategies

+

Explore the power of machine learning in predicting market trends and optimizing trading strategies. Our platform offers resources to help you leverage ML algorithms for more effective decision-making in the financial markets.

+

Make Stock Price Prediction using Monte Carlo Methods

+

This website enables users to predict future stock prices by employing advanced Monte Carlo simulation methods, leveraging historical data and statistical models.

- + + - // Check if table data is present in response - if (data.table_data) { - var tableContainer = document.getElementById('tableContainer'); - tableContainer.innerHTML = '

Stock Data

'; - var table = document.createElement('table'); - table.classList.add('table'); - var tbody = document.createElement('tbody'); - data.table_data.forEach(function(row) { - var tr = document.createElement('tr'); - row.forEach(function(cell) { - var td = document.createElement('td'); - td.textContent = cell; - tr.appendChild(td); - }); - tbody.appendChild(tr); - }); - table.appendChild(tbody); - tableContainer.appendChild(table); - } - }) - .catch(error => { - console.error('Error:', error); - }); - } - diff --git a/app/templates/mlbase.html b/app/templates/mlbase.html new file mode 100644 index 0000000..d1c3f20 --- /dev/null +++ b/app/templates/mlbase.html @@ -0,0 +1,182 @@ + + + + + + + Statistical Arbitrage Trading + + + + + +
+ {% with messages = get_flashed_messages() %} + {% if messages %} +
    + {% for message in messages %} +
    + + {% endfor %} +
+ {% endif %} + + {% endwith %} +
+ +
+

Machine Learning Trading

+
+

Asset Data

+
+ + +

This is the equity symbol to be analyzed. Example: SPY, AAPL, MSFT.


+
+ +
+ + +

This is the granularity of historical data retrieved.


+
+
+ + +

How far back to fetch data.


+
+ +

Machine Learning Parameters

+ +
+ + +

Number of estimators for the Random Forest.


+
+ +
+ + +

Features to use for the Random Forest. Select multiple.


+
+ + + + +
+ + +

Creates new features by getting the other features for this amount of intervals ago. May provide the model with valuable information.


+
+ +

Strategy Threshold

+ +
+ + +

By predicting a range from 0 to 1, we can filter for stronger predictions to get a higher prediction of winning.


+
+ + +
+

This strategy makes a prediction about whether or not the next hour/day/week will have a positive(1) or negative gain(0). By predicting a range from 0 to 1, we can filter for stronger predictions to get a higher prediction of winning. Note the data split. This is an important part of machine learning, as we must validate the model's prediction accuracy.


+
+ + +
+ + {% block content %} + {% endblock %} +
+ + + + + + + + + diff --git a/app/templates/mlindex.html b/app/templates/mlindex.html new file mode 100644 index 0000000..346a45d --- /dev/null +++ b/app/templates/mlindex.html @@ -0,0 +1,8 @@ +{% extends 'mlbase.html' %} + +{% block title %}Home - {{ super() }}{% endblock %} + +{% block content %} + + +{% endblock %} diff --git a/app/templates/mlresults.html b/app/templates/mlresults.html new file mode 100644 index 0000000..40e7dad --- /dev/null +++ b/app/templates/mlresults.html @@ -0,0 +1,12 @@ +{% extends 'mlbase.html' %} + +{% block title %}Results - {{ super() }}{% endblock %} + +{% block content %} +
+ +

Results

+

Accuracy: {{accuracy}} | Count: {{count}}

+ +
+{% endblock %} \ No newline at end of file diff --git a/app/templates/montecarlo.html b/app/templates/montecarlo.html new file mode 100644 index 0000000..6fe7ed9 --- /dev/null +++ b/app/templates/montecarlo.html @@ -0,0 +1,134 @@ + + + + + + Stock Price Prediction using Monte Carlo Methods + + + + + +
+

Stock Price Prediction using Monte Carlo Methods

+ +
+
+ + +
+ +
+ + +
+ + +
+ + +
+
+
+ + + + diff --git a/app/templates/results.html b/app/templates/results.html new file mode 100644 index 0000000..77cce7c --- /dev/null +++ b/app/templates/results.html @@ -0,0 +1,10 @@ +{% extends 'formbase.html' %} + +{% block title %}Results - {{ super() }}{% endblock %} + +{% block content %} +
+

Results

+ +
+{% endblock %} \ No newline at end of file diff --git a/app/templates/statarb.html b/app/templates/statarb.html new file mode 100644 index 0000000..8c22a36 --- /dev/null +++ b/app/templates/statarb.html @@ -0,0 +1,8 @@ +{% extends 'formbase.html' %} + +{% block title %}Home - {{ super() }}{% endblock %} + +{% block content %} + + +{% endblock %}