PIM-Data-Science · Athi-Matyumza · Aug 5, 2023 · Aug 5, 2023
diff --git a/solution_skeleton.py b/solution_skeleton.py
@@ -3,6 +3,7 @@
 import pandas as pd
 import datetime
 import plotly.express as px
+import tensorflow as tf
 
 
 print('---Python script Start---', str(datetime.datetime.now()))
@@ -19,19 +20,19 @@
 
 def equalise_weights(df: pd.DataFrame):
 
-    '''
-        Function to generate the equal weights, i.e. 1/p for each active stock within a month
+    # '''
+    #     Function to generate the equal weights, i.e. 1/p for each active stock within a month
 
-        Args:
-            df: A return data frame. First column is month end and remaining columns are stocks
+    #     Args:
+    #         df: A return data frame. First column is month end and remaining columns are stocks
 
-        Returns:
-            A dataframe of the same dimension but with values 1/p on active funds within a month
+    #     Returns:
+    #         A dataframe of the same dimension but with values 1/p on active funds within a month
 
-    '''
+    # '''
 
     # create df to house weights
-    n_length = len(df)
+    n_length = len(df) #number of rows
     df_returns = df
     df_weights = df_returns[:n_length].copy()
     df_weights.set_index('month_end', inplace=True)
@@ -84,22 +85,51 @@ def generate_portfolio(df_train: pd.DataFrame, df_test: pd.DataFrame):
     # strategy to generate portfolio weights.
     # Use the latest available data at that point in time
 
-    for i in range(len(df_test)):
+    # Define the neural network model for portfolio optimization
+    def create_portfolio_model(input_shape, num_stocks):
+        model = tf.keras.Sequential([
+            tf.keras.layers.Dense(64, activation='relu', input_shape=input_shape),
+            tf.keras.layers.Dense(32, activation='relu'),
+            tf.keras.layers.Dense(num_stocks, activation='softmax')
+        ])
+        return model
+
+    # New function to calculate portfolio weights using the trained model
+    def get_portfolio_weights(model, df_latest):
+        returns_data = np.array(df_latest.drop(columns=['month_end']))
+        predictions = model.predict(returns_data)
+        normalized_weights = np.clip(predictions, 0, 0.1)  # Clip weights to ensure no stock > 10%
+        weights_sum = np.sum(normalized_weights, axis=1, keepdims=True)
+        portfolio_weights = normalized_weights / weights_sum
+        return portfolio_weights
+
+    # New function for training the portfolio model using backpropagation
+    def train_portfolio_model(df_train, epochs=3000, batch_size=25):
+        num_stocks = len(df_train.columns) - 1
+        input_shape = (num_stocks,)
+        model = create_portfolio_model(input_shape, num_stocks)
+
+        x_train = np.array(df_train.drop(columns=['month_end']))
+        y_train = x_train  # Input and output are the same for this self-supervised learning
+
+        model.compile(optimizer='adam', loss='mse')
+        model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)
+        return model
+
+    # Create and train the portfolio model
+    model = train_portfolio_model(df_train)
 
-        # latest data at this point
+    for i in range(len(df_test)):
         df_latest = df_returns[(df_returns['month_end'] < df_test.loc[i, 'month_end'])]
-
-        # vol calc
-        df_w = pd.DataFrame()
-        df_w['vol'] = df_latest.std(numeric_only=True)          # calculate stock volatility
-        df_w['inv_vol'] = 1/df_w['vol']                         # calculate the inverse volatility
-        df_w['tot_inv_vol'] = df_w['inv_vol'].sum()             # calculate the total inverse volatility
-        df_w['weight'] = df_w['inv_vol']/df_w['tot_inv_vol']    # calculate weight based on inverse volatility
-        df_w.reset_index(inplace=True, names='name')
-
-        # add to all weights
-        df_this = pd.DataFrame(data=[[df_test.loc[i, 'month_end']] + df_w['weight'].to_list()], columns=df_latest.columns)
+
+        # Get portfolio weights from the model
+        portfolio_weights = get_portfolio_weights(model, df_latest)
+
+        # Convert weights to DataFrame format
+        df_this = pd.DataFrame(data=[[df_test.loc[i, 'month_end']] + portfolio_weights.tolist()[0]],
+                               columns=df_latest.columns)
         df_weights = pd.concat(objs=[df_weights, df_this], ignore_index=True)
+
 
     # <<--------------------- YOUR CODE GOES ABOVE THIS LINE --------------------->>
 
@@ -171,3 +201,24 @@ def plot_total_return(df_returns: pd.DataFrame, df_weights_index: pd.DataFrame,
 df_returns, df_weights_portfolio = generate_portfolio(df_returns_train, df_returns_test)
 fig1, df_rtn = plot_total_return(df_returns, df_weights_index=df_weights_index, df_weights_portfolio=df_weights_portfolio)
 fig1
+
+# %%
+
+
+# Using an Artificial Neural Network (ANN) and backpropagation to generate the weights for the portfolio was the approach we used and for the following reasons we chose it:
+
+# 1. Non-linearity: ANNs can capture complex and non-linear relationships in the data. The stock market often exhibits non-linear patterns, and an ANN can better model these intricate interactions among different stocks.
+
+# 2. Flexibility: ANNs can handle various types of data, including both numerical and categorical variables. This flexibility allows them to incorporate additional information, such as macroeconomic indicators or sector-specific data, which can improve the portfolio weight generation process.
+
+# 3. Adaptability: The stock market is dynamic, and the relationships between different stocks may change over time. ANNs, especially when combined with backpropagation, can adapt and update the weights based on new incoming data, allowing the portfolio to adjust to changing market conditions.
+
+# 4. Risk Management: ANNs can be integrated into the portfolio optimization process to consider risk factors beyond volatility. By training the ANN on historical data, it can learn to account for factors like downside risk, correlation between stocks, and other risk metrics, resulting in a more robust and risk-aware portfolio.
+
+# 5. Portfolio Diversification: ANNs can optimize for diversification by learning to allocate weights in a way that minimizes the correlation among stocks. Diversification is a key aspect of risk reduction in a portfolio, and ANNs can help achieve this more efficiently.
+
+# 6. Speed and Efficiency: Once the ANN is trained, generating portfolio weights for each time period is computationally efficient. It can quickly process large datasets and produce weight allocations, making it suitable for real-time or frequent rebalancing strategies.
+
+# 7. Adoption of New Information: As new data becomes available, the ANN can continuously update the portfolio weights, allowing it to adapt to market changes and incorporate the most recent information into the investment decisions.
+
+# However, it's important to note that using an ANN for portfolio optimization also comes with challenges, such as model complexity, data overfitting, and the need for appropriate hyperparameter tuning. This model also may require bigger training data but the following solutions we produced was able to surpass the benchmark and inverse volatility approach.