PIM-Data-Science · lukhanyoVena808 · Aug 5, 2023 · Aug 5, 2023
diff --git a/solution_skeleton.py b/solution_skeleton.py
@@ -1,4 +1,5 @@
 # %%
+
 import numpy as np
 import pandas as pd
 import datetime
@@ -48,8 +49,19 @@ def equalise_weights(df: pd.DataFrame):
 
 # %%
 
-def generate_portfolio(df_train: pd.DataFrame, df_test: pd.DataFrame):
+from sklearn.linear_model import LinearRegression
+
+def predict_returns(df_train, df_test):
+    model = LinearRegression()
+    features = df_train.drop(columns=['month_end'])
+    target = df_train.iloc[:, -1]
+
+    model.fit(features, target)
+    predicted_returns = model.predict(df_test.drop(columns=['month_end']))
+
+    return predicted_returns
 
+def generate_portfolio(df_train: pd.DataFrame, df_test: pd.DataFrame):
     '''
         Function to generate stocks weight allocation for time t+1 using historic data. Initial weights generated as 1/p for active stock within a month
 
@@ -60,53 +72,57 @@ def generate_portfolio(df_train: pd.DataFrame, df_test: pd.DataFrame):
         Returns:
             The returns dataframe and the weights
     '''
-
     print('---> training set spans', df_train['month_end'].min(), df_train['month_end'].max())
     print('---> training set spans', df_test['month_end'].min(), df_test['month_end'].max())
 
-    # initialise data
-    n_train = len(df_train)
-    df_returns = pd.concat(objs=[df_train, df_test], ignore_index=True)
+    # Combine training and testing data for easier calculations
+    df_returns = pd.concat([df_train, df_test], ignore_index=True)
 
-    df_weights = equalise_weights(df_returns[:n_train]) # df to store weights and create initial
+    df_weights = equalise_weights(df_train)  # df to store weights and create initial
 
     # list of stock names
-    list_stocks = list(df_returns.columns)
-    list_stocks.remove('month_end')
-
-    # <<--------------------- YOUR CODE GOES BELOW THIS LINE --------------------->>
+    list_stocks = df_train.columns.drop('month_end')
 
-    # This is your playground. Delete/modify any of the code here and replace with 
-    # your methodology. Below we provide a simple, naive estimation to illustrate 
-    # how we think you should go about structuring your submission and your comments:
-
-    # We use a static Inverse Volatility Weighting (https://en.wikipedia.org/wiki/Inverse-variance_weighting) 
-    # strategy to generate portfolio weights.
-    # Use the latest available data at that point in time
-
+    # Loop through each time step in df_test
     for i in range(len(df_test)):
+        # Latest data at this point
+        df_latest = df_returns[df_returns['month_end'] < df_test.loc[i, 'month_end']]
+
+        # Use machine learning to predict future returns
+        predicted_returns = predict_returns(df_latest, df_test.iloc[[i]])
 
-        # latest data at this point
-        df_latest = df_returns[(df_returns['month_end'] < df_test.loc[i, 'month_end'])]
-
         # vol calc
         df_w = pd.DataFrame()
         df_w['vol'] = df_latest.std(numeric_only=True)          # calculate stock volatility
-        df_w['inv_vol'] = 1/df_w['vol']                         # calculate the inverse volatility
-        df_w['tot_inv_vol'] = df_w['inv_vol'].sum()             # calculate the total inverse volatility
-        df_w['weight'] = df_w['inv_vol']/df_w['tot_inv_vol']    # calculate weight based on inverse volatility
-        df_w.reset_index(inplace=True, names='name')
-
-        # add to all weights
-        df_this = pd.DataFrame(data=[[df_test.loc[i, 'month_end']] + df_w['weight'].to_list()], columns=df_latest.columns)
-        df_weights = pd.concat(objs=[df_weights, df_this], ignore_index=True)
-
-    # <<--------------------- YOUR CODE GOES ABOVE THIS LINE --------------------->>
-
-    # 10% limit check
-    if len(np.array(df_weights[list_stocks])[np.array(df_weights[list_stocks]) > 0.101]):
+        df_w['inv_vol'] = 1 / df_w['vol']                        # calculate the inverse volatility
+        df_w['tot_inv_vol'] = df_w['inv_vol'].sum()              # calculate the total inverse volatility
+
+        # Apply your custom weighting algorithm here based on inverse volatility and predicted returns
+        # Assuming the weights cannot exceed 10% and setting zero weight for negative returns.
+        df_w['weight'] = np.where(
+            (predicted_returns > 0) & (predicted_returns == predicted_returns),  # Filtering non-NaN positive returns
+            (df_w['inv_vol'] / df_w['tot_inv_vol']) * 0.10,
+            0.0
+        )
+
+        # Increase prioritization weighting on higher return stocks by applying power transformation.
+        scaling_factor = 1.5  # Adjust this scaling factor as needed.
+        df_w['weight'] = df_w['weight'] ** scaling_factor
 
-        raise Exception(r'---> 10% limit exceeded')
+        # Normalize the weights to sum up to 1 after applying the scaling factor.
+        df_w['weight'] /= df_w['weight'].sum()
+
+        # Reset index to align with the list_stocks
+        df_w.reset_index(inplace=True, drop=True)
+
+        # Create a DataFrame to store the results.
+        df_this = pd.DataFrame(data=[[df_test.loc[i, 'month_end']] + df_w['weight'].to_list()],
+                               columns=df_latest.columns)
+        df_weights = pd.concat([df_weights, df_this], ignore_index=True)
+
+    # 10% limit check
+    if (df_weights[list_stocks] > 0.101).any().any():
+        raise Exception('---> 10% limit exceeded')
 
     return df_returns, df_weights