diff --git a/solution_skeleton.py b/solution_skeleton.py index 7fc2fa5..9c694c5 100644 --- a/solution_skeleton.py +++ b/solution_skeleton.py @@ -1,4 +1,5 @@ # %% + import numpy as np import pandas as pd import datetime @@ -48,8 +49,19 @@ def equalise_weights(df: pd.DataFrame): # %% -def generate_portfolio(df_train: pd.DataFrame, df_test: pd.DataFrame): +from sklearn.linear_model import LinearRegression + +def predict_returns(df_train, df_test): + model = LinearRegression() + features = df_train.drop(columns=['month_end']) + target = df_train.iloc[:, -1] + + model.fit(features, target) + predicted_returns = model.predict(df_test.drop(columns=['month_end'])) + + return predicted_returns +def generate_portfolio(df_train: pd.DataFrame, df_test: pd.DataFrame): ''' Function to generate stocks weight allocation for time t+1 using historic data. Initial weights generated as 1/p for active stock within a month @@ -60,53 +72,57 @@ def generate_portfolio(df_train: pd.DataFrame, df_test: pd.DataFrame): Returns: The returns dataframe and the weights ''' - print('---> training set spans', df_train['month_end'].min(), df_train['month_end'].max()) print('---> training set spans', df_test['month_end'].min(), df_test['month_end'].max()) - # initialise data - n_train = len(df_train) - df_returns = pd.concat(objs=[df_train, df_test], ignore_index=True) + # Combine training and testing data for easier calculations + df_returns = pd.concat([df_train, df_test], ignore_index=True) - df_weights = equalise_weights(df_returns[:n_train]) # df to store weights and create initial + df_weights = equalise_weights(df_train) # df to store weights and create initial # list of stock names - list_stocks = list(df_returns.columns) - list_stocks.remove('month_end') - - # <<--------------------- YOUR CODE GOES BELOW THIS LINE --------------------->> + list_stocks = df_train.columns.drop('month_end') - # This is your playground. Delete/modify any of the code here and replace with - # your methodology. Below we provide a simple, naive estimation to illustrate - # how we think you should go about structuring your submission and your comments: - - # We use a static Inverse Volatility Weighting (https://en.wikipedia.org/wiki/Inverse-variance_weighting) - # strategy to generate portfolio weights. - # Use the latest available data at that point in time - + # Loop through each time step in df_test for i in range(len(df_test)): + # Latest data at this point + df_latest = df_returns[df_returns['month_end'] < df_test.loc[i, 'month_end']] + + # Use machine learning to predict future returns + predicted_returns = predict_returns(df_latest, df_test.iloc[[i]]) - # latest data at this point - df_latest = df_returns[(df_returns['month_end'] < df_test.loc[i, 'month_end'])] - # vol calc df_w = pd.DataFrame() df_w['vol'] = df_latest.std(numeric_only=True) # calculate stock volatility - df_w['inv_vol'] = 1/df_w['vol'] # calculate the inverse volatility - df_w['tot_inv_vol'] = df_w['inv_vol'].sum() # calculate the total inverse volatility - df_w['weight'] = df_w['inv_vol']/df_w['tot_inv_vol'] # calculate weight based on inverse volatility - df_w.reset_index(inplace=True, names='name') - - # add to all weights - df_this = pd.DataFrame(data=[[df_test.loc[i, 'month_end']] + df_w['weight'].to_list()], columns=df_latest.columns) - df_weights = pd.concat(objs=[df_weights, df_this], ignore_index=True) - - # <<--------------------- YOUR CODE GOES ABOVE THIS LINE --------------------->> - - # 10% limit check - if len(np.array(df_weights[list_stocks])[np.array(df_weights[list_stocks]) > 0.101]): + df_w['inv_vol'] = 1 / df_w['vol'] # calculate the inverse volatility + df_w['tot_inv_vol'] = df_w['inv_vol'].sum() # calculate the total inverse volatility + + # Apply your custom weighting algorithm here based on inverse volatility and predicted returns + # Assuming the weights cannot exceed 10% and setting zero weight for negative returns. + df_w['weight'] = np.where( + (predicted_returns > 0) & (predicted_returns == predicted_returns), # Filtering non-NaN positive returns + (df_w['inv_vol'] / df_w['tot_inv_vol']) * 0.10, + 0.0 + ) + + # Increase prioritization weighting on higher return stocks by applying power transformation. + scaling_factor = 1.5 # Adjust this scaling factor as needed. + df_w['weight'] = df_w['weight'] ** scaling_factor - raise Exception(r'---> 10% limit exceeded') + # Normalize the weights to sum up to 1 after applying the scaling factor. + df_w['weight'] /= df_w['weight'].sum() + + # Reset index to align with the list_stocks + df_w.reset_index(inplace=True, drop=True) + + # Create a DataFrame to store the results. + df_this = pd.DataFrame(data=[[df_test.loc[i, 'month_end']] + df_w['weight'].to_list()], + columns=df_latest.columns) + df_weights = pd.concat([df_weights, df_this], ignore_index=True) + + # 10% limit check + if (df_weights[list_stocks] > 0.101).any().any(): + raise Exception('---> 10% limit exceeded') return df_returns, df_weights