Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 118 additions & 14 deletions solution_skeleton.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
df_returns_train['month_end'] = pd.to_datetime(arg=df_returns_train['month_end']).apply(lambda d: d.date())
df_returns_test['month_end'] = pd.to_datetime(arg=df_returns_test['month_end']).apply(lambda d: d.date())

df_returns_train
df_returns_test

# %%

def equalise_weights(df: pd.DataFrame):
Expand Down Expand Up @@ -44,6 +47,7 @@ def equalise_weights(df: pd.DataFrame):
df_weights[list_stocks] = 1/len(list_stocks)

return df_weights



# %%
Expand Down Expand Up @@ -74,6 +78,8 @@ def generate_portfolio(df_train: pd.DataFrame, df_test: pd.DataFrame):
list_stocks = list(df_returns.columns)
list_stocks.remove('month_end')



# <<--------------------- YOUR CODE GOES BELOW THIS LINE --------------------->>

# This is your playground. Delete/modify any of the code here and replace with
Expand All @@ -84,23 +90,121 @@ def generate_portfolio(df_train: pd.DataFrame, df_test: pd.DataFrame):
# strategy to generate portfolio weights.
# Use the latest available data at that point in time

for i in range(len(df_test)):

import tensorflow as tf
from tensorflow import keras
from tensorboard.plugins.hparams import api as hp


#remove dates column
df_test_Clean = df_returns_test.iloc[:, 1:56]
df_train_Clean = df_returns_train.iloc[:, 1:56]
df_test_Clean
df_train_Clean


# Combine train and test data for preprocessing
df_returns = pd.concat([df_train_Clean, df_test_Clean])
df_returns


# Normalize the returns data
returns_array = df_returns.values
returns_mean = np.mean(returns_array, axis=0)
returns_std = np.std(returns_array, axis=0)
returns_array_normalized = (returns_array - returns_mean) / returns_std

# Split the data back into train and test sets
df_returns_train_normalized = returns_array_normalized[:len(df_returns_train)]
df_returns_test_normalized = returns_array_normalized[len(df_returns_train):]

# latest data at this point
df_latest = df_returns[(df_returns['month_end'] < df_test.loc[i, 'month_end'])]

# vol calc
df_w = pd.DataFrame()
df_w['vol'] = df_latest.std(numeric_only=True) # calculate stock volatility
df_w['inv_vol'] = 1/df_w['vol'] # calculate the inverse volatility
df_w['tot_inv_vol'] = df_w['inv_vol'].sum() # calculate the total inverse volatility
df_w['weight'] = df_w['inv_vol']/df_w['tot_inv_vol'] # calculate weight based on inverse volatility
df_w.reset_index(inplace=True, names='name')


# add to all weights
df_this = pd.DataFrame(data=[[df_test.loc[i, 'month_end']] + df_w['weight'].to_list()], columns=df_latest.columns)
df_weights = pd.concat(objs=[df_weights, df_this], ignore_index=True)

HP_L2 = hp.HParam('l2_regulariser', hp.RealInterval(0.01,0.02))
HP_ACTIVATIONS = hp.HParam('Activations', hp.Discrete(['sigmoid','elu','tanh','softmax','softplus','relu']))
HP_Layer_1_Nodes = hp.HParam('Layer_1_Nodes', hp.Discrete([20, 40, 60, 80, 100]))
HP_Layer_2_Nodes = hp.HParam('Layer_2_Nodes', hp.Discrete([20, 40, 60, 80, 100]))


with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
hp.hparams_config(
hparams=[HP_Layer_1_Nodes, HP_Layer_2_Nodes, HP_L2 ,HP_ACTIVATIONS],

)



# Define the neural network model
def create_model(hparams):
model = tf.keras.Sequential([
tf.keras.layers.Dense(HP_Layer_1_Nodes, activation=HP_ACTIVATIONS),
tf.keras.layers.Dense(HP_Layer_2_Nodes, activation=HP_ACTIVATIONS ),
tf.keras.layers.Dense(len(df_returns.columns), activation='softmax')
])

return model


def train_val_model(hparams):
model = create_model(hparams)
model.compile(optimizer="adam", loss='mean_squared_error', metrics=['mean_absolute_error'])
model.fit(df_returns_train_normalized, df_returns_test_normalized, epochs=100, batch_size=8)
loss_and_metrics = model.evaluate(inputs_val, outputs_val)
return loss_and_metrics[1], loss_and_metrics[2] # Return MAE and MSE


# Grid search over hyperparameter combinations
session_num = 0
best_combined_metric = float('inf')
best_hparams = {}
for Layer_1_Nodes in HP_Layer_1_Nodes.domain.values:
for Layer_2_Nodes in HP_Layer_2_Nodes.domain.values:
for l2_regulariser in np.linspace(HP_L2.domain.min_value, HP_L2.domain.max_value, num=5):
for Activations in HP_ACTIVATIONS.domain.values:
hparams = {
HP_Layer_1_Nodes: Layer_1_Nodes,
HP_Layer_2_Nodes: Layer_2_Nodes,
HP_L2: l2_regulariser,
HP_ACTIVATIONS: Activations,
}



# Compile the model

model.compile(optimizer='adam', loss="mean_absolute_error", metrics=['accuracy'])

# Prepare training data and labels
Past_train = df_returns_train_normalized[:-1] # training data all the way up to last month
Future_train = df_returns_train_normalized[1:] # Predict weights for future month

# Convert y_train to one-hot encoding
Future_train_onehot = np.zeros_like(Future_train)
Future_train_onehot[np.arange(len(Future_train)), np.argmax(Future_train, axis=1)] = 1

# Train the model
model.fit(Past_train, Future_train_onehot, epochs=100, batch_size=8)

# Use the model to predict portfolio weights for the test set
Past_test = df_returns_test_normalized[:-1] # Use all but the last month as test data
Future_test_predicted = model.predict(Past_test)

# Normalize the predicted weights so that they sum to 1 for each month
Future_test_normalized = Future_test_predicted / np.sum(Future_test_predicted, axis=1, keepdims=True)

# Convert the normalized weights back to original scale
df_weights = Future_test_normalized * returns_std + returns_mean

# Optionally, you can enforce constraints on the weights (e.g., limit to 10%)
df_weights[df_weights > 0.1] = 0.1

# The resulting y_test_weights is the generated portfolio weights for each month in the test set





# <<--------------------- YOUR CODE GOES ABOVE THIS LINE --------------------->>

# 10% limit check
Expand Down