PIM-Data-Science · kojuno83 · Aug 5, 2023
diff --git a/solution_skeleton.py b/solution_skeleton.py
@@ -15,6 +15,9 @@
 df_returns_train['month_end'] = pd.to_datetime(arg=df_returns_train['month_end']).apply(lambda d: d.date())
 df_returns_test['month_end'] = pd.to_datetime(arg=df_returns_test['month_end']).apply(lambda d: d.date())
 
+df_returns_train
+df_returns_test
+
 # %%
 
 def equalise_weights(df: pd.DataFrame):
@@ -44,6 +47,7 @@ def equalise_weights(df: pd.DataFrame):
     df_weights[list_stocks] = 1/len(list_stocks)
 
     return df_weights
+
 
 
 # %%
@@ -74,6 +78,8 @@ def generate_portfolio(df_train: pd.DataFrame, df_test: pd.DataFrame):
     list_stocks = list(df_returns.columns)
     list_stocks.remove('month_end')
 
+
+
     # <<--------------------- YOUR CODE GOES BELOW THIS LINE --------------------->>
 
     # This is your playground. Delete/modify any of the code here and replace with 
@@ -84,23 +90,121 @@ def generate_portfolio(df_train: pd.DataFrame, df_test: pd.DataFrame):
     # strategy to generate portfolio weights.
     # Use the latest available data at that point in time
 
-    for i in range(len(df_test)):
+
+    import tensorflow as tf
+    from tensorflow import keras
+    from tensorboard.plugins.hparams import api as hp
+
+
+    #remove dates column 
+    df_test_Clean = df_returns_test.iloc[:, 1:56]
+    df_train_Clean = df_returns_train.iloc[:, 1:56]
+    df_test_Clean
+    df_train_Clean
+
+
+    # Combine train and test data for preprocessing
+    df_returns = pd.concat([df_train_Clean, df_test_Clean])
+    df_returns
+
+
+    # Normalize the returns data
+    returns_array = df_returns.values
+    returns_mean = np.mean(returns_array, axis=0)
+    returns_std = np.std(returns_array, axis=0)
+    returns_array_normalized = (returns_array - returns_mean) / returns_std
+
+    # Split the data back into train and test sets
+    df_returns_train_normalized = returns_array_normalized[:len(df_returns_train)]
+    df_returns_test_normalized = returns_array_normalized[len(df_returns_train):]
 
-        # latest data at this point
-        df_latest = df_returns[(df_returns['month_end'] < df_test.loc[i, 'month_end'])]
-
-        # vol calc
-        df_w = pd.DataFrame()
-        df_w['vol'] = df_latest.std(numeric_only=True)          # calculate stock volatility
-        df_w['inv_vol'] = 1/df_w['vol']                         # calculate the inverse volatility
-        df_w['tot_inv_vol'] = df_w['inv_vol'].sum()             # calculate the total inverse volatility
-        df_w['weight'] = df_w['inv_vol']/df_w['tot_inv_vol']    # calculate weight based on inverse volatility
-        df_w.reset_index(inplace=True, names='name')
+
 
-        # add to all weights
-        df_this = pd.DataFrame(data=[[df_test.loc[i, 'month_end']] + df_w['weight'].to_list()], columns=df_latest.columns)
-        df_weights = pd.concat(objs=[df_weights, df_this], ignore_index=True)
+
+    HP_L2 = hp.HParam('l2_regulariser', hp.RealInterval(0.01,0.02))
+    HP_ACTIVATIONS = hp.HParam('Activations', hp.Discrete(['sigmoid','elu','tanh','softmax','softplus','relu']))
+    HP_Layer_1_Nodes = hp.HParam('Layer_1_Nodes', hp.Discrete([20, 40, 60, 80, 100])) 
+    HP_Layer_2_Nodes = hp.HParam('Layer_2_Nodes', hp.Discrete([20, 40, 60, 80, 100]))
+
+
+    with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
+      hp.hparams_config(
+        hparams=[HP_Layer_1_Nodes, HP_Layer_2_Nodes, HP_L2 ,HP_ACTIVATIONS],
+
+      )
+
+
+
+    # Define the neural network model
+    def create_model(hparams):
+        model = tf.keras.Sequential([
+                tf.keras.layers.Dense(HP_Layer_1_Nodes, activation=HP_ACTIVATIONS),
+                tf.keras.layers.Dense(HP_Layer_2_Nodes, activation=HP_ACTIVATIONS ),
+                tf.keras.layers.Dense(len(df_returns.columns), activation='softmax')
+                ])
+
+        return model 
+
 
+    def train_val_model(hparams):
+        model = create_model(hparams)
+        model.compile(optimizer="adam", loss='mean_squared_error', metrics=['mean_absolute_error'])
+        model.fit(df_returns_train_normalized, df_returns_test_normalized, epochs=100, batch_size=8)
+        loss_and_metrics = model.evaluate(inputs_val, outputs_val)
+        return loss_and_metrics[1], loss_and_metrics[2]  # Return MAE and MSE
+
+
+    # Grid search over hyperparameter combinations
+    session_num = 0
+    best_combined_metric = float('inf')
+    best_hparams = {}
+    for Layer_1_Nodes in HP_Layer_1_Nodes.domain.values:
+        for Layer_2_Nodes in HP_Layer_2_Nodes.domain.values:
+            for l2_regulariser in np.linspace(HP_L2.domain.min_value, HP_L2.domain.max_value, num=5):
+                for Activations in HP_ACTIVATIONS.domain.values:
+                    hparams = {
+                        HP_Layer_1_Nodes: Layer_1_Nodes,
+                        HP_Layer_2_Nodes: Layer_2_Nodes,
+                        HP_L2: l2_regulariser,
+                        HP_ACTIVATIONS: Activations, 
+                     }
+
+
+
+   # Compile the model
+
+    model.compile(optimizer='adam', loss="mean_absolute_error", metrics=['accuracy'])
+
+    # Prepare training data and labels
+    Past_train = df_returns_train_normalized[:-1]  # training data all the way up to last month 
+    Future_train = df_returns_train_normalized[1:]   # Predict weights for future month 
+
+    # Convert y_train to one-hot encoding
+    Future_train_onehot = np.zeros_like(Future_train)
+    Future_train_onehot[np.arange(len(Future_train)), np.argmax(Future_train, axis=1)] = 1
+
+    # Train the model
+    model.fit(Past_train, Future_train_onehot, epochs=100, batch_size=8)
+
+    # Use the model to predict portfolio weights for the test set
+    Past_test = df_returns_test_normalized[:-1]  # Use all but the last month as test data
+    Future_test_predicted = model.predict(Past_test)
+
+    # Normalize the predicted weights so that they sum to 1 for each month
+    Future_test_normalized = Future_test_predicted / np.sum(Future_test_predicted, axis=1, keepdims=True)
+
+    # Convert the normalized weights back to original scale
+    df_weights = Future_test_normalized * returns_std + returns_mean
+
+    # Optionally, you can enforce constraints on the weights (e.g., limit to 10%)
+    df_weights[df_weights > 0.1] = 0.1
+
+    # The resulting y_test_weights is the generated portfolio weights for each month in the test set 
+
+
+
+
+
     # <<--------------------- YOUR CODE GOES ABOVE THIS LINE --------------------->>
 
     # 10% limit check