Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 88 additions & 12 deletions solution.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,60 @@
# Load necessary libraries
rm(list=ls())
library(dplyr)
library(lubridate)
library(ggplot2)
library(randomForest)
library(tidyr)
library(pacman)
# pacman::p_load(randomForest)
library(h2o)


# Initialize the H2O cluster
h2o.init(nthreads = -1)

# Define parameters
start_train <- as.Date("2017-01-01")
end_train <- as.Date("2023-11-30")
start_test <- as.Date("2024-01-01")
end_test <- as.Date("2024-06-30")

n_buys <- 10
verbose <- FALSE



# Load sector data
df_sectors <- read.csv('data/data0.csv')

# Load price and financial data
df_data <- read.csv('data/data1.csv')
df_data$date <- as.Date(df_data$date)

df_x <- df_data %>% select(date, security, price, return30, ratio_pe, ratio_pcf, ratio_de, ratio_roe, ratio_roa)
df_y <- df_data %>% select(date, security, label)

list_vars1 <- c('price', 'return30', 'ratio_pe', 'ratio_pcf', 'ratio_de', 'ratio_roe', 'ratio_roa')

# Create signals DataFrame
df_signals <- data.frame(date = unique(df_x$date[df_x$date >= start_test & df_x$date <= end_test]))
df_signals <- df_signals %>% arrange(date)

# Initialize an empty list for storing accuracy results
df_signals$acc_total <- NA
df_signals$acc_current <- NA



# Load necessary libraries
library(dplyr)
library(lubridate)
library(ggplot2)
library(tidyr)
library(h2o)

print('---> R Script Start')

# Initialize the H2O cluster
h2o.init(nthreads = -1)

# Define parameters
start_train <- as.Date("2017-01-01")
end_train <- as.Date("2023-11-30")
Expand Down Expand Up @@ -55,7 +101,7 @@ for (i in seq_len(nrow(df_signals))) {
df_testx <- df_x %>% filter(date >= df_signals$date[i])
df_testy <- df_y %>% filter(date >= df_signals$date[i])

# Scale data
# Scale data (optional, h2o handles scaling internally)
for (col in list_vars1) {
scaler <- function(x) (x - min(x)) / (max(x) - min(x)) * 2 - 1
df_trainx[[col]] <- scaler(df_trainx[[col]])
Expand All @@ -64,15 +110,33 @@ for (i in seq_len(nrow(df_signals))) {

df_trainy <- df_trainy %>% mutate(label = as.factor(label))

# Fit the Random Forest classifier
if (i == 1) {
clf <- randomForest(x = df_trainx[list_vars1], y = df_trainy$label, ntree = 10, mtry = sqrt(length(list_vars1)), nodesize = 1000)
# Convert to h2o frames
train_h2o <- as.h2o(cbind(df_trainx[list_vars1], label = df_trainy$label))
test_h2o <- as.h2o(df_testx[list_vars1])

# Build and train the H2O deep learning model trains every 10 days
if (i %in% seq(1, nrow(df_signals), 10)) {
clf <- h2o.deeplearning(
x = list_vars1,
y = "label",
training_frame = train_h2o,
activation = "RectifierWithDropout",
hidden = c(64, 32),
epochs = 20,
input_dropout_ratio = 0.2,
hidden_dropout_ratios = c(0.5, 0.5),
balance_classes = TRUE,
stopping_metric = "misclassification",
stopping_rounds = 5,
verbose = verbose
)
}

# Predictions and accuracy
pred_probs <- predict(clf, newdata = df_testx[list_vars1], type = "prob")
df_testy$signal <- pred_probs[, 2]
df_testy$pred <- ifelse(pred_probs[, 2] > 0.5, 1, 0)
pred_probs <- h2o.predict(clf, newdata = test_h2o)
pred_probs <- as.data.frame(pred_probs)
df_testy$signal <- pred_probs$p1
df_testy$pred <- ifelse(pred_probs$p1 > 0.5, 1, 0)
df_testy$count <- 1

df_current <- df_testy %>% filter(date == df_signals$date[i])
Expand All @@ -89,6 +153,19 @@ for (i in seq_len(nrow(df_signals))) {
df_signals[i, df_current$security] <- df_current$signal
}

# Continue with your signal plotting, buy matrix, and payoff calculations

# Shutdown H2O cluster after completion
h2o.shutdown(prompt = FALSE)

print('---> R Script End')

# Continue with your signal plotting, buy matrix, and payoff calculations

# Shutdown H2O cluster after completion
h2o.shutdown(prompt = FALSE)


# Create buy matrix for payoff plot
df_signals$`10th` <- apply(df_signals[df_sectors$security], 1, function(x) sort(x, decreasing = TRUE)[n_buys])
df_index <- df_signals[df_sectors$security] >= df_signals$`10th`
Expand Down Expand Up @@ -137,7 +214,7 @@ plot_payoff <- function(df_buys) {

# df_payoff <- df[, 1, drop = FALSE]
df <- df * (1 / n_buys) # equally weighted

df_payoff <- NULL
arr_ret <- as.matrix(df_returns[-1] + 1)
df_payoff$payoff <- diag(df %*% t(arr_ret-1))
Expand All @@ -157,4 +234,3 @@ df_payoff <- plot_payoff(df_buys)
ggplot(df_payoff, aes(x = date, y = tri)) + geom_line() + ggtitle("Payoff Over Time")


print('---> R Script End')