-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrcpm_comp.R
More file actions
executable file
·86 lines (76 loc) · 2.93 KB
/
rcpm_comp.R
File metadata and controls
executable file
·86 lines (76 loc) · 2.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#################################################################
# CPM with complete data
# Input:
# all_edges: edges of complete subjects
# all_behav: phenotype of complete subjects
# seed: random number for cross-validation data split
# thresh: feature selection threshold, p-value
# lambda: ridge regression parameters
#
# Output:
# Rcv for regression.
rcpm_comp <- function(all_edges, all_behav,
id, threshold = 0.01, alpha = 0,
lambda = 0.5*10^seq(-2, 10, length.out = 100), nfold = 10,
seed = 123){
require(glmnet)
require(cvTools)
source("impute_missing.R")
source("nrmse.R")
num_sub_total = dim(all_edges)[1]
num_edges = dim(all_edges)[2]
coef_total = matrix(nrow = num_edges,
ncol = nfold) # store all the coefficients
coef0_total = c()
lambda_total = c() # store all the lambda
r_pearson_fold = c()
y.predict <- c()
mse_fold <- c()
r_2_fold <- c()
#Perform 10 fold cross validation
behav <- all_behav
label_missing <- is.na(behav[, id])
Index <- 1:nrow(all_edges)
missIndex <- Index[label_missing]
compIndex <- Index[!label_missing]
edge_comp <- all_edges[compIndex,]
behav_comp <- behav[compIndex,]
set.seed(seed)
folds <- cvFolds(length(compIndex), K = nfold)
for(i_fold in 1:nfold){
testIndexes <- folds$subsets[folds$which == i_fold]
test_mat <- edge_comp[testIndexes, ]
test_behav <- behav_comp[testIndexes, ]
train_mat <- edge_comp[-testIndexes, ]
train_behav <- behav_comp[-testIndexes,]
test_behav <- test_behav[, id]
train_behav <- train_behav[, id]
# Feature Selection
cor_pvalue <- apply(train_mat, 2, function(x){
cc <- cor.test(x, train_behav, method = "pearson")
cc$p.value
})
edges_1 <- which(cor_pvalue <= threshold)
#cat("\n CV:", i_fold, "/number of selected edges:", length(edges_1), "\n")
#Use the train data to decide best lambda for ridge regression
model.cv <- cv.glmnet(x = train_mat[, edges_1], y = train_behav,
family = "gaussian", alpha = alpha, nfolds = 10)
idxLambda1SE = model.cv$lambda.1se
coef_total[edges_1, i_fold] <- as.vector(model.cv$glmnet.fit$
beta[, which(model.cv$lambda == idxLambda1SE)])
coef0_total[i_fold] <- model.cv$glmnet.fit$
a0[which(model.cv$lambda == idxLambda1SE)]
lambda_total[i_fold] <- idxLambda1SE
y.predict[testIndexes] <- test_mat[, edges_1] %*%
matrix(coef_total[edges_1, i_fold]) + coef0_total[i_fold]
r_pearson_fold[i_fold] <- cor(y.predict[testIndexes], test_behav)
mse_fold[i_fold] <- mean((y.predict[testIndexes] - test_behav)^2)
r_2_fold[i_fold] <- 1 - mse_fold[i_fold]/var(test_behav)
}
#browser()
rm(all_edges)
r_2_fold[r_2_fold < 0] = 0
r_2 <- mean(sqrt(r_2_fold))
cat("seed:", seed, "/r:", r_2, "\n")
return(r_2)
}