-
Notifications
You must be signed in to change notification settings - Fork 0
/
RMSSD_Calculator_2.R
194 lines (154 loc) · 8.06 KB
/
RMSSD_Calculator_2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
#
# |**********************************************************************;
# * Project : SRMSSD_Calculator
# *
# * Program name : SRMSSD_Calculator.R
# *
# * Author : Alex Pavlides
# *
# * Date created : 12/08/2016
# *
# * Purpose : Calculate RMSSD from IBT values, for time windows related to stimulus events of participants.
# Output includes a table of RMSSD value per participant and per stimulus event called RMSSD_Table.csv
# Output also includes a figure showing the mean RMSSD over participants for events greater than 15 seconds.
# *
# |**********************************************************************;
# * Parameters :
# * path - path to data folder
# * num_cores - number of cores to use in parallelised code
# *
# * Example call :
# * setwd("D:/APavlides/Documents/Kaggle/GitHub Data") -set working directory that contains function
# * source("RMSSD_Calculator_2.R") - load function
# * RMSSD_Calculator_2("D:/APavlides/Documents/Kaggle/GitHub Data/join_us-correction/data_engineer/data", 7) -- run function
RMSSD_Calculator_2 <- function(path, num_cores){
#Packages
library('plyr')
library('foreach')
library('doSNOW')
cl<-makeCluster(num_cores) #number of CPU cores to use
registerDoSNOW(cl)
#Retrieve paths for data and the ID's of participants
files <- list.files(path,recursive = TRUE,full.names=F)
files <- files[order(as.numeric(sub("([0-9]*).*", "\\1", files)))]
file_ID <- list.files(path,full.names=F)
file_ID <-file_ID[order(as.numeric(sub("([0-9]*).*", "\\1", file_ID)))]
#init vectors
file_ID_4Deletion <- vector(mode="integer", length=0)
stim_time_window <- vector(mode="numeric", length=0)
RMSSD <- vector(mode="numeric", length=0)
names_header <- vector(mode="character", length=0)
names_header_filtered <- vector(mode="character", length=0)
#initialise list
RMSSD_Store <- list()
RMSSD_Store_filtered <- list()
count_ID <- 1
#remove incomplete data where there is not both the sensor AND stimulation data.
for (i in seq(1,length(file_ID),1)){
FindString <- paste(paste ("^",file_ID[i], sep ="", collapse = NULL), "/", sep="", collapse = NULL)
occurrence <- length(grep(FindString, files))
if (occurrence < 2){
ToRemove <- grep(FindString, files)
files <-files[-ToRemove]
file_ID_4Deletion[count_ID] <- as.integer(file_ID[i])
count_ID = count_ID + 1
}
}
#remove subject ID's with incomplete data
file_ID <- file_ID[! file_ID %in% file_ID_4Deletion]
#init number of participants
Number_of_participants <- (length(files)/2)
comb <- function(x, ...) {
lapply(seq_along(x), function(i) c(x[[i]], lapply(list(...), function(y) y[[i]])))
}
#main loop for calculating RMSSD from IBT
output <- foreach(m = 1:Number_of_participants,.combine='comb', .multicombine=TRUE,
.init=list(list(), list())) %dopar% {
#retrieve data for subject
hr_gsr_path <- paste (path,files[2*m-1], sep ="/", collapse = NULL)
stimulus_path <- paste (path,files[2*m], sep ="/", collapse = NULL)
hr_gsr = read.csv( hr_gsr_path, header = TRUE)
stimulus = read.csv( stimulus_path, header = TRUE)
#convert time formats
#convert unix utc to time, using Central European Time Zone
hr_gsr$unix_utc <- as.POSIXct(as.numeric(as.character(hr_gsr$unix_utc)),origin="1970-01-01",tz="GMT")
names(hr_gsr)[names(hr_gsr) == 'unix_utc'] <- 'sensor_time'
#remove atypical IBT
lowerNormal <- which(hr_gsr[,3] < 600)
higherNormal <- which(hr_gsr[,3] >1200)
removeAtypical <- c(lowerNormal,higherNormal)
# catch integer(0) error
if (length(removeAtypical) != 0){
hr_gsr <- hr_gsr[-removeAtypical,]}
#change stimulus time format
stimulus$server_time <- as.POSIXct(stimulus$server_time,origin="1970-01-01",tz="GMT")
names(stimulus)[names(stimulus) == 'server_time'] <- 'stimulus_time'
#find no. of rows
Nrows_hr_gsr <- nrow(hr_gsr)
Nrows_stimulus <- nrow(stimulus)
stim_count <- 1
#Get time window length (sec) of stimulus
for (i in seq(1,Nrows_stimulus,2)){
stim_time_window[stim_count] <- as.numeric(stimulus[i+1,1] - stimulus[i,1])
stim_count = stim_count + 1
}
#Calculate RMSSD for each stimulus window
count_RMSSD <- 1
for (i in seq(1,Nrows_stimulus,2)){
start_time <- stimulus[i,1]
end_time <- stimulus[i+1,1]
RMSSD_sum <- 0
for (n in seq(1,Nrows_hr_gsr-1,1)){
#if sensor time is between stimulation times calculate MRSSD_sum
if (hr_gsr[n,1] >= start_time && hr_gsr[n,1] < end_time){RMSSD_sum <- RMSSD_sum + (hr_gsr[n+1,3] - hr_gsr[n,3])^2}
#if sensor time is less than start of stimulation time, continue to next time step
else if(hr_gsr[n,1] < start_time){next}
#if sensor time is greater than stimulation time, break and calculate RMSSD
else break
}
#calculate the RMSSD using RMSSD_sum
RMSSD[count_RMSSD] <- sqrt(RMSSD_sum*(1/(Nrows_hr_gsr-1)))
#reset sum
RMSSD_sum <- 0
#iterate count
count_RMSSD <- count_RMSSD + 1
}
#list of lists (foreach can only output a list so bundle two lists together for output)
output <- list(RMSSD, RMSSD[stim_time_window>15])
} # end of foreach
stopCluster(cl)
#extract first list, RMSSD
RMSSD_Store <- output[[1]]
## Add NA values to list elements
l1 <- lapply(RMSSD_Store , `length<-`, max(lengths(RMSSD_Store )))
output_RMSSD_Store <- as.data.frame(l1,stringsAsFactors=FALSE)
#extract second list, RMSSD where stimulation events longer than 15 seconds
RMSSD_Store_filtered <- output[[2]]
## Add NA values to list elements
l2 <- lapply(RMSSD_Store_filtered , `length<-`, max(lengths(RMSSD_Store_filtered )))
output_RMSSD_Store_filtered <- as.data.frame(l2,stringsAsFactors=FALSE)
#create headers for data frame
for (k in seq(1,Number_of_participants,1)){
names_header[k] <- paste ('RMSSD', file_ID[k], sep =":", collapse = NULL)
}
#add column headers to data frame
names(output_RMSSD_Store) <- names_header
names(output_RMSSD_Store_filtered) <- names_header
#calculate mean RMSSD by subject for each stimulation event, avoiding NA by setting na.rm = T
output_RMSSD_Store_filtered[output_RMSSD_Store_filtered == 0] <- NA #remove what is most likely spurious zeros in the data.
output_RMSSD_Store_filtered <- output_RMSSD_Store_filtered[rowSums(is.na(output_RMSSD_Store_filtered))!=ncol(output_RMSSD_Store_filtered), ] #trim all NA rows
stim_mean <- vector(mode="numeric", length=0)
for (n in 1:nrow(output_RMSSD_Store_filtered)){
stim_mean[n] <- rowMeans(output_RMSSD_Store_filtered[n,], na.rm = TRUE)
}
#output to .csv
write.csv(output_RMSSD_Store, file = "RMSSD_Table.csv")
#create line chart
OutputRowN <- nrow(output_RMSSD_Store_filtered)
stim_events <- seq(1,OutputRowN,1)
xrange <- range(stim_events)
yrange <- range(stim_mean)
plot(xrange, yrange, type="n", xlab="Stimulation Event", ylab="Mean RMSSD of Event" )
lines(stim_events, stim_mean, type="b", lwd=1.5)
title("Mean RMSSD across participants associated with Stimulation Events > 15 sec", cex.lab=0.75)
}