-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstep_02_combine_data.R
44 lines (33 loc) · 1.15 KB
/
step_02_combine_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# This file loads all of the cleaned subject data into a single data frame.
library(tidyverse)
library(conflicted)
# Load the data in data/Pair{n}/data_*.tsv
# n is the number of the subject pair
# first get a list of all the files
data_files <- Sys.glob("data/data_*.tsv.bz2")
# now we need to load each tsv data file and join events
# then we add filename and combine into a single data frame
print("Loading data")
print(paste("Found", length(data_files), "files"))
print("=====================")
# read in the tsv data
dat <- data.frame()
for (data_file in data_files) {
print(paste("Processing", data_file))
# read in the data
dat_temp <- read_tsv(data_file)
# add the filename
dat_temp <- dat_temp %>%
mutate(filename = data_file)
# get the events
events_temp <- read_tsv(sub("data_", "events_", data_file))
# add the events to the data
dat_temp <- dat_temp %>%
left_join(events_temp, by = "index", suffix = c("", "_event"))
# add to the data frame
dat <- rbind(dat, dat_temp)
}
print(head(dat))
# now write the data to a single file
con <- bzfile("data/combined_data.tsv.bz2")
write_tsv(dat, con)