-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path02-wrangle_data.R
69 lines (57 loc) · 1.93 KB
/
02-wrangle_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
library(readr)
library(dplyr)
library(fs)
library(purrr)
library(janitor)
usage_reports <- dir_ls(path("data", "usage"))
reg_reports <- dir_ls(path("data", "registration"))
read_wrangle_usage <- function(usage_report) {
# check that metadata exists
if (!grepl(pattern = "Topic", readLines(usage_report, n = 1))) {
warning("Usage report doesn't contain meeting information")
#return
main <- read_csv(usage_report) |>
clean_names() |>
rename(name = name_original_name, attendance_duration = total_duration_minutes)
#add meeting ID from filename
ID <-
path_file(usage_report) |>
path_ext_remove() |>
gsub("participants_", "", x = _)
#return:
main |>
mutate(id = ID, .before = everything())
} else {
main <- read_csv(usage_report, skip = 3) |>
clean_names() |>
rename(name = name_original_name, attendance_duration = total_duration_minutes)
head <- read_csv(usage_report, n_max = 1, col_types = cols(ID = col_character())) |>
clean_names() |>
rename(meeting_duration = duration_minutes)
#return:
bind_cols(head, main)
}
}
usage <- map(usage_reports, read_wrangle_usage) |> list_rbind()
usage
read_wrangle_registration <- function(reg_report) {
df <- read_csv(reg_report, id = "file")
df <- df |> clean_names() |>
rename(
college = contains("college"),
department = contains("department"),
career_stage = contains("career_stage"),
how_discovered = contains("how_did_you_hear"),
mailing_list = contains("mailing_list")
) |>
# get meeting ID
mutate(
id = path_file(file) |> fs::path_ext_remove() |> gsub("_RegistrationReport", "", x = _),
.before = file
) |>
select(-any_of(c("x11", "file")))
df
}
registration <- map(reg_reports, read_wrangle_registration) |> list_rbind()
attended <- left_join(usage, registration)
write_csv(attended, "data/attended.csv")