Skip to content

Commit 7ef9103

Browse files
committed
fix: nhsn archive builder
1 parent cc54d1b commit 7ef9103

File tree

1 file changed

+22
-21
lines changed

1 file changed

+22
-21
lines changed

scripts/build_nhsn_archive.R

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -90,33 +90,27 @@ get_last_raw_update_at <- function(type = c("raw", "prelim"), missing_value = MI
9090
#'
9191
#' @param verbose Whether to print verbose output.
9292
update_nhsn_data_raw <- function() {
93-
current_time <- with_tz(Sys.time(), tzone = "UTC")
94-
# WARNING: These Socrata metadata fields have been unreliable. If they fail, they
93+
# WARNING: Socrata metadata fields have been unreliable. If they fail, they
9594
# default to current time, which will trigger a download and then we compare
9695
# with hash archive.
97-
raw_update_at <- get_socrata_updated_at(config$raw_metadata_url, missing_value = current_time)
98-
prelim_update_at <- get_socrata_updated_at(config$prelim_metadata_url, missing_value = current_time)
99-
# Get the last time the raw data was updated from S3.
100-
last_raw_file_update_at <- get_last_raw_update_at("raw")
101-
last_prelim_file_update_at <- get_last_raw_update_at("prelim")
10296

103-
# Some derived values for logging and file naming.
104-
raw_update_at_local <- with_tz(raw_update_at)
105-
raw_update_at_formatted <- format(raw_update_at, "%Y-%m-%d_%H-%M-%OS5")
106-
raw_file <- glue("{config$raw_file_name_prefix}_{raw_update_at_formatted}.parquet")
107-
local_file_path <- here::here(config$local_raw_cache_path, raw_file)
108-
prelim_update_at_local <- with_tz(prelim_update_at)
109-
prelim_update_at_formatted <- format(prelim_update_at, "%Y-%m-%d_%H-%M-%OS5")
110-
prelim_file <- glue("{config$raw_file_name_prefix}_{prelim_update_at_formatted}_prelim.parquet")
111-
local_prelim_file_path <- here::here(config$local_raw_cache_path, prelim_file)
112-
hash_archive_path <- here::here(config$local_raw_cache_path, config$hash_archive_file)
97+
# Get the current time in UTC for logging.
98+
current_time <- with_tz(Sys.time(), tzone = "UTC")
11399

114100
# Open the hash archive file.
101+
hash_archive_path <- here::here(config$local_raw_cache_path, config$hash_archive_file)
115102
hash_archive <- nanoparquet::read_parquet(hash_archive_path)
116103

104+
# Get the last time the raw data was updated from Socrata.
105+
raw_update_at <- get_socrata_updated_at(config$raw_metadata_url, missing_value = current_time)
106+
last_raw_file_update_at <- get_last_raw_update_at("raw")
117107
# If the raw data has been updated or there was a failure getting metadata,
118108
# download it.
119109
if (raw_update_at > last_raw_file_update_at) {
110+
raw_update_at_local <- with_tz(raw_update_at)
111+
raw_update_at_formatted <- format(raw_update_at, "%Y-%m-%d_%H-%M-%OS5")
112+
raw_file <- glue("{config$raw_file_name_prefix}_{raw_update_at_formatted}.parquet")
113+
local_file_path <- here::here(config$local_raw_cache_path, raw_file)
120114
cli_inform("The raw data has been updated at {raw_update_at_local} (UTC: {raw_update_at}).")
121115
cli_inform("Downloading the raw data... {raw_file}")
122116
read_csv(config$raw_query_url) %>% write_parquet(local_file_path)
@@ -126,11 +120,11 @@ update_nhsn_data_raw <- function() {
126120

127121
# If the raw file hash is not in the archive, add it to S3 and local file.
128122
if (!raw_file_hash %in% hash_archive$hash) {
129-
hash_archive <- bind_rows(hash_archive, tibble(file = raw_file, hash = raw_file_hash))
123+
hash_archive <- bind_rows(hash_archive, tibble(files = raw_file, hash = raw_file_hash))
130124
cli_inform("Adding raw file to S3 and local cache.")
131125

132126
# Back up the raw file to S3.
133-
# s3write_using(write_parquet, object = raw_file, bucket = config$s3_bucket)
127+
put_object(file = local_file_path, object = raw_file, bucket = config$s3_bucket)
134128

135129
# Write the hash archive back to the file.
136130
write_parquet(hash_archive, hash_archive_path)
@@ -140,9 +134,16 @@ update_nhsn_data_raw <- function() {
140134
}
141135
}
142136

137+
# Get the last time the prelim data was updated from Socrata.
138+
prelim_update_at <- get_socrata_updated_at(config$prelim_metadata_url, missing_value = current_time)
139+
last_prelim_file_update_at <- get_last_raw_update_at("prelim")
143140
# If the prelim data has been updated or there was a failure getting metadata,
144141
# download it.
145142
if (prelim_update_at > last_prelim_file_update_at) {
143+
prelim_update_at_local <- with_tz(prelim_update_at)
144+
prelim_update_at_formatted <- format(prelim_update_at, "%Y-%m-%d_%H-%M-%OS5")
145+
prelim_file <- glue("{config$raw_file_name_prefix}_{prelim_update_at_formatted}_prelim.parquet")
146+
local_prelim_file_path <- here::here(config$local_raw_cache_path, prelim_file)
146147
cli_inform("The prelim data has been updated at {prelim_update_at_local} (UTC: {prelim_update_at}).")
147148
cli_inform("Downloading the prelim data... {prelim_file}")
148149
read_csv(config$prelim_query_url) %>% write_parquet(local_prelim_file_path)
@@ -152,11 +153,11 @@ update_nhsn_data_raw <- function() {
152153

153154
# If the prelim file hash is not in the archive, add it to S3 and local file.
154155
if (!prelim_file_hash %in% hash_archive$hash) {
155-
hash_archive <- bind_rows(hash_archive, tibble(file = prelim_file, hash = prelim_file_hash))
156+
hash_archive <- bind_rows(hash_archive, tibble(files = prelim_file, hash = prelim_file_hash))
156157
cli_inform("Adding prelim file to S3 and local cache.")
157158

158159
# Back up the prelim file to S3.
159-
# s3write_using(write_parquet, object = prelim_file, bucket = config$s3_bucket)
160+
put_object(file = local_prelim_file_path, object = prelim_file, bucket = config$s3_bucket)
160161

161162
# Write the hash archive back to the file.
162163
write_parquet(hash_archive, hash_archive_path)

0 commit comments

Comments
 (0)