@@ -90,33 +90,27 @@ get_last_raw_update_at <- function(type = c("raw", "prelim"), missing_value = MI
90
90
# '
91
91
# ' @param verbose Whether to print verbose output.
92
92
update_nhsn_data_raw <- function () {
93
- current_time <- with_tz(Sys.time(), tzone = " UTC" )
94
- # WARNING: These Socrata metadata fields have been unreliable. If they fail, they
93
+ # WARNING: Socrata metadata fields have been unreliable. If they fail, they
95
94
# default to current time, which will trigger a download and then we compare
96
95
# with hash archive.
97
- raw_update_at <- get_socrata_updated_at(config $ raw_metadata_url , missing_value = current_time )
98
- prelim_update_at <- get_socrata_updated_at(config $ prelim_metadata_url , missing_value = current_time )
99
- # Get the last time the raw data was updated from S3.
100
- last_raw_file_update_at <- get_last_raw_update_at(" raw" )
101
- last_prelim_file_update_at <- get_last_raw_update_at(" prelim" )
102
96
103
- # Some derived values for logging and file naming.
104
- raw_update_at_local <- with_tz(raw_update_at )
105
- raw_update_at_formatted <- format(raw_update_at , " %Y-%m-%d_%H-%M-%OS5" )
106
- raw_file <- glue(" {config$raw_file_name_prefix}_{raw_update_at_formatted}.parquet" )
107
- local_file_path <- here :: here(config $ local_raw_cache_path , raw_file )
108
- prelim_update_at_local <- with_tz(prelim_update_at )
109
- prelim_update_at_formatted <- format(prelim_update_at , " %Y-%m-%d_%H-%M-%OS5" )
110
- prelim_file <- glue(" {config$raw_file_name_prefix}_{prelim_update_at_formatted}_prelim.parquet" )
111
- local_prelim_file_path <- here :: here(config $ local_raw_cache_path , prelim_file )
112
- hash_archive_path <- here :: here(config $ local_raw_cache_path , config $ hash_archive_file )
97
+ # Get the current time in UTC for logging.
98
+ current_time <- with_tz(Sys.time(), tzone = " UTC" )
113
99
114
100
# Open the hash archive file.
101
+ hash_archive_path <- here :: here(config $ local_raw_cache_path , config $ hash_archive_file )
115
102
hash_archive <- nanoparquet :: read_parquet(hash_archive_path )
116
103
104
+ # Get the last time the raw data was updated from Socrata.
105
+ raw_update_at <- get_socrata_updated_at(config $ raw_metadata_url , missing_value = current_time )
106
+ last_raw_file_update_at <- get_last_raw_update_at(" raw" )
117
107
# If the raw data has been updated or there was a failure getting metadata,
118
108
# download it.
119
109
if (raw_update_at > last_raw_file_update_at ) {
110
+ raw_update_at_local <- with_tz(raw_update_at )
111
+ raw_update_at_formatted <- format(raw_update_at , " %Y-%m-%d_%H-%M-%OS5" )
112
+ raw_file <- glue(" {config$raw_file_name_prefix}_{raw_update_at_formatted}.parquet" )
113
+ local_file_path <- here :: here(config $ local_raw_cache_path , raw_file )
120
114
cli_inform(" The raw data has been updated at {raw_update_at_local} (UTC: {raw_update_at})." )
121
115
cli_inform(" Downloading the raw data... {raw_file}" )
122
116
read_csv(config $ raw_query_url ) %> % write_parquet(local_file_path )
@@ -126,11 +120,11 @@ update_nhsn_data_raw <- function() {
126
120
127
121
# If the raw file hash is not in the archive, add it to S3 and local file.
128
122
if (! raw_file_hash %in% hash_archive $ hash ) {
129
- hash_archive <- bind_rows(hash_archive , tibble(file = raw_file , hash = raw_file_hash ))
123
+ hash_archive <- bind_rows(hash_archive , tibble(files = raw_file , hash = raw_file_hash ))
130
124
cli_inform(" Adding raw file to S3 and local cache." )
131
125
132
126
# Back up the raw file to S3.
133
- # s3write_using(write_parquet , object = raw_file, bucket = config$s3_bucket)
127
+ put_object( file = local_file_path , object = raw_file , bucket = config $ s3_bucket )
134
128
135
129
# Write the hash archive back to the file.
136
130
write_parquet(hash_archive , hash_archive_path )
@@ -140,9 +134,16 @@ update_nhsn_data_raw <- function() {
140
134
}
141
135
}
142
136
137
+ # Get the last time the prelim data was updated from Socrata.
138
+ prelim_update_at <- get_socrata_updated_at(config $ prelim_metadata_url , missing_value = current_time )
139
+ last_prelim_file_update_at <- get_last_raw_update_at(" prelim" )
143
140
# If the prelim data has been updated or there was a failure getting metadata,
144
141
# download it.
145
142
if (prelim_update_at > last_prelim_file_update_at ) {
143
+ prelim_update_at_local <- with_tz(prelim_update_at )
144
+ prelim_update_at_formatted <- format(prelim_update_at , " %Y-%m-%d_%H-%M-%OS5" )
145
+ prelim_file <- glue(" {config$raw_file_name_prefix}_{prelim_update_at_formatted}_prelim.parquet" )
146
+ local_prelim_file_path <- here :: here(config $ local_raw_cache_path , prelim_file )
146
147
cli_inform(" The prelim data has been updated at {prelim_update_at_local} (UTC: {prelim_update_at})." )
147
148
cli_inform(" Downloading the prelim data... {prelim_file}" )
148
149
read_csv(config $ prelim_query_url ) %> % write_parquet(local_prelim_file_path )
@@ -152,11 +153,11 @@ update_nhsn_data_raw <- function() {
152
153
153
154
# If the prelim file hash is not in the archive, add it to S3 and local file.
154
155
if (! prelim_file_hash %in% hash_archive $ hash ) {
155
- hash_archive <- bind_rows(hash_archive , tibble(file = prelim_file , hash = prelim_file_hash ))
156
+ hash_archive <- bind_rows(hash_archive , tibble(files = prelim_file , hash = prelim_file_hash ))
156
157
cli_inform(" Adding prelim file to S3 and local cache." )
157
158
158
159
# Back up the prelim file to S3.
159
- # s3write_using(write_parquet , object = prelim_file, bucket = config$s3_bucket)
160
+ put_object( file = local_prelim_file_path , object = prelim_file , bucket = config $ s3_bucket )
160
161
161
162
# Write the hash archive back to the file.
162
163
write_parquet(hash_archive , hash_archive_path )
0 commit comments