@@ -87,7 +87,10 @@ get_last_raw_update_at <- function(type = c("raw", "prelim"), missing_value = MI
87
87
# '
88
88
# ' @param verbose Whether to print verbose output.
89
89
update_nhsn_data_raw <- function () {
90
+ # If this request fails (which occurs surprisingly often, eyeroll), we
91
+ # will just return a future date (2040-01-01) and download anyway.
90
92
raw_update_at <- get_socrata_updated_at(config $ raw_metadata_url )
93
+ # Same here.
91
94
prelim_update_at <- get_socrata_updated_at(config $ prelim_metadata_url )
92
95
last_raw_file_update_at <- get_last_raw_update_at(" raw" )
93
96
last_prelim_file_update_at <- get_last_raw_update_at(" prelim" )
@@ -109,6 +112,11 @@ update_nhsn_data_raw <- function() {
109
112
cli_inform(" Downloading the prelim data... {prelim_file}" )
110
113
read_csv(config $ prelim_query_url ) %> % s3write_using(write_parquet , object = prelim_file , bucket = config $ s3_bucket )
111
114
}
115
+
116
+ # Since we may have downloaded a duplicate file above, filter out the ones
117
+ # that have the same ETag. (I don't feel like rederiving AWS S3's ETag field
118
+ # and computing ahead of time.)
119
+ delete_duplicates_from_s3_by_etag(config $ s3_bucket , config $ raw_file_name_prefix , dry_run = FALSE )
112
120
}
113
121
114
122
# ' Process Raw NHSN Data File
@@ -182,7 +190,9 @@ update_nhsn_data_archive <- function() {
182
190
return (invisible (NULL ))
183
191
}
184
192
185
- cli_inform(" New datasets available at, adding {nrow(new_data_files_latest_per_day)} new NHSN datasets to the archive." )
193
+ cli_inform(
194
+ " New datasets available at, adding {nrow(new_data_files_latest_per_day)} new NHSN datasets to the archive."
195
+ )
186
196
187
197
# Process each new dataset snapshot
188
198
new_data <- new_data_files_latest_per_day $ Key %> %
@@ -211,4 +221,4 @@ update_nhsn_data <- function(verbose = FALSE) {
211
221
update_nhsn_data_archive()
212
222
}
213
223
214
- update_nhsn_data(verbose = TRUE )
224
+ update_nhsn_data(verbose = TRUE )
0 commit comments