@@ -90,8 +90,7 @@ add_pop_and_density <-
90
90
)
91
91
}
92
92
if (! (" agg_level" %in% names(original_dataset ))) {
93
- original_dataset %<> %
94
- mutate(agg_level = ifelse(grepl(" [0-9]{2}" , geo_value ), " hhs_region" , ifelse((" us" == geo_value ) | (" usa" == geo_value ), " nation" , " state" )))
93
+ original_dataset %<> % add_agg_level()
95
94
}
96
95
original_dataset %> %
97
96
mutate(year = year(time_value )) %> %
@@ -106,6 +105,15 @@ add_pop_and_density <-
106
105
fill(population , density )
107
106
}
108
107
108
+ add_agg_level <- function (data ) {
109
+ data %> %
110
+ mutate(agg_level = case_when(
111
+ grepl(" [0-9]{2}" , geo_value ) ~ " hhs_region" ,
112
+ geo_value %in% c(" us" , " usa" ) ~ " nation" ,
113
+ .default = " state"
114
+ ))
115
+ }
116
+
109
117
gen_pop_and_density_data <-
110
118
function (apportion_filename = here :: here(" aux_data" , " flusion_data" , " apportionment.csv" ),
111
119
state_code_filename = here :: here(" aux_data" , " flusion_data" , " state_codes_table.csv" ),
@@ -188,13 +196,15 @@ gen_pop_and_density_data <-
188
196
daily_to_weekly <- function (epi_df , agg_method = c(" sum" , " mean" ), keys = " geo_value" , values = c(" value" )) {
189
197
agg_method <- arg_match(agg_method )
190
198
epi_df %> %
199
+ arrange(across(all_of(c(keys , " time_value" )))) %> %
191
200
mutate(epiweek = epiweek(time_value ), year = epiyear(time_value )) %> %
192
201
group_by(across(any_of(c(keys , " epiweek" , " year" )))) %> %
193
202
summarize(
194
203
across(all_of(values ), ~ sum(.x , na.rm = TRUE )),
195
204
time_value = floor_date(max(time_value ), " weeks" , week_start = 7 ) + 3 ,
196
205
.groups = " drop"
197
206
) %> %
207
+ arrange(across(all_of(c(keys , " time_value" )))) %> %
198
208
select(- epiweek , - year )
199
209
}
200
210
@@ -336,9 +346,7 @@ add_hhs_region_sum <- function(archive_data_raw, hhs_region_table) {
336
346
archive_data_raw %<> %
337
347
filter(agg_level != " state" ) %> %
338
348
mutate(hhs_region = hhs ) %> %
339
- bind_rows(
340
- hhs_region_agg_state
341
- )
349
+ bind_rows(hhs_region_agg_state )
342
350
if (need_agg_level ) {
343
351
archive_data_raw %<> % select(- agg_level )
344
352
}
@@ -401,11 +409,32 @@ get_health_data <- function(as_of, disease = c("covid", "flu")) {
401
409
# Get something sort of compatible with that by summing to national with
402
410
# na.omit = TRUE. As otherwise we have some NAs from probably territories
403
411
# propagated to US level.
404
- bind_rows(
405
- (. ) %> %
406
- group_by(time_value ) %> %
407
- summarize(geo_value = " us" , hhs = sum(hhs , na.rm = TRUE ))
408
- )
412
+ append_us_aggregate(" hhs" )
413
+ }
414
+
415
+ # ' Append a national aggregate to a dataframe
416
+ # '
417
+ # ' Computes national values by summing all the values per group_keys.
418
+ # ' Removes pre-existing national values.
419
+ # '
420
+ # ' @param df A dataframe with a `geo_value` column.
421
+ # ' @param cols A character vector of column names to aggregate.
422
+ # ' @param group_keys A character vector of column names to group by.
423
+ # ' @return A dataframe with a `geo_value` column.
424
+ append_us_aggregate <- function (df , cols = NULL , group_keys = c(" time_value" )) {
425
+ if (! (is.data.frame(df ))) {
426
+ cli :: cli_abort(" df must be a data.frame" , call = rlang :: caller_env())
427
+ }
428
+ national_col_names <- c(" us" , " usa" , " national" , " nation" , " US" , " USA" )
429
+ df1 <- df %> % filter(geo_value %nin % national_col_names )
430
+ if (is.null(cols )) {
431
+ df2 <- df1 %> %
432
+ summarize(geo_value = " us" , across(where(is.numeric ), ~ sum(.x , na.rm = TRUE )), .by = all_of(group_keys ))
433
+ } else {
434
+ df2 <- df1 %> %
435
+ summarize(geo_value = " us" , across(all_of(cols ), ~ sum(.x , na.rm = TRUE )), .by = all_of(group_keys ))
436
+ }
437
+ bind_rows(df1 , df2 )
409
438
}
410
439
411
440
calculate_burden_adjustment <- function (flusurv_latest ) {
@@ -718,7 +747,7 @@ up_to_date_nssp_state_archive <- function(disease = c("covid", "influenza")) {
718
747
wait_seconds = 1 ,
719
748
fn = pub_covidcast ,
720
749
source = " nssp" ,
721
- signal = glue :: glue(" pct_ed_visits_{disease}" ),
750
+ signals = glue :: glue(" pct_ed_visits_{disease}" ),
722
751
time_type = " week" ,
723
752
geo_type = " state" ,
724
753
geo_values = " *" ,
@@ -727,7 +756,7 @@ up_to_date_nssp_state_archive <- function(disease = c("covid", "influenza")) {
727
756
nssp_state %> %
728
757
select(geo_value , time_value , issue , nssp = value ) %> %
729
758
as_epi_archive(compactify = TRUE ) %> %
730
- `$` (" DT" ) %> %
759
+ extract2 (" DT" ) %> %
731
760
# End of week to midweek correction.
732
761
mutate(time_value = time_value + 3 ) %> %
733
762
as_epi_archive(compactify = TRUE )
0 commit comments