Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,17 @@ prod-flu:
export TAR_RUN_PROJECT=flu_hosp_prod; \
Rscript scripts/run.R

prod: prod-covid prod-flu update_site netlify
prod: prod-covid prod-flu update-site netlify

explore-covid:
export TAR_RUN_PROJECT=covid_hosp_explore; \
Rscript scripts/run.R

explore-flu:
export TAR_RUN_PROJECT=flu_hosp_explore; \
Rscript scripts/run.R

explore: explore-covid explore-flu update-site netlify

submit-covid:
cd ../covid19-forecast-hub; \
Expand Down
53 changes: 41 additions & 12 deletions R/aux_data_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,7 @@ add_pop_and_density <-
)
}
if (!("agg_level" %in% names(original_dataset))) {
original_dataset %<>%
mutate(agg_level = ifelse(grepl("[0-9]{2}", geo_value), "hhs_region", ifelse(("us" == geo_value) | ("usa" == geo_value), "nation", "state")))
original_dataset %<>% add_agg_level()
}
original_dataset %>%
mutate(year = year(time_value)) %>%
Expand All @@ -106,6 +105,15 @@ add_pop_and_density <-
fill(population, density)
}

add_agg_level <- function(data) {
data %>%
mutate(agg_level = case_when(
grepl("[0-9]{2}", geo_value) ~ "hhs_region",
geo_value %in% c("us", "usa") ~ "nation",
.default = "state"
))
}

gen_pop_and_density_data <-
function(apportion_filename = here::here("aux_data", "flusion_data", "apportionment.csv"),
state_code_filename = here::here("aux_data", "flusion_data", "state_codes_table.csv"),
Expand Down Expand Up @@ -188,13 +196,15 @@ gen_pop_and_density_data <-
daily_to_weekly <- function(epi_df, agg_method = c("sum", "mean"), keys = "geo_value", values = c("value")) {
agg_method <- arg_match(agg_method)
epi_df %>%
arrange(across(all_of(c(keys, "time_value")))) %>%
mutate(epiweek = epiweek(time_value), year = epiyear(time_value)) %>%
group_by(across(any_of(c(keys, "epiweek", "year")))) %>%
summarize(
across(all_of(values), ~ sum(.x, na.rm = TRUE)),
time_value = floor_date(max(time_value), "weeks", week_start = 7) + 3,
.groups = "drop"
) %>%
arrange(across(all_of(c(keys, "time_value")))) %>%
select(-epiweek, -year)
}

Expand Down Expand Up @@ -336,9 +346,7 @@ add_hhs_region_sum <- function(archive_data_raw, hhs_region_table) {
archive_data_raw %<>%
filter(agg_level != "state") %>%
mutate(hhs_region = hhs) %>%
bind_rows(
hhs_region_agg_state
)
bind_rows(hhs_region_agg_state)
if (need_agg_level) {
archive_data_raw %<>% select(-agg_level)
}
Expand Down Expand Up @@ -401,11 +409,32 @@ get_health_data <- function(as_of, disease = c("covid", "flu")) {
# Get something sort of compatible with that by summing to national with
# na.omit = TRUE. As otherwise we have some NAs from probably territories
# propagated to US level.
bind_rows(
(.) %>%
group_by(time_value) %>%
summarize(geo_value = "us", hhs = sum(hhs, na.rm = TRUE))
)
append_us_aggregate("hhs")
}

#' Append a national aggregate to a dataframe
#'
#' Computes national values by summing all the values per group_keys.
#' Removes pre-existing national values.
#'
#' @param df A dataframe with a `geo_value` column.
#' @param cols A character vector of column names to aggregate.
#' @param group_keys A character vector of column names to group by.
#' @return A dataframe with a `geo_value` column.
append_us_aggregate <- function(df, cols = NULL, group_keys = c("time_value")) {
if (!(is.data.frame(df))) {
cli::cli_abort("df must be a data.frame", call = rlang::caller_env())
}
national_col_names <- c("us", "usa", "national", "nation", "US", "USA")
df1 <- df %>% filter(geo_value %nin% national_col_names)
if (is.null(cols)) {
df2 <- df1 %>%
summarize(geo_value = "us", across(where(is.numeric), ~ sum(.x, na.rm = TRUE)), .by = all_of(group_keys))
} else {
df2 <- df1 %>%
summarize(geo_value = "us", across(all_of(cols), ~ sum(.x, na.rm = TRUE)), .by = all_of(group_keys))
}
bind_rows(df1, df2)
}

calculate_burden_adjustment <- function(flusurv_latest) {
Expand Down Expand Up @@ -718,7 +747,7 @@ up_to_date_nssp_state_archive <- function(disease = c("covid", "influenza")) {
wait_seconds = 1,
fn = pub_covidcast,
source = "nssp",
signal = glue::glue("pct_ed_visits_{disease}"),
signals = glue::glue("pct_ed_visits_{disease}"),
time_type = "week",
geo_type = "state",
geo_values = "*",
Expand All @@ -727,7 +756,7 @@ up_to_date_nssp_state_archive <- function(disease = c("covid", "influenza")) {
nssp_state %>%
select(geo_value, time_value, issue, nssp = value) %>%
as_epi_archive(compactify = TRUE) %>%
`$`("DT") %>%
extract2("DT") %>%
# End of week to midweek correction.
mutate(time_value = time_value + 3) %>%
as_epi_archive(compactify = TRUE)
Expand Down
7 changes: 2 additions & 5 deletions R/forecasters/data_transforms.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,17 +104,14 @@ rolling_sd <- function(epi_data, sd_width = 29L, mean_width = NULL, cols_to_sd =
#' @importFrom tidyr drop_na
#' @importFrom epiprocess as_epi_df
#' @export
clear_lastminute_nas <- function(epi_data, outcome, extra_sources) {
clear_lastminute_nas <- function(epi_data, cols) {
meta_data <- attr(epi_data, "metadata")
if (extra_sources == c("")) {
extra_sources <- character(0L)
}
as_of <- attributes(epi_data)$metadata$as_of
other_keys <- attributes(epi_data)$metadata$other_keys %||% character()
epi_data %>% na.omit()
# make sure at least one column is not NA
epi_data %<>%
filter(if_any(c(!!outcome, !!extra_sources), ~ !is.na(.x))) %>%
filter(if_any(c(!!cols), ~ !is.na(.x))) %>%
as_epi_df(as_of = as_of, other_keys = other_keys)
attr(epi_data, "metadata") <- meta_data
return(epi_data)
Expand Down
17 changes: 15 additions & 2 deletions R/forecasters/data_validation.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ sanitize_args_predictors_trainer <- function(epi_data,
if (!is.null(trainer) && !epipredict:::is_regression(trainer)) {
cli::cli_abort("{trainer} must be a `{parsnip}` model of mode 'regression'.")
} else if (inherits(trainer, "rand_forest") && trainer$engine == "grf_quantiles") {
trainer %<>%
set_engine("grf_quantiles", quantiles = args_list$quantile_levels)
trainer %<>% set_engine("grf_quantiles", quantiles = args_list$quantile_levels)
} else if (inherits(trainer, "quantile_reg")) {
# add all quantile_levels to the trainer and update args list
quantile_levels <- sort(epipredict:::compare_quantile_args(
Expand Down Expand Up @@ -97,3 +96,17 @@ filter_extraneous <- function(epi_data, filter_source, filter_agg_level) {
}
return(epi_data)
}

#' Unwrap an argument if it's a list of length 1
#'
#' Many of our arguments to the forecasters come as lists not because we expect
#' them that way, but as a byproduct of tibble and expand_grid.
unwrap_argument <- function(arg, default_trigger = "", default = character(0L)) {
if (is.list(arg) && length(arg) == 1) {
arg <- arg[[1]]
}
if (identical(arg, default_trigger)) {
return(default)
}
return(arg)
}
2 changes: 2 additions & 0 deletions R/forecasters/forecaster_climatological.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ climate_linear_ensembled <- function(epi_data,
nonlin_method <- arg_match(nonlin_method)

epi_data <- validate_epi_data(epi_data)
extra_sources <- unwrap_argument(extra_sources)
trainer <- unwrap_argument(trainer)

args_list <- list(...)
ahead <- as.integer(ahead / 7)
Expand Down
5 changes: 4 additions & 1 deletion R/forecasters/forecaster_flatline.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,13 @@ flatline_fc <- function(epi_data,
filter_agg_level = "",
...) {
epi_data <- validate_epi_data(epi_data)
extra_sources <- unwrap_argument(extra_sources)
trainer <- unwrap_argument(trainer)

# perform any preprocessing not supported by epipredict
epi_data %<>% filter_extraneous(filter_source, filter_agg_level)
# this is a temp fix until a real fix gets put into epipredict
epi_data <- clear_lastminute_nas(epi_data, outcome, extra_sources)
epi_data <- clear_lastminute_nas(epi_data, cols = c(outcome, extra_sources))
# one that every forecaster will need to handle: how to manage max(time_value)
# that's older than the `as_of` date
c(epi_data, ahead) %<-% extend_ahead(epi_data, ahead)
Expand Down
2 changes: 2 additions & 0 deletions R/forecasters/forecaster_flusion.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ flusion <- function(epi_data,
derivative_estimator <- arg_match(derivative_estimator)

epi_data <- validate_epi_data(epi_data)
extra_sources <- unwrap_argument(extra_sources)
trainer <- unwrap_argument(trainer)

# perform any preprocessing not supported by epipredict
args_input <- list(...)
Expand Down
6 changes: 4 additions & 2 deletions R/forecasters/forecaster_no_recent_outcome.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ no_recent_outcome <- function(epi_data,
week_method <- arg_match(week_method)

epi_data <- validate_epi_data(epi_data)
extra_sources <- unwrap_argument(extra_sources)
trainer <- unwrap_argument(trainer)

# this is for the case where there are multiple sources in the same column
epi_data %<>% filter_extraneous(filter_source, filter_agg_level)
Expand Down Expand Up @@ -62,10 +64,10 @@ no_recent_outcome <- function(epi_data,
args_input[["quantile_levels"]] <- quantile_levels
args_list <- do.call(default_args_list, args_input)
# if you want to hardcode particular predictors in a particular forecaster
if (identical(extra_sources[[1]], "")) {
if (identical(extra_sources, character(0L))) {
predictors <- character()
} else {
predictors <- extra_sources[[1]]
predictors <- extra_sources
}
c(args_list, tmp_pred, trainer) %<-% sanitize_args_predictors_trainer(epi_data, outcome, predictors, trainer, args_list)

Expand Down
4 changes: 3 additions & 1 deletion R/forecasters/forecaster_scaled_pop.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ scaled_pop <- function(epi_data,
nonlin_method <- arg_match(nonlin_method)

epi_data <- validate_epi_data(epi_data)
extra_sources <- unwrap_argument(extra_sources)
trainer <- unwrap_argument(trainer)

# perform any preprocessing not supported by epipredict
#
Expand Down Expand Up @@ -94,7 +96,7 @@ scaled_pop <- function(epi_data,
args_input[["nonneg"]] <- scale_method == "none"
args_list <- inject(default_args_list(!!!args_input))
# if you want to hardcode particular predictors in a particular forecaster
predictors <- c(outcome, extra_sources[[1]])
predictors <- c(outcome, extra_sources)
c(args_list, predictors, trainer) %<-% sanitize_args_predictors_trainer(epi_data, outcome, predictors, trainer, args_list)
# end of the copypasta
# finally, any other pre-processing (e.g. smoothing) that isn't performed by
Expand Down
10 changes: 4 additions & 6 deletions R/forecasters/forecaster_scaled_pop_seasonal.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,9 @@ scaled_pop_seasonal <- function(epi_data,
nonlin_method <- arg_match(nonlin_method)

epi_data <- validate_epi_data(epi_data)
extra_sources <- unwrap_argument(extra_sources)
trainer <- unwrap_argument(trainer)

# TODO: handle this when creating param grid?
if (typeof(seasonal_method) == "list") {
seasonal_method <- seasonal_method[[1]]
}
if (all(seasonal_method == c("none", "flu", "covid", "indicator", "window", "climatological"))) {
seasonal_method <- "none"
}
Expand All @@ -71,7 +69,7 @@ scaled_pop_seasonal <- function(epi_data,
# this is for the case where there are multiple sources in the same column
epi_data %<>% filter_extraneous(filter_source, filter_agg_level)
# this is a temp fix until a real fix gets put into epipredict
epi_data <- clear_lastminute_nas(epi_data, outcome, extra_sources)
epi_data <- clear_lastminute_nas(epi_data, cols = c(outcome, extra_sources))
# this next part is basically unavoidable boilerplate you'll want to copy
args_input <- list(...)
# edge case where there is no data or less data than the lags; eventually epipredict will handle this
Expand Down Expand Up @@ -100,7 +98,7 @@ scaled_pop_seasonal <- function(epi_data,
args_input[["seasonal_forward_window"]] <- seasonal_forward_window + ahead
args_list <- inject(default_args_list(!!!args_input))
# if you want to hardcode particular predictors in a particular forecaster
predictors <- c(outcome, extra_sources[[1]])
predictors <- c(outcome, extra_sources)
c(args_list, predictors, trainer) %<-% sanitize_args_predictors_trainer(epi_data, outcome, predictors, trainer, args_list)

if ("season_week" %nin% names(epi_data)) {
Expand Down
8 changes: 4 additions & 4 deletions R/forecasters/forecaster_smoothed_scaled.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,15 @@ smoothed_scaled <- function(epi_data,
nonlin_method <- arg_match(nonlin_method)

epi_data <- validate_epi_data(epi_data)
extra_sources <- unwrap_argument(extra_sources)
trainer <- unwrap_argument(trainer)

# perform any preprocessing not supported by epipredict
#
# this is for the case where there are multiple sources in the same column
epi_data %<>% filter_extraneous(filter_source, filter_agg_level)
# this is a temp fix until a real fix gets put into epipredict
epi_data <- clear_lastminute_nas(epi_data, outcome, extra_sources)
epi_data <- clear_lastminute_nas(epi_data, cols = c(outcome, extra_sources))
# see latency_adjusting for other examples
args_input <- list(...)
# edge case where there is no data or less data than the lags; eventually epipredict will handle this
Expand All @@ -106,14 +108,12 @@ smoothed_scaled <- function(epi_data,
args_list <- inject(default_args_list(!!!args_input))
# `extra_sources` sets which variables beyond the outcome are lagged and used as predictors
# any which are modified by `rolling_mean` or `rolling_sd` have their original values dropped later
predictors <- c(outcome, extra_sources[[1]])
predictors <- predictors[predictors != ""]
predictors <- c(outcome, extra_sources)
# end of the copypasta
# finally, any other pre-processing (e.g. smoothing) that isn't performed by
# epipredict



#######################
# robust whitening
#######################
Expand Down
Loading