|
| 1 | +--- |
| 2 | +title: "Left truncation with delay_min" |
| 3 | +description: "Using delay_min to exclude delays below a threshold" |
| 4 | +output: |
| 5 | + bookdown::html_document2: |
| 6 | + fig_caption: yes |
| 7 | + code_folding: show |
| 8 | + number_sections: true |
| 9 | +pkgdown: |
| 10 | + as_is: true |
| 11 | +link-citations: true |
| 12 | +vignette: > |
| 13 | + %\VignetteIndexEntry{Left truncation with delay_min} |
| 14 | + %\VignetteEngine{knitr::rmarkdown} |
| 15 | + %\VignetteEncoding{UTF-8} |
| 16 | +bibliography: references.bib |
| 17 | +--- |
| 18 | + |
| 19 | +```{r setup, include=FALSE} |
| 20 | +knitr::opts_chunk$set( |
| 21 | + fig.path = file.path("figures", "left-truncation-"), |
| 22 | + collapse = TRUE, |
| 23 | + comment = "#>", |
| 24 | + message = FALSE, |
| 25 | + warning = FALSE, |
| 26 | + error = FALSE |
| 27 | +) |
| 28 | +``` |
| 29 | + |
| 30 | +Some delay distributions have a natural lower bound above zero. |
| 31 | +For example, generation intervals (time between successive infections) are often defined to exclude day zero, as same-day transmission may not be meaningful for a given pathogen. |
| 32 | +The `delay_min` parameter in `as_epidist_marginal_model()` supports this by left-truncating the delay distribution at a specified minimum value. |
| 33 | +This is passed as the `L` parameter to the [`primarycensored`](https://primarycensored.epinowcast.org/) likelihood. |
| 34 | + |
| 35 | +In this vignette, we demonstrate how to use `delay_min` by simulating data with a known left truncation point and fitting models with and without the truncation adjustment. |
| 36 | + |
| 37 | +# Setup |
| 38 | + |
| 39 | +```{r load-packages} |
| 40 | +library(epidist) |
| 41 | +library(ggplot2) |
| 42 | +library(dplyr) |
| 43 | +library(tidybayes) |
| 44 | +``` |
| 45 | + |
| 46 | +# Simulate data with left truncation |
| 47 | + |
| 48 | +We simulate delay data from a lognormal distribution, then remove all observations with delays below a threshold to mimic left truncation. |
| 49 | +This is a simplified version of how generation interval data might look when same-day events are excluded. |
| 50 | + |
| 51 | +```{r simulate} |
| 52 | +set.seed(42) |
| 53 | +n <- 500 |
| 54 | +true_meanlog <- 1.5 |
| 55 | +true_sdlog <- 0.6 |
| 56 | +delay_min <- 1 |
| 57 | +
|
| 58 | +# Simulate delays from lognormal, removing those below delay_min |
| 59 | +delays_raw <- rlnorm(n * 2, meanlog = true_meanlog, sdlog = true_sdlog) |
| 60 | +delays <- delays_raw[delays_raw >= delay_min][seq_len(n)] |
| 61 | +
|
| 62 | +# Create linelist-style data with daily censoring |
| 63 | +obs_time <- 100 |
| 64 | +sim_data <- data.frame( |
| 65 | + ptime_lwr = runif(n, 0, obs_time - max(delays)), |
| 66 | + delay = delays |
| 67 | +) |> |
| 68 | + mutate( |
| 69 | + ptime_upr = ptime_lwr + 1, |
| 70 | + stime_lwr = floor(ptime_lwr + delay), |
| 71 | + stime_upr = stime_lwr + 1, |
| 72 | + obs_time = obs_time |
| 73 | + ) |> |
| 74 | + filter(stime_upr <= obs_time) |
| 75 | +``` |
| 76 | + |
| 77 | +The observed delay distribution is visibly truncated at `delay_min = `r delay_min``: |
| 78 | + |
| 79 | +```{r hist, fig.cap="Observed delays are truncated below the minimum delay threshold (dashed line)."} |
| 80 | +ggplot(sim_data, aes(x = stime_lwr - ptime_lwr)) + |
| 81 | + geom_histogram( |
| 82 | + aes(y = after_stat(density)), |
| 83 | + binwidth = 1, fill = "#56B4E9", alpha = 0.7 |
| 84 | + ) + |
| 85 | + geom_vline( |
| 86 | + xintercept = delay_min, linetype = "dashed", linewidth = 0.8 |
| 87 | + ) + |
| 88 | + labs(x = "Observed delay (days)", y = "Density") + |
| 89 | + theme_minimal() |
| 90 | +``` |
| 91 | + |
| 92 | +# Prepare data |
| 93 | + |
| 94 | +We convert the simulated data into an `epidist` linelist and then prepare marginal models with and without the `delay_min` adjustment. |
| 95 | + |
| 96 | +```{r prepare} |
| 97 | +linelist <- as_epidist_linelist_data( |
| 98 | + sim_data, |
| 99 | + ptime_lwr = "ptime_lwr", |
| 100 | + ptime_upr = "ptime_upr", |
| 101 | + stime_lwr = "stime_lwr", |
| 102 | + stime_upr = "stime_upr", |
| 103 | + obs_time = "obs_time" |
| 104 | +) |
| 105 | +
|
| 106 | +# Without left truncation adjustment |
| 107 | +marginal_no_trunc <- as_epidist_marginal_model(linelist) |
| 108 | +
|
| 109 | +# With left truncation adjustment |
| 110 | +marginal_trunc <- as_epidist_marginal_model( |
| 111 | + linelist, delay_min = delay_min |
| 112 | +) |
| 113 | +``` |
| 114 | + |
| 115 | +# Fit models |
| 116 | + |
| 117 | +We fit two marginal models: one ignoring left truncation and one accounting for it. |
| 118 | + |
| 119 | +```{r fit} |
| 120 | +fit_no_trunc <- epidist( |
| 121 | + marginal_no_trunc, |
| 122 | + chains = 4, cores = 2, refresh = ifelse(interactive(), 250, 0) |
| 123 | +) |
| 124 | +
|
| 125 | +fit_trunc <- epidist( |
| 126 | + marginal_trunc, |
| 127 | + chains = 4, cores = 2, refresh = ifelse(interactive(), 250, 0) |
| 128 | +) |
| 129 | +``` |
| 130 | + |
| 131 | +# Compare parameter estimates |
| 132 | + |
| 133 | +We extract the estimated parameters and compare them to the true values. |
| 134 | + |
| 135 | +```{r compare-params} |
| 136 | +params_no_trunc <- predict_delay_parameters(fit_no_trunc) |
| 137 | +params_trunc <- predict_delay_parameters(fit_trunc) |
| 138 | +
|
| 139 | +true_params <- data.frame( |
| 140 | + parameter = c("meanlog", "sdlog"), |
| 141 | + true_value = c(true_meanlog, true_sdlog), |
| 142 | + stringsAsFactors = FALSE |
| 143 | +) |
| 144 | +
|
| 145 | +param_summary <- bind_rows( |
| 146 | + mutate(params_no_trunc, model = "No truncation adjustment"), |
| 147 | + mutate(params_trunc, model = "With delay_min") |
| 148 | +) |> |
| 149 | + filter(parameter %in% c("meanlog", "sdlog")) |
| 150 | +``` |
| 151 | + |
| 152 | +```{r params-plot, fig.cap="Posterior estimates of the lognormal parameters. The model accounting for left truncation (orange) recovers the true values (dashed lines) while the unadjusted model (blue) is biased."} |
| 153 | +ggplot(param_summary, aes(x = mean, y = model, col = model)) + |
| 154 | + geom_point(size = 3) + |
| 155 | + geom_linerange(aes(xmin = q5, xmax = q95)) + |
| 156 | + geom_vline( |
| 157 | + data = true_params, |
| 158 | + aes(xintercept = true_value), |
| 159 | + linetype = "dashed" |
| 160 | + ) + |
| 161 | + facet_wrap(~parameter, scales = "free_x") + |
| 162 | + scale_colour_manual(values = c( |
| 163 | + "No truncation adjustment" = "#56B4E9", |
| 164 | + "With delay_min" = "#E69F00" |
| 165 | + )) + |
| 166 | + labs(x = "Estimate", y = "", col = "") + |
| 167 | + theme_minimal() + |
| 168 | + theme(legend.position = "bottom") |
| 169 | +``` |
| 170 | + |
| 171 | +# Compare fitted distributions |
| 172 | + |
| 173 | +We can also compare the fitted delay distributions by generating predictions from each model. |
| 174 | + |
| 175 | +```{r predict} |
| 176 | +pred_data <- data.frame( |
| 177 | + relative_obs_time = Inf, pwindow = 0, swindow = 0, |
| 178 | + delay_upr = NA, delay_min = 0 |
| 179 | +) |
| 180 | +
|
| 181 | +pred_data_trunc <- data.frame( |
| 182 | + relative_obs_time = Inf, pwindow = 0, swindow = 0, |
| 183 | + delay_upr = NA, delay_min = delay_min |
| 184 | +) |
| 185 | +
|
| 186 | +draws_no_trunc <- add_predicted_draws( |
| 187 | + pred_data, fit_no_trunc, ndraws = 1000 |
| 188 | +) |
| 189 | +
|
| 190 | +draws_trunc <- add_predicted_draws( |
| 191 | + pred_data_trunc, fit_trunc, ndraws = 1000 |
| 192 | +) |
| 193 | +``` |
| 194 | + |
| 195 | +```{r pdf-plot, fig.cap="Predicted delay distributions compared with the true lognormal density (black line). The left-truncated model correctly recovers the distribution shape above the truncation point."} |
| 196 | +draws_combined <- bind_rows( |
| 197 | + mutate(draws_no_trunc, model = "No truncation adjustment"), |
| 198 | + mutate(draws_trunc, model = "With delay_min") |
| 199 | +) |
| 200 | +
|
| 201 | +ggplot(draws_combined, aes(x = .prediction)) + |
| 202 | + geom_density(aes(col = model), linewidth = 0.8) + |
| 203 | + geom_function( |
| 204 | + fun = dlnorm, |
| 205 | + args = list(meanlog = true_meanlog, sdlog = true_sdlog), |
| 206 | + linewidth = 1, linetype = "solid" |
| 207 | + ) + |
| 208 | + scale_colour_manual(values = c( |
| 209 | + "No truncation adjustment" = "#56B4E9", |
| 210 | + "With delay_min" = "#E69F00" |
| 211 | + )) + |
| 212 | + coord_cartesian(xlim = c(0, 30)) + |
| 213 | + labs(x = "Delay (days)", y = "Density", col = "") + |
| 214 | + theme_minimal() + |
| 215 | + theme(legend.position = "bottom") |
| 216 | +``` |
| 217 | + |
| 218 | +# Using delay_min with aggregate data |
| 219 | + |
| 220 | +Left truncation also works with aggregate data. |
| 221 | +If your data is already aggregated, `delay_min` can be passed through the same interface. |
| 222 | + |
| 223 | +```{r aggregate} |
| 224 | +agg_data <- as_epidist_aggregate_data(linelist) |
| 225 | +
|
| 226 | +marginal_agg <- as_epidist_marginal_model( |
| 227 | + agg_data, delay_min = delay_min |
| 228 | +) |
| 229 | +
|
| 230 | +head(marginal_agg[, c("delay_lwr", "delay_upr", "delay_min", "n")]) |
| 231 | +``` |
| 232 | + |
| 233 | +# Using a per-observation delay_min column |
| 234 | + |
| 235 | +For cases where the truncation point varies across observations, you can provide `delay_min` as a column in the data. |
| 236 | + |
| 237 | +```{r per-obs} |
| 238 | +linelist_varying <- linelist |
| 239 | +linelist_varying$my_min <- sample( |
| 240 | + c(0, 1), nrow(linelist_varying), replace = TRUE |
| 241 | +) |
| 242 | +
|
| 243 | +marginal_varying <- as_epidist_marginal_model( |
| 244 | + linelist_varying, delay_min = "my_min" |
| 245 | +) |
| 246 | +
|
| 247 | +table(marginal_varying$delay_min) |
| 248 | +``` |
| 249 | + |
| 250 | +# Summary |
| 251 | + |
| 252 | +The `delay_min` parameter provides a simple way to account for left truncation when estimating delay distributions. |
| 253 | +When delays below a threshold are excluded from the data (as is common for generation intervals), ignoring this truncation biases parameter estimates. |
| 254 | +Setting `delay_min` corrects for this by adjusting the likelihood via the [`primarycensored`](https://primarycensored.epinowcast.org/) package. |
0 commit comments