@@ -45,39 +45,39 @@ library(tidybayes)
4545
4646# Simulate data with left truncation
4747
48- We simulate delay data from a lognormal distribution, then remove all observations with delays below a threshold to mimic left truncation.
49- This is a simplified version of how generation interval data might look when same-day events are excluded.
48+ We use the package's simulation tools to generate outbreak data, then remove all observations with delays below a threshold to mimic left truncation.
5049
5150``` {r simulate}
52- set.seed(42)
53- n <- 500
5451true_meanlog <- 1.5
5552true_sdlog <- 0.6
5653delay_min <- 1
5754
58- # Simulate delays from lognormal, removing those below delay_min
59- delays_raw <- rlnorm(n * 2, meanlog = true_meanlog, sdlog = true_sdlog)
60- delays <- delays_raw[delays_raw >= delay_min][seq_len(n)]
55+ outbreak <- simulate_gillespie(r = 0.2, seed = 101)
56+ obs <- simulate_secondary(
57+ outbreak,
58+ dist = rlnorm,
59+ meanlog = true_meanlog,
60+ sdlog = true_sdlog
61+ )
6162
62- # Create linelist-style data with daily censoring
63- obs_time <- 100
64- sim_data <- data.frame(
65- ptime_lwr = runif(n, 0, obs_time - max(delays)),
66- delay = delays
67- ) |>
63+ # Apply left truncation: remove delays below threshold
64+ obs_trunc <- obs |>
65+ filter(delay >= delay_min) |>
6866 mutate(
67+ ptime_lwr = floor(ptime),
6968 ptime_upr = ptime_lwr + 1,
70- stime_lwr = floor(ptime_lwr + delay ),
69+ stime_lwr = floor(stime ),
7170 stime_upr = stime_lwr + 1,
72- obs_time = obs_time
71+ obs_time = max(stime_upr)
7372 ) |>
74- filter(stime_upr <= obs_time)
73+ filter(stime_upr <= obs_time) |>
74+ slice_sample(n = 200)
7575```
7676
7777The observed delay distribution is visibly truncated at ` delay_min = ` r delay_min``:
7878
7979``` {r hist, fig.cap="Observed delays are truncated below the minimum delay threshold (dashed line)."}
80- ggplot(sim_data , aes(x = stime_lwr - ptime_lwr)) +
80+ ggplot(obs_trunc , aes(x = stime_lwr - ptime_lwr)) +
8181 geom_histogram(
8282 aes(y = after_stat(density)),
8383 binwidth = 1, fill = "#56B4E9", alpha = 0.7
@@ -89,62 +89,50 @@ ggplot(sim_data, aes(x = stime_lwr - ptime_lwr)) +
8989 theme_minimal()
9090```
9191
92- # Prepare data
92+ # Prepare data and fit models
9393
94- We convert the simulated data into an ` epidist ` linelist and then prepare marginal models with and without the ` delay_min ` adjustment.
94+ We convert the simulated data into an ` epidist ` linelist and prepare marginal models with and without the ` delay_min ` adjustment.
9595
96- ``` {r prepare}
96+ ``` {r prepare-and-fit }
9797linelist <- as_epidist_linelist_data(
98- sim_data,
99- ptime_lwr = "ptime_lwr",
100- ptime_upr = "ptime_upr",
101- stime_lwr = "stime_lwr",
102- stime_upr = "stime_upr",
103- obs_time = "obs_time"
98+ obs_trunc$ptime_lwr,
99+ ptime_upr = obs_trunc$ptime_upr,
100+ stime_lwr = obs_trunc$stime_lwr,
101+ stime_upr = obs_trunc$stime_upr,
102+ obs_time = obs_trunc$obs_time
104103)
105104
106105# Without left truncation adjustment
107- marginal_no_trunc <- as_epidist_marginal_model(linelist)
108-
109- # With left truncation adjustment
110- marginal_trunc <- as_epidist_marginal_model(
111- linelist, delay_min = delay_min
112- )
113- ```
114-
115- # Fit models
116-
117- We fit two marginal models: one ignoring left truncation and one accounting for it.
118-
119- ``` {r fit}
120106fit_no_trunc <- epidist(
121- marginal_no_trunc ,
107+ as_epidist_marginal_model(linelist) ,
122108 chains = 4, cores = 2, refresh = ifelse(interactive(), 250, 0)
123109)
124110
111+ # With left truncation adjustment
125112fit_trunc <- epidist(
126- marginal_trunc ,
113+ as_epidist_marginal_model(linelist, delay_min = delay_min) ,
127114 chains = 4, cores = 2, refresh = ifelse(interactive(), 250, 0)
128115)
129116```
130117
131118# Compare parameter estimates
132119
133- We extract the estimated parameters and compare them to the true values.
134-
135120``` {r compare-params}
136- params_no_trunc <- predict_delay_parameters(fit_no_trunc)
137- params_trunc <- predict_delay_parameters(fit_trunc)
138-
139121true_params <- data.frame(
140122 parameter = c("meanlog", "sdlog"),
141123 true_value = c(true_meanlog, true_sdlog),
142124 stringsAsFactors = FALSE
143125)
144126
145127param_summary <- bind_rows(
146- mutate(params_no_trunc, model = "No truncation adjustment"),
147- mutate(params_trunc, model = "With delay_min")
128+ mutate(
129+ predict_delay_parameters(fit_no_trunc),
130+ model = "No truncation adjustment"
131+ ),
132+ mutate(
133+ predict_delay_parameters(fit_trunc),
134+ model = "With delay_min"
135+ )
148136) |>
149137 filter(parameter %in% c("meanlog", "sdlog"))
150138```
@@ -170,8 +158,6 @@ ggplot(param_summary, aes(x = mean, y = model, col = model)) +
170158
171159# Compare fitted distributions
172160
173- We can also compare the fitted delay distributions by generating predictions from each model.
174-
175161``` {r predict}
176162pred_data <- data.frame(
177163 relative_obs_time = Inf, pwindow = 0, swindow = 0,
@@ -218,7 +204,6 @@ ggplot(draws_combined, aes(x = .prediction)) +
218204# Using delay_min with aggregate data
219205
220206Left truncation also works with aggregate data.
221- If your data is already aggregated, ` delay_min ` can be passed through the same interface.
222207
223208``` {r aggregate}
224209agg_data <- as_epidist_aggregate_data(linelist)
@@ -230,9 +215,9 @@ marginal_agg <- as_epidist_marginal_model(
230215head(marginal_agg[, c("delay_lwr", "delay_upr", "delay_min", "n")])
231216```
232217
233- # Using a per -observation delay_min column
218+ # Per -observation delay_min
234219
235- For cases where the truncation point varies across observations, you can provide ` delay_min ` as a column in the data .
220+ When the truncation point varies across observations, provide ` delay_min ` as a column name .
236221
237222``` {r per-obs}
238223linelist_varying <- linelist
0 commit comments