Skip to content

Commit 76a755d

Browse files
Copilotkaiemjoy
andauthored
Stabilize multi-way cluster SE diagnostics
Co-authored-by: kaiemjoy <16113030+kaiemjoy@users.noreply.github.com>
1 parent 1a361a9 commit 76a755d

8 files changed

Lines changed: 306 additions & 44 deletions

NEWS.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,10 @@
2929
their snapshots, differ between macOS, Windows, and Linux). Simulated
3030
values change slightly as a result of this fix. (#447)
3131
* Cluster-robust standard errors now treat multiple `cluster_var` columns as
32-
multi-way clustering instead of collapsing them to a single interaction, and
33-
they are no longer allowed to be smaller than the corresponding model-based
34-
standard errors. (#543)
32+
multi-way clustering instead of collapsing them to a single interaction.
33+
One-way subset terms now use the CR1 small-sample correction by default,
34+
the floor to model-based variance is optional, and debug output now exposes
35+
the multi-way variance decomposition used in the final standard error. (#543)
3536
* Corrected default axis labels in `strat_ests_barplot()` (`xlab`) and
3637
`strat_ests_scatterplot()` (`ylab`) to say "seroincidence" rather than
3738
"seroconversion"/"incidence".

R/compute_cluster_robust_var.R

Lines changed: 72 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,14 @@
1616
.compute_cluster_robust_var <- function(
1717
fit,
1818
cluster_var,
19-
stratum_var = NULL) {
19+
stratum_var = NULL,
20+
small_sample = c("none", "CR1"),
21+
floor_to_standard = FALSE,
22+
debug_cluster = FALSE) {
23+
small_sample <- match.arg(small_sample)
2024
pop_data_list <- attr(fit, "pop_data")
2125
pop_data_combined <- do.call(rbind, pop_data_list)
22-
standard_var_log_lambda <- 1 / fit$hessian |> as.numeric()
26+
standard_var_log_lambda <- as.numeric(1 / fit$hessian)[1]
2327

2428
cluster_var_combinations <- unlist(
2529
lapply(seq_along(cluster_var), function(n_vars) {
@@ -29,7 +33,7 @@
2933
)
3034

3135
n_vars_per_subset <- vapply(cluster_var_combinations, length, integer(1))
32-
robust_var_log_lambda <- 0
36+
decomp_rows <- vector("list", length(cluster_var_combinations))
3337

3438
for (i in seq_along(cluster_var_combinations)) {
3539
cluster_vars_subset <- cluster_var_combinations[[i]]
@@ -46,20 +50,73 @@
4650
subset_var_log_lambda <- .compute_cluster_var_oneway(
4751
fit = fit,
4852
cluster_ids = cluster_ids,
49-
pop_data_combined = pop_data_combined
53+
pop_data_combined = pop_data_combined,
54+
small_sample = small_sample
55+
)
56+
subset_sign <- (-1)^(n_vars_per_subset[i] + 1)
57+
decomp_rows[[i]] <- tibble::tibble(
58+
subset = paste(cluster_vars_subset, collapse = " + "),
59+
order = n_vars_per_subset[i],
60+
sign = subset_sign,
61+
subset_variance = subset_var_log_lambda,
62+
signed_term = subset_sign * subset_var_log_lambda
5063
)
51-
robust_var_log_lambda <- robust_var_log_lambda +
52-
(-1)^(n_vars_per_subset[i] + 1) * subset_var_log_lambda
5364
}
5465

55-
# Use a conservative floor so cluster-robust variance does not fall below the
56-
# model-based variance when clustering adds no effective dependence (for
57-
# example, singleton clusters) or finite-sample noise would otherwise shrink
58-
# the standard error.
59-
robust_var_log_lambda <- max(
60-
standard_var_log_lambda,
61-
robust_var_log_lambda
62-
)
66+
decomp_terms <- dplyr::bind_rows(decomp_rows)
67+
robust_raw <- sum(decomp_terms$signed_term)
68+
floor_applied <- isTRUE(floor_to_standard) &&
69+
robust_raw < standard_var_log_lambda
70+
robust_final <- if (isTRUE(floor_to_standard)) {
71+
max(standard_var_log_lambda, robust_raw)
72+
} else {
73+
robust_raw
74+
}
75+
76+
if (debug_cluster) {
77+
cli::cli_inform("Cluster-robust variance decomposition:")
78+
79+
for (i in seq_len(nrow(decomp_terms))) {
80+
term_label <- if (
81+
length(cluster_var) == 2 &&
82+
decomp_terms$order[i] == 2
83+
) {
84+
"V_intersection"
85+
} else {
86+
paste0(
87+
"V_",
88+
gsub(" \\+ ", "_", decomp_terms$subset[i])
89+
)
90+
}
91+
term_message <- glue::glue(
92+
"{term_label} ({decomp_terms$subset[i]}) = ",
93+
"{signif(decomp_terms$subset_variance[i], 6)}"
94+
)
95+
96+
cli::cli_inform(term_message)
97+
}
6398

64-
return(robust_var_log_lambda)
99+
cli::cli_inform("V_raw = {signif(robust_raw, 6)}")
100+
cli::cli_inform("V_final = {signif(robust_final, 6)}")
101+
102+
if (isTRUE(floor_to_standard)) {
103+
floor_message <- glue::glue(
104+
"Variance floor relative to standard variance ",
105+
"{signif(standard_var_log_lambda, 6)}: ",
106+
"{if (floor_applied) 'applied' else 'not applied'}"
107+
)
108+
cli::cli_inform(floor_message)
109+
}
110+
}
111+
112+
structure(
113+
robust_final,
114+
cluster_decomp = list(
115+
standard_var = standard_var_log_lambda,
116+
robust_raw = robust_raw,
117+
robust_final = robust_final,
118+
terms = decomp_terms,
119+
floor_applied = floor_applied
120+
)
121+
)
65122
}

R/compute_cluster_var_oneway.R

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
.compute_cluster_var_oneway <- function(
1111
fit,
1212
cluster_ids,
13-
pop_data_combined) {
13+
pop_data_combined,
14+
small_sample = c("none", "CR1")) {
15+
small_sample <- match.arg(small_sample)
1416
pop_data_list <- attr(fit, "pop_data")
1517
sr_params_list <- attr(fit, "sr_params")
1618
noise_params_list <- attr(fit, "noise_params")
@@ -19,7 +21,8 @@
1921
epsilon <- 1e-6
2022

2123
unique_clusters <- unique(cluster_ids)
22-
cluster_scores <- numeric(length(unique_clusters))
24+
n_clusters <- length(unique_clusters)
25+
cluster_scores <- numeric(n_clusters)
2326

2427
for (i in seq_along(unique_clusters)) {
2528
cluster_id <- unique_clusters[i]
@@ -57,7 +60,17 @@
5760
}
5861

5962
score_variance <- sum(cluster_scores^2)
60-
hessian <- fit$hessian
63+
hessian <- as.numeric(fit$hessian)[1]
6164

62-
score_variance / (hessian^2)
65+
if (!is.finite(hessian) || hessian <= 0) {
66+
return(NA_real_)
67+
}
68+
69+
var_log_lambda <- score_variance / (hessian^2)
70+
71+
if (small_sample == "CR1" && n_clusters > 1) {
72+
var_log_lambda <- var_log_lambda * (n_clusters / (n_clusters - 1))
73+
}
74+
75+
var_log_lambda
6376
}

R/summary.seroincidence.R

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,13 @@
88
#' @param object a [list()] outputted by [stats::nlm()] or [est_seroincidence()]
99
#' @param coverage desired confidence interval coverage probability
1010
#' @param verbose whether to produce verbose messaging
11+
#' @param small_sample small-sample correction for cluster-robust variance.
12+
#' Use `"CR1"` to apply the one-way CR1 factor to each subset term, or
13+
#' `"none"` to leave the one-way terms unadjusted.
14+
#' @param floor_to_standard whether to floor cluster-robust variance at the
15+
#' model-based variance.
16+
#' @param debug_cluster whether to print the cluster-robust variance
17+
#' decomposition when clustering is used.
1118
#' @param ... unused
1219
#'
1320
#' @return a [tibble::tibble()] containing the following:
@@ -71,7 +78,11 @@ summary.seroincidence <- function(
7178
object,
7279
coverage = .95,
7380
verbose = TRUE,
81+
small_sample = c("none", "CR1"),
82+
floor_to_standard = FALSE,
83+
debug_cluster = FALSE,
7484
...) {
85+
small_sample <- match.arg(small_sample)
7586
start <- object |> attr("lambda_start")
7687
antigen_isos <- object |> attr("antigen_isos")
7788
cluster_var <- object |> attr("cluster_var")
@@ -106,13 +117,17 @@ summary.seroincidence <- function(
106117
var_log_lambda <- .compute_cluster_robust_var(
107118
fit = object,
108119
cluster_var = cluster_var,
109-
stratum_var = stratum_var
120+
stratum_var = stratum_var,
121+
small_sample = small_sample,
122+
floor_to_standard = floor_to_standard,
123+
debug_cluster = debug_cluster
110124
)
111125
} else {
112126
# Standard variance from Hessian
113127
var_log_lambda <- 1 / object$hessian |> as.vector()
114128
}
115129

130+
cluster_decomp <- attr(var_log_lambda, "cluster_decomp")
116131
# Ensure var_log_lambda is a scalar
117132
var_log_lambda <- as.numeric(var_log_lambda)[1]
118133
se_log_lambda <- sqrt(var_log_lambda)
@@ -140,5 +155,9 @@ summary.seroincidence <- function(
140155
"summary.seroincidence" |>
141156
union(class(to_return))
142157

158+
if (!is.null(cluster_decomp)) {
159+
attr(to_return, "cluster_decomp") <- cluster_decomp
160+
}
161+
143162
return(to_return)
144163
}

R/summary.seroincidence.by.R

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#' (see help for [optim()] for details).
1818
#' Default = `FALSE`.
1919
#' @param confidence_level desired confidence interval coverage probability
20+
#' @inheritParams summary.seroincidence
2021
#' @return
2122
#' A `summary.seroincidence.by` object, which is a [tibble::tibble],
2223
#' with the following columns:
@@ -69,7 +70,11 @@ summary.seroincidence.by <- function(
6970
show_deviance = TRUE,
7071
show_convergence = TRUE,
7172
verbose = FALSE,
73+
small_sample = c("none", "CR1"),
74+
floor_to_standard = FALSE,
75+
debug_cluster = FALSE,
7276
...) {
77+
small_sample <- match.arg(small_sample)
7378
alpha <- 1 - confidence_level
7479
quantiles <- c(alpha / 2, 1 - alpha / 2)
7580

@@ -87,7 +92,10 @@ summary.seroincidence.by <- function(
8792
lapply(
8893
FUN = summary.seroincidence,
8994
coverage = confidence_level,
90-
verbose = verbose
95+
verbose = verbose,
96+
small_sample = small_sample,
97+
floor_to_standard = floor_to_standard,
98+
debug_cluster = debug_cluster
9199
) |>
92100
bind_rows(.id = "Stratum")
93101

man/summary.seroincidence.Rd

Lines changed: 19 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/summary.seroincidence.by.Rd

Lines changed: 13 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)