|
17 | 17 | fit, |
18 | 18 | cluster_var, |
19 | 19 | stratum_var = NULL) { |
20 | | - # Extract stored data (already split by antigen_iso) |
21 | 20 | pop_data_list <- attr(fit, "pop_data") |
22 | | - sr_params_list <- attr(fit, "sr_params") |
23 | | - noise_params_list <- attr(fit, "noise_params") |
24 | | - antigen_isos <- attr(fit, "antigen_isos") |
25 | | - |
26 | | - # Get MLE estimate |
27 | | - log_lambda_mle <- fit$estimate |
28 | | - |
29 | | - # Combine pop_data list back into a single data frame |
30 | | - # to get cluster info |
31 | 21 | pop_data_combined <- do.call(rbind, pop_data_list) |
32 | | - |
33 | | - # Compute score (gradient) using numerical differentiation |
34 | | - # The score is the derivative of log-likelihood w.r.t. log(lambda) |
35 | | - epsilon <- 1e-6 |
36 | | - |
37 | | - # For each observation, compute the contribution to the score |
38 | | - # We need to identify which cluster each observation belongs to |
39 | | - |
40 | | - # Handle multiple clustering levels by creating composite cluster ID |
41 | | - if (length(cluster_var) == 1) { |
42 | | - cluster_ids <- pop_data_combined[[cluster_var]] |
43 | | - } else { |
44 | | - # Create composite cluster ID from multiple variables |
45 | | - cluster_ids <- interaction( |
46 | | - pop_data_combined[, cluster_var, drop = FALSE], |
47 | | - drop = TRUE, |
48 | | - sep = "_" |
49 | | - ) |
50 | | - } |
51 | | - |
52 | | - # Get unique clusters |
53 | | - unique_clusters <- unique(cluster_ids) |
54 | | - n_clusters <- length(unique_clusters) |
55 | | - |
56 | | - # Compute cluster-level scores |
57 | | - cluster_scores <- numeric(n_clusters) |
58 | | - |
59 | | - for (i in seq_along(unique_clusters)) { |
60 | | - cluster_id <- unique_clusters[i] |
61 | | - |
62 | | - # Get observations in this cluster |
63 | | - cluster_mask <- cluster_ids == cluster_id |
64 | | - |
65 | | - # Create temporary pop_data with only this cluster |
66 | | - pop_data_cluster <- pop_data_combined[cluster_mask, , drop = FALSE] |
67 | | - |
68 | | - # Split by antigen |
69 | | - pop_data_cluster_list <- split( |
70 | | - pop_data_cluster, |
71 | | - pop_data_cluster$antigen_iso |
72 | | - ) |
73 | | - |
74 | | - # Ensure all antigen_isos are represented |
75 | | - # (add empty data frames if missing) |
76 | | - for (ag in antigen_isos) { |
77 | | - if (!ag %in% names(pop_data_cluster_list)) { |
78 | | - # Create empty data frame with correct structure |
79 | | - pop_data_cluster_list[[ag]] <- pop_data_list[[ag]][0, , drop = FALSE] |
80 | | - } |
| 22 | + standard_var_log_lambda <- 1 / fit$hessian |> as.numeric() |
| 23 | + |
| 24 | + subset_cluster_vars <- unlist( |
| 25 | + lapply(seq_along(cluster_var), function(n_vars) { |
| 26 | + utils::combn(cluster_var, n_vars, simplify = FALSE) |
| 27 | + }), |
| 28 | + recursive = FALSE |
| 29 | + ) |
| 30 | + |
| 31 | + cluster_var_terms <- vapply(subset_cluster_vars, length, integer(1)) |
| 32 | + robust_var_log_lambda <- 0 |
| 33 | + |
| 34 | + for (i in seq_along(subset_cluster_vars)) { |
| 35 | + cluster_vars_subset <- subset_cluster_vars[[i]] |
| 36 | + if (length(cluster_vars_subset) == 1) { |
| 37 | + cluster_ids <- pop_data_combined[[cluster_vars_subset]] |
| 38 | + } else { |
| 39 | + cluster_ids <- interaction( |
| 40 | + pop_data_combined[, cluster_vars_subset, drop = FALSE], |
| 41 | + drop = TRUE, |
| 42 | + sep = "_" |
| 43 | + ) |
81 | 44 | } |
82 | 45 |
|
83 | | - # Compute log-likelihood for this cluster at MLE |
84 | | - ll_cluster_mle <- -(.nll( |
85 | | - log.lambda = log_lambda_mle, |
86 | | - pop_data = pop_data_cluster_list, |
87 | | - antigen_isos = antigen_isos, |
88 | | - curve_params = sr_params_list, |
89 | | - noise_params = noise_params_list, |
90 | | - verbose = FALSE |
91 | | - )) |
92 | | - |
93 | | - # Compute log-likelihood at MLE + epsilon |
94 | | - ll_cluster_plus <- -(.nll( |
95 | | - log.lambda = log_lambda_mle + epsilon, |
96 | | - pop_data = pop_data_cluster_list, |
97 | | - antigen_isos = antigen_isos, |
98 | | - curve_params = sr_params_list, |
99 | | - noise_params = noise_params_list, |
100 | | - verbose = FALSE |
101 | | - )) |
102 | | - |
103 | | - # Numerical derivative (score for this cluster) |
104 | | - cluster_scores[i] <- (ll_cluster_plus - ll_cluster_mle) / epsilon |
| 46 | + robust_var_log_lambda <- robust_var_log_lambda + |
| 47 | + (-1)^(cluster_var_terms[[i]] + 1) * |
| 48 | + .compute_cluster_var_oneway( |
| 49 | + fit = fit, |
| 50 | + cluster_ids = cluster_ids, |
| 51 | + pop_data_combined = pop_data_combined |
| 52 | + ) |
105 | 53 | } |
106 | 54 |
|
107 | | - # Compute B matrix (middle of sandwich) |
108 | | - # B = sum of outer products of cluster scores |
109 | | - b_matrix <- sum(cluster_scores^2) # nolint: object_name_linter |
110 | | - |
111 | | - # Get Hessian (already computed by nlm) |
112 | | - h_matrix <- fit$hessian # nolint: object_name_linter |
113 | | - |
114 | | - # Sandwich variance: V = H^(-1) * B * H^(-1) |
115 | | - # Since we have a scalar parameter, this simplifies to: |
116 | | - var_log_lambda_robust <- b_matrix / (h_matrix^2) |
| 55 | + robust_var_log_lambda <- max( |
| 56 | + standard_var_log_lambda, |
| 57 | + robust_var_log_lambda |
| 58 | + ) |
117 | 59 |
|
118 | | - return(var_log_lambda_robust) |
| 60 | + return(robust_var_log_lambda) |
119 | 61 | } |
0 commit comments