|
6 | 6 | #' This function can also generate love plots of the same quantities. |
7 | 7 | #' |
8 | 8 | #' @inheritParams stand |
| 9 | +#' @inheritParams optimize_controls |
9 | 10 | #' @param X a data frame containing the covariates in the columns over which balance is desired. The number |
10 | 11 | #' of rows should equal the length of \code{z}. |
| 12 | +#' @param treated which treatment value should be considered the treated units. This |
| 13 | +#' must be one of the values of \code{z}. |
| 14 | +#' @param control which treatment value should be considered the control units. This |
| 15 | +#' must be one of the values of \code{z}. |
11 | 16 | #' @param selected a boolean vector including whether each unit was selected as part of the treated and control |
12 | 17 | #' groups for analysis. Should be the same length as \code{z} and typically comes from the results of |
13 | 18 | #' \code{\link{optimize_controls}()}. |
|
65 | 70 | #' selected = results$selected, |
66 | 71 | #' plot = TRUE) |
67 | 72 |
|
68 | | - |
69 | | -check_balance <- function(z, X, st, selected, denom_variance = "treated", plot = FALSE, message = TRUE) { |
| 73 | +check_balance <- function(z, X, st, selected, treated = 1, control = 0, |
| 74 | + denom_variance = "treated", plot = FALSE, message = TRUE) { |
70 | 75 |
|
71 | 76 | if (plot && !requireNamespace("ggplot2", quietly = TRUE) && !requireNamespace("rlang", quietly = TRUE)) { |
72 | | - stop("Packages \"ggplot2\" and \"rlang\" needed if \"plot\" argument set to \"TRUE\". Please |
| 77 | + stop("Packages \"ggplot2\" and \"rlang\" needed if \"plot\" argument set to \"TRUE\". Please |
73 | 78 | install these or switch the \"plot\" argument to \"FALSE\".", |
74 | | - call. = FALSE) |
| 79 | + call. = FALSE) |
75 | 80 | } |
76 | 81 |
|
77 | 82 | st <- as.factor(st) |
78 | 83 | X[, sapply(X, is.logical)] <- sapply(X[, sapply(X, is.logical)], as.numeric) |
79 | 84 | dummies <- dummyVars( ~ ., data = X, levelsOnly = FALSE) |
80 | 85 | full_X <- predict(dummies, newdata = X) |
81 | 86 |
|
82 | | - sd_across <- get_stand_diffs(full_X, z, selected, denom_variance = denom_variance) |
| 87 | + sd_across <- get_stand_diffs(full_X, z, selected, treated = treated, control = control, |
| 88 | + denom_variance = denom_variance) |
83 | 89 |
|
84 | 90 | sd_strata <- NULL |
85 | 91 | for (ist in levels(st)) { |
86 | 92 | sd_strata <- rbind(sd_strata, cbind(get_stand_diffs(full_X, z, selected, st, ist, |
| 93 | + treated = treated, control = control, |
87 | 94 | denom_variance = denom_variance), ist)) |
88 | 95 | } |
89 | 96 | colnames(sd_strata)[4] <- "stratum" |
90 | 97 |
|
91 | | - q_s <- sapply(levels(st), function(ist) {sum( !z & selected & st == ist )}) |
92 | | - n_s <- sapply(levels(st), function(ist) {sum( !z & st == ist )}) |
| 98 | + q_s <- sapply(levels(st), function(ist) {sum( z == control & selected & st == ist )}) |
| 99 | + n_s <- sapply(levels(st), function(ist) {sum( z == control & st == ist )}) |
93 | 100 |
|
94 | | - fr_tab <- table(z, st) |
95 | 101 | sd_strata_avg <- sd_across |
96 | 102 | sd_strata_avg[1:dim(sd_strata_avg)[1], 1:2] <- NA |
97 | 103 | for (cov in row.names(sd_strata_avg)) { |
98 | 104 | sd_strata_avg[cov, 1] <- sum(sapply(levels(st), function(ist) { |
99 | | - sd_strata[sd_strata$covariate == cov & sd_strata$stratum == ist, 1] * (n_s[ist] - sum(is.na(X[!z & st == ist, cov]))) })) / (sum(n_s) - sum(is.na(X[!z, cov]))) |
| 105 | + sd_strata[sd_strata$covariate == cov & sd_strata$stratum == ist, 1] * |
| 106 | + (n_s[ist] - sum(is.na(X[z == 0 & st == ist, cov]))) })) / |
| 107 | + (sum(n_s) - sum(is.na(X[z == 0, cov]))) |
100 | 108 | sd_strata_avg[cov, 2] <- sum(sapply(levels(st), function(ist) { |
101 | | - sd_strata[sd_strata$covariate == cov & sd_strata$stratum == ist, 2] * (q_s[ist] - sum(is.na(X[!z & st == ist & selected, cov]))) })) / (sum(q_s) - sum(is.na(X[!z & selected, cov]))) |
| 109 | + sd_strata[sd_strata$covariate == cov & sd_strata$stratum == ist, 2] * |
| 110 | + (q_s[ist] - sum(is.na(X[z == 0 & st == ist & selected, cov]))) })) / |
| 111 | + (sum(q_s) - sum(is.na(X[z == 0 & selected, cov]))) |
102 | 112 | } |
103 | 113 |
|
104 | 114 | if (message) { |
@@ -169,41 +179,50 @@ check_balance <- function(z, X, st, selected, denom_variance = "treated", plot = |
169 | 179 | #' choosing a subset of controls, and one for after. The rows pertain to covariates. |
170 | 180 | #' @keywords internal |
171 | 181 |
|
172 | | -get_stand_diffs <- function(data, z, selected, st = NULL, ist = NULL, denom_variance = "treated") { |
| 182 | +get_stand_diffs <- function(data, z, selected, st = NULL, ist = NULL, |
| 183 | + treated = 1, control = 0, denom_variance = "treated") { |
| 184 | + if (is.vector(z)) { |
| 185 | + z <- as.factor(z) |
| 186 | + } |
173 | 187 | if (!is.null(ist)) { |
174 | 188 | ind <- st == ist |
175 | 189 | } else { |
176 | 190 | ind <- rep(TRUE, length(z)) |
177 | 191 | } |
178 | | - treatedmat_full <- data[z == 1, , drop = FALSE] |
179 | | - treatedmat <- data[z == 1 & ind, , drop = FALSE] |
180 | 192 | # Standardized differences before matching |
181 | | - controlmat_before_full <- data[z == 0, , drop = FALSE] |
182 | | - controlmat_before <- data[z == 0 & ind, , drop = FALSE] |
| 193 | + treatedmat_before_full <- data[z == treated, , drop = FALSE] |
| 194 | + treatedmat_before <- data[z == treated & ind, , drop = FALSE] |
| 195 | + treatedmean_before <- apply(treatedmat_before, 2, mean, na.rm = TRUE) |
| 196 | + controlmat_before_full <- data[z == control, , drop = FALSE] |
| 197 | + controlmat_before <- data[z == control & ind, , drop = FALSE] |
183 | 198 | controlmean_before <- apply(controlmat_before, 2, mean, na.rm = TRUE) |
184 | | - treatmean <- apply(treatedmat, 2, mean, na.rm = TRUE) |
185 | | - treatvar <- apply(treatedmat_full, 2, var, na.rm = TRUE) |
186 | | - controlvar <- apply(controlmat_before_full, 2, var, na.rm = TRUE) |
187 | | - if (dim(treatedmat_full)[1] == 1) { |
188 | | - treatvar[1:length(treatvar)] <- 0.0 |
| 199 | + variances <- sapply(levels(z), function(group) { |
| 200 | + return(apply(data[z == group, , drop = FALSE], 2, var, na.rm = TRUE)) |
| 201 | + }) |
| 202 | + if (is.vector(variances)) { |
| 203 | + variances <- matrix(variances, ncol = 1) |
189 | 204 | } |
| 205 | + variances[is.na(variances)] <- 0 |
190 | 206 | if (denom_variance == "pooled") { |
191 | | - denom <- sqrt((treatvar + controlvar) / 2) |
| 207 | + denom <- sqrt(rowMeans(variances)) |
192 | 208 | } else { |
193 | | - denom <- sqrt(treatvar) |
194 | | - denom[treatvar == 0] <- sqrt(controlvar[treatvar == 0] / 2) |
| 209 | + denom <- sqrt(variances[, levels(z) == treated]) |
| 210 | + denom[denom == 0] <- |
| 211 | + sqrt(rowMeans(variances)[denom == 0]) |
195 | 212 | } |
196 | | - stand_diff_before <- rep(NA, length(treatvar)) |
197 | | - names(stand_diff_before) <- names(treatvar) |
198 | | - stand_diff_before <- (treatmean - controlmean_before) / denom |
199 | | - stand_diff_before[treatmean == controlmean_before] <- 0.0 |
| 213 | + stand_diff_before <- rep(NA, nrow(variances)) |
| 214 | + names(stand_diff_before) <- dimnames(variances)[[1]] |
| 215 | + stand_diff_before <- (treatedmean_before - controlmean_before) / denom |
| 216 | + stand_diff_before[treatedmean_before == controlmean_before] <- 0.0 |
200 | 217 | # Standardized differences after matching |
201 | | - controlmat_after <- data[selected & z == 0 & ind, , drop = FALSE] |
| 218 | + controlmat_after <- data[selected & z == control & ind, , drop = FALSE] |
202 | 219 | controlmean_after <- apply(controlmat_after, 2, mean, na.rm = TRUE) |
203 | | - stand_diff_after <- rep(NA, length(treatvar)) |
204 | | - names(stand_diff_after) <- names(treatvar) |
205 | | - stand_diff_after <- (treatmean - controlmean_after) / denom |
206 | | - stand_diff_after[treatmean == controlmean_after] <- 0.0 |
| 220 | + treatedmat_after <- data[selected & z == treated & ind, , drop = FALSE] |
| 221 | + treatedmean_after <- apply(treatedmat_after, 2, mean, na.rm = TRUE) |
| 222 | + stand_diff_after <- rep(NA, nrow(variances)) |
| 223 | + names(stand_diff_after) <- dimnames(variances)[[1]] |
| 224 | + stand_diff_after <- (treatedmean_after - controlmean_after) / denom |
| 225 | + stand_diff_after[treatedmean_after == controlmean_after] <- 0.0 |
207 | 226 | sd_matrix <- data.frame(abs_stand_diff_before = abs(stand_diff_before), |
208 | 227 | abs_stand_diff_after = abs(stand_diff_after)) |
209 | 228 | if (!is.null(ist)) { |
@@ -329,8 +348,8 @@ plot_stand_diffs <- function(sds, type) { |
329 | 348 | stratum = sds$sd_strata$stratum) |
330 | 349 |
|
331 | 350 | p <- apply(as.array(unique(sds$sd_strata$stratum)), 1, function(x) { |
332 | | - ggplot(plot_dataframe[plot_dataframe$stratum == x,], |
333 | | - aes(x = .data$abs_stand_diff, y = .data$covariates)) + |
| 351 | + ggplot(plot_dataframe[plot_dataframe$stratum == x,], |
| 352 | + aes(x = .data$abs_stand_diff, y = .data$covariates)) + |
334 | 353 | geom_point(size = 5, aes(shape = .data$type)) + |
335 | 354 | scale_shape_manual(values = c(4, 1)) + |
336 | 355 | geom_vline(xintercept = c(.1,.2), lty = 2) + |
|
0 commit comments