Skip to content

Commit 3d1fb68

Browse files
committed
benchmarking
1 parent fac3690 commit 3d1fb68

File tree

7 files changed

+233
-25
lines changed

7 files changed

+233
-25
lines changed

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ S3method(vcov,ei_est)
2828
S3method(weights,ei_ridge)
2929
S3method(weights,ei_riesz)
3030
S3method(weights,ei_spec)
31+
export(ei_bench)
3132
export(ei_est)
3233
export(ei_est_local)
3334
export(ei_proportions)

R/ei_sens.R

Lines changed: 126 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,21 @@
88
#' \eqn{\alpha} is the true Riesz representer and \eqn{\alpha_s} is the Riesz
99
#' representer with the observed covariates. The RR can be equivalently
1010
#' expressed as \deqn{
11-
#' \alpha = \partial_x \log f(X\mid Z, U),
11+
#' \alpha = \partial_{\bar x_j} \log f(\bar x_j\mid z, u),
1212
#' } where \eqn{U} is the unobserved confounder and \eqn{f} is the conditional
1313
#' density. The corresponding `c_predictor` is then \deqn{
1414
#' 1 - R^2_{\alpha\sim\alpha_s} = 1 - \
15-
#' \frac{\mathbb{E}[(\partial_x \log f(X\mid Z))^2]}{
16-
#' \mathbb{E}[(\partial_x \log f(X\mid Z, U))^2]}.
17-
#' } When \eqn{X\mid Z,U} and \eqn{X\mid Z} are homoscedastic Gaussian, this
18-
#' simplifies to \deqn{
19-
#' 1 - R^2_{\alpha\sim\alpha_s} =
20-
#' 1 - \frac{\mathbb{E}[X - \mathbb{E}[X\mid Z, U]]^2}{
21-
#' \mathbb{E}[X - \mathbb{E}[X\mid Z]]^2}
22-
#' = R^2_{X\sim U\mid Z}.
15+
#' \frac{\mathbb{E}[(\partial_{\bar x_j} \log f(\bar x_j\mid z))^2]}{
16+
#' \mathbb{E}[(\partial_{\bar x_j} \log f(\bar x_j\mid z, u))^2]}.
2317
#' }
18+
#' # When \eqn{\bar X_j\mid Z,U} and \eqn{\bar X_j\mid Z} are homoscedastic
19+
#' # Gaussian with the same variance, and each unit has the same population, this
20+
#' # simplifies to \deqn{
21+
#' # 1 - R^2_{\alpha\sim\alpha_s} =
22+
#' # 1 - \frac{\mathbb{E}[X - \mathbb{E}[X\mid Z, U]]^2}{
23+
#' # \mathbb{E}[X - \mathbb{E}[X\mid Z]]^2}
24+
#' # = R^2_{X\sim U\mid Z}.
25+
#' # }
2426
#'
2527
#' The bounds here are plug-in estimates and do not incorporate sampling
2628
#' uncertainty. As such, they may fail to cover the true value in finite
@@ -128,7 +130,12 @@ ei_sens <- function(est, c_outcome=seq(0, 1, 0.01)^2, c_predictor=seq(0, 1, 0.01
128130

129131
#' Robustness values for ecological inference
130132
#'
131-
#' TODO fill in...
133+
#' The robustness value is the minimum bound for both `c_outcome` and
134+
#' `c_predictor` in [ei_sens()] such that the bias bound is a certain value.
135+
#' For example, if the provided bias bound is 0.5, meaning a bias of magnitude
136+
#' 0.5 would be considered problematic, then the robustness value is the minimum
137+
#' amount of confounding of outcome and predictor (more specifically, the Riesz
138+
#' representer) that would lead to bias of magnitude 0.5.
132139
#'
133140
#' @param bias_bound <[`data-masking`][rlang::args_data_masking]> A bias bound:
134141
#' an amount of bias which is considered substantial. Evaluated in the context
@@ -178,7 +185,11 @@ ei_sens_rv <- function(est, bias_bound, confounding=1) {
178185

179186
#' Bias contour plot for ecological inference estimates
180187
#'
181-
#' TODO fill in...
188+
#' Displays bias bound as a function of `c_outcome` and `c_predictor` in
189+
#' [ei_sens()] on a contour plot. Bounds on the outcome, and standard errors of
190+
#' the point estimate, can be overlaid as contours on the plot to aid in
191+
#' interpretation. Benchmarked values of `c_outcome` and `c_predictor` based on
192+
#' the observed covariates can also be overlaid.
182193
#'
183194
#' @param x An [ei_sens] object
184195
#' @param y An outcome variable, as a character vector. Defaults to first.
@@ -189,6 +200,7 @@ ei_sens_rv <- function(est, bias_bound, confounding=1) {
189200
#' they will be inferred from the outcome variable: if it is contained within
190201
#' \eqn{[0, 1]}, for instance, then the bounds will be `c(0, 1)`. Setting
191202
#' `bounds = FALSE` forces unbounded estimates.
203+
#' @param bench A data frame of benchmark values, from [ei_bench()], to plot.
192204
#' @param plot_se A vector of multiples of the standard error to plot as contours.
193205
#' @param ... Additional arguments passed on to [contour()]
194206
#' @param lwd A scaling factor for the contour line widths
@@ -210,8 +222,10 @@ ei_sens_rv <- function(est, bias_bound, confounding=1) {
210222
#'
211223
#' plot(sens)
212224
#'
225+
#' plot(sens, bench = ei_bench(spec), plot_se=NULL)
213226
#' @export
214-
plot.ei_sens <- function(x, y=NULL, predictor=NULL, bounds=NULL, plot_se=1:3, ..., lwd=1) {
227+
plot.ei_sens <- function(x, y=NULL, predictor=NULL, bounds=NULL, bench=NULL,
228+
plot_se=1:3, ..., lwd=1) {
215229
if (is.null(y)) y = x$outcome[1]
216230
if (is.null(predictor)) predictor = x$predictor[1]
217231

@@ -269,7 +283,7 @@ plot.ei_sens <- function(x, y=NULL, predictor=NULL, bounds=NULL, plot_se=1:3, ..
269283
labels[dists < 0.05] = ""
270284
graphics::contour(
271285
cx, cy, cz, levels=breaks, labels=labels,
272-
lwd = lwd * c(rep(c(1.6, 1.0), n_om), 1.6),
286+
lwd = lwd * c(rep(c(1.4, 1.0), n_om), 1.4),
273287
labcex=0.8, col = "#444", add=TRUE, method="edge"
274288
)
275289
graphics::contour(
@@ -286,7 +300,106 @@ plot.ei_sens <- function(x, y=NULL, predictor=NULL, bounds=NULL, plot_se=1:3, ..
286300
add=TRUE, method="edge"
287301
)
288302
}
303+
if (!missing(bench)) {
304+
if (!inherits(bench, "ei_bench")) {
305+
cli_abort("{.arg bench} must be an {.cls ei_bench} object.",
306+
call = parent.frame())
307+
}
308+
309+
bench = bench[bench$outcome == y & bench$predictor == predictor, ]
310+
points(bench$c_predictor, bench$c_outcome, pch=3, cex=1.5)
311+
}
289312
graphics::par(mar = oldmar)
290313
}
291314

315+
#' Benchmark sensitivity parameters from observed covariates
316+
#'
317+
#' Produces a table of benchmark values for `c_outcome` and `c_predictor` in
318+
#' [ei_sens()] for each covariate, following the methodology of Chernozhukov
319+
#' et al. (2022).
320+
#'
321+
#' @param spec An [ei_spec] object.
322+
#' @param preproc An optional function which takes in a `ei_spec` object (`spec`
323+
#' with one covariate removed) and returns a modified object that includes
324+
#' modified object. Useful to apply any preprocessing, such as a basis
325+
#' transformation, as part of the benchmarking process.
326+
#'
327+
#' @references
328+
#' Chernozhukov, V., Cinelli, C., Newey, W., Sharma, A., & Syrgkanis, V. (2022).
329+
#' *Long story short: Omitted variable bias in causal machine learning*
330+
#' (No. w30302). National Bureau of Economic Research.
331+
#'
332+
#' @examples
333+
#' data(elec_1968)
334+
#'
335+
#' spec = ei_spec(elec_1968, vap_white:vap_other, pres_ind_wal,
336+
#' total = pres_total, covariates = c(educ_elem, pop_urban, farm))
337+
#'
338+
#' ei_bench(spec)
339+
#'
340+
#' # preprocess to add all 2-way interactions
341+
#' ei_bench(spec, preproc = function(s) {
342+
#' z_cols = match(attr(s, "ei_z"), names(s))
343+
#' s_out = s[-z_cols]
344+
#' z_new = model.matrix(~ .^2 - 1, data = s[z_cols])
345+
#' s_out = cbind(s_out, z_new)
346+
#' ei_spec(s_out, white:other, pres_ind_wal,
347+
#' total = attr(s, "ei_n"), covariates = colnames(z_new))
348+
#' })
349+
#' @export
350+
ei_bench <- function(spec, preproc = NULL) {
351+
validate_ei_spec(spec)
352+
353+
if (!missing(preproc)) {
354+
if (!is.function(preproc)) {
355+
cli_abort("{.arg preproc} must be a function.")
356+
}
357+
} else {
358+
preproc = function(x) x
359+
}
360+
361+
n_x = length(attr(spec, "ei_x"))
362+
n_y = length(attr(spec, "ei_y"))
363+
var_resid = function(regr) {
364+
apply(regr$y - regr$fitted, 2, var)
365+
}
366+
367+
spec_proc = preproc(spec)
368+
regr0 = ei_ridge(spec_proc)
369+
riesz0 = ei_riesz(spec_proc, penalty=regr$penalty)
370+
est0 = ei_est(regr0, riesz0, spec_proc)
371+
vy = apply(regr0$y, 2, var)
372+
var_resid0 = var_resid(regr0)
373+
r2_out0 = 1 - var_resid0 / vy
374+
375+
covs = attr(spec, "ei_z")
376+
benches = lapply(covs, function(cv) {
377+
spec_loo = reconstruct_ei_spec(spec[setdiff(names(spec), cv)], spec)
378+
spec_loo = preproc(spec_loo)
379+
380+
regr_loo = ei_ridge(spec_loo)
381+
riesz_loo = ei_riesz(spec_loo, penalty=regr$penalty)
382+
est_loo = ei_est(regr_loo, riesz_loo, spec_loo)
383+
var_resid_loo = var_resid(regr_loo)
384+
r2_out_loo = 1 - var_resid_loo / vy
385+
r2_riesz = riesz_loo$nu2 / riesz0$nu2
386+
387+
c_outcome = pmin((r2_out0 - r2_out_loo) / r2_out0, 1)
388+
c_predictor = pmin((1 - r2_riesz) / r2_riesz, 1)
389+
est_chg = est_loo$estimate - est0$estimate
390+
confounding = est_chg / rep(sqrt(var_resid_loo - var_resid0), each=n_x) /
391+
rep(sqrt(riesz0$nu2 - riesz_loo$nu2), n_y)
392+
confounding = pmax(pmin(confounding, 1), -1)
393+
394+
est_loo$covariate = cv
395+
est_loo = est_loo[c("covariate", "predictor", "outcome")]
396+
est_loo$c_outcome = rep(c_outcome, each=n_x)
397+
est_loo$c_predictor = rep(c_predictor, n_y)
398+
est_loo$confounding = confounding
399+
est_loo$est_chg = est_chg
400+
est_loo
401+
})
402+
403+
tibble::new_tibble(do.call(rbind, benches), class="ei_bench")
404+
}
292405

R/spec_form.R

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,31 @@ validate_ei_spec = function(x) {
123123
invisible(x)
124124
}
125125

126+
reconstruct_ei_spec = function(data, old) {
127+
if (!missing(old)) {
128+
if (is.null(attr(data, "ei_x"))) {
129+
attr(data, "ei_x") = attr(old, "ei_x")
130+
}
131+
if (is.null(attr(data, "ei_y"))) {
132+
attr(data, "ei_y") = attr(old, "ei_y")
133+
}
134+
if (is.null(attr(data, "ei_z"))) {
135+
attr(data, "ei_z") = attr(old, "ei_z")
136+
}
137+
if (is.null(attr(data, "ei_n"))) {
138+
attr(data, "ei_n") = attr(old, "ei_n")
139+
}
140+
}
141+
142+
attr(data, "ei_z") = intersect(attr(data, "ei_z"), names(data))
143+
144+
if (!inherits(data, "ei_spec")) {
145+
class(data) = c("ei_spec", class(data))
146+
}
147+
148+
data
149+
}
150+
126151

127152
#' @export
128153
print.ei_spec = function(x, ..., n=5) {

man/ei_bench.Rd

Lines changed: 44 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/ei_sens.Rd

Lines changed: 13 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/ei_sens_rv.Rd

Lines changed: 6 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/plot.ei_sens.Rd

Lines changed: 18 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)