88# ' \eqn{\alpha} is the true Riesz representer and \eqn{\alpha_s} is the Riesz
99# ' representer with the observed covariates. The RR can be equivalently
1010# ' expressed as \deqn{
11- # ' \alpha = \partial_x \log f(X\ mid Z, U ),
11+ # ' \alpha = \partial_{\bar x_j} \log f(\bar x_j\ mid z, u ),
1212# ' } where \eqn{U} is the unobserved confounder and \eqn{f} is the conditional
1313# ' density. The corresponding `c_predictor` is then \deqn{
1414# ' 1 - R^2_{\alpha\sim\alpha_s} = 1 - \
15- # ' \frac{\mathbb{E}[(\partial_x \log f(X\mid Z))^2]}{
16- # ' \mathbb{E}[(\partial_x \log f(X\mid Z, U))^2]}.
17- # ' } When \eqn{X\mid Z,U} and \eqn{X\mid Z} are homoscedastic Gaussian, this
18- # ' simplifies to \deqn{
19- # ' 1 - R^2_{\alpha\sim\alpha_s} =
20- # ' 1 - \frac{\mathbb{E}[X - \mathbb{E}[X\mid Z, U]]^2}{
21- # ' \mathbb{E}[X - \mathbb{E}[X\mid Z]]^2}
22- # ' = R^2_{X\sim U\mid Z}.
15+ # ' \frac{\mathbb{E}[(\partial_{\bar x_j} \log f(\bar x_j\mid z))^2]}{
16+ # ' \mathbb{E}[(\partial_{\bar x_j} \log f(\bar x_j\mid z, u))^2]}.
2317# ' }
18+ # ' # When \eqn{\bar X_j\mid Z,U} and \eqn{\bar X_j\mid Z} are homoscedastic
19+ # ' # Gaussian with the same variance, and each unit has the same population, this
20+ # ' # simplifies to \deqn{
21+ # ' # 1 - R^2_{\alpha\sim\alpha_s} =
22+ # ' # 1 - \frac{\mathbb{E}[X - \mathbb{E}[X\mid Z, U]]^2}{
23+ # ' # \mathbb{E}[X - \mathbb{E}[X\mid Z]]^2}
24+ # ' # = R^2_{X\sim U\mid Z}.
25+ # ' # }
2426# '
2527# ' The bounds here are plug-in estimates and do not incorporate sampling
2628# ' uncertainty. As such, they may fail to cover the true value in finite
@@ -128,7 +130,12 @@ ei_sens <- function(est, c_outcome=seq(0, 1, 0.01)^2, c_predictor=seq(0, 1, 0.01
128130
129131# ' Robustness values for ecological inference
130132# '
131- # ' TODO fill in...
133+ # ' The robustness value is the minimum bound for both `c_outcome` and
134+ # ' `c_predictor` in [ei_sens()] such that the bias bound is a certain value.
135+ # ' For example, if the provided bias bound is 0.5, meaning a bias of magnitude
136+ # ' 0.5 would be considered problematic, then the robustness value is the minimum
137+ # ' amount of confounding of outcome and predictor (more specifically, the Riesz
138+ # ' representer) that would lead to bias of magnitude 0.5.
132139# '
133140# ' @param bias_bound <[`data-masking`][rlang::args_data_masking]> A bias bound:
134141# ' an amount of bias which is considered substantial. Evaluated in the context
@@ -178,7 +185,11 @@ ei_sens_rv <- function(est, bias_bound, confounding=1) {
178185
179186# ' Bias contour plot for ecological inference estimates
180187# '
181- # ' TODO fill in...
188+ # ' Displays bias bound as a function of `c_outcome` and `c_predictor` in
189+ # ' [ei_sens()] on a contour plot. Bounds on the outcome, and standard errors of
190+ # ' the point estimate, can be overlaid as contours on the plot to aid in
191+ # ' interpretation. Benchmarked values of `c_outcome` and `c_predictor` based on
192+ # ' the observed covariates can also be overlaid.
182193# '
183194# ' @param x An [ei_sens] object
184195# ' @param y An outcome variable, as a character vector. Defaults to first.
@@ -189,6 +200,7 @@ ei_sens_rv <- function(est, bias_bound, confounding=1) {
189200# ' they will be inferred from the outcome variable: if it is contained within
190201# ' \eqn{[0, 1]}, for instance, then the bounds will be `c(0, 1)`. Setting
191202# ' `bounds = FALSE` forces unbounded estimates.
203+ # ' @param bench A data frame of benchmark values, from [ei_bench()], to plot.
192204# ' @param plot_se A vector of multiples of the standard error to plot as contours.
193205# ' @param ... Additional arguments passed on to [contour()]
194206# ' @param lwd A scaling factor for the contour line widths
@@ -210,8 +222,10 @@ ei_sens_rv <- function(est, bias_bound, confounding=1) {
210222# '
211223# ' plot(sens)
212224# '
225+ # ' plot(sens, bench = ei_bench(spec), plot_se=NULL)
213226# ' @export
214- plot.ei_sens <- function (x , y = NULL , predictor = NULL , bounds = NULL , plot_se = 1 : 3 , ... , lwd = 1 ) {
227+ plot.ei_sens <- function (x , y = NULL , predictor = NULL , bounds = NULL , bench = NULL ,
228+ plot_se = 1 : 3 , ... , lwd = 1 ) {
215229 if (is.null(y )) y = x $ outcome [1 ]
216230 if (is.null(predictor )) predictor = x $ predictor [1 ]
217231
@@ -269,7 +283,7 @@ plot.ei_sens <- function(x, y=NULL, predictor=NULL, bounds=NULL, plot_se=1:3, ..
269283 labels [dists < 0.05 ] = " "
270284 graphics :: contour(
271285 cx , cy , cz , levels = breaks , labels = labels ,
272- lwd = lwd * c(rep(c(1.6 , 1.0 ), n_om ), 1.6 ),
286+ lwd = lwd * c(rep(c(1.4 , 1.0 ), n_om ), 1.4 ),
273287 labcex = 0.8 , col = " #444" , add = TRUE , method = " edge"
274288 )
275289 graphics :: contour(
@@ -286,7 +300,106 @@ plot.ei_sens <- function(x, y=NULL, predictor=NULL, bounds=NULL, plot_se=1:3, ..
286300 add = TRUE , method = " edge"
287301 )
288302 }
303+ if (! missing(bench )) {
304+ if (! inherits(bench , " ei_bench" )) {
305+ cli_abort(" {.arg bench} must be an {.cls ei_bench} object." ,
306+ call = parent.frame())
307+ }
308+
309+ bench = bench [bench $ outcome == y & bench $ predictor == predictor , ]
310+ points(bench $ c_predictor , bench $ c_outcome , pch = 3 , cex = 1.5 )
311+ }
289312 graphics :: par(mar = oldmar )
290313}
291314
315+ # ' Benchmark sensitivity parameters from observed covariates
316+ # '
317+ # ' Produces a table of benchmark values for `c_outcome` and `c_predictor` in
318+ # ' [ei_sens()] for each covariate, following the methodology of Chernozhukov
319+ # ' et al. (2022).
320+ # '
321+ # ' @param spec An [ei_spec] object.
322+ # ' @param preproc An optional function which takes in a `ei_spec` object (`spec`
323+ # ' with one covariate removed) and returns a modified object that includes
324+ # ' modified object. Useful to apply any preprocessing, such as a basis
325+ # ' transformation, as part of the benchmarking process.
326+ # '
327+ # ' @references
328+ # ' Chernozhukov, V., Cinelli, C., Newey, W., Sharma, A., & Syrgkanis, V. (2022).
329+ # ' *Long story short: Omitted variable bias in causal machine learning*
330+ # ' (No. w30302). National Bureau of Economic Research.
331+ # '
332+ # ' @examples
333+ # ' data(elec_1968)
334+ # '
335+ # ' spec = ei_spec(elec_1968, vap_white:vap_other, pres_ind_wal,
336+ # ' total = pres_total, covariates = c(educ_elem, pop_urban, farm))
337+ # '
338+ # ' ei_bench(spec)
339+ # '
340+ # ' # preprocess to add all 2-way interactions
341+ # ' ei_bench(spec, preproc = function(s) {
342+ # ' z_cols = match(attr(s, "ei_z"), names(s))
343+ # ' s_out = s[-z_cols]
344+ # ' z_new = model.matrix(~ .^2 - 1, data = s[z_cols])
345+ # ' s_out = cbind(s_out, z_new)
346+ # ' ei_spec(s_out, white:other, pres_ind_wal,
347+ # ' total = attr(s, "ei_n"), covariates = colnames(z_new))
348+ # ' })
349+ # ' @export
350+ ei_bench <- function (spec , preproc = NULL ) {
351+ validate_ei_spec(spec )
352+
353+ if (! missing(preproc )) {
354+ if (! is.function(preproc )) {
355+ cli_abort(" {.arg preproc} must be a function." )
356+ }
357+ } else {
358+ preproc = function (x ) x
359+ }
360+
361+ n_x = length(attr(spec , " ei_x" ))
362+ n_y = length(attr(spec , " ei_y" ))
363+ var_resid = function (regr ) {
364+ apply(regr $ y - regr $ fitted , 2 , var )
365+ }
366+
367+ spec_proc = preproc(spec )
368+ regr0 = ei_ridge(spec_proc )
369+ riesz0 = ei_riesz(spec_proc , penalty = regr $ penalty )
370+ est0 = ei_est(regr0 , riesz0 , spec_proc )
371+ vy = apply(regr0 $ y , 2 , var )
372+ var_resid0 = var_resid(regr0 )
373+ r2_out0 = 1 - var_resid0 / vy
374+
375+ covs = attr(spec , " ei_z" )
376+ benches = lapply(covs , function (cv ) {
377+ spec_loo = reconstruct_ei_spec(spec [setdiff(names(spec ), cv )], spec )
378+ spec_loo = preproc(spec_loo )
379+
380+ regr_loo = ei_ridge(spec_loo )
381+ riesz_loo = ei_riesz(spec_loo , penalty = regr $ penalty )
382+ est_loo = ei_est(regr_loo , riesz_loo , spec_loo )
383+ var_resid_loo = var_resid(regr_loo )
384+ r2_out_loo = 1 - var_resid_loo / vy
385+ r2_riesz = riesz_loo $ nu2 / riesz0 $ nu2
386+
387+ c_outcome = pmin((r2_out0 - r2_out_loo ) / r2_out0 , 1 )
388+ c_predictor = pmin((1 - r2_riesz ) / r2_riesz , 1 )
389+ est_chg = est_loo $ estimate - est0 $ estimate
390+ confounding = est_chg / rep(sqrt(var_resid_loo - var_resid0 ), each = n_x ) /
391+ rep(sqrt(riesz0 $ nu2 - riesz_loo $ nu2 ), n_y )
392+ confounding = pmax(pmin(confounding , 1 ), - 1 )
393+
394+ est_loo $ covariate = cv
395+ est_loo = est_loo [c(" covariate" , " predictor" , " outcome" )]
396+ est_loo $ c_outcome = rep(c_outcome , each = n_x )
397+ est_loo $ c_predictor = rep(c_predictor , n_y )
398+ est_loo $ confounding = confounding
399+ est_loo $ est_chg = est_chg
400+ est_loo
401+ })
402+
403+ tibble :: new_tibble(do.call(rbind , benches ), class = " ei_bench" )
404+ }
292405
0 commit comments