Skip to content

Commit e1837a2

Browse files
committed
global bounds; incorporate into local CIs
1 parent bd238c9 commit e1837a2

File tree

9 files changed

+137
-78
lines changed

9 files changed

+137
-78
lines changed

DESCRIPTION

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@ Authors@R: c(
88
comment = c(ORCID = "0000-0002-5687-2647"))
99
)
1010
Description: Efficient and user-friendly routines for modern ecological
11-
inference. Implements the methods described in McCartan & Kuriwaki (2025)
12-
<arxiv:2507.XXXXX>, which generalize ecological regression as introduced by
11+
inference. Implements the methods described in McCartan & Kuriwaki (2025+)
12+
<arxiv:2509.20194>, which generalize ecological regression as introduced by
1313
Goodman (1953) <doi:10.2307/2088121>. Includes routines for preprocessing,
1414
synthetic data generation, double/debiased machine learning (DML)
15-
estimation, and sensitivity analysis.
15+
estimation, partial identification bounds, and sensitivity analysis.
1616
Depends:
1717
R (>= 3.5.0)
1818
Imports:
@@ -41,7 +41,7 @@ RoxygenNote: 7.3.3
4141
LazyData: true
4242
SystemRequirements: C++17
4343
Config/testthat/edition: 3
44-
URL: https://github.com/CoryMcCartan/seine, https://corymccartan.com/seine/
44+
URL: https://corymccartan.com/seine/, https://github.com/CoryMcCartan/seine
4545
BugReports: https://github.com/CoryMcCartan/seine/issues
4646
VignetteBuilder: knitr
4747
Config/build/compilation-database: true

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# seine 0.1.0
22

33
* Double/debiased ML for ecological inference
4+
* Sensitivity analysis for EI
5+
* Partial identification bounds
6+
* Local (unit-level) estimates and confidence intervals
47
* Preprocessing helper functions
58
* Synthetic EI data

R/ei_bounds.R

Lines changed: 59 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -18,28 +18,34 @@
1818
#' containing the total number of observations in each aggregate unit. For
1919
#' example, the column containing the total number of voters. Required for
2020
#' computing weights unless `x` is an [ei_spec()] object.
21+
#' @param global If `TRUE`, aggregate the bounds across units to produce bounds
22+
#' on the global estimands.
2123
#' @param ... Not currently used, but required for extensibility.
2224
#'
2325
#' @returns A data frame with bounds. The `.row` column in the output
24-
#' corresponds to the observation index in the input. The `wt` column contains
25-
#' the product of the predictor variable and total for each observation.
26-
#' Taking a weighted average of the bounds against this column will produce
27-
#' global bounds. The `min` and `max` columns contain the minimum and maximum
28-
#' values for each local estimand. It has class `ei_bounds`.
26+
#' corresponds to the observation index in the input. The `min` and `max`
27+
#' columns contain the minimum and maximum values for each local estimand.
28+
#' The `wt` column contains the product of the predictor variable and total
29+
#' for each observation. Taking a weighted average of the bounds against this
30+
#' column will produce global bounds. It has class `ei_bounds`.
2931
#'
3032
#' @examples
3133
#' data(elec_1968)
3234
#'
3335
#' spec = ei_spec(elec_1968, vap_white:vap_other, pres_dem_hum:pres_abs,
3436
#' total = pres_total, covariates = c(state, pop_urban, farm))
3537
#'
36-
#' bounds = ei_bounds(spec, bounds = c(0, 1))
37-
#' print(bounds)
38+
#' ei_bounds(spec, bounds = c(0, 1))
39+
#' ei_bounds(spec, bounds = c(0, 1), global = TRUE)
3840
#'
39-
#' # aggregate min/max
41+
#' # Infer bounds
42+
#' ei_bounds(pres_ind_wal ~ vap_white, data = elec_1968, total = pres_total, bounds = NULL)
43+
#'
44+
#' # manually aggregate min/max
4045
#' # easier with dplyr:
4146
#' # summarize(across(min:max, ~ weighted.mean(.x, wt)), .by=c(predictor, outcome))
42-
#' do.call(rbind, lapply(split(bounds, ~ predictor + outcome), function(b) {
47+
#' grp_units = split(ei_bounds(spec, bounds = c(0, 1)), ~ predictor + outcome)
48+
#' do.call(rbind, lapply(grp_units, function(b) {
4349
#' tibble::tibble(
4450
#' predictor = b$predictor[1],
4551
#' outcome = b$outcome[1],
@@ -48,16 +54,14 @@
4854
#' )
4955
#' }))
5056
#'
51-
#' # Infer bounds
52-
#' ei_bounds(pres_ind_wal ~ vap_white, data = elec_1968, total = pres_total, bounds = NULL)
5357
#' @export
54-
ei_bounds <- function(x, ..., total, contrast = NULL, bounds = c(0, 1)) {
58+
ei_bounds <- function(x, ..., total, contrast = NULL, bounds = c(0, 1), global = FALSE) {
5559
UseMethod("ei_bounds")
5660
}
5761

5862
#' @export
5963
#' @rdname ei_bounds
60-
ei_bounds.ei_spec <- function(x, total, contrast = NULL, bounds = c(0, 1), ...) {
64+
ei_bounds.ei_spec <- function(x, total, contrast = NULL, bounds = c(0, 1), global = FALSE, ...) {
6165
spec = x
6266
validate_ei_spec(spec)
6367

@@ -75,12 +79,12 @@ ei_bounds.ei_spec <- function(x, total, contrast = NULL, bounds = c(0, 1), ...)
7579
total = as.numeric(eval_tidy(enquo(total), spec))
7680
}
7781

78-
ei_bounds_impl(x_mat, y_mat, total, contrast, bounds)
82+
ei_bounds_bridge(x_mat, y_mat, total, contrast, bounds, global)
7983
}
8084

8185
#' @export
8286
#' @rdname ei_bounds
83-
ei_bounds.formula <- function(formula, data, total, contrast = NULL, bounds = c(0, 1), ...) {
87+
ei_bounds.formula <- function(formula, data, total, contrast = NULL, bounds = c(0, 1), global = FALSE, ...) {
8488
forms = ei_forms(formula)
8589
form_preds = terms(rlang::new_formula(lhs = NULL, rhs = forms$predictors))
8690
form_out = terms(rlang::new_formula(forms$outcome, rhs = NULL))
@@ -103,13 +107,13 @@ ei_bounds.formula <- function(formula, data, total, contrast = NULL, bounds = c(
103107

104108
total = as.numeric(eval_tidy(enquo(total), data))
105109

106-
ei_bounds_impl(x, y, total, contrast, processed$blueprint$bounds)
110+
ei_bounds_bridge(x, y, total, contrast, processed$blueprint$bounds, global)
107111
}
108112

109113

110114
#' @export
111115
#' @rdname ei_bounds
112-
ei_bounds.data.frame <- function(x, y, total, contrast = NULL, bounds = c(0, 1), ...) {
116+
ei_bounds.data.frame <- function(x, y, total, contrast = NULL, bounds = c(0, 1), global = FALSE, ...) {
113117
x_mat = as.matrix(x)
114118
check_preds(x_mat, call = rlang::new_call(rlang::sym("ei_bounds")))
115119
y_mat = as.matrix(y)
@@ -122,13 +126,13 @@ ei_bounds.data.frame <- function(x, y, total, contrast = NULL, bounds = c(0, 1),
122126

123127
bounds = check_bounds(bounds, y_mat)
124128

125-
ei_bounds_impl(x_mat, y_mat, total, contrast, bounds)
129+
ei_bounds_bridge(x_mat, y_mat, total, contrast, bounds, global)
126130
}
127131

128132
#' @export
129133
#' @rdname ei_bounds
130-
ei_bounds.matrix <- function(x, y, total, contrast = NULL, bounds = c(0, 1), ...) {
131-
ei_bounds.data.frame(x, y, total, contrast, bounds, ...)
134+
ei_bounds.matrix <- function(x, y, total, contrast = NULL, bounds = c(0, 1), global = FALSE, ...) {
135+
ei_bounds.data.frame(x, y, total, contrast, bounds, global, ...)
132136
}
133137

134138
#' @export
@@ -142,37 +146,55 @@ ei_bounds.default <- function(x, ...) {
142146

143147
# Implementation --------------------------------------------------------------
144148

145-
ei_bounds_impl <- function(x, y, total, contrast, bounds) {
149+
ei_bounds_bridge <- function(x, y, total, contrast, bounds, global = FALSE) {
146150
n = nrow(x)
147151
n_x = ncol(x)
148152
n_y = ncol(y)
149153

150-
if (!is.null(contrast)) {
151-
cli_abort("{.arg contrast} is not yet implemented for {.fn ei_bounds}.")
152-
}
153-
154154
if (identical(bounds, c(-Inf, Inf))) {
155155
cli_abort("At least one bound must be provided for {.fn ei_bounds}.", call=parent.frame())
156156
}
157157
if (any(is.na(x))) cli_abort("Missing values found in predictors.", call=parent.frame())
158158
if (any(is.na(y))) cli_abort("Missing values found in outcome.", call=parent.frame())
159159

160-
result = R_bounds_lp(x, y, bounds)
160+
result = ei_bounds_impl(x, y, total, contrast, bounds)
161161

162162
x_nm = colnames(x)
163163
y_nm = colnames(y)
164164

165-
tibble::new_tibble(
166-
list(
167-
.row = rep(seq_len(n), n_x * n_y),
168-
predictor = rep(rep(x_nm, each = n), n_y),
169-
outcome = rep(y_nm, each = n * n_x),
170-
wt = if (is.null(contrast)) rep(c(x * total), n_y) else NULL,
171-
min = c(result$min),
172-
max = c(result$max)
173-
),
174-
class = "ei_bounds"
175-
)
165+
if (isTRUE(global)) {
166+
wt = total * (matrix(1, 1, n_y) %x% x)
167+
wt = scale_cols(wt, 1 / colSums(wt))
168+
tibble::new_tibble(
169+
list(
170+
predictor = rep(x_nm, n_y),
171+
outcome = rep(y_nm, each = n_x),
172+
min = colSums(result$min * wt),
173+
max = colSums(result$max * wt)
174+
),
175+
class = "ei_bounds"
176+
)
177+
} else {
178+
tibble::new_tibble(
179+
list(
180+
.row = rep(seq_len(n), n_x * n_y),
181+
predictor = rep(rep(x_nm, each = n), n_y),
182+
outcome = rep(y_nm, each = n * n_x),
183+
wt = if (is.null(contrast)) rep(c(x * total), n_y) else NULL,
184+
min = c(result$min),
185+
max = c(result$max)
186+
),
187+
class = "ei_bounds"
188+
)
189+
}
190+
}
191+
192+
ei_bounds_impl <- function(x, y, total, contrast, bounds) {
193+
if (!is.null(contrast)) {
194+
cli_abort("{.arg contrast} is not yet implemented for {.fn ei_bounds}.")
195+
}
196+
197+
R_bounds_lp(x, y, as.double(bounds))
176198
}
177199

178200
#' @describeIn ei_bounds Format bounds as an array with dimensions

R/ei_local.R

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
#' corresponds to the observation index in the input. The `wt` column contains
6161
#' the product of the predictor variable and total for each observation.
6262
#' Taking a weighted average of the estimate against this column will produce
63-
#' a global estimate. It has class `ei_est_local`, supporting several methods.
63+
#' a global estimate. It has class `ei_est_local`.
6464
#'
6565
#' @inherit ei_est references
6666
#'
@@ -118,6 +118,7 @@ ei_est_local = function(
118118
if (is.null(sum_one) && all(bounds == c(0, 1))) {
119119
sum_one = isTRUE(all.equal(rowSums(y), rep(1, nrow(y))))
120120
}
121+
has_bounds = !identical(bounds, c(-Inf, Inf))
121122

122123
if (missing(total)) {
123124
if (inherits(data, "ei_spec")) {
@@ -145,11 +146,7 @@ ei_est_local = function(
145146
eta_proj = eta_proj %*% contr$m
146147
}
147148

148-
sds = if (!isFALSE(conf_level)) {
149-
local_sds(rl$x, b_cov, rl$vcov_u, regr$sigma2, contr$m, !is.null(contrast))
150-
} else {
151-
NULL
152-
}
149+
sds = local_sds(rl$x, b_cov, rl$vcov_u, regr$sigma2, contr$m, !is.null(contrast))
153150

154151
ests = list(
155152
.row = rep(seq_len(n), length(x_nm)),
@@ -169,20 +166,25 @@ ei_est_local = function(
169166
class = "ei_est_local"
170167
)
171168

169+
if (has_bounds && is.null(contrast)) {
170+
bb = ei_bounds_bridge(rl$x, y, total, contrast, bounds)
171+
fac = if (isTRUE(unimodal)) sqrt(1/12) else 0.5
172+
ests$std.error = pmin(ests$std.error, fac * (bb$max - bb$min))
173+
}
174+
172175
if (!isFALSE(conf_level)) {
173176
fac = if (isTRUE(unimodal)) 4 / 9 else 1
174177
chebyshev = sqrt(fac / (1 - conf_level))
175178
ests$conf.low = ests$estimate - chebyshev * ests$std.error
176179
ests$conf.high = ests$estimate + chebyshev * ests$std.error
177180

178-
if (is.null(contrast)) {
179-
ests$conf.low[ests$conf.low < bounds[1]] = bounds[1]
180-
ests$conf.high[ests$conf.high < bounds[1]] = bounds[1]
181-
ests$conf.low[ests$conf.low > bounds[2]] = bounds[2]
182-
ests$conf.high[ests$conf.high > bounds[2]] = bounds[2]
181+
if (has_bounds && is.null(contrast)) {
182+
ests$conf.low = pmax(ests$conf.low, bb$min)
183+
ests$conf.low = pmin(ests$conf.low, bb$max)
184+
ests$conf.high = pmax(ests$conf.high, bb$min)
185+
ests$conf.high = pmin(ests$conf.high, bb$max)
183186
}
184187
}
185-
ests$std.error = NULL
186188

187189
ests
188190
}

explore/local.R

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ spec = ei_spec(elec_1968, c(vap_white, vap_black, vap_other), c(pres_dem_hum, pr
1515
m = ei_ridge(spec, bounds=F, sum_one=F)
1616
rr = ei_riesz(spec, penalty = m$penalty)
1717

18+
left_join(ei_est(m, rr, spec), ei_bounds(spec, bounds=0:1, global=TRUE)) |>
19+
mutate(
20+
within = min <= estimate & estimate <= max,
21+
se_max = (max - min) / sqrt(12),
22+
se_within = std.error <= se_max
23+
)
24+
1825
(colMeans(est_check_regr(m, spec, nrow(spec), NULL, 4, T)$vcov) |>
1926
matrix(3, 3) * m$sigma2[1]) |>
2027
diag() |>
@@ -40,8 +47,8 @@ ei_est_local(m, spec, b_cov = b_cov2, bounds=c(0, 1), sum_one = TRUE) |>
4047

4148

4249
ei_est_local(m, spec, b_cov = b_cov2, bounds = c(0, 1), sum_one = TRUE, conf_level = 0.95, regr_var = T) |>
43-
# dplyr::filter(.row == 592) |>
44-
dplyr::filter(.row == 1) |>
50+
dplyr::filter(.row == 592) |>
51+
# dplyr::filter(.row == 1) |>
4552
ggplot(aes(estimate, paste0(outcome, " | ", predictor))) +
4653
geom_pointrange(aes(xmin = conf.low, xmax = conf.high), size=0.25)
4754

man/ei_bounds.Rd

Lines changed: 29 additions & 16 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/ei_est_local.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)