CoryMcCartan
diff --git a/‎R/ei_est.R‎
Lines changed: 6 additions & 0 deletions b/‎R/ei_est.R‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎R/ei_local.R‎
Lines changed: 108 additions & 42 deletions b/‎R/ei_local.R‎
Lines changed: 108 additions & 42 deletions
diff --git a/‎R/ei_ridge.R‎
Lines changed: 12 additions & 5 deletions b/‎R/ei_ridge.R‎
Lines changed: 12 additions & 5 deletions
diff --git a/‎R/rr_impl.R‎
Lines changed: 16 additions & 6 deletions b/‎R/rr_impl.R‎
Lines changed: 16 additions & 6 deletions
diff --git a/‎explore/local.R‎
Lines changed: 28 additions & 13 deletions b/‎explore/local.R‎
Lines changed: 28 additions & 13 deletions
diff --git a/‎man/ei-impl.Rd‎
Lines changed: 2 additions & 1 deletion b/‎man/ei-impl.Rd‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎man/ei_est_local.Rd‎
Lines changed: 9 additions & 2 deletions b/‎man/ei_est_local.Rd‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎man/ei_ridge.Rd‎
Lines changed: 2 additions & 1 deletion b/‎man/ei_ridge.Rd‎
Lines changed: 2 additions & 1 deletion
@@ -327,6 +327,12 @@ est_check_regr = function(regr, data, n, xcols, n_y, sd = FALSE) {
 
     preds = list()
     sds = if (sd) matrix(nrow = n, ncol = n_x^2) else NULL
+    if (sd && is.null(regr$vcov_u)) {
+        cli_abort(c(
+            "Standard errors not available for this {.arg regr} object.",
+            ">"="Call {.fn ei_ridge} with {.arg vcov = TRUE} to enable."
+        ), call=parent.frame())
+    }
     for (group in seq_along(xcols)) {
         use = c(group, n_x + p*(group-1) + seq_len(p))
         preds[[xcols[group]]] = z %*% regr$coef[use, ]
 
@@ -25,7 +25,9 @@
 #'   violations of the accounting identity. If `bounds = NULL`, they will be
 #'   inferred from the outcome variable: if it is contained within \eqn{[0, 1]},
 #'   for instance, then the bounds will be `c(0, 1)`. Setting `bounds = FALSE`
-#'   forces unbounded estimates.
+#'   forces unbounded estimates. The default uses the `bounds` attribute of
+#'   `regr`, if available, or infers from the outcome variable otherwise.
+#' @inheritParams ei_ridge
 #' @param conf_level A numeric specifying the level for confidence intervals.
 #'   If `FALSE` (the default), no confidence intervals are calculated.
 #'   For `regr` arguments from [ei_wrap_model()], confidence intervals will not
@@ -50,24 +52,41 @@
 #' suppressWarnings(ei_est_local(m, spec, bounds=c(0.01, 0.2)))
 #' }
 #' @export
-ei_est_local = function(regr, data, r_cov=NULL, bounds=NULL, conf_level=FALSE, unimodal=TRUE) {
+ei_est_local = function(
+    regr,
+    data,
+    r_cov = NULL,
+    bounds = regr$blueprint$bounds,
+    sum_one = NULL,
+    conf_level = FALSE,
+    unimodal = TRUE
+) {
     y = est_check_outcome(regr, data, NULL)
     n = nrow(y)
     n_y = ncol(y)
 
-    cli_warn("Local confidence intervals do not yet incorporate prediction uncertainty.",
-             .frequency="regularly", .frequency_id="ei_est_local_temp")
+    cli_warn(
+        "Local confidence intervals do not yet incorporate prediction uncertainty.",
+        .frequency = "regularly",
+        .frequency_id = "ei_est_local_temp"
+    )
 
     rl = est_check_regr(regr, data, n, NULL, n_y, sd = TRUE)
     rl <<- rl
     n_x = length(rl$preds)
     if (inherits(regr, "ei_wrapped") && !isFALSE(conf_level)) {
-        cli_warn("Local confidence intervals with wrapped model objects
+        cli_warn(
+            "Local confidence intervals with wrapped model objects
                   do not incorporate prediction uncertainty.",
-                 .frequency="regularly", .frequency_id="ei_est_local")
+            .frequency = "regularly",
+            .frequency_id = "ei_est_local"
+        )
     }
 
-    bounds = ei_bounds(bounds, y)
+    bounds = ei_bounds(bounds, y, clamp = 1e-8)
+    if (is.null(sum_one) && all(bounds == c(0, 1))) {
+        sum_one = isTRUE(all.equal(rowSums(y), rep(1, nrow(y))))
+    }
 
     # Process r_cov; TODO: heteroskedastic model
     if (is.null(r_cov)) {
@@ -86,25 +105,32 @@ ei_est_local = function(regr, data, r_cov=NULL, bounds=NULL, conf_level=FALSE, u
     r_cov = lapply(r_cov, chol)
 
     ests = list()
+    ests[[k]] =
+    eta = do.call(cbind, rl$preds)
+    eps = y - rl$yhat
+    R_cov = diag(n_x * n_y)
     for (k in seq_len(n_y)) {
-        eta = vapply(rl$preds, function(p) p[, k], numeric(n))
-        eta <<- eta
-        eta_proj = local_proj(rl$x, eta, y[, k] - rl$yhat[, k], r_cov[[k]], bounds)
-        eta_proj <<- eta_proj
-
-        ests[[k]] = tibble::new_tibble(list(
-            .row = rep(seq_len(n), n_x),
-            predictor = rep(colnames(rl$x), each=n),
-            outcome = rep(colnames(y)[k], n * n_x),
-            estimate = c(eta_proj),
-            std.error = NA #sqrt(c(proj[[2]]))
-        ), class="ei_est_local")
+        idx = (k - 1) * n_x + seq_len(n_x)
+        R_cov[idx, idx] = r_cov[[k]]
     }
-
-    ests = do.call(rbind, ests)
+    eta_proj = local_proj(rl$x, eta, eps, R_cov, bounds, sum_one)
+    ests = lapply(seq_len(n_y), function(k) {
+        tibble::new_tibble(
+            list(
+                .row = rep(seq_len(n), n_x),
+                predictor = rep(colnames(rl$x), each = n),
+                outcome = rep(colnames(y)[k], n * n_x),
+                estimate = c(eta_proj[, k + seq(0, by=n_y, length.out=n_x)]),
+                std.error = NA #sqrt(c(proj[[2]]))
+            ),
+            class = "ei_est_local"
+        )
+    }) |>
+        do.call(rbind, args = _)
+    attr(ests, "proj_misses") = attr(eta_proj, "misses")
 
     if (!isFALSE(conf_level)) {
-        fac = if (isTRUE(unimodal)) 4/9 else 1
+        fac = if (isTRUE(unimodal)) 4 / 9 else 1
         chebyshev = sqrt(fac / (1 - conf_level))
         ests$conf.low = ests$estimate - chebyshev * ests$std.error
         ests$conf.high = ests$estimate + chebyshev * ests$std.error
@@ -135,38 +161,78 @@ as.array.ei_est_local = function(x, ...) {
 
 # Solve QP to project estimates onto tomography plane and into bounds
 # Not the fastest possible implementation (pure C++ would be better), but fast enough
-local_proj = function(x, eta, eps, r_cov, bounds) {
+local_proj = function(x, eta, eps, r_cov, bounds, sum_one) {
     n = nrow(eta)
     n_x = ncol(x)
-    eta_diff = matrix(nrow = n, ncol = n_x)
-
-    zeros = rep(0, n_x)
-    Amat = cbind(zeros)
-    b0 = cbind(eps)
+    n_y = ncol(eps)
+    sum_one = isTRUE(sum_one)
+    eta_diff = matrix(nrow = n, ncol = n_x * n_y)
+
+    # avoid overflow
+    r_cov = r_cov / sqrt(norm(crossprod(r_cov), "2"))
+
+    # parameters are the displacement in each estimate
+    # (x1y1, x1y2, x1y3, x2y1, x2y2, x2y3, ...)
+    # minimize overall displacement st x-weighted displacement = residual
+    # and (optionally) bounds and sum-to-one constraints are satisfied
+    zeros = rep(0, n_x * n_y)
+    Amat = matrix(0, nrow = n_x * n_y, ncol = n_y * 2) # i-specific, filled later
+    b0 = cbind(eps, -eps)
+    if (sum_one) {
+        if (n_y == 1 || all(bounds == c(-Inf, Inf))) {
+            cli_abort(
+                "Using{.arg sum_one} requires multiple bounded outcomes.",
+                call = parent.frame()
+            )
+        }
+        rs_mat = diag(n_x) %x% rep(1, n_y)
+        Amat = cbind(rs_mat, Amat)
+        b0 = cbind(1 - eta %*% rs_mat, b0)
+    }
     if (!is.infinite(bounds[1])) {
-        Amat = cbind(Amat, diag(n_x))
+        Amat = cbind(Amat, diag(n_x * n_y))
         b0 = cbind(b0, bounds[1] - eta)
     }
     if (!is.infinite(bounds[2])) {
-        Amat = cbind(Amat, -diag(n_x))
+        Amat = cbind(Amat, -diag(n_x * n_y))
         b0 = cbind(b0, -bounds[2] + eta)
     }
 
+    idx_eps = sum_one * n_x + seq_len(2*n_y)
+    patt_eps = cbind(diag(n_y), -diag(n_y))
+
+    constr_pt = function(Dmat, bvec, tol) {
+        bvec[idx_eps] = bvec[idx_eps] - tol
+        quadprog::solve.QP(
+            Dmat = Dmat, # distance metric
+            dvec = zeros,
+            Amat = Amat,
+            bvec = bvec,
+            meq = sum_one * n_x,
+            factorized = TRUE
+        )$solution
+    }
+
+    misses = integer(0)
     for (i in seq_len(n)) {
-        Amat[, 1] = x[i, ]
-        eta_diff[i, ] = tryCatch({
-            quadprog::solve.QP(
-                Dmat = r_cov,
-                dvec = zeros,
-                Amat = Amat,
-                bvec = b0[i, ],
-                meq = 1,
-                factorized = TRUE
-            )$solution
-        }, error = \(e) eps[i])
+        Amat[, idx_eps] = x[i, ] %x% patt_eps
+        tol = 1e-12
+        repeat {
+            ans = tryCatch(constr_pt(r_cov, b0[i, ], tol), error = \(e) NULL)
+            if (!is.null(ans)) break
+            if (tol > 0.005) {
+                misses <<- c(misses, i)
+                ans = rep(eps[i, ], n_x)
+                break
+            }
+            tol = tol * 1000
+        }
+        eta_diff[i, ] = ans
     }
 
-    eta + eta_diff
+    out = eta + eta_diff
+    attr(out, "misses") = misses
+    out
 }
 
 local_basis = function(x) {
 
@@ -96,7 +96,8 @@
 #'   are `c(0, 1)` the outcome variables sum to 1.
 #' @param scale If `TRUE`, scale covariates `z` to have unit variance.
 #' @param vcov If `TRUE`, calculate and return the covariance matrix of the
-#'    estimated coefficients. Ignored when `bounds` are provided.
+#'    estimated coefficients. When `bounds` are provided, the covariance matrix
+#'    for the unbounded estimate is returned as a conservative approximation.
 #' @param ... Not currently used, but required for extensibility.
 #'
 #' @returns An `ei_ridge` object, which supports various [ridge-methods].
@@ -327,21 +328,27 @@ ei_ridge_impl <- function(x, y, z, weights=rep(1, nrow(x)),
 
     vcov = isTRUE(vcov)
     enforce = is.finite(bounds)
-    fit = if (!any(enforce)) { # unbounded
+    if (!any(enforce)) { # unbounded
         if (isTRUE(sum_one)) {
             cli_abort("{.fn ei_ridge} cannot enforce sum-to-one constraint when outcome is unbounded.")
         }
-        if (is.null(penalty)) {
+        fit = if (is.null(penalty)) {
             ridge_auto(udv, y, sqrt_w, vcov)
         } else {
             ridge_svd(udv, y, sqrt_w, penalty, vcov)
         }
     } else {
+        if (is.null(penalty) || vcov) {
+            unb_fit = ridge_auto(udv, y, sqrt_w, vcov)
+        }
         if (is.null(penalty)) {
-            penalty = ridge_auto(udv, y, sqrt_w, FALSE)$penalty
+            penalty = unb_fit$penalty
         }
 
-        ridge_bounds(xz, z, y, weights, bounds, sum_one, penalty)
+        fit = ridge_bounds(xz, z, y, weights, bounds, sum_one, penalty)
+        if (vcov) {
+            fit$vcov_u = unb_fit$vcov_u
+        }
     }
 
     rownames(fit$coef) = colnames(xz)
 
@@ -163,13 +163,23 @@ ridge_bounds <- function(xz, z, y, weights, bounds, sum_one=FALSE, penalty=0) {
         }
 
         # relax to inequality constraint if sum-to-one fails
-        fit <- tryCatch(
-            do_fit(n * n_x),
-            error = \(e_outer) {
-                cli_warn("Relaxing sum-to-one constraint to inequality to achieve feasible solution.", call=NULL)
-                tryCatch(do_fit(0), error = fit_err)
+        eq_constr = n * n_x
+        repeat {
+            fit = tryCatch(do_fit(eq_constr), error = \(e) NULL)
+            if (!is.null(fit)) break
+            if (eq_constr > 0) {
+                eq_constr = max(eq_constr - n, 0) # reduce by one group
+            } else {
+                fit_err()
+                break
             }
-        )
+        }
+        if (eq_constr < n * n_x) {
+            cli_warn(
+                "Relaxing {n * n_x - eq_constr} sum-to-one constraint{?s} to inequality to achieve feasible solution.",
+                call = NULL
+            )
+        }
         coefs = matrix(fit$solution, nrow = nrow(dvecs), ncol = ncol(dvecs))
     }
 
 
@@ -2,25 +2,40 @@ devtools::load_all(".")
 library(tidyverse)
 
 data(elec_1968)
-elec_1968$vap_nonwhite = 1 - elec_1968$vap_white
+elec_1968 = elec_1968 |>
+    mutate(vap_nonwhite = 1 - vap_white, pres_abs = pmax(1e-6, pres_abs)) |>
+    ei_proportions(pres_dem_hum, pres_rep_nix, pres_ind_wal, pres_abs, clamp = 1e-12) |>
+    select(-.total)
 
-spec = ei_spec(elec_1968, c(vap_white, vap_nonwhite), pres_ind_wal,
+spec = ei_spec(elec_1968, c(vap_white, vap_black, vap_other), c(pres_dem_hum, pres_rep_nix, pres_ind_wal, pres_abs),
                total = pres_total, covariates = c(state, pop_urban, pop_rural, educ_elem:educ_coll, farm, inc_00_03k:inc_25_99k))
 
-m = ei_ridge(spec)
+m = ei_ridge(spec, bounds=0:1, sum_one=F)
+m = ei_ridge(spec, bounds=F)
 rr = ei_riesz(spec, penalty = m$penalty)
 
 # mean(c(y - rowSums(eta * x)) * weights(rr)[, 2])
-wx = x * weights(spec) / rep(colMeans(x * weights(spec)), each=n)
-
-ei_est(m, data = spec)
-ei_est(m, rr, data = spec)
-eif = eta_proj * wx
-est = colMeans(eif)
-vcov = crossprod(shift_cols(eif, est)) / (n - 1)^2
-cbind(estimate=est, std.error=sqrt(diag(vcov)))
-
-ei_est_local(m, spec, conf_level = 0.95)
+# wx = x * weights(spec) / rep(colMeans(x * weights(spec)), each=n)
+
+ei_est(m, data = spec) |>
+    summarize(err = sum(estimate) - 1, .by = predictor)
+ei_est(m, rr, data = spec) |>
+    summarize(err = sum(estimate) - 1, .by = predictor)
+
+# eif = eta_proj * wx
+# est = colMeans(eif)
+# vcov = crossprod(shift_cols(eif, est)) / (n - 1)^2
+# cbind(estimate=est, std.error=sqrt(diag(vcov)))
+
+ei_est_local(m, spec, conf_level = 0.95, bounds=c(0, 1), sum_one = F) |>
+# ei_est_local(m, spec, conf_level = 0.95, bounds=F, sum_one = F) |>
+    (\(x) { print(attr(x, "proj_misses")); x })() |>
+    # dplyr::filter(estimate < -1e-6 | estimate > 1)
+    # print()
+    summarize(err = sum(estimate) - 1, .by = c(.row, predictor)) |>
+    # arrange(-err)
+    pull() |>
+    hist()
 
 k = 1
 n = nrow(spec)