99# ' rich basis transformation of the covariates and predictors; a missing
1010# ' `preproc` will lead to a warning.
1111# '
12+ # ' The test is a Kennedy-Cade (1996) style permutation test on a Wald statistic
13+ # ' for the coefficients not included in the "reduced" model that would be fit
14+ # ' by [ei_ridge()].
1215# ' The test is carried out by fitting a regression on a fully basis-expanded
13- # ' combination of covariates and predictors, and calculating the F statistic
14- # ' compared to the "reduced" model that would be fit by [ei_ridge()]. To
15- # ' account for penalization, the null distribution is estimated by permuting
16- # ' the residuals from the reduced model.
16+ # ' combination of covariates and predictors, and calculating a Wald statistic
17+ # ' for the
1718# '
18- # ' @param spec An `ei_spec` object created with [ei_spec()].
19+ # ' @param spec An `ei_spec` object created with [ei_spec()]. The object
20+ # ' should use the `preproc` argument to specify a rich basis expansion of the
21+ # ' covariates and predictors.
22+ # ' @inheritParams ei_ridge
23+ # ' @param iter The number of permutations to use when estimating the null
24+ # ' distribution. Ignored when `use_chisq = TRUE`.
25+ # ' @param use_chisq If `TRUE`, use the asymptotic chi-squared distribution for
26+ # ' the Wald test statistic instead of conducting a permutation test. Only
27+ # ' appropriate for larger sample sizes (Helwig 2022 recommends at least 200
28+ # ' when a single predictor is used).
1929# '
20- # ' @returns A 1-row tibble with columns describing the test results. The
21- # `p.value` column contains the p-value for the test.
30+ # ' @returns A tibble with one row per outcome variable and columns describing
31+ # ' the test results. The `p.value` column contains the p-values for the test.
32+ # ' P-values are not adjusted by default; pass them to [stats::p.adjust()] if
33+ # ' desired.
34+ # '
35+ # ' @references
36+ # ' Helwig, N. E. (2022). Robust Permutation Tests for Penalized Splines. _Stats_,
37+ # ' 5(3), 916-933.
38+ # '
39+ # ' Kennedy, P. E., & Cade, B. S. (1996). Randomization tests for multiple regression.
40+ # ' _Communications in Statistics-Simulation and Computation_, 25(4), 923-936.
41+ # '
42+ # ' McCartan, C., & Kuriwaki, S. (2025+). Identification and semiparametric
43+ # ' estimation of conditional means from aggregate data.
44+ # ' Working paper [arXiv:2509.20194](https://arxiv.org/abs/2509.20194).
45+ # '
46+ # ' @examples
47+ # ' data(elec_1968)
48+ # '
49+ # ' # basis expansion: poly() with degree=2 not recommended in practice
50+ # ' preproc = if (requireNamespace("bases", quietly = TRUE)) {
51+ # ' ~ bases::b_bart(.x, trees = 100)
52+ # ' } else {
53+ # ' ~ poly(as.matrix(.x), degree=2, simple=TRUE)
54+ # ' }
55+ # '
56+ # ' spec = ei_spec(
57+ # ' data = elec_1968,
58+ # ' predictors = vap_white:vap_other,
59+ # ' outcome = pres_dem_hum:pres_abs,
60+ # ' total = pres_total,
61+ # ' covariates = c(pop_city:pop_rural, farm:educ_coll, starts_with("inc_")),
62+ # ' preproc = preproc
63+ # ' )
64+ # '
65+ # ' ei_test_car(spec, iter=19) # use a larger number in practice
2266# '
2367# ' @export
24- ei_test_car <- function (spec ) {
68+ ei_test_car <- function (spec , weights , iter = 1000 , use_chisq = FALSE ) {
69+ validate_ei_spec(spec )
70+ n = nrow(spec )
71+ x_col = attr(spec , " ei_x" )
72+ z_col = attr(spec , " ei_z" )
73+ x = spec [, x_col , drop = FALSE ]
74+ z = spec [, z_col , drop = FALSE ]
75+ z_proc = attr(spec , " ei_z_proc" )
76+
77+ int_scale = 1e5
78+ xz0 = row_kronecker(as.matrix(x ), z_proc , int_scale )
79+ xzf = run_preproc(spec , z_col = c(x_col , z_col ))
80+
81+ if (missing(weights )) {
82+ weights = rep(1 , n )
83+ } else {
84+ weights = eval_tidy(enquo(weights ), spec )
85+ }
86+ sqrt_w = sqrt(weights / mean(weights ))
87+
88+ y = as.matrix(spec [, attr(spec , " ei_y" ), drop = FALSE ])
89+ n_y = ncol(y )
90+
91+ # first, residualize out xz0
92+ udv0 = svd(xz0 * sqrt_w )
93+ fit0 = ridge_auto(udv0 , y , sqrt_w , vcov = FALSE )
94+ pen = fit0 $ penalty
95+ d_pen_h = udv0 $ d ^ 2 / (udv0 $ d ^ 2 + pen )
96+ H0 = tcrossprod(scale_cols(udv0 $ u , d_pen_h ), udv0 $ u )
97+ res = y - H0 %*% y
98+ udv = svd((xzf - H0 %*% xzf ) * sqrt_w )
99+
100+ # pseudo-inverse
101+ pinv_sym = function (M ) {
102+ eig = eigen(M , symmetric = TRUE )
103+ rk = seq_len(sum(eig $ values > 1e-10 ))
104+ eig $ values [rk ] = 1 / eig $ values [rk ]
105+ out = tcrossprod(scale_cols(eig $ vectors , eig $ values ), eig $ vectors )
106+ attr(out , " rank" ) = length(rk )
107+ out
108+ }
109+
110+ fit = ridge_svd(udv , res , sqrt_w , pen , vcov = TRUE )
111+ inv_vcov = pinv_sym(fit $ vcov_u )
112+
113+ calc_wald = function (fit ) {
114+ colSums((inv_vcov %*% fit $ coef ) * fit $ coef ) / fit $ sigma2
115+ }
116+ W0 = calc_wald(fit )
117+
118+ if (! isTRUE(use_chisq )) {
119+ if (! is.numeric(iter ) && iter > 0 ) {
120+ cli_abort(" {.arg iter} must be a positive integer." )
121+ }
122+
123+ W = matrix (nrow = ncol(y ), ncol = iter )
124+ pb = cli :: cli_progress_bar(" Running permutations" , total = iter )
125+ for (i in seq_len(iter )) {
126+ res_p = res [sample.int(n ), , drop = FALSE ]
127+ fit_p = ridge_svd(udv , res_p , sqrt_w , pen , vcov = FALSE )
128+ W [, i ] = calc_wald(fit_p )
129+ cli :: cli_progress_update(id = pb )
130+ }
131+ cli :: cli_progress_done(id = pb )
132+ p_val = (rowSums(W > = W0 ) + 1 ) / (iter + 1 )
133+ } else {
134+ p_val = pchisq(W0 , df = attr(inv_vcov , " rank" ), lower.tail = FALSE )
135+ }
136+
137+ tibble :: new_tibble(list (
138+ outcome = colnames(y ),
139+ W = W0 ,
140+ df = attr(inv_vcov , " rank" ),
141+ p.value = p_val
142+ ))
25143}
0 commit comments