1111# '
1212# ' The test is a Kennedy-Cade (1996) style permutation test on a Wald statistic
1313# ' for the coefficients not included in the "reduced" model that would be fit
14- # ' by [ei_ridge()].
15- # ' The test is carried out by fitting a regression on a fully basis-expanded
16- # ' combination of covariates and predictors, and calculating a Wald statistic
17- # ' for the
14+ # ' by [ei_ridge()]. The test statistic is asymptotically chi-squared under the
15+ # ' null and may be anti-conservative in small samples, especially when the
16+ # ' dimensionality of the basis expansion is large.
1817# '
1918# ' @param spec An `ei_spec` object created with [ei_spec()]. The object
2019# ' should use the `preproc` argument to specify a rich basis expansion of the
2120# ' covariates and predictors.
2221# ' @inheritParams ei_ridge
2322# ' @param iter The number of permutations to use when estimating the null
24- # ' distribution. Ignored when `use_chisq = TRUE`.
23+ # ' distribution, including the original identity permutation.
24+ # ' Ignored when `use_chisq = TRUE`.
25+ # ' @param undersmooth A value to divide the estimated ridge penalty by when
26+ # ' partialling out the partially linear component of the model. A larger
27+ # ' value will smooth the partially linear component less, which may improve
28+ # ' Type I error control in finite samples at the cost of power.
2529# ' @param use_chisq If `TRUE`, use the asymptotic chi-squared distribution for
2630# ' the Wald test statistic instead of conducting a permutation test. Only
2731# ' appropriate for larger sample sizes (Helwig 2022 recommends at least 200
2832# ' when a single predictor is used).
33+ # ' @param use_hc If `TRUE`, use a heteroskedasticity-consistent covariance estimate.
34+ # ' More computationally intensive, but may make a difference in small samples
35+ # ' or when there is substantial heteroskedasticity.
2936# '
3037# ' @returns A tibble with one row per outcome variable and columns describing
3138# ' the test results. The `p.value` column contains the p-values for the test.
6269# ' preproc = preproc
6370# ' )
6471# '
65- # ' ei_test_car(spec, iter=19 ) # use a larger number in practice
72+ # ' ei_test_car(spec, iter=20 ) # use a larger number in practice
6673# '
6774# ' @export
68- ei_test_car <- function (spec , weights , iter = 1000 , use_chisq = FALSE ) {
75+ ei_test_car <- function (spec , weights , iter = 1000 , undersmooth = 1.5 , use_chisq = nrow( spec ) > = 2000 , use_hc = FALSE ) {
6976 validate_ei_spec(spec )
77+ if (! has_preproc(spec )) {
78+ cli_warn(c(
79+ " {.arg preproc} was not specified in your {.cls ei_spec} object" ,
80+ " i" = " The {.fn ei_test_car} function relies on a rich basis expansion of the covariates and predictors." ,
81+ " x" = " Without a basis expansion in {.arg preproc}, the test will not be able to detect violations of the CAR assumption." ,
82+ " >" = " Consider basis expansions from the {.pkg bases} or {.pkg splines} package."
83+ ))
84+ }
85+
7086 n = nrow(spec )
7187 x_col = attr(spec , " ei_x" )
7288 z_col = attr(spec , " ei_z" )
@@ -91,7 +107,7 @@ ei_test_car <- function(spec, weights, iter = 1000, use_chisq = FALSE) {
91107 # first, residualize out xz0
92108 udv0 = svd(xz0 * sqrt_w )
93109 fit0 = ridge_auto(udv0 , y , sqrt_w , vcov = FALSE )
94- pen = fit0 $ penalty
110+ pen = fit0 $ penalty / undersmooth
95111 d_pen_h = udv0 $ d ^ 2 / (udv0 $ d ^ 2 + pen )
96112 H0 = tcrossprod(scale_cols(udv0 $ u , d_pen_h ), udv0 $ u )
97113 res = y - H0 %*% y
@@ -107,37 +123,64 @@ ei_test_car <- function(spec, weights, iter = 1000, use_chisq = FALSE) {
107123 out
108124 }
109125
126+ # observed test stat value
110127 fit = ridge_svd(udv , res , sqrt_w , pen , vcov = TRUE )
111- inv_vcov = pinv_sym(fit $ vcov_u )
112128
113- calc_wald = function (fit ) {
114- colSums((inv_vcov %*% fit $ coef ) * fit $ coef ) / fit $ sigma2
129+ # set up Wald stat calculation
130+ if (isTRUE(use_hc )) {
131+ xzr = (xzf - H0 %*% xzf )
132+ Sig_inv = solve((crossprod(xzr ) + diag(ncol(xzr ))* pen ) / n )
133+
134+ calc_wald = function (fit ) {
135+ W = numeric (n_y )
136+ df = numeric (n_y )
137+ for (j in seq_len(n_y )) {
138+ Omega = crossprod((res - fit $ fitted )[, j ] * xzr ) / n
139+ inv_vcov2 = pinv_sym(Sig_inv %*% Omega %*% Sig_inv )
140+ df [j ] = attr(inv_vcov2 , " rank" )
141+ W [j ] = n * crossprod(fit $ coef [, j ], inv_vcov2 %*% fit $ coef [, j ])
142+ }
143+ attr(W , " df" ) = df
144+ W
145+ }
146+
147+ W0 = calc_wald(fit )
148+ df = attr(W0 , " df" )
149+ } else {
150+ inv_vcov = pinv_sym(fit $ vcov_u )
151+ df = rep(attr(inv_vcov , " rank" ), n_y )
152+
153+ calc_wald = function (fit ) {
154+ colSums(fit $ coef * (inv_vcov %*% fit $ coef )) / fit $ sigma2
155+ }
156+
157+ W0 = calc_wald(fit )
115158 }
116- W0 = calc_wald(fit )
117159
118160 if (! isTRUE(use_chisq )) {
119- if (! is.numeric(iter ) && iter > 0 ) {
161+ if (! is.numeric(iter ) && iter > 1 ) {
120162 cli_abort(" {.arg iter} must be a positive integer." )
121163 }
122164
123165 W = matrix (nrow = ncol(y ), ncol = iter )
166+ W [, 1 ] = W0
124167 pb = cli :: cli_progress_bar(" Running permutations" , total = iter )
125- for (i in seq_len( iter )) {
168+ for (i in seq( 2 , iter , 1 )) {
126169 res_p = res [sample.int(n ), , drop = FALSE ]
127170 fit_p = ridge_svd(udv , res_p , sqrt_w , pen , vcov = FALSE )
128171 W [, i ] = calc_wald(fit_p )
129172 cli :: cli_progress_update(id = pb )
130173 }
131174 cli :: cli_progress_done(id = pb )
132- p_val = ( rowSums(W > = W0 ) + 1 ) / ( iter + 1 )
175+ p_val = rowSums(W > = W0 ) / iter
133176 } else {
134- p_val = pchisq(W0 , df = attr( inv_vcov , " rank " ) , lower.tail = FALSE )
177+ p_val = pchisq(W0 , df = df , lower.tail = FALSE )
135178 }
136179
137180 tibble :: new_tibble(list (
138181 outcome = colnames(y ),
139182 W = W0 ,
140- df = attr( inv_vcov , " rank " ) ,
183+ df = df ,
141184 p.value = p_val
142185 ))
143186}
0 commit comments