diff --git a/Project.toml b/Project.toml index 3db052b..e0b814c 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,8 @@ version = "2.0.0" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MixedModels = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316" +ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" @@ -13,6 +15,7 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" [compat] MixedModels = "4" +ProgressMeter = "1" StatsBase = "0.33, 0.34" StatsModels = "0.7.3" Tables = "1" diff --git a/src/MixedModelsExtras.jl b/src/MixedModelsExtras.jl index 476e874..1f73319 100644 --- a/src/MixedModelsExtras.jl +++ b/src/MixedModelsExtras.jl @@ -2,11 +2,13 @@ module MixedModelsExtras using LinearAlgebra using MixedModels +using Random using Statistics using StatsBase using StatsModels using Tables +using MixedModels: replicate using StatsModels: termnames, vif, gvif export termnames, gvif, vif @@ -24,4 +26,7 @@ export partial_fitted include("shrinkage.jl") export shrinkagenorm, shrinkagetables +include("bootstrap.jl") +export bootstrap_lrt + end diff --git a/src/bootstrap.jl b/src/bootstrap.jl new file mode 100644 index 0000000..4f9e0a3 --- /dev/null +++ b/src/bootstrap.jl @@ -0,0 +1,90 @@ +""" + bootstrap_lrt(rng::AbstractRNG, n::Integer, m0::MixedModel, ms::MixedModel...; + optsum_overrides=(;), progress=true) + +Bootstrapped likelihood ratio test applied to a set of nested models. + +The first model is used to simulate `n` dataset replicates, where the ground truth is that +specified by the first model. Each of the other models is then refit to those +null data and the underlying distribution of deviance differences is then captured. +For final computation of the p-values, the observed deviance is differencce between the +original models is compared against this null distribution. + +!!! note + The precision of the resulting p-value cannot exceed ``1/n``. + +!!! warn + This method is **not** thread safe. For efficiency , the models are modified + during bootstrapping and the original fits are only restored at the end. + +!!! note + The nesting of the models is not checked. It is incumbent on the user + to check this. This differs from `StatsModels.lrtest` as nesting in + mixed models, especially in the random effects specification, may be non obvious. + +This functionality may be deprecated in the future in favor of `StatsModels.lrtest`. +""" +function bootstrap_lrt(rng::AbstractRNG, n::Integer, m0::MixedModel, ms::MixedModel...; + optsum_overrides=(;), progress=true) + y0 = copy(response(m0)) + ys = [copy(response(m)) for m in ms] + local models + local devs + local dofs + local dofs + local formulas + local dofdiffs + local devdiffs + local pvals + try + models = [m0; ms...] + dofs = dof.(models) + formulas = string.(formula.(models)) + ord = sortperm(dofs) + dofs = dofs[ord] + formulas = formulas[ord] + devs = deviance.(models)[ord] + dofdiffs = diff(dofs) + devdiffs = .-(diff(devs)) + + for (key, val) in pairs(optsum_overrides) + setfield!(m0.optsum, key, val) + for m in ms + setfield!(m.optsum, key, val) + end + end + nulldist = replicate(n; hide_progress=!progress) do + simulate!(rng, m0) + refit!(m0; progress=false) + for m in ms + refit!(m, response(m0); progress=false) + end + return [deviance(m) for m in models] + end + nulldist = stack(nulldist; dims=1) + nulldist = diff(nulldist; dims=2) + pvals = map(enumerate(devdiffs)) do (idx, dev) + if dev > 0 + mean(>=(dev), view(nulldist, :, idx)) + else + NaN + end + end + catch ex + rethrow(ex) + finally + # restore the original fits + if progress + @info "Bootstrapping complete, cleaning up..." + end + refit!(m0, y0; progress=false) + for (m, y) in zip(ms, ys) + refit!(m, y; progress=false) + end + end + return MixedModels.LikelihoodRatioTest(formulas, + (dof=dofs, deviance=devs), + (dofdiff=dofdiffs, deviancediff=devdiffs, + pvalues=pvals), + first(models) isa LinearMixedModel) +end diff --git a/test/bootstrap.jl b/test/bootstrap.jl new file mode 100644 index 0000000..e372a76 --- /dev/null +++ b/test/bootstrap.jl @@ -0,0 +1,18 @@ +using MixedModels +using MixedModelsExtras +using Random +using Test + +using MixedModels: likelihoodratiotest + +progress = false +sleepstudy = MixedModels.dataset(:sleepstudy) +fm0 = fit(MixedModel, @formula(reaction ~ 1 + days + (1 | subj)), + sleepstudy; progress) +fm1 = fit(MixedModel, @formula(reaction ~ 1 + days + (1 + days | subj)), + sleepstudy; progress) +fmzc = fit(MixedModel, @formula(reaction ~ 1 + days + zerocorr(1 + days | subj)), + sleepstudy; progress) + +lrt = likelihoodratiotest(fm0, fm1, fmzc) +boot = bootstrap_lrt(MersenneTwister(42), 1000, fm0, fm1, fmzc)