-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathadd_ci_negbin.R
164 lines (145 loc) · 5.85 KB
/
add_ci_negbin.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
## Copyright (C) 2017 Institute for Defense Analyses
##
## This file is part of ciTools.
##
## ciTools is free software: you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## ciTools is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with ciTools. If not, see <http://www.gnu.org/licenses/>.
#' Confidence Intervals for Negative Binomial Linear Model Predictions
#'
#' This function is one of the methods for \code{add_ci}, and is
#' called automatically when \code{add_ci} is used on a \code{fit} of
#' class \code{negbin}.
#'
#' The default link function is log-link. Confidence Intervals are
#' determined by making an interval on the scale of the linear
#' predictor, then applying the inverse link function from the model
#' fit to transform the linear level confidence intervals to the
#' response level. Alternatively, bootstrap confidence intervals may
#' be formed. The bootstrap intervals are formed by first resampling
#' cases from the data frame used to calculate \code{fit}, then bias
#' corrected and accelerated intervals are calculated. See
#' \code{boot::boot.ci} for more details.
#'
#' @param df A data frame of new data.
#' @param fit An object of class \code{negbin}.
#' @param alpha A real number between 0 and 1. Controls the confidence
#' level of the interval estimates.
#' @param names \code{NULL} or character vector of length two. If
#' \code{NULL}, confidence bounds automatically will be named by
#' \code{add_ci}, otherwise, the lower confidence bound will be
#' named \code{names[1]} and the upper confidence bound will be
#' named \code{names[2]}.
#' @param yhatName A string. Name of the vector of predictions made
#' for each observation in df
#' @param type A string. Must be \code{type = "parametric"} or
#' \code{type = "boot"}. \code{type} determines the method used to
#' compute the confidence intervals.
#' @param response A logical. The default is \code{TRUE}. If
#' \code{TRUE}, the confidence intervals will be determined for
#' the expected response; if \code{FALSE}, confidence intervals
#' will be made on the scale of the linear predictor.
#' @param nSims An integer. Number of simulations to perform if the
#' bootstrap method is used.
#' @param ... Additional arguments.
#'
#' @return A dataframe, \code{df}, with predicted values, upper and lower
#' confidence bounds attached.
#'
#' @seealso \code{\link{add_pi.negbin}} for prediction intervals for
#' \code{negbin} objects, \code{\link{add_probs.negbin}} for conditional
#' probabilities of \code{negbin} objects, and
#' \code{\link{add_quantile.negbin}} for response quantiles of
#' \code{negbin} objects.
#'
#' @examples
#' x1 <- rnorm(300, mean = 1)
#' y <- MASS::rnegbin(n = 300, mu = exp(5 + 0.5 * x1), theta = 2)
#' df <- data.frame(x1 = x1, y = y)
#' fit <- MASS::glm.nb(y ~ x1, data = df)
#' df <- df[sample(100),]
#' add_ci(df, fit, names = c("lcb", "ucb"))
#' \dontrun{
#' add_ci(df, fit, names = c("lcb", "ucb"), type = "boot")
#' }
#' @export
add_ci.negbin <- function(df, fit, alpha = 0.05, names = NULL, yhatName = "pred",
response = TRUE, type = "parametric", nSims = 2000, ...){
if (!(fit$converged))
warning ("coverage probabilities may be inaccurate if the model did not converge")
if (is.null(names)){
names[1] <- paste("LCB", alpha/2, sep = "")
names[2] <- paste("UCB", 1 - alpha/2, sep = "")
}
if ((names[1] %in% colnames(df))) {
warning ("These CIs may have already been appended to your dataframe. Overwriting.")
}
if (type == "boot")
boot_ci_negbin(df, fit, alpha, names, yhatName, response, nSims)
else if (type == "parametric")
parametric_ci_negbin(df, fit, alpha, names, yhatName, response)
else
stop("Incorrect interval type specified!")
}
parametric_ci_negbin <- function(df, fit, alpha, names, yhatName, response){
out <- predict(fit, df, se.fit = TRUE, type = "link")
crit_val <- qt(p = 1 - alpha/2, df = fit$df.residual)
inverselink <- fit$family$linkinv
if (response){
pred <- inverselink(out$fit)
upr <- inverselink(out$fit + crit_val * out$se.fit)
lwr <- inverselink(out$fit - crit_val * out$se.fit)
if(fit$family$link %in% c("inverse", "1/mu^2")){
upr1 <- lwr
lwr <- upr
upr <- upr1
}
}else{
upr <- out$fit + crit_val * out$se.fit
lwr <- out$fit - crit_val * out$se.fit
pred <- out$fit
}
if(is.null(df[[yhatName]]))
df[[yhatName]] <- pred
df[[names[1]]] <- lwr
df[[names[2]]] <- upr
data.frame(df)
}
boot_ci_negbin <- function(df, fit, alpha, names, yhatName, response, nSims){
if (response){
lvl <- "response"
}
else{
lvl <- "link"
}
out <- predict(fit, df, type = lvl)
boot_obj <- boot(data = fit$model,
statistic = boot_fit,
R = nSims,
fit = fit,
df = df,
lvl = lvl)
temp_mat <- matrix(0, ncol = 2, nrow = NROW(df))
for (i in 1:NROW(df)){
temp_mat[i,] <- boot.ci(boot_obj,
type = "bca",
conf = 1 - alpha,
index = i)$bca[4:5]
}
lwr <- temp_mat[,1]
upr <- temp_mat[,2]
if(is.null(df[[yhatName]]))
df[[yhatName]] <- out
df[[names[1]]] <- lwr
df[[names[2]]] <- upr
data.frame(df)
}