hyunjimoon
diff --git a/‎NAMESPACE‎
Lines changed: 9 additions & 0 deletions b/‎NAMESPACE‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎R/backend-cached.R‎
Lines changed: 1 addition & 1 deletion b/‎R/backend-cached.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/backend-mock.R‎
Lines changed: 1 addition & 0 deletions b/‎R/backend-mock.R‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/binary-calibration-tests.R‎
Lines changed: 85 additions & 9 deletions b/‎R/binary-calibration-tests.R‎
Lines changed: 85 additions & 9 deletions
diff --git a/‎R/plot.R‎
Lines changed: 2 additions & 0 deletions b/‎R/plot.R‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎R/var_attributes.R‎
Lines changed: 2 additions & 0 deletions b/‎R/var_attributes.R‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎man/SBC_backend_function.Rd‎
Lines changed: 3 additions & 2 deletions b/‎man/SBC_backend_function.Rd‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎man/SBC_backend_postprocess_cached_fit.Rd‎
Lines changed: 3 additions & 0 deletions b/‎man/SBC_backend_postprocess_cached_fit.Rd‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎man/SBC_generator_function.Rd‎
Lines changed: 4 additions & 1 deletion b/‎man/SBC_generator_function.Rd‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎man/binary-calibration-tests.Rd‎
Lines changed: 56 additions & 0 deletions b/‎man/binary-calibration-tests.Rd‎
Lines changed: 56 additions & 0 deletions
@@ -42,6 +42,7 @@ S3method(SBC_fit,SBC_backend_rstan_sample)
 S3method(SBC_fit_to_BFBayesFactor,SBC_fit_lmBF)
 S3method(SBC_fit_to_bridge_sampler,SBC_backend_brms)
 S3method(SBC_fit_to_bridge_sampler,SBC_backend_cached)
+S3method(SBC_fit_to_bridge_sampler,SBC_backend_mock)
 S3method(SBC_fit_to_bridge_sampler,SBC_backend_rstan_sample)
 S3method(SBC_fit_to_bridge_sampler,default)
 S3method(SBC_fit_to_diagnostics,CmdStanMCMC)
@@ -145,12 +146,15 @@ export(attribute_present)
 export(attribute_present_stats)
 export(binary_calibration_base)
 export(binary_calibration_from_stats)
+export(binary_miscalibration)
 export(binary_probabilities_from_stats)
 export(binary_var_attribute)
 export(bind_datasets)
 export(bind_derived_quantities)
 export(bind_generated_quantities)
 export(bind_results)
+export(brier_resampling_p)
+export(brier_resampling_test)
 export(cached_fit_filename)
 export(calculate_prior_sd)
 export(calculate_ranks_draws_matrix)
@@ -170,12 +174,17 @@ export(derived_quantities)
 export(draws_rvars_to_standata)
 export(draws_rvars_to_standata_single)
 export(empirical_coverage)
+export(gaffke_ci)
+export(gaffke_p)
+export(gaffke_test)
 export(generate_datasets)
 export(generated_quantities)
 export(get_diagnostic_messages)
 export(get_stats_for_submodel)
 export(hidden_var_attribute)
 export(inf_valid_var_attribute)
+export(miscalibration_resampling_p)
+export(miscalibration_resampling_test)
 export(na_valid_var_attribute)
 export(plot_binary_calibration)
 export(plot_binary_calibration_diff)
 
@@ -118,7 +118,7 @@ SBC_backend_postprocess_cached_fit <- function(backend, generated, fit) {
 }
 
 #' @export
-#' @rdname SBC_backend_preprocess_fit_for_cache
+#' @rdname SBC_backend_postprocess_cached_fit
 SBC_backend_preprocess_fit_for_cache <- function(backend, generated, fit) {
   UseMethod("SBC_backend_preprocess_fit_for_cache")
 }
 
@@ -39,6 +39,7 @@ SBC_fit.SBC_backend_mock <- function(backend, generated, cores) {
   backend$result
 }
 
+#' @export
 SBC_fit_to_bridge_sampler.SBC_backend_mock <- function(backend, ...)  {
   return(backend$bridgesampler)
 }
 
@@ -2,15 +2,38 @@ brier_score <- function(x, y) {
   sum((x-y)^2)
 }
 
-brier_resampling_p <- function(x, y, B = 10000) {
-  actual_brier <- brier_score(x, y)
-  brier_null <- replicate(B, {
-    yrep <- rbinom(length(x), size = 1, prob = x)
-    brier_score(x, yrep)
-  })
-  max(mean(actual_brier <= brier_null), 0.5/B)
-}
-
+#' @title Binary calibration tests
+#'
+#' @param x the predicted success probabilities
+#' @param y the actual observed outcomes (just 0 or 1)
+#' @param alpha the type I error rate for the test
+#' @param B number of boostrap samples for the null distribution
+#'
+#' @description Dimitriadis et al. propose several tests based on
+#' comparing actual predictions to predictions when the probabilities are
+#' calibrated. This yields several possible tests of correctly calibrated
+#' predictions (i.e. that the expected proportion of true values matches the
+#' predicted probability).
+#'
+#' @details
+#' The `brier_` functions represent a test based on brier score, while
+#' the `miscalibration_` functions represent a test based on miscalibration.
+#' In both cases we evaluate the null distribution via bootstrapping.
+#'
+#' @returns `brier_resampling_test` and `miscalibration_resampling_test` return
+#' an object of class `htest`, `brier_resampling_p` and `miscalibration_resampling_p`
+#' return just the p-value (for easier use with automated workflows).
+#' `binary_miscalibration` computes just the miscalibration component using
+#' the PAV (pool adjacent violators) algorithm.
+#'
+#'
+#' @references     T. Dimitriadis, T. Gneiting, & A.I. Jordan,
+#' Stable reliability diagrams for probabilistic classifiers,
+#' Proc. Natl. Acad. Sci. U.S.A. 118 (8) e2016191118,
+#' https://doi.org/10.1073/pnas.2016191118 (2021).
+#'
+#' @rdname binary-calibration-tests
+#' @export
 brier_resampling_test <- function(x, y, alpha = 0.05, B = 10000) {
   dname <- paste0("x = ", deparse1(substitute(x)), ", y = ", deparse1(substitute(y)))
 
@@ -35,6 +58,19 @@ brier_resampling_test <- function(x, y, alpha = 0.05, B = 10000) {
   class = "htest")
 }
 
+#' @rdname binary-calibration-tests
+#' @export
+brier_resampling_p <- function(x, y, B = 10000) {
+  actual_brier <- brier_score(x, y)
+  brier_null <- replicate(B, {
+    yrep <- rbinom(length(x), size = 1, prob = x)
+    brier_score(x, yrep)
+  })
+  max(mean(actual_brier <= brier_null), 0.5/B)
+}
+
+#' @rdname binary-calibration-tests
+#' @export
 binary_miscalibration <- function(x,y) {
   require_package_version("monotone", "0.1.2", "miscalibration computations")
   ord <- order(x, -y)
@@ -56,12 +92,15 @@ miscalibration_resampling_nulldist <- function(x,y, B = 1000) {
   })
 }
 
+#' @rdname binary-calibration-tests
+#' @export
 miscalibration_resampling_p <- function(x,y, B = 10000) {
   actual_miscalibration <- binary_miscalibration(x,y)
   misc_null <- miscalibration_resampling_nulldist(x, y, B)
   max(mean(actual_miscalibration <= misc_null), 0.5/B)
 }
 
+#' @rdname binary-calibration-tests
 #' @export
 miscalibration_resampling_test <- function(x, y, alpha = 0.05, B = 10000) {
   dname <- paste0("x = ", deparse1(substitute(x)), ", y = ", deparse1(substitute(y)))
@@ -110,6 +149,8 @@ gaffke_ci_from_m <- function(m, alpha = 0.05) {
   ))
 }
 
+#' @rdname gaffke_test
+#' @export
 gaffke_ci <- function(probs, B = 10000, alpha = 0.05) {
   m <- gaffke_m(probs, B, alpha)
   gaffke_ci_from_m(m, alpha)
@@ -140,6 +181,8 @@ gaffke_p_from_m <- function(m, mu, B, alternative = c("two.sided", "less", "grea
   }
 }
 
+#' @rdname gaffke_test
+#' @export
 gaffke_p <- function(probs, mu = 0.5, alpha = 0.05, B = 10000, alternative = c("two.sided", "less", "greater")) {
   alternative <- match.arg(alternative)
 
@@ -148,13 +191,46 @@ gaffke_p <- function(probs, mu = 0.5, alpha = 0.05, B = 10000, alternative = c("
 }
 
 #' Non-parametric test for the mean of a bounded variable.
+#'
+#' @param x a vector of observed values
+#' @param mu the mean under null hypothesis
+#' @param alpha the level of the test
+#' @param lb the lower bound for `x`
+#' @param ub the upper bound for `x`
+#' @param B number of bootstrap samples for the null distribution
+#' @param alternative the alternative for the test.
+#'
+#' @details The test is expected to be valid for any bounded distribution without further
+#' assumptions. The test has been proven valid only for special cases but
+#' no counterexample is known despite some efforts in the literature to find
+#' some.
+#'
+#' @description Test a null hypothesis about the mean of i.i.d. samples.
+#' The test is based on Gaffke 2005, though a more detailed analysis and
+#' exposition can be found in Learned-Miller & Thomas 2020.
+#'
+#' @returns `gaffke_test` returns an object of class `htest`, `gaffke_p` and
+#' `gaffke_ci` return just the p-value / CI as numeric for easier use in batch
+#' workflows.
+#'
+#' @references Gaffke, N. (2005).
+#' “Three test statistics for a nonparametric one-sided hypothesis on the mean
+#' of a nonnegative variable.” Mathematical Methods of Statistics, 14(4): 451–467.
+#'
+#' Learned-Miller, E. and Thomas, P. S. (2020).
+#' “A New Confidence Interval for the Mean of a Bounded Random Variable.”
+#' https://arxiv.org/abs/1905.06208
+#'
+#' @rdname gaffke_test
 #' @export
 gaffke_test <- function(x, mu = 0.5, alpha = 0.05, lb = 0, ub = 1, B = 10000, alternative = c("two.sided", "less", "greater")) {
   dname <- deparse1(substitute(x))
   alternative <- match.arg(alternative)
 
   stopifnot(length(lb) == 1)
   stopifnot(length(ub) == 1)
+  stopifnot(is.finite(lb))
+  stopifnot(is.finite(ub))
   stopifnot(all(x >= lb))
   stopifnot(all(x <= ub))
   stopifnot(length(B) == 1 && B > 1)
 
@@ -607,6 +607,7 @@ plot_contraction.SBC_results <- function(x, prior_sd, variables = NULL, scale =
 #' @param show_hidden Show variables marked with [hidden_var_attribute()]
 #'    (by default, those are not shown, available only when `x` is a data.frame)
 #' @export
+#' @rdname plot_contraction
 plot_contraction.data.frame <- function(x, prior_sd, variables = NULL, scale = "sd", alpha = 0.8, show_hidden = FALSE, parameters = NULL) {
   if(!is.null(parameters)) {
     warning("The `parameters` argument is deprecated use `variables` instead.")
@@ -700,6 +701,7 @@ plot_sim_estimated.SBC_results <- function(x, variables = NULL, estimate = "mean
 #' @param show_hidden Show variables marked with [hidden_var_attribute()]
 #'    (by default, those are not shown, available only when `x` is a data.frame)
 #' @export
+#' @rdname plot_sim_estimated
 plot_sim_estimated.data.frame <- function(x, variables = NULL, estimate = "mean",
                                           uncertainty = c("q5", "q95"),
                                           alpha = NULL,
 
@@ -171,6 +171,8 @@ var_attributes_to_attributes_column <- function(var_attr, variables) {
 }
 
 
+#' Combine multiple sets of variable attributes.
+#'
 #' @details
 #' It is currently by design that multiple copies of an attribute are kept
 #'
Original file line number	Diff line number	Diff line change
`@@ -118,7 +118,7 @@ SBC_backend_postprocess_cached_fit <- function(backend, generated, fit) {`
`118`	`118`	`}`
`119`	`119`
`120`	`120`	`#' @export`
`121`		`-#' @rdname SBC_backend_preprocess_fit_for_cache`
	`121`	`+#' @rdname SBC_backend_postprocess_cached_fit`
`122`	`122`	`SBC_backend_preprocess_fit_for_cache <- function(backend, generated, fit) {`
`123`	`123`	`UseMethod("SBC_backend_preprocess_fit_for_cache")`
`124`	`124`	`}`
Original file line number	Diff line number	Diff line change
`@@ -39,6 +39,7 @@ SBC_fit.SBC_backend_mock <- function(backend, generated, cores) {`
`39`	`39`	`backend$result`
`40`	`40`	`}`
`41`	`41`
	`42`	`+#' @export`
`42`	`43`	`SBC_fit_to_bridge_sampler.SBC_backend_mock <- function(backend, ...) {`
`43`	`44`	`return(backend$bridgesampler)`
`44`	`45`	`}`
Original file line number	Diff line number	Diff line change
`@@ -171,6 +171,8 @@ var_attributes_to_attributes_column <- function(var_attr, variables) {`
`171`	`171`	`}`
`172`	`172`
`173`	`173`
	`174`	`+#' Combine multiple sets of variable attributes.`
	`175`	`+#'`
`174`	`176`	`#' @details`
`175`	`177`	`#' It is currently by design that multiple copies of an attribute are kept`
`176`	`178`	`#'`