|
| 1 | +#' Two-Sample Test for Persistence Homology Data |
| 2 | +#' |
| 3 | +#' This function performs a two-sample test for persistence homology data using |
| 4 | +#' the theory of permutation hypothesis testing. The input data can take on |
| 5 | +#' various forms: |
| 6 | +#' - A persistence set, which is a collection of persistence diagrams. |
| 7 | +#' - A distance matrix, which is a pairwise distance matrix between persistence |
| 8 | +#' diagrams. |
| 9 | +#' - One of the PH vectorizations available in the [{tdarec}]() package. |
| 10 | +#' |
| 11 | +#' @param x An object of class `persistence_set` typically produced by |
| 12 | +#' [`phutil::as_persistence_set()`] or of class `dist` typically produced by |
| 13 | +#' [`phutil::bottleneck_pairwise_distances()`] or |
| 14 | +#' [`phutil::wasserstein_pairwise_distances()`]. If `x` is a persistence set, |
| 15 | +#' then `y` must be either a vector of two integers (sample sizes) or another |
| 16 | +#' persistence set. If `x` is a distance matrix, then `y` must be a vector of |
| 17 | +#' two integers (sample sizes). |
| 18 | +#' @param y An object of class `persistence_set` typically produced by |
| 19 | +#' [`phutil::as_persistence_set()`] or a vector of two integers. If `x` is a |
| 20 | +#' persistence set, then `y` must be either a vector of two integers (sample |
| 21 | +#' sizes) or another persistence set. If `x` is a distance matrix, then `y` |
| 22 | +#' must be a vector of two integers (sample sizes). |
| 23 | +#' @param dimension An integer value specifying the homology dimension to use. |
| 24 | +#' Defaults to `0L`, which corresponds to the 0-dimensional homology. |
| 25 | +#' @param p An integer value specifying the p-norm to use for the Wasserstein |
| 26 | +#' distance. Defaults to `2L`, which corresponds to the Euclidean distance. If |
| 27 | +#' `p` is set to `Inf`, then the Bottleneck distance is used. |
| 28 | +#' @param ncores An integer value specifying the number of cores to use when |
| 29 | +#' computing the pairwise distance matrix between all combined persistence |
| 30 | +#' diagrams. Defaults to `1L`, which means that the computation is done |
| 31 | +#' sequentially. |
| 32 | +#' @param B An integer value specifying the number of permutations to use for |
| 33 | +#' the permutation hypothesis test. Defaults to `1000L`. |
| 34 | +#' @param npc A string specifying the non-parametric combination method to use. |
| 35 | +#' Choices are either `"tippett"` (default) or `"fisher"`. The former |
| 36 | +#' corresponds to the Tippet's method, while the latter corresponds to |
| 37 | +#' Fisher's method. |
| 38 | +#' @param verbose A boolean value indicating whether to print some information |
| 39 | +#' about the progress of the computation. Defaults to `FALSE`. |
| 40 | +#' |
| 41 | +#' @returns A p-value from the two-sample test where the null hypothesis is that |
| 42 | +#' the two samples come from the same distribution. |
| 43 | +#' |
| 44 | +#' @export |
| 45 | +#' @examples |
| 46 | +#' two_sample_test(trefoils1[1:5], trefoils2[1:5], B = 100L) |
| 47 | +#' two_sample_test(trefoils1[1:5], archspirals[1:5], B = 100L) |
| 48 | +two_sample_test <- function( |
| 49 | + x, |
| 50 | + y, |
| 51 | + dimension = 0L, |
| 52 | + p = 2L, |
| 53 | + ncores = 1L, |
| 54 | + B = 1000L, |
| 55 | + npc = "tippett", |
| 56 | + verbose = FALSE |
| 57 | +) { |
| 58 | + if (verbose) cli::cli_alert_info("Parsing inputs...") |
| 59 | + l <- parse_inputs( |
| 60 | + x = x, |
| 61 | + y = y, |
| 62 | + dimension = dimension, |
| 63 | + p = p, |
| 64 | + ncores = ncores |
| 65 | + ) |
| 66 | + D <- l$D |
| 67 | + sample_sizes <- l$sample_sizes |
| 68 | + if (verbose) cli::cli_alert_info("Setting up the plausibility function...") |
| 69 | + # We could use alternative statistics for PH vectorizations |
| 70 | + pf <- flipr::PlausibilityFunction$new( |
| 71 | + null_spec = null_spec, |
| 72 | + stat_functions = list(flipr::stat_t_ip, flipr::stat_f_ip), |
| 73 | + stat_assignments = list(mean = 1, sd = 2), |
| 74 | + D, |
| 75 | + sample_sizes[1], |
| 76 | + seed = 1234 |
| 77 | + ) |
| 78 | + pf$alternative <- "right_tail" |
| 79 | + pf$nperms <- B |
| 80 | + pf$aggregator <- npc |
| 81 | + if (verbose) cli::cli_alert_info("Calculating the p-value...") |
| 82 | + pf$get_value(c(0, 1)) |
| 83 | +} |
0 commit comments