pnnl
diff --git a/‎DESCRIPTION‎
Lines changed: 2 additions & 2 deletions b/‎DESCRIPTION‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 1 addition & 0 deletions b/‎NAMESPACE‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/RcppExports.R‎
Lines changed: 47 additions & 17 deletions b/‎R/RcppExports.R‎
Lines changed: 47 additions & 17 deletions
diff --git a/‎R/utils.R‎
Lines changed: 20 additions & 68 deletions b/‎R/utils.R‎
Lines changed: 20 additions & 68 deletions
diff --git a/‎man/figures/README-figure-1.png‎
-1.26 KB b/‎man/figures/README-figure-1.png‎
-1.26 KB
diff --git a/‎man/figures/README-figure-2.png‎
461 Bytes b/‎man/figures/README-figure-2.png‎
461 Bytes
diff --git a/‎simulation/data/fast-ssGSEA_timing_results_BLAS.rds‎
-3 Bytes b/‎simulation/data/fast-ssGSEA_timing_results_BLAS.rds‎
-3 Bytes
diff --git a/‎simulation/data/fast-ssGSEA_timing_results_BLAS_for_comparison.rds‎
14 Bytes b/‎simulation/data/fast-ssGSEA_timing_results_BLAS_for_comparison.rds‎
14 Bytes
diff --git a/‎simulation/data/fast-ssGSEA_timing_results_OpenBLAS.rds‎
8 Bytes b/‎simulation/data/fast-ssGSEA_timing_results_OpenBLAS.rds‎
8 Bytes
diff --git a/‎simulation/data/fast-ssGSEA_timing_results_OpenBLAS_for_comparison.rds‎
-5 Bytes b/‎simulation/data/fast-ssGSEA_timing_results_OpenBLAS_for_comparison.rds‎
-5 Bytes
@@ -1,8 +1,8 @@
 Package: fast.ssgsea
 Type: Package
 Title: Fast Single-Sample Gene Set Enrichment Analysis (ssGSEA)
-Version: 0.1.0.9016
-Date: 2025-09-04
+Version: 0.1.0.9017
+Date: 2025-09-14
 Authors@R: 
       person(given = "Tyler", family = "Sagendorf", 
              email = "tyler.sagendorf@pnnl.gov", 
 
@@ -11,6 +11,7 @@ importFrom(data.table,chmatch)
 importFrom(data.table,data.table)
 importFrom(data.table,frank)
 importFrom(data.table,rbindlist)
+importFrom(data.table,setattr)
 importFrom(data.table,setorderv)
 importFrom(dqrng,dqsample.int)
 importFrom(dqrng,dqset.seed)
 
@@ -16,6 +16,31 @@
 #' @noRd
 NULL
 
+#' @title Extract Information About Permutation Enrichment Scores
+#'
+#' @param x sorted vector of true enrichment scores. Missing values not
+#'   allowed.
+#' @param y vector of permutation enrichment scores (not necessarily sorted).
+#'
+#' @returns A named list with 3 components, each vectors with the same length
+#'   as x:
+#'
+#' \describe{
+#'   \item{"n_same_sign_b"}{integer vector; the number of permutation ES in
+#'   \code{y} with the same sign as the corresponding ES in \code{x}.}
+#'   \item{"n_as_extreme_b"}{integer vector; the number of permutation ES in
+#'   \code{y} that were at least as extreme as the corresponding ES in
+#'   \code{x}. At most \code{NSameSign.b}.}
+#'   \item{"sum_ES_perm_b"}{numeric vector; the absolute value of the sum of
+#'   the permutation ES in \code{y} that have the same sign as the
+#'   corresponding ES in \code{x}.}
+#' }
+#'
+#' @author Tyler Sagendorf
+#'
+#' @noRd
+NULL
+
 #' @title Fast Vector Indexing
 #'
 #' @param x a numeric or integer vecctor.
@@ -149,33 +174,38 @@ NULL
     .Call(`_fast_ssgsea_Rcpp_calcESPermCore`, alpha, Y_perm, R_perm, sumRanks_i, A_perm, theta_m_i, theta_w_i)
 }
 
-#' @title Extract Information About Permutation Enrichment Scores
+#' @title Extract Information from a Permutation Enrichment Score Matrix
 #'
-#' @param x sorted vector of true enrichment scores. Missing values not
-#'   allowed.
-#' @param y vector of permutation enrichment scores (not necessarily sorted).
-#'   All values may be missing.
+#' @description Extract information from a matrix of permutation enrichment
+#'   scores run as a single batch.
 #'
-#' @returns A named list with 3 components, each vectors with length
-#'   `length(x)`:
+#' @param ES_ls list of sorted true enrichment scores grouped by gene set size.
+#' @param ES_perm matrix of permutation ES. The number of rows is equal to the
+#'   length of \code{ES_ls}, while the number of columns is at most the total
+#'   number of permutations: more likely, it is a fraction of the total number
+#'   of permutations. See the \code{batch_size} parameter of
+#'   \code{\link{fast_ssgsea}} for more details.
+#'
+#' @returns A list of \code{data.table} objects, each with 3 columns:
 #'
 #' \describe{
-#'   \item{"n_same_sign_b"}{integer vector; the number of permutation ES in
-#'   \code{y} with the same sign as the corresponding ES in \code{x}.}
-#'   \item{"n_as_extreme_b"}{integer vector; the number of permutation ES in
-#'   \code{y} that were at least as extreme as the corresponding ES in
-#'   \code{x}. At most \code{NSameSign.b}.}
-#'   \item{"sum_ES_perm_b"}{numeric vector; the absolute value of the sum of
-#'   the permutation ES in \code{y} that have the same sign as the
-#'   corresponding ES in \code{x}.}
+#'   \item{"n_same_sign_b"}{integer; the number of permutation ES in each
+#'   row of \code{ES_perm} with the same sign as the corresponding ES in
+#'   \code{ES}.}
+#'   \item{"n_as_extreme_b"}{integer; the number of permutation ES in
+#'   each row of \code{ES_perm} that were at least as extreme as the
+#'   corresponding ES in \code{ES}. At most \code{"n_same_sign_b"}.}
+#'   \item{"sum_ES_perm_b"}{integer; the sum of the absolute values of the
+#'   permutation ES that have the same sign as the corresponding ES in
+#'   \code{ES}.}
 #' }
 #'
 #' @author Tyler Sagendorf
 #'
 #' @noRd
 #'
-.Rcpp_extractPermInfo <- function(x, y) {
-    .Call(`_fast_ssgsea_Rcpp_extractPermInfo`, x, y)
+.Rcpp_extractPermInfo <- function(ES_ls, ES_perm) {
+    .Call(`_fast_ssgsea_Rcpp_extractPermInfo`, ES_ls, ES_perm)
 }
 
 #' @title Generate a dense incidence matrix for permutation testing
 
@@ -142,7 +142,7 @@
 #'
 #' @author Tyler Sagendorf
 #'
-#' @importFrom data.table data.table := chmatch
+#' @importFrom data.table data.table := chmatch setattr
 #' @importFrom Matrix sparseMatrix
 #'
 #' @noRd
@@ -170,7 +170,7 @@
     stringsAsFactors = TRUE
   )
 
-  dt[, sets := rep(
+  dt[, sets := rep.int(
     names(gene_sets),
     lengths(gene_sets)
   )]
@@ -181,10 +181,10 @@
   # Strip information about direction of change. This may reduce the number of
   # levels if an element is both "up" and "down": "gene;u" and "gene;d" become
   # "gene".
-  levels(dt$elements) <- sub(";[ud]{1}$", "", levels(dt$elements))
+  setattr(dt$elements, "levels", sub(";[ud]{1}$", "", levels(dt[["elements"]])))
 
   # Do not chain with previous line, since the number of levels may change.
-  unique_elements <- levels(dt$elements)
+  unique_elements <- levels(dt[["elements"]])
 
   # Convert to characters to use chmatch()
   dt[, elements := as.character.factor(elements)]
@@ -202,9 +202,9 @@
   dt[, i := chmatch(elements, unique_elements, nomatch = 0L)]
 
   # Remove elements not in the background
-  dt <- subset(dt, subset = i != 0L)
+  dt <- dt[i != 0L]
 
-  unique_sets <- unique(dt[["sets"]])
+  unique_sets <- unique.default(dt[["sets"]])
 
   # Column indices for sparse matrix
   dt[, j := chmatch(sets, unique_sets)]
@@ -213,7 +213,7 @@
   dims <- lengths(dim_names)
 
   # Keep genes expected to be "up"
-  dt_u <- subset(dt, subset = direction_down == FALSE)
+  dt_u <- dt[direction_down == FALSE]
 
   # Incidence matrix where a 1 indicates that the element is in the set. If x
   # is a directional database, then A will only contain elements that are
@@ -231,12 +231,12 @@
   # In the unlikely event where an element appears multiple times in the same
   # set, some values of A will be > 1. Replace all values with 1. Could also
   # use the use.last.ij parameter in sparseMatrix(), but this is faster.
-  attr(A, which = "x") <- rep(1, length(attr(A, which = "x")))
+  attr(A, which = "x") <- rep.int(1, length(attr(A, which = "x")))
 
   A_d <- NULL # default
 
   if (nrow(dt_u) < nrow(dt)) {
-    dt_d <- subset(dt, subset = direction_down == TRUE)
+    dt_d <- dt[direction_down == TRUE]
 
     # Incidence matrix where a 1 indicates that a feature is expected to be
     # down in the set.
@@ -250,7 +250,7 @@
       use.last.ij = FALSE
     )
 
-    attr(A_d, which = "x") <- rep(1, length(attr(A_d, which = "x")))
+    attr(A_d, which = "x") <- rep.int(1, length(attr(A_d, which = "x")))
 
     # The Hadamard product A * A.d should be a matrix of zeros
     if (length(attr(A * A_d, which = "x"))) {
@@ -617,11 +617,11 @@
     n_batches <- ceiling(nperm / batch_size)
 
     batch_sizes <- c(
-      rep(batch_size, n_batches - 1L),
+      rep.int(batch_size, n_batches - 1L),
       nperm - batch_size * (n_batches - 1L)
     )
 
-    batch_id <- rep(seq_len(n_batches), batch_sizes)
+    batch_id <- rep.int(seq_len(n_batches), batch_sizes)
 
     # Seeds for permutations
     set.seed(seed)
@@ -684,7 +684,7 @@
 #' \describe{
 #'   \item{"rep_idx"}{a vector with length \eqn{\geq} \code{ncol(A_perm)} that
 #'   maps each row of \code{A_perm} to the corresponding entry of \code{m_i}.
-#'   This is used by \code{.extractPermInfo}.}
+#'   This is used by \code{.makeResultsTable}.}
 #'
 #'   \item{"A_perm"}{dense incidence matrix where the number of rows is the
 #'   number of unique gene set sizes and the number of columns is the size of
@@ -811,56 +811,6 @@
 }
 
 
-#' @title Extract Information from a Permutation Enrichment Score Matrix
-#'
-#' @description Extract information from a matrix of permutation enrichment
-#'   scores run as a single batch.
-#'
-#' @param ES_ls list of enrichment scores grouped by gene set size.
-#' @param ES_perm lis of permutation ES. The length of the list is equal to the
-#'   length of \code{ES}, while the length of each vector is at most the total
-#'   number of permutations: more likely, it is a fraction of the total number
-#'   of permutations. See the \code{batch_size} parameter of
-#'   \code{\link{fast_ssgsea}} for more details.
-#'
-#' @returns A \code{data.table} with 3 columns:
-#'
-#' \describe{
-#'   \item{"n_same_sign_b"}{integer; the number of permutation ES in each
-#'   row of \code{ES_perm} with the same sign as the corresponding ES in
-#'   \code{ES}.}
-#'   \item{"n_as_extreme_b"}{integer; the number of permutation ES in
-#'   each row of \code{ES_perm} that were at least as extreme as the
-#'   corresponding ES in \code{ES}. At most \code{"n_same_sign_b"}.}
-#'   \item{"sum_ES_perm_b"}{integer; the sum of the absolute values of the
-#'   permutation ES that have the same sign as the corresponding ES in
-#'   \code{ES}.}
-#' }
-#'
-#' @author Tyler Sagendorf
-#'
-#' @importFrom data.table data.table := setorderv rbindlist
-#'
-#' @noRd
-.extractPermInfo <- function(ES_ls,
-                             ES_perm) {
-  out <- lapply(seq_along(ES_ls), function(i) {
-    ES_i <- ES_ls[[i]]
-
-    ES_perm_i <- ES_perm[i, , drop = TRUE]
-
-    out_i <- .Rcpp_extractPermInfo(ES_i, ES_perm_i) # returns list
-    class(out_i) <- "data.table"
-
-    return(out_i)
-  })
-
-  out <- rbindlist(out)
-
-  return(out)
-}
-
-
 #' @title Generate ssGSEA Results Table for a Single Sample
 #'
 #' @inheritParams fast_ssgsea
@@ -924,7 +874,7 @@
 #'
 #' @author Tyler Sagendorf
 #'
-#' @importFrom data.table data.table := setorderv
+#' @importFrom data.table data.table := setorderv rbindlist
 #'
 #' @noRd
 .makeResultsTable <- function(alpha = 1,
@@ -948,9 +898,9 @@
     ES = ES_i,
     # Initialize vectors of 0's. These 3 vectors will be updated using the
     # results from each batch of permutations.
-    n_same_sign = rep(0L, length(ES_i)),
-    n_as_extreme = rep(0L, length(ES_i)),
-    sum_ES_perm = rep(0, length(ES_i)),
+    n_same_sign = rep.int(0L, length(ES_i)),
+    n_as_extreme = rep.int(0L, length(ES_i)),
+    sum_ES_perm = rep.int(0, length(ES_i)),
     row_order = seq_along(ES_i),
     stringsAsFactors = FALSE
   )
@@ -999,11 +949,13 @@
         theta_w_d_i = theta_w_d_i
       )
 
-      perm_dt <- .extractPermInfo(
+      perm_dt <- .Rcpp_extractPermInfo(
         ES_ls = ES_ls,
         ES_perm = ES_perm
       )
 
+      perm_dt <- rbindlist(perm_dt)
+
       # Update summary vectors
       tab_i[, `:=`(
         n_same_sign = n_same_sign + perm_dt[["n_same_sign_b"]],