Skip to content

Commit 569913b

Browse files
Update .Rcpp_extractPermInfo
Rather than using lapply to call .Rcpp_extractPermInfo on a single vector of true ES and another vector of permutation ES, everything is now done in C++. This reduces overhead due to passing objects from R to C++ and back. The function .sparseIncidence was also modified slightly, though any difference in runtime due to those changes is negligible.
1 parent aa878b7 commit 569913b

18 files changed

+155
-111
lines changed

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Package: fast.ssgsea
22
Type: Package
33
Title: Fast Single-Sample Gene Set Enrichment Analysis (ssGSEA)
4-
Version: 0.1.0.9016
5-
Date: 2025-09-04
4+
Version: 0.1.0.9017
5+
Date: 2025-09-14
66
Authors@R:
77
person(given = "Tyler", family = "Sagendorf",
88
email = "tyler.sagendorf@pnnl.gov",

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ importFrom(data.table,chmatch)
1111
importFrom(data.table,data.table)
1212
importFrom(data.table,frank)
1313
importFrom(data.table,rbindlist)
14+
importFrom(data.table,setattr)
1415
importFrom(data.table,setorderv)
1516
importFrom(dqrng,dqsample.int)
1617
importFrom(dqrng,dqset.seed)

R/RcppExports.R

Lines changed: 47 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,31 @@
1616
#' @noRd
1717
NULL
1818

19+
#' @title Extract Information About Permutation Enrichment Scores
20+
#'
21+
#' @param x sorted vector of true enrichment scores. Missing values not
22+
#' allowed.
23+
#' @param y vector of permutation enrichment scores (not necessarily sorted).
24+
#'
25+
#' @returns A named list with 3 components, each vectors with the same length
26+
#' as x:
27+
#'
28+
#' \describe{
29+
#' \item{"n_same_sign_b"}{integer vector; the number of permutation ES in
30+
#' \code{y} with the same sign as the corresponding ES in \code{x}.}
31+
#' \item{"n_as_extreme_b"}{integer vector; the number of permutation ES in
32+
#' \code{y} that were at least as extreme as the corresponding ES in
33+
#' \code{x}. At most \code{NSameSign.b}.}
34+
#' \item{"sum_ES_perm_b"}{numeric vector; the absolute value of the sum of
35+
#' the permutation ES in \code{y} that have the same sign as the
36+
#' corresponding ES in \code{x}.}
37+
#' }
38+
#'
39+
#' @author Tyler Sagendorf
40+
#'
41+
#' @noRd
42+
NULL
43+
1944
#' @title Fast Vector Indexing
2045
#'
2146
#' @param x a numeric or integer vecctor.
@@ -149,33 +174,38 @@ NULL
149174
.Call(`_fast_ssgsea_Rcpp_calcESPermCore`, alpha, Y_perm, R_perm, sumRanks_i, A_perm, theta_m_i, theta_w_i)
150175
}
151176

152-
#' @title Extract Information About Permutation Enrichment Scores
177+
#' @title Extract Information from a Permutation Enrichment Score Matrix
153178
#'
154-
#' @param x sorted vector of true enrichment scores. Missing values not
155-
#' allowed.
156-
#' @param y vector of permutation enrichment scores (not necessarily sorted).
157-
#' All values may be missing.
179+
#' @description Extract information from a matrix of permutation enrichment
180+
#' scores run as a single batch.
158181
#'
159-
#' @returns A named list with 3 components, each vectors with length
160-
#' `length(x)`:
182+
#' @param ES_ls list of sorted true enrichment scores grouped by gene set size.
183+
#' @param ES_perm matrix of permutation ES. The number of rows is equal to the
184+
#' length of \code{ES_ls}, while the number of columns is at most the total
185+
#' number of permutations: more likely, it is a fraction of the total number
186+
#' of permutations. See the \code{batch_size} parameter of
187+
#' \code{\link{fast_ssgsea}} for more details.
188+
#'
189+
#' @returns A list of \code{data.table} objects, each with 3 columns:
161190
#'
162191
#' \describe{
163-
#' \item{"n_same_sign_b"}{integer vector; the number of permutation ES in
164-
#' \code{y} with the same sign as the corresponding ES in \code{x}.}
165-
#' \item{"n_as_extreme_b"}{integer vector; the number of permutation ES in
166-
#' \code{y} that were at least as extreme as the corresponding ES in
167-
#' \code{x}. At most \code{NSameSign.b}.}
168-
#' \item{"sum_ES_perm_b"}{numeric vector; the absolute value of the sum of
169-
#' the permutation ES in \code{y} that have the same sign as the
170-
#' corresponding ES in \code{x}.}
192+
#' \item{"n_same_sign_b"}{integer; the number of permutation ES in each
193+
#' row of \code{ES_perm} with the same sign as the corresponding ES in
194+
#' \code{ES}.}
195+
#' \item{"n_as_extreme_b"}{integer; the number of permutation ES in
196+
#' each row of \code{ES_perm} that were at least as extreme as the
197+
#' corresponding ES in \code{ES}. At most \code{"n_same_sign_b"}.}
198+
#' \item{"sum_ES_perm_b"}{integer; the sum of the absolute values of the
199+
#' permutation ES that have the same sign as the corresponding ES in
200+
#' \code{ES}.}
171201
#' }
172202
#'
173203
#' @author Tyler Sagendorf
174204
#'
175205
#' @noRd
176206
#'
177-
.Rcpp_extractPermInfo <- function(x, y) {
178-
.Call(`_fast_ssgsea_Rcpp_extractPermInfo`, x, y)
207+
.Rcpp_extractPermInfo <- function(ES_ls, ES_perm) {
208+
.Call(`_fast_ssgsea_Rcpp_extractPermInfo`, ES_ls, ES_perm)
179209
}
180210

181211
#' @title Generate a dense incidence matrix for permutation testing

R/utils.R

Lines changed: 20 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@
142142
#'
143143
#' @author Tyler Sagendorf
144144
#'
145-
#' @importFrom data.table data.table := chmatch
145+
#' @importFrom data.table data.table := chmatch setattr
146146
#' @importFrom Matrix sparseMatrix
147147
#'
148148
#' @noRd
@@ -170,7 +170,7 @@
170170
stringsAsFactors = TRUE
171171
)
172172

173-
dt[, sets := rep(
173+
dt[, sets := rep.int(
174174
names(gene_sets),
175175
lengths(gene_sets)
176176
)]
@@ -181,10 +181,10 @@
181181
# Strip information about direction of change. This may reduce the number of
182182
# levels if an element is both "up" and "down": "gene;u" and "gene;d" become
183183
# "gene".
184-
levels(dt$elements) <- sub(";[ud]{1}$", "", levels(dt$elements))
184+
setattr(dt$elements, "levels", sub(";[ud]{1}$", "", levels(dt[["elements"]])))
185185

186186
# Do not chain with previous line, since the number of levels may change.
187-
unique_elements <- levels(dt$elements)
187+
unique_elements <- levels(dt[["elements"]])
188188

189189
# Convert to characters to use chmatch()
190190
dt[, elements := as.character.factor(elements)]
@@ -202,9 +202,9 @@
202202
dt[, i := chmatch(elements, unique_elements, nomatch = 0L)]
203203

204204
# Remove elements not in the background
205-
dt <- subset(dt, subset = i != 0L)
205+
dt <- dt[i != 0L]
206206

207-
unique_sets <- unique(dt[["sets"]])
207+
unique_sets <- unique.default(dt[["sets"]])
208208

209209
# Column indices for sparse matrix
210210
dt[, j := chmatch(sets, unique_sets)]
@@ -213,7 +213,7 @@
213213
dims <- lengths(dim_names)
214214

215215
# Keep genes expected to be "up"
216-
dt_u <- subset(dt, subset = direction_down == FALSE)
216+
dt_u <- dt[direction_down == FALSE]
217217

218218
# Incidence matrix where a 1 indicates that the element is in the set. If x
219219
# is a directional database, then A will only contain elements that are
@@ -231,12 +231,12 @@
231231
# In the unlikely event where an element appears multiple times in the same
232232
# set, some values of A will be > 1. Replace all values with 1. Could also
233233
# use the use.last.ij parameter in sparseMatrix(), but this is faster.
234-
attr(A, which = "x") <- rep(1, length(attr(A, which = "x")))
234+
attr(A, which = "x") <- rep.int(1, length(attr(A, which = "x")))
235235

236236
A_d <- NULL # default
237237

238238
if (nrow(dt_u) < nrow(dt)) {
239-
dt_d <- subset(dt, subset = direction_down == TRUE)
239+
dt_d <- dt[direction_down == TRUE]
240240

241241
# Incidence matrix where a 1 indicates that a feature is expected to be
242242
# down in the set.
@@ -250,7 +250,7 @@
250250
use.last.ij = FALSE
251251
)
252252

253-
attr(A_d, which = "x") <- rep(1, length(attr(A_d, which = "x")))
253+
attr(A_d, which = "x") <- rep.int(1, length(attr(A_d, which = "x")))
254254

255255
# The Hadamard product A * A.d should be a matrix of zeros
256256
if (length(attr(A * A_d, which = "x"))) {
@@ -617,11 +617,11 @@
617617
n_batches <- ceiling(nperm / batch_size)
618618

619619
batch_sizes <- c(
620-
rep(batch_size, n_batches - 1L),
620+
rep.int(batch_size, n_batches - 1L),
621621
nperm - batch_size * (n_batches - 1L)
622622
)
623623

624-
batch_id <- rep(seq_len(n_batches), batch_sizes)
624+
batch_id <- rep.int(seq_len(n_batches), batch_sizes)
625625

626626
# Seeds for permutations
627627
set.seed(seed)
@@ -684,7 +684,7 @@
684684
#' \describe{
685685
#' \item{"rep_idx"}{a vector with length \eqn{\geq} \code{ncol(A_perm)} that
686686
#' maps each row of \code{A_perm} to the corresponding entry of \code{m_i}.
687-
#' This is used by \code{.extractPermInfo}.}
687+
#' This is used by \code{.makeResultsTable}.}
688688
#'
689689
#' \item{"A_perm"}{dense incidence matrix where the number of rows is the
690690
#' number of unique gene set sizes and the number of columns is the size of
@@ -811,56 +811,6 @@
811811
}
812812

813813

814-
#' @title Extract Information from a Permutation Enrichment Score Matrix
815-
#'
816-
#' @description Extract information from a matrix of permutation enrichment
817-
#' scores run as a single batch.
818-
#'
819-
#' @param ES_ls list of enrichment scores grouped by gene set size.
820-
#' @param ES_perm lis of permutation ES. The length of the list is equal to the
821-
#' length of \code{ES}, while the length of each vector is at most the total
822-
#' number of permutations: more likely, it is a fraction of the total number
823-
#' of permutations. See the \code{batch_size} parameter of
824-
#' \code{\link{fast_ssgsea}} for more details.
825-
#'
826-
#' @returns A \code{data.table} with 3 columns:
827-
#'
828-
#' \describe{
829-
#' \item{"n_same_sign_b"}{integer; the number of permutation ES in each
830-
#' row of \code{ES_perm} with the same sign as the corresponding ES in
831-
#' \code{ES}.}
832-
#' \item{"n_as_extreme_b"}{integer; the number of permutation ES in
833-
#' each row of \code{ES_perm} that were at least as extreme as the
834-
#' corresponding ES in \code{ES}. At most \code{"n_same_sign_b"}.}
835-
#' \item{"sum_ES_perm_b"}{integer; the sum of the absolute values of the
836-
#' permutation ES that have the same sign as the corresponding ES in
837-
#' \code{ES}.}
838-
#' }
839-
#'
840-
#' @author Tyler Sagendorf
841-
#'
842-
#' @importFrom data.table data.table := setorderv rbindlist
843-
#'
844-
#' @noRd
845-
.extractPermInfo <- function(ES_ls,
846-
ES_perm) {
847-
out <- lapply(seq_along(ES_ls), function(i) {
848-
ES_i <- ES_ls[[i]]
849-
850-
ES_perm_i <- ES_perm[i, , drop = TRUE]
851-
852-
out_i <- .Rcpp_extractPermInfo(ES_i, ES_perm_i) # returns list
853-
class(out_i) <- "data.table"
854-
855-
return(out_i)
856-
})
857-
858-
out <- rbindlist(out)
859-
860-
return(out)
861-
}
862-
863-
864814
#' @title Generate ssGSEA Results Table for a Single Sample
865815
#'
866816
#' @inheritParams fast_ssgsea
@@ -924,7 +874,7 @@
924874
#'
925875
#' @author Tyler Sagendorf
926876
#'
927-
#' @importFrom data.table data.table := setorderv
877+
#' @importFrom data.table data.table := setorderv rbindlist
928878
#'
929879
#' @noRd
930880
.makeResultsTable <- function(alpha = 1,
@@ -948,9 +898,9 @@
948898
ES = ES_i,
949899
# Initialize vectors of 0's. These 3 vectors will be updated using the
950900
# results from each batch of permutations.
951-
n_same_sign = rep(0L, length(ES_i)),
952-
n_as_extreme = rep(0L, length(ES_i)),
953-
sum_ES_perm = rep(0, length(ES_i)),
901+
n_same_sign = rep.int(0L, length(ES_i)),
902+
n_as_extreme = rep.int(0L, length(ES_i)),
903+
sum_ES_perm = rep.int(0, length(ES_i)),
954904
row_order = seq_along(ES_i),
955905
stringsAsFactors = FALSE
956906
)
@@ -999,11 +949,13 @@
999949
theta_w_d_i = theta_w_d_i
1000950
)
1001951

1002-
perm_dt <- .extractPermInfo(
952+
perm_dt <- .Rcpp_extractPermInfo(
1003953
ES_ls = ES_ls,
1004954
ES_perm = ES_perm
1005955
)
1006956

957+
perm_dt <- rbindlist(perm_dt)
958+
1007959
# Update summary vectors
1008960
tab_i[, `:=`(
1009961
n_same_sign = n_same_sign + perm_dt[["n_same_sign_b"]],

man/figures/README-figure-1.png

-1.26 KB
Loading

man/figures/README-figure-2.png

461 Bytes
Loading
-3 Bytes
Binary file not shown.
14 Bytes
Binary file not shown.
8 Bytes
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)