Skip to content

Commit 7364a80

Browse files
Merge pull request #9 from pnnl/overhaul
2 parents ecc38eb + f16409b commit 7364a80

29 files changed

+1313
-2313
lines changed

DESCRIPTION

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Package: fast.ssgsea
22
Type: Package
33
Title: Fast Single-Sample Gene Set Enrichment Analysis (ssGSEA)
4-
Version: 0.1.0.9026
5-
Date: 2026-02-15
4+
Version: 0.1.0.9027
5+
Date: 2026-02-24
66
Authors@R:
77
person(given = "Tyler", family = "Sagendorf",
88
email = "tyler.sagendorf@pnnl.gov",
@@ -15,17 +15,14 @@ License: file LICENSE
1515
Encoding: UTF-8
1616
LinkingTo:
1717
Rcpp,
18-
RcppArmadillo,
1918
dqrng
2019
Depends:
2120
R (>= 4.0.0)
2221
Imports:
2322
collapse,
2423
data.table,
2524
dqrng,
26-
Matrix,
2725
Rcpp (>= 1.1.0),
28-
RcppArmadillo,
2926
stats
3027
RoxygenNote: 7.3.3
3128
Suggests:

NAMESPACE

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,27 @@
33
export(fast_ssgsea)
44
export(read_gmt)
55
exportPattern("^[[:alpha:]]+")
6-
import(Matrix)
7-
import(RcppArmadillo)
86
import(dqrng)
97
importFrom(Rcpp,evalCpp)
8+
importFrom(collapse,"%!iin%")
9+
importFrom(collapse,alloc)
1010
importFrom(collapse,allv)
11+
importFrom(collapse,any_duplicated)
1112
importFrom(collapse,anyv)
13+
importFrom(collapse,fmatch)
14+
importFrom(collapse,fmax)
1215
importFrom(collapse,fsubset)
1316
importFrom(collapse,funique)
1417
importFrom(collapse,groupid)
18+
importFrom(collapse,radixorderv)
1519
importFrom(collapse,vec)
1620
importFrom(collapse,vlengths)
1721
importFrom(collapse,vtypes)
1822
importFrom(collapse,whichNA)
1923
importFrom(collapse,whichv)
2024
importFrom(data.table,":=")
21-
importFrom(data.table,chmatch)
2225
importFrom(data.table,data.table)
2326
importFrom(data.table,frank)
24-
importFrom(data.table,rbindlist)
2527
importFrom(data.table,setDF)
2628
importFrom(data.table,setorderv)
2729
importFrom(stats,p.adjust)

NEWS.md

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,25 @@
11
# fast.ssgsea (development version)
22

3-
- Fixed Windows installation error ([#1](https://github.com/pnnl/fast.ssgsea/issues/1)).
3+
## BREAKING CHANGES
4+
- Renamed the `X` parameter of `fast_ssgsea` to `stats`. Parameter `stats` accepts a named numeric vector, rather than a numeric matrix with dimension names. This also led to the removal of the `adjust.globally` parameter.
5+
- Removed the `batch.size` parameter from `fast_ssgsea`.
6+
7+
## ENHANCEMENTS
48
- Greatly reduced runtime of permutation tests, especially when testing directional gene sets.
5-
- Added `read_gmt` function, which reads a list of gene sets from a Gene Matrix Transposed (GMT) file.
6-
- Added `alternative` parameter to `fast_ssgsea` to perform one-sided hypothesis tests.
7-
- Added `max_size` parameter to `fast_ssgsea` to limit the maximum size of sets that will be tested.
9+
- Increased default `nperm` to 100,000 in `fast_ssgsea` and increased the permutation limit from 1 million to 2 billion, which is close to `.Machine$integer.max`.
10+
- Greatly reduced memory usage and slightly improved runtime by removing the need for incidence matrices. This also led to the removal of the _Matrix_ and _RcppArmadillo_ packages from Imports.
11+
- Added parameter `alternative` to `fast_ssgsea` to perform one-sided hypothesis tests.
12+
- Added parameter `max_size` to `fast_ssgsea` to limit the maximum size of sets that will be tested.
13+
- Added function `read_gmt`, which reads a named list of gene sets from a Gene Matrix Transposed (GMT) file.
14+
15+
## BUGFIXES
16+
- Fixed Windows installation error ([#1](https://github.com/pnnl/fast.ssgsea/issues/1)).
17+
- Directional gene sets are now allowed to consist entirely of up-regulated or down-regulated genes.
18+
- Permutation enrichment scores for down-regulated genes are now calculated so they avoid overlap with the genes selected for the up-regulated permutation enrichment scores.
19+
20+
## MISC
21+
- Added the _collapse_ package to Imports.
822
- Updated runtime data and figures in simulation/.
9-
- Increased permutation limit from 1 million to 2 billion.
1023

1124

1225
# fast.ssgsea 0.1.0

R/RcppExports.R

Lines changed: 51 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -1,75 +1,27 @@
11
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
22
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
33

4-
#' @title Dense-Sparse Matrix Multiplication
5-
#'
6-
#' @description Multiply a dense matrix by a sparse matrix.
7-
#'
8-
#' @param X dense matrix.
9-
#' @param Y sparse matrix.
10-
#'
11-
#' @returns The product of \code{X} and \code{Y}: a dense matrix.
12-
#'
13-
#' @references Sanderson, C., & Curtin, R. (2016). Armadillo: A template-based
14-
#' C++ library for linear algebra. The Journal of Open Source Software, 1(2),
15-
#' 26. \url{https://doi.org/10.21105/joss.00026}
16-
#'
17-
#' @noRd
18-
NULL
19-
20-
#' @title Core of the .calcES R function
21-
#'
22-
#' @param Y absolute values of the matrix \code{t(X)} raised to the power of
23-
#' \code{alpha}. Missing values are then imputed with 0.
24-
#' @param R matrix of ranks of the values in each row of \code{X}. Missing
25-
#' values in \code{X} are assigned a rank of \code{NA}, which are then
26-
#' imputed with 0.
27-
#' @param sumRanks integer vector; the sums of the ranks in each sample. Equal
28-
#' to \code{rowSums(R)}.
29-
#' @param A sparse incidence matrix with single genes as rows and gene sets as
30-
#' columns. A value of 1 indicates that the gene is an element of the set,
31-
#' while a value of 0 indicates otherwise.
32-
#' @param M matrix with samples as rows and gene sets as columns, where each
33-
#' entry is the number of genes with nonmissing values in each set.
34-
#' @param W matrix with the same dimensions as \code{M} where each entry is the
35-
#' number of genes with nonmissing values \emph{not} in each set.
36-
#'
37-
#' @returns A matrix of real-valued enrichment scores with samples as rows and
38-
#' gene sets as columns. May contain missing values if the corresponding
39-
#' entry of \code{M} is less than 2.
40-
#'
41-
#' @author Tyler Sagendorf
42-
#'
43-
#' @references Sanderson, C., & Curtin, R. (2016). Armadillo: A template-based
44-
#' C++ library for linear algebra. The Journal of Open Source Software, 1(2),
45-
#' 26. \url{https://doi.org/10.21105/joss.00026}
46-
#'
47-
#' Sanderson, C., & Curtin, R. (2019). Practical Sparse Matrices in C++ with
48-
#' Hybrid Storage and Template-Based Expression Optimisation. Mathematical
49-
#' and Computational Applications, 24(3), 70. \url{
50-
#' https://doi.org/10.3390/mca24030070}
51-
#'
52-
#' @noRd
53-
NULL
54-
554
#' @title Get the Index of the First Positive ES for Every Unique Gene Set Size
565
#'
57-
#' @description Perform binary searches within sections of a numeric vector to
58-
#' determine the index of the first positive ES.
6+
#' @description Perform binary searches to determine the index of the first
7+
#' positive ES for each unique gene set size.
598
#'
9+
#' @param pES_pos_idx pointer to the integer vector that will hold the results.
6010
#' @param n_sizes integer; the number of unique gene set sizes.
61-
#' @param ES numeric vector of enrichment scores, sorted in ascending order by
62-
#' gene set size and then by the values of the ES.
63-
#' @param ES_start integer vector with length equal to \code{n_sizes}. Stores
64-
#' the index of the first ES for every unique gene set size.
65-
#' @param ES_end integer vector with length equal to \code{n_sizes}. Each
66-
#' element is 1 more than the index of the last ES for every unique gene set
67-
#' size.
68-
#'
69-
#' @returns An integer vector containing the index of the first positive ES for
70-
#' every unique gene set size. Each element is greater than or equal to the
71-
#' corresponding element of \code{ES_start}. If there are no positive ES for
72-
#' a particular set size, the index will be the corresponding element of
11+
#' @param pES pointer to the numeric vector of enrichment scores, sorted in
12+
#' ascending order by gene set size and then by the values of the ES.
13+
#' @param pES_start pointer to the integer vector with length equal to
14+
#' \code{n_sizes}. The vector stores the index of the first ES for every
15+
#' unique gene set size.
16+
#' @param pES_end pointer to the integer vector with length equal to
17+
#' \code{n_sizes}. Each element is 1 more than the index of the last ES for
18+
#' every unique gene set size.
19+
#'
20+
#' @returns Nothing. The vector pointed to by \code{pES_pos_idx} is modified in
21+
#' place. \code{ES_pos_idx} will contain the index of the first positive ES
22+
#' for every unique gene set size. Each element is greater than or equal to
23+
#' the corresponding element of \code{ES_start}. If there are no positive ES
24+
#' for a particular set size, the index will be the corresponding element of
7325
#' \code{ES_end}.
7426
#'
7527
#' @author Tyler Sagendorf
@@ -79,30 +31,32 @@ NULL
7931

8032
#' @title Update n_same_sign and sum_ES_perm vectors
8133
#'
82-
#' @description Map the result vectors with length equal to the number of
83-
#' unique gene set sizes to the result vectors with length equal to the total
84-
#' number of gene sets.
34+
#' @description Map the vectors with length equal to the number of unique gene
35+
#' set sizes to the vectors with length equal to the total number of gene
36+
#' sets.
8537
#'
86-
#' @param n_same_sign integer vector of zeros with length equal to the number
87-
#' of enrichment scores. Stores the number of permutation ES with the same
88-
#' sign as each true ES.
89-
#' @param sum_ES_perm numeric vector of zeros with length equal to the number
90-
#' of enrichment scores. Stores the absolute sum of the permutation ES with
91-
#' the same sign as each true ES.
38+
#' @param pn_same_sign pointer to an integer vector of zeros with length equal
39+
#' to the number of enrichment scores. Stores the number of permutation ES
40+
#' with the same sign as each true ES.
41+
#' @param sum_ES_perm pointer to a numeric vector of zeros with length equal to
42+
#' the number of enrichment scores. Stores the absolute sum of the
43+
#' permutation ES with the same sign as each true ES.
9244
#' @param n_sizes integer; number of unique gene set sizes.
9345
#' @param nperm integer; total number of permutations.
94-
#' @param ES_start integer vector with length equal to \code{n_sizes}. Stores
95-
#' the index of the first ES for every unique gene set size.
96-
#' @param ES_end integer vector with length equal to \code{n_sizes}. Each
97-
#' element is 1 more than the index of the last ES for every unique gene set
46+
#' @param pES_start pointer to an integer vector with length equal to
47+
#' \code{n_sizes}. Stores the index of the first ES for every unique gene set
9848
#' size.
99-
#' @param ES_pos_idx integer vector; the output of \code{get_ES_pos_idx()}.
100-
#' @param n_perm_neg integer vector; contains the number of negative
101-
#' permutation ES for every unique gene set size.
102-
#' @param sum_perm_pos numeric vector; the sum of the positive permutation ES
103-
#' for every unique gene set size.
104-
#' @param sum_perm_neg numeric vector; the absolute sum of the negative
105-
#' permutation ES for every unique gene set size.
49+
#' @param pES_end pointer to an integer vector with length equal to
50+
#' \code{n_sizes}. Each element is 1 more than the index of the last ES for
51+
#' every unique gene set size.
52+
#' @param pES_pos_idx pointer to an integer vector; the output of
53+
#' \code{get_ES_pos_idx()}.
54+
#' @param pn_perm_neg pointer to an integer vector; contains the number of
55+
#' negative permutation ES for every unique gene set size.
56+
#' @param psum_perm_pos pointer to a numeric vector; contains the sum of the
57+
#' positive permutation ES for every unique gene set size.
58+
#' @param psum_perm_neg pointer to a numeric vector; contains the absolute sum
59+
#' of the negative permutation ES for every unique gene set size.
10660
#'
10761
#' @returns Nothing. The vectors \code{n_same_sign} and \code{sum_ES_perm} are
10862
#' modified in place.
@@ -126,7 +80,6 @@ NULL
12680
#' @param seed integer or \code{NULL}; seed to obtain reproducible results from
12781
#' permutation tests.
12882
#' @param nperm integer; total number of permutations.
129-
#' @param batch_size integer; the number of permutations run as a single batch.
13083
#' @param ES numeric vector of enrichment scores, sorted in ascending order by
13184
#' gene set size and then by the values of the ES.
13285
#' @param ES_end integer vector with length equal to \code{n_sizes}. Each
@@ -135,7 +88,7 @@ NULL
13588
#' @param y the absolute values of the gene level statistics raised to some
13689
#' non-negative power \code{alpha}.
13790
#' @param r the ranks of the gene-level statistics.
138-
#' @param max_set_size integer; the size of the largest gene set.
91+
#' @param max_size integer; the size of the largest gene set.
13992
#' @param sum_ranks the sum of the ranks of all gene-level statistics (sum of
14093
#' the vector \code{r}).
14194
#' @param L2_m integer vector of unique gene set sizes, sorted in ascending
@@ -160,19 +113,19 @@ NULL
160113
#' sets and update vectors needed to calculate NES and p-values.
161114
#'
162115
#' @inheritParams calc_ES_perm
163-
#' @param L3_m integer vector of the unique number of up-regulated genes found
164-
#' in the directional gene sets, sorted in ascending order.
165-
#' @param L3_w integer vector; the differences between the total number of
166-
#' genes and the elements of \code{L3_m} (number of genes not up-regulated in
167-
#' the set).
116+
#' @param L3_m_up integer vector of the unique number of up-regulated genes
117+
#' found in the directional gene sets, sorted in ascending order.
118+
#' @param L3_w_up integer vector; the differences between the total number of
119+
#' genes and the elements of \code{L3_m_up} (number of genes not up-regulated
120+
#' in the set).
168121
#' @param L3_m_down integer vector of the unique number of down-regulated genes
169122
#' found in the directional gene sets, sorted in ascending order.
170123
#' @param L3_w_down integer vector; the differences between the total number of
171124
#' genes and the elements of \code{L3_m_down} (number of genes not
172125
#' down-regulated in the set).
173-
#' @param map_L3_to_L2 a 1-based integer vector that maps the unique number of
126+
#' @param map_L3_to_L2 a 1-based integer vector that maps each unique number of
174127
#' up-regulated genes to the unique pairs of up- and down-regulated genes.
175-
#' @param map_L3_to_L2_down a 1-based integer vector that maps the unique
128+
#' @param map_L3_to_L2_down a 1-based integer vector that maps each unique
176129
#' number of down-regulated genes to the unique pairs of up- and
177130
#' down-regulated genes.
178131
#'
@@ -184,23 +137,11 @@ NULL
184137
#' @noRd
185138
NULL
186139

187-
.Cpp_unsafe_sparseMatrix <- function(i, j, dims, dimnames) {
188-
.Call(`_fast_ssgsea_unsafe_sparseMatrix`, i, j, dims, dimnames)
189-
}
190-
191-
.Cpp_matmult_sparse <- function(X, Y) {
192-
.Call(`_fast_ssgsea_matmult_sparse`, X, Y)
193-
}
194-
195-
.Cpp_calcES <- function(min_size, Y, R, sumRanks, A, M, W) {
196-
.Call(`_fast_ssgsea_calcES`, min_size, Y, R, sumRanks, A, M, W)
197-
}
198-
199-
.Cpp_calc_ES_perm <- function(n_same_sign, n_as_extreme, sum_ES_perm, seed, nperm, batch_size, ES, ES_end, y, r, max_set_size, sum_ranks, L2_m, L2_w) {
200-
invisible(.Call(`_fast_ssgsea_calc_ES_perm`, n_same_sign, n_as_extreme, sum_ES_perm, seed, nperm, batch_size, ES, ES_end, y, r, max_set_size, sum_ranks, L2_m, L2_w))
140+
.Cpp_calc_ES_perm <- function(n_same_sign, n_as_extreme, sum_ES_perm, seed, nperm, ES, ES_end, y, r, max_size, sum_ranks, L2_m, L2_w) {
141+
invisible(.Call(`_fast_ssgsea_calc_ES_perm`, n_same_sign, n_as_extreme, sum_ES_perm, seed, nperm, ES, ES_end, y, r, max_size, sum_ranks, L2_m, L2_w))
201142
}
202143

203-
.Cpp_calc_ES_perm_dir <- function(n_same_sign, n_as_extreme, sum_ES_perm, seed, nperm, batch_size, ES, ES_end, y, r, max_set_size, sum_ranks, L3_m, L3_w, L3_m_down, L3_w_down, map_L3_to_L2, map_L3_to_L2_down) {
204-
invisible(.Call(`_fast_ssgsea_calc_ES_perm_dir`, n_same_sign, n_as_extreme, sum_ES_perm, seed, nperm, batch_size, ES, ES_end, y, r, max_set_size, sum_ranks, L3_m, L3_w, L3_m_down, L3_w_down, map_L3_to_L2, map_L3_to_L2_down))
144+
.Cpp_calc_ES_perm_dir <- function(n_same_sign, n_as_extreme, sum_ES_perm, seed, nperm, ES, ES_end, y, r, max_size, sum_ranks, L3_m_up, L3_w_up, L3_m_down, L3_w_down, map_L3_to_L2_up, map_L3_to_L2_down) {
145+
invisible(.Call(`_fast_ssgsea_calc_ES_perm_dir`, n_same_sign, n_as_extreme, sum_ES_perm, seed, nperm, ES, ES_end, y, r, max_size, sum_ranks, L3_m_up, L3_w_up, L3_m_down, L3_w_down, map_L3_to_L2_up, map_L3_to_L2_down))
205146
}
206147

R/fast.ssgsea-package.R

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
#' @import dqrng
2-
#' @import Matrix
3-
#' @import RcppArmadillo
42
#' @importFrom Rcpp evalCpp
53
#'
64
#' @useDynLib fast.ssgsea, .registration=TRUE

0 commit comments

Comments
 (0)