diff --git a/DESCRIPTION b/DESCRIPTION index 410a79b..551fec2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: leapR Title: Layered enrichment analysis of pathways R -Version: 0.99.8 +Version: 0.99.9 Authors@R: c( person("Sara", "Gosline", email = "sara.gosline@pnnl.gov", role = c('aut',"cre"), comment = c(ORCID = "0000-0002-6534-4774")), person("Jason", "McDermott", email = "jason.mcdermott@pnnl.gov", role = "aut"), @@ -17,7 +17,8 @@ biocViews: Proteomics, Pathways, GeneExpression, - Transcriptomics + Transcriptomics, + Software Imports: stats, gplots, @@ -30,7 +31,8 @@ Imports: stringr, tidyr, SummarizedExperiment, - BiocStyle + BiocStyle, + BiocFileCache Suggests: knitr, rmarkdown, @@ -38,3 +40,4 @@ Suggests: VignetteBuilder: knitr License: MIT + file LICENSE Config/testthat/edition: 3 +URL: https://pnnl.github.io/leapR/ diff --git a/R/calcTTest.R b/R/calcTTest.R index 095c35c..ca0df02 100644 --- a/R/calcTTest.R +++ b/R/calcTTest.R @@ -18,11 +18,15 @@ #' @examples #' #' library(leapR) +#' library(BiocFileCache) +#' +#' path <- tools::R_user_dir("leapR", which = "cache") +#' bfc <- BiocFileCache(path, ask = FALSE) +#' #' url <- "https://api.figshare.com/v2/file/download/56536214" -#' tdata <- download.file(url,method='libcurl',destfile='transData.rda') -#' load('transData.rda') -#' p <- file.remove("transData.rda") -#' +#' tc <- bfcadd(bfc, "tdat", fpath = url) +#' load(tc) +#' #' # read in the pathways #' data("ncipid") #' diff --git a/R/cluster_enrichment.R b/R/cluster_enrichment.R index d1f3120..7e76adf 100644 --- a/R/cluster_enrichment.R +++ b/R/cluster_enrichment.R @@ -19,12 +19,14 @@ #' @export #' @examples #' library(leapR) -#' -#' # read in the example transcriptomic data +#' library(BiocFileCache) +#' +#' path <- tools::R_user_dir("leapR", which = "cache") +#' bfc <- BiocFileCache(path, ask = FALSE) +#' #' url <- "https://api.figshare.com/v2/file/download/56536214" -#' tdata <- download.file(url,method='libcurl',destfile='transData.rda') -#' load('transData.rda') -#' p <- file.remove("transData.rda") +#' tc <- bfcadd(bfc, "tdat", fpath = url) +#' load(tc) #' #' # read in the pathways #' data("ncipid") diff --git a/R/combine_omics.R b/R/combine_omics.R index 101d595..78dbbcc 100644 --- a/R/combine_omics.R +++ b/R/combine_omics.R @@ -18,23 +18,21 @@ #' #' @examples #' library(leapR) -#' url <- 'https://api.figshare.com/v2/file/download/56536217' -#' -#' pdata <- download.file(url,method='libcurl',destfile='protData.rda') -#' load('protData.rda') -#' p <- file.remove("protData.rda") -#' +#' library(BiocFileCache) +#' path <- tools::R_user_dir("leapR", which = "cache") +#' bfc <- BiocFileCache(path, ask = FALSE) +#' +#' url <- "https://api.figshare.com/v2/file/download/56536217" +#' pc <- bfcadd(bfc, "pdat", fpath = url) +#' load(pc) +#' #' url <- "https://api.figshare.com/v2/file/download/56536214" -#' tdata <- download.file(url,method='libcurl',destfile='transData.rda') -#' load('transData.rda') -#' p <- file.remove("transData.rda") -#' -#' url <- 'https://api.figshare.com/v2/file/download/56536211' -#' phdata<-download.file(url,method='libcurl',destfile = 'phosData.rda') -#' #phosphodata<-read.csv("phdata",check.names=FALSE,row.names=1) -#' load('phosData.rda') -#' p <- file.remove('phosData.rda')# read in the example protein data -#' +#' tc <- bfcadd(bfc, "tdat", fpath = url) +#' load(tc) +#' +#' url <- "https://api.figshare.com/v2/file/download/56536211" +#' phc <- bfcadd(bfc, "phdat", fpath = url) +#' load(phc) #' #' # merge the three datasets by rows and add prefix tags for #' # different omics types diff --git a/R/enrichment_in_groups.R b/R/enrichment_in_groups.R index b82cc45..4d92940 100644 --- a/R/enrichment_in_groups.R +++ b/R/enrichment_in_groups.R @@ -18,6 +18,7 @@ #' to log your data before calling. NOTE: if you do not call `suppressWarnings` then #' the KS test will warn you about ties. #' @param minsize minimum size of set +#' @param log_transformed Set to TRUE if data is already log-transformed #' @param mapping_column column name of mapping identifiers #' @param abundance_column columns mapping abundance, either in the `assay` #' matrix or `rowData` @@ -114,7 +115,7 @@ enrichment_in_groups <- function(geneset, names(backvals) <- backlist#[-group_ind] in_back <- length(backvals) - outgroup_mean = mean(backvals[-group_ind], na.rm = T) + outgroup_mean = mean(backvals[-group_ind], na.rm = TRUE) in_path <- length(in_group) #how many left after na.rm if ((in_path > minsize) & (any(!is.na(in_path))) & diff --git a/R/leapR-package.R b/R/leapR-package.R index ad09996..f82de0d 100644 --- a/R/leapR-package.R +++ b/R/leapR-package.R @@ -143,14 +143,14 @@ #' \cr #' @examples #' library(leapR) -#' -#' # read in the example abundance data -#' # read in the example transcriptomic data -#' tdata <- download.file("https://api.figshare.com/v2/file/download/56536214", -#' method='libcurl',destfile='transData.rda') -#' load('transData.rda') -#' p <- file.remove("transData.rda") -#' +#' library(BiocFileCache) +#' +#' path <- tools::R_user_dir("leapR", which = "cache") +#' bfc <- BiocFileCache(path, ask = FALSE) +#' +#' url <- "https://api.figshare.com/v2/file/download/56536214" +#' tc <- bfcadd(bfc, "tdat", fpath = url) +#' load(tc) #' # read in the pathways #' data("ncipid") #' diff --git a/R/leapR.R b/R/leapR.R index 05318c4..4b53700 100644 --- a/R/leapR.R +++ b/R/leapR.R @@ -140,13 +140,14 @@ #' \cr #' @examples #' library(leapR) -#' -#' # read in the example abundance data -#' # read in the example transcriptomic data -#' tdata <- download.file("https://api.figshare.com/v2/file/download/56536214", -#' method='libcurl',destfile='transData.rda') -#' load('transData.rda') -#' p <- file.remove("transData.rda") +#' library(BiocFileCache) +#' +#' path <- tools::R_user_dir("leapR", which = "cache") +#' bfc <- BiocFileCache(path, ask = FALSE) +#' +#' url <- "https://api.figshare.com/v2/file/download/56536214" +#' tc <- bfcadd(bfc, "tdat", fpath = url) +#' load(tc) #' #' # read in the pathways #' data("ncipid") diff --git a/leapR.Rproj b/leapR.Rproj new file mode 100644 index 0000000..eaa6b81 --- /dev/null +++ b/leapR.Rproj @@ -0,0 +1,18 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace diff --git a/man/calcTTest.Rd b/man/calcTTest.Rd index 94a5a27..64a0eb5 100644 --- a/man/calcTTest.Rd +++ b/man/calcTTest.Rd @@ -27,11 +27,15 @@ append results to ExpressionSet with two extra columns: `pvalue` and \examples{ library(leapR) + library(BiocFileCache) + + path <- tools::R_user_dir("leapR", which = "cache") + bfc <- BiocFileCache(path, ask = FALSE) + url <- "https://api.figshare.com/v2/file/download/56536214" - tdata <- download.file(url,method='libcurl',destfile='transData.rda') - load('transData.rda') - p <- file.remove("transData.rda") - + tc <- bfcadd(bfc, "tdat", fpath = url) + load(tc) + # read in the pathways data("ncipid") diff --git a/man/cluster_enrichment.Rd b/man/cluster_enrichment.Rd index c9428ef..1d4b487 100644 --- a/man/cluster_enrichment.Rd +++ b/man/cluster_enrichment.Rd @@ -36,12 +36,14 @@ a list of results matrices in the order of the input clusters. } \examples{ library(leapR) - - # read in the example transcriptomic data + library(BiocFileCache) + + path <- tools::R_user_dir("leapR", which = "cache") + bfc <- BiocFileCache(path, ask = FALSE) + url <- "https://api.figshare.com/v2/file/download/56536214" - tdata <- download.file(url,method='libcurl',destfile='transData.rda') - load('transData.rda') - p <- file.remove("transData.rda") + tc <- bfcadd(bfc, "tdat", fpath = url) + load(tc) # read in the pathways data("ncipid") diff --git a/man/combine_omics.Rd b/man/combine_omics.Rd index 74f0e50..d359663 100644 --- a/man/combine_omics.Rd +++ b/man/combine_omics.Rd @@ -31,23 +31,21 @@ adds prefix tags to the ids. } \examples{ library(leapR) - url <- 'https://api.figshare.com/v2/file/download/56536217' - - pdata <- download.file(url,method='libcurl',destfile='protData.rda') - load('protData.rda') - p <- file.remove("protData.rda") - + library(BiocFileCache) + path <- tools::R_user_dir("leapR", which = "cache") + bfc <- BiocFileCache(path, ask = FALSE) + + url <- "https://api.figshare.com/v2/file/download/56536217" + pc <- bfcadd(bfc, "pdat", fpath = url) + load(pc) + url <- "https://api.figshare.com/v2/file/download/56536214" - tdata <- download.file(url,method='libcurl',destfile='transData.rda') - load('transData.rda') - p <- file.remove("transData.rda") - - url <- 'https://api.figshare.com/v2/file/download/56536211' - phdata<-download.file(url,method='libcurl',destfile = 'phosData.rda') - #phosphodata<-read.csv("phdata",check.names=FALSE,row.names=1) - load('phosData.rda') - p <- file.remove('phosData.rda')# read in the example protein data - + tc <- bfcadd(bfc, "tdat", fpath = url) + load(tc) + + url <- "https://api.figshare.com/v2/file/download/56536211" + phc <- bfcadd(bfc, "phdat", fpath = url) + load(phc) # merge the three datasets by rows and add prefix tags for # different omics types diff --git a/man/enrichment_in_groups.Rd b/man/enrichment_in_groups.Rd index 6392c4c..c839d02 100644 --- a/man/enrichment_in_groups.Rd +++ b/man/enrichment_in_groups.Rd @@ -12,6 +12,7 @@ enrichment_in_groups( method = "fishers", minsize = 5, mapping_column = NULL, + log_transformed = FALSE, abundance_column = NULL, randomize = FALSE, silence_try_errors = TRUE @@ -35,6 +36,8 @@ the KS test will warn you about ties.} \item{mapping_column}{column name of mapping identifiers} +\item{log_transformed}{Set to TRUE if data is already log-transformed} + \item{abundance_column}{columns mapping abundance, either in the `assay` matrix or `rowData`} diff --git a/man/leapR-package.Rd b/man/leapR-package.Rd index 41c7aa5..821a41b 100644 --- a/man/leapR-package.Rd +++ b/man/leapR-package.Rd @@ -6,6 +6,13 @@ \title{leapR: Layered enrichment analysis of pathways R} \description{ leapR is a package that identifies pathways that are enriched across diverse 'omics experiments. It leverages any tabular expression data (proteomics, transcriptomics) using the `SummarizedExperiment` object. It works with any pathway in the .gct file format. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://pnnl.github.io/leapR/} +} + } \author{ \strong{Maintainer}: Sara Gosline \email{sara.gosline@pnnl.gov} (\href{https://orcid.org/0000-0002-6534-4774}{ORCID}) @@ -13,6 +20,7 @@ leapR is a package that identifies pathways that are enriched across diverse 'om Authors: \itemize{ \item Jason McDermott \email{jason.mcdermott@pnnl.gov} + \item Jeremy Jacobson \email{jeremy.jacobson@pnnl.gov} } Other contributors: diff --git a/man/leapR.Rd b/man/leapR.Rd index a448e22..3bd6fb2 100644 --- a/man/leapR.Rd +++ b/man/leapR.Rd @@ -151,13 +151,14 @@ pathway. } \examples{ library(leapR) - - # read in the example abundance data - # read in the example transcriptomic data - tdata <- download.file("https://api.figshare.com/v2/file/download/56536214", - method='libcurl',destfile='transData.rda') - load('transData.rda') - p <- file.remove("transData.rda") + library(BiocFileCache) + + path <- tools::R_user_dir("leapR", which = "cache") + bfc <- BiocFileCache(path, ask = FALSE) + + url <- "https://api.figshare.com/v2/file/download/56536214" + tc <- bfcadd(bfc, "tdat", fpath = url) + load(tc) # read in the pathways data("ncipid") diff --git a/vignettes/examples.Rmd b/vignettes/examples.Rmd index 1dbcd3a..4570d83 100644 --- a/vignettes/examples.Rmd +++ b/vignettes/examples.Rmd @@ -31,6 +31,7 @@ library(ggplot2) library(dplyr) library(tibble) library(stringr) +library(BiocFileCache) ``` # Example data @@ -44,22 +45,23 @@ as example. This data can be loaded as follows: ```{r omicsdata, message=FALSE, warning=FALSE} -url <- "https://api.figshare.com/v2/file/download/56536217" -pdata <- download.file(url, method = "libcurl", destfile = "protData.rda") -# as.matrix() -load("protData.rda") -p <- file.remove("protData.rda") +#currently using the BiocFileCache though i'm not sure it helps +path <- tools::R_user_dir("leapR", which = "cache") +bfc <- BiocFileCache(path, ask = FALSE) + +url <- "https://api.figshare.com/v2/file/download/56536217" +pc <- bfcadd(bfc, "pdat", fpath = url) +load(pc) url <- "https://api.figshare.com/v2/file/download/56536214" -tdata <- download.file(url, method = "libcurl", destfile = "transData.rda") -load("transData.rda") -p <- file.remove("transData.rda") +tc <- bfcadd(bfc, "tdat", fpath = url) +load(tc) url <- "https://api.figshare.com/v2/file/download/56536211" -phdata <- download.file(url, method = "libcurl", destfile = "phosData.rda") -load("phosData.rda") -p <- file.remove("phosData.rda") +phc <- bfcadd(bfc, "phdat", fpath = url) +load(phc) + ``` We also have local data we can load diff --git a/vignettes/leapR.Rmd b/vignettes/leapR.Rmd index 45d987c..35e0f6f 100644 --- a/vignettes/leapR.Rmd +++ b/vignettes/leapR.Rmd @@ -44,6 +44,7 @@ library(ggplot2) library(dplyr) library(tibble) library(stringr) +library(BiocFileCache) ``` # Introduction @@ -149,22 +150,21 @@ as example. This data can be loaded as follows: ```{r omicsdata, message=FALSE, warning=FALSE, echo = FALSE} -url <- "https://api.figshare.com/v2/file/download/56536217" -pdata <- download.file(url, method = "libcurl", destfile = "protData.rda") -# as.matrix() -load("protData.rda") +#currently using the BiocFileCache though i'm not sure it helps +path <-tools::R_user_dir("leapR", which = "cache") +bfc <- BiocFileCache(path, ask = FALSE) -p <- file.remove("protData.rda") +url <- "https://api.figshare.com/v2/file/download/56536217" +pc <- bfcadd(bfc, "pdat", fpath = url) +load(pc) url <- "https://api.figshare.com/v2/file/download/56536214" -tdata <- download.file(url, method = "libcurl", destfile = "transData.rda") -load("transData.rda") -p <- file.remove("transData.rda") +tc <- bfcadd(bfc, "tdat", fpath = url) +load(tc) url <- "https://api.figshare.com/v2/file/download/56536211" -phdata <- download.file(url, method = "libcurl", destfile = "phosData.rda") -load("phosData.rda") -p <- file.remove("phosData.rda") +phc <- bfcadd(bfc, 'phdat', fpath =url) +load(phc) ``` ```{r show_pset_example} diff --git a/vignettes/order-enrichment.Rmd b/vignettes/order-enrichment.Rmd index 336e161..a2e3bad 100644 --- a/vignettes/order-enrichment.Rmd +++ b/vignettes/order-enrichment.Rmd @@ -31,17 +31,26 @@ library(ggplot2) library(dplyr) library(tibble) library(stringr) +library(BiocFileCache) ``` # Load our test proteomics dataset ```{r load data} + + +#currently using the BiocFileCache though i'm not sure it helps +path <- tools::R_user_dir("leapR", which = "cache") +bfc <- BiocFileCache(path, ask = FALSE) + url <- "https://api.figshare.com/v2/file/download/56536217" -pdata <- download.file(url, method = "libcurl", destfile = "protData.rda") +pd <- bfcadd(bfc, 'protdat', url) +load(pd) +#pdata <- download.file(url, method = "libcurl", destfile = "protData.rda") # as.matrix() -load("protData.rda") +#load("protData.rda") -p <- file.remove("protData.rda") +#p <- file.remove("protData.rda") data(shortlist) data(longlist) @@ -69,7 +78,9 @@ cor.res <- do.call(rbind,lapply(1:length(shortlist), function (i) { primary_columns = shortlist[i] ) - colnames(protdata.enrichment.ks) <- paste('ks',colnames(protdata.enrichment.ks),sep='.') + colnames(protdata.enrichment.ks) <- paste('ks', + colnames(protdata.enrichment.ks), + sep = '.') protdata.enrichment.cs <- leapR::leapR( @@ -79,7 +90,9 @@ cor.res <- do.call(rbind,lapply(1:length(shortlist), function (i) { assay_name = "proteomics", primary_columns = shortlist[i] ) - colnames(protdata.enrichment.cs) <- paste('chisq',colnames(protdata.enrichment.cs),sep='.') + colnames(protdata.enrichment.cs) <- paste('chisq', + colnames(protdata.enrichment.cs), + sep = '.') protdata.enrichment.zt <- leapR::leapR( geneset = ncipid, "enrichment_in_order", @@ -89,7 +102,9 @@ cor.res <- do.call(rbind,lapply(1:length(shortlist), function (i) { primary_columns = shortlist[i] ) - colnames(protdata.enrichment.zt) <- paste('ztest',colnames(protdata.enrichment.zt),sep='.') + colnames(protdata.enrichment.zt) <- paste('ztest', + colnames(protdata.enrichment.zt), + sep = '.') paths <- rownames(protdata.enrichment.ks) allvals <- cbind(protdata.enrichment.cs[paths,],