Skip to content

Commit ac57693

Browse files
committed
Merge branch 'release/v0.0.3'
2 parents 621ff1b + 180125b commit ac57693

38 files changed

+1277
-1167
lines changed

.gitignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
.Rproj.user
2+
.Rhistory
3+
.RData
4+
vignettes/figure
5+
*.Rproj
6+
vignettes/R_cache
7+
vignettes/R_figure
8+
old_scripts

DESCRIPTION

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,39 @@
11
Package: scone
2-
Version: 0.0.1
2+
Version: 0.0.3
33
Title: Single Cell Overview of Normalized Expression data
4-
Description: SCONE.
4+
Description: scone is a package to compare and rank the performance of different normalization schemes in real single-cell RNA-seq datasets.
55
Authors@R: c(person("Michael", "Cole", email = "mbeloc@gmail.com",
66
role = c("aut", "cre", "cph")),
77
person("Davide", "Risso", email = "risso.davide@gmail.com",
88
role = c("aut")))
99
Author: Michael Cole [aut, cre, cph] and Davide Risso [aut]
1010
Maintainer: Michael Cole <mbeloc@gmail.com>
11-
Imports: BiocParallel, DESeq, EDASeq, MASS, RUVSeq, aroma.light, class,
12-
diptest, edgeR, fpc, gplots, limma, matrixStats, mixtools, scde
13-
Date: 02-06-2016
11+
Date: 2016-02-14
1412
License: Artistic-2.0
13+
Depends:
14+
R (>= 3.1)
15+
Imports:
16+
BiocParallel,
17+
clusterCells,
18+
DESeq,
19+
EDASeq,
20+
MASS,
21+
RUVSeq,
22+
aroma.light,
23+
class,
24+
diptest,
25+
edgeR,
26+
fpc,
27+
gplots,
28+
limma,
29+
matrixStats,
30+
mixtools,
31+
scde
32+
Suggests:
33+
knitr,
34+
rmarkdown,
35+
testthat
36+
VignetteBuilder: knitr
1537
LazyLoad: yes
38+
BugReports: https://github.com/epurdom/clusterExperiment/issues
1639
RoxygenNote: 5.0.1

NAMESPACE

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,11 @@ export(FQ_FN_POS)
77
export(TMM_FN)
88
export(UQ_FN)
99
export(UQ_FN_POS)
10+
export(biplot_colored)
11+
export(estimate_ziber)
1012
export(estimate_zinb)
1113
export(factor_sample_filter)
14+
export(impute_ziber_simp)
1215
export(impute_zinb)
1316
export(lm_adjust)
1417
export(make_design)
@@ -23,9 +26,11 @@ importFrom(MASS,glm.nb)
2326
importFrom(RUVSeq,RUVg)
2427
importFrom(aroma.light,normalizeQuantileRank.matrix)
2528
importFrom(class,knn)
29+
importFrom(clusterCells,subsampleClustering)
2630
importFrom(diptest,dip.test)
2731
importFrom(edgeR,calcNormFactors)
2832
importFrom(fpc,pamk)
33+
importFrom(grDevices,colorRampPalette)
2934
importFrom(limma,lmFit)
3035
importFrom(matrixStats,rowMedians)
3136
importFrom(mixtools,normalmixEM)

R/SCONE_DEFAULTS.R

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ FQ_FN_POS = function(ei){
6565
quant_mat = NULL
6666
# Re-ordered Data Matrix
6767
x_mat = NULL
68-
print("Sorting Matrix...")
6968
# For each sample:
7069
for (i in 1:dim(ei)[2]){
7170
# Sort data and replace zeroes with NA
@@ -79,8 +78,6 @@ FQ_FN_POS = function(ei){
7978

8079
# Vector form of quantile index matrix
8180
quant_out = as.numeric(as.vector(quant_mat))
82-
print("Complete.")
83-
print("Spline Interpolation...")
8481
# Interpolation Matrix (Values of all quantiles)
8582
inter_mat = rep(0,length(quant_out))
8683
ob_counts = rep(0,length(quant_out)) # Number of observations for averaging
@@ -95,8 +92,7 @@ FQ_FN_POS = function(ei){
9592
inter[is.na(inter)] = 0
9693
inter_mat = inter_mat + inter
9794
}
98-
print("Complete.")
99-
95+
10096
# Average over the interpolated values from all samples
10197
inter_mean = inter_mat/ob_counts
10298

@@ -106,7 +102,9 @@ FQ_FN_POS = function(ei){
106102
for (i in 1:dim(ei)[2]){
107103
eo[,i] = rev(eo[,i])[order(order(ei[,i]))]
108104
}
109-
print("Finished!")
105+
106+
rownames(eo) = rownames(ei)
107+
colnames(eo) = colnames(ei)
110108
return(eo)
111109
}
112110

R/biplot.R

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
## When porting the package to S4, we can make this a biplot method
2+
3+
#' Another implementation of the biplot function
4+
#'
5+
#' Function to plot a biplot with no point labels and with points color-coded
6+
#' according to a certain quantitative variable, for instance the rank of the
7+
#' normalization performance or the expression of a certain gene.
8+
#'
9+
#' This function implements the biplot only for \code{\link[stats]{prcomp}}
10+
#' objects. Eventually, we will turn this into an S4 method.
11+
#'
12+
#' @param x the result of a call to \code{\link[stats]{prcomp}}.
13+
#' @param y the rank value that should be used to color the points.
14+
#' @param choices which principal components to plot. Only 2D plots are
15+
#' possible for now. Default to first two PCs.
16+
#' @param expand numeric value used to adjust the spread of the arrows relative
17+
#' to the points.
18+
#' @param ... passed to plot.
19+
#'
20+
#' @importFrom grDevices colorRampPalette
21+
#' @export
22+
#'
23+
#' @examples
24+
#' mat <- matrix(rnorm(1000), ncol=10)
25+
#' colnames(mat) <- paste("X", 1:ncol(mat), sep="")
26+
#'
27+
#' pc <- prcomp(mat)
28+
#'
29+
#' biplot_colored(pc, rank(pc$x[,1]))
30+
#'
31+
biplot_colored <- function(x, y, choices=1:2, expand=1, ...) {
32+
33+
lam <- x$sdev[choices]
34+
n <- NROW(x$x)
35+
lam <- lam * sqrt(n)
36+
37+
xx <- t(t(x$x[, choices])/lam)
38+
yy <- t(t(x$rotation[, choices]) * lam)
39+
40+
ratio <- max(range(yy)/range(xx))/expand
41+
42+
cols <- rev(colorRampPalette(c("black","navyblue","mediumblue","dodgerblue3","aquamarine4","green4","yellowgreen","yellow"))(length(y)))[y]
43+
plot(xx, pch=19, col=cols, ...)
44+
45+
labs <- rownames(yy)
46+
47+
text(yy/ratio, labels=labs, col=2)
48+
arrows(0, 0, yy[, 1] * 0.8/ratio, yy[, 2] * 0.8/ratio, col = 2, length = 0.1)
49+
}

R/data.R

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#' Positive and negative control genes
2+
#'
3+
#' Sets of positive and negative control genes, useful for input in
4+
#' \code{\link{scone}}.
5+
#'
6+
#' These gene sets can be used as negative or positive controls, either for RUV
7+
#' normalization or for evaluation and ranking of the normalization strategies.
8+
#'
9+
#' @details The datasets are in the form of \code{data.frame}, with at least one
10+
#' column containing the gene symbols and optionally a second column
11+
#' containing additional information (such as cortical layer or cell cycle
12+
#' phase).
13+
#'
14+
#' @details Note that the gene symbols follow the mouse conventions (i.e.,
15+
#' capitalized) or the human conventions (i.e, all upper-case), based on the
16+
#' original pubblication. One can use the \code{\link[base]{toupper}},
17+
#' \code{\link[base]{tolower}}, and \code{\link[tools]{toTitleCase}} functions
18+
#' to convert the symbols.
19+
#'
20+
#' @details The genes in \code{cortical_markers} are from Figure 3 of Molyneaux
21+
#' et al. (2007). The genes in \code{housekeeping} are from Eisenberg and
22+
#' Levanon (2003) and in \code{housekeeping_revised} are from Eisenberg and
23+
#' Levanon (2013). The genes in \code{cellcycle_genes} are adapted from
24+
#' Kowalczyk et al. (2015).
25+
#'
26+
#' @references Molyneaux, B.J., Arlotta, P., Menezes, J.R. and Macklis, J.D..
27+
#' Neuronal subtype specification in the cerebral cortex. Nature Reviews
28+
#' Neuroscience, 2007, 8(6):427-437.
29+
#' @references Eisenberg E, Levanon EY. Human housekeeping genes are compact.
30+
#' Trends in Genetics, 2003, 19(7):362-5.
31+
#' @references Eisenberg E, Levanon EY. Human housekeeping genes, revisited.
32+
#' Trends in Genetics, 2013, 29(10):569-74.
33+
#' @references Kowalczyk, M.S., Tirosh, I., Heckl, D., Rao, T.N., Dixit, A.,
34+
#' Haas, B.J., Schneider, R.K., Wagers, A.J., Ebert, B.L. and Regev, A.
35+
#' Single-cell RNA-seq reveals changes in cell cycle and differentiation
36+
#' programs upon aging of hematopoietic stem cells. Genome research, 2015.
37+
#'
38+
#' @name control_genes
39+
#'
40+
#' @docType data
41+
#' @aliases cortical_markers housekeeping housekeeping_revised cellcycle_genes
42+
#'
43+
#' @examples
44+
#' data(housekeeping)
45+
#' data(housekeeping_revised)
46+
#' data(cellcycle_genes)
47+
#' data(cortical_markers)
48+
NULL

R/helper.R

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
#' Internal Function
2-
#'
2+
#'
33
#' This function is used internally in scone to parse the variables used to generate the design matrices.
4-
#'
4+
#'
55
#' @param pars character. A vector of parameters corresponding to a row of params.
66
#' @param bio factor. The biological factor of interest.
77
#' @param batch factor. The known batch effects.
88
#' @param ruv_factors list. A list containing the factors of unwanted variation.
99
#' @param qc matrix. The principal components of the QC metrics.
10-
#'
10+
#'
1111
#' @return A list with the variables to be passed to make_design.
1212
parse_row <- function(pars, bio, batch, ruv_factors, qc) {
1313
sc_name <- paste(pars[1:2], collapse="_")
14-
14+
1515
W <- out_bio <- out_batch <- NULL
16-
16+
1717
if(pars[3]!="no_uv") {
1818
parsed <- strsplit(as.character(pars[3]), "=")[[1]]
1919
if(grepl("ruv", parsed[1])) {
@@ -22,34 +22,34 @@ parse_row <- function(pars, bio, batch, ruv_factors, qc) {
2222
W <- qc[,1:as.numeric(parsed[2])]
2323
}
2424
}
25-
25+
2626
if(pars[4]=="bio") {
2727
out_bio <- bio
2828
}
29-
29+
3030
if(pars[5]=="batch") {
3131
out_batch <- batch
3232
}
33-
33+
3434
return(list(sc_name=sc_name, W=W, bio=out_bio, batch=out_batch))
3535
}
3636

3737
#' Function to make a design matrix
38-
#'
38+
#'
3939
#' This function is useful to create a design matrix, when the covariates are two (possibly nested) factors
4040
#' and one or more continuous variables.
41-
#'
41+
#'
4242
#' @details If nested=TRUE a nested design is used, i.e., the batch variable is assumed to be nested within
4343
#' the bio variable. Here, nested means that each batch is made of observations from only one level of bio,
4444
#' while each level of bio may contain multiple batches.
45-
#'
45+
#'
4646
#' @export
47-
#'
47+
#'
4848
#' @param bio factor. The biological factor of interest.
4949
#' @param batch factor. The known batch effects.
5050
#' @param W numeric. Either a vector or matrix containing one or more continuous covariates (e.g. RUV factors).
5151
#' @param nested logical. Whether or not to consider a nested design (see details).
52-
#'
52+
#'
5353
#' @return The design matrix.
5454
make_design <- function(bio, batch, W, nested=FALSE) {
5555
if(nested & (is.null(bio) | is.null(batch))) {
@@ -65,7 +65,7 @@ make_design <- function(bio, batch, W, nested=FALSE) {
6565
stop("batch must be a factor.")
6666
}
6767
}
68-
68+
6969
f <- "~ 1"
7070
if(!is.null(bio)) {
7171
f <- paste(f, "bio", sep="+")
@@ -76,12 +76,12 @@ make_design <- function(bio, batch, W, nested=FALSE) {
7676
if(!is.null(W)) {
7777
f <- paste(f, "W", sep="+")
7878
}
79-
79+
8080
if(is.null(bio) & is.null(batch) & is.null(W)) {
8181
return(NULL)
8282
} else if (!is.null(bio) & !is.null(batch) & nested) {
8383
n_vec <- tapply(batch, bio, function(x) nlevels(droplevels(x)))
84-
84+
8585
mat = matrix(0,nrow = sum(n_vec),ncol = sum(n_vec - 1))
8686
xi = 1
8787
yi = 1
@@ -96,25 +96,25 @@ make_design <- function(bio, batch, W, nested=FALSE) {
9696
xi = xi + 1
9797
}
9898
}
99-
100-
return(model.matrix(as.formula(f), contrasts=list(bio=contr.sum, batch=mat)))
99+
100+
return(model.matrix(as.formula(f), contrasts=list(bio=contr.sum, batch=mat)))
101101
} else {
102-
return(model.matrix(as.formula(f)))
102+
return(model.matrix(as.formula(f)))
103103
}
104104
}
105105

106106
#' Function to perform linear batch effect correction
107-
#'
107+
#'
108108
#' Given a matrix with log expression values and a design matrix, this function fits a linear model
109109
#' and removes the effects of the batch factor as well as of the linear variables encoded in W.
110-
#'
110+
#'
111111
#' @details The function assumes that the columns of the design matrix corresponding to the variable
112112
#' for which expression needs to be adjusted, start with either the word "batch" or the letter "W" (case sensitive).
113113
#' Any other covariate (including the intercept) is kept.
114-
#'
114+
#'
115115
#' @importFrom limma lmFit
116116
#' @export
117-
#'
117+
#'
118118
#' @param log_expr matrix. The log gene expression (genes in row, samples in columns).
119119
#' @param design_mat matrix. The design matrix (usually the result of make_design).
120120
#' @param batch factor. A factor with the batch information.
@@ -125,13 +125,13 @@ lm_adjust <- function(log_expr, design_mat, batch=NULL, weights=NULL) {
125125

126126
uvind <- grep("^W", colnames(design_mat))
127127
bind <- grep("^batch", colnames(design_mat))
128-
128+
129129
if(length(uvind)) {
130130
uv_term <- t(design_mat[,uvind] %*% t(lm_object$coefficients[,uvind]))
131131
} else {
132132
uv_term <- 0
133133
}
134-
134+
135135
if(length(bind)) {
136136
if(is.character(attr(design_mat,"contrasts")$batch)) {
137137
contr <- get(attr(design_mat,"contrasts")$batch)(nlevels(batch))
@@ -142,6 +142,6 @@ lm_adjust <- function(log_expr, design_mat, batch=NULL, weights=NULL) {
142142
} else {
143143
batch_term <- 0
144144
}
145-
146-
log_norm <- log_expr - batch_term - uv_term
145+
146+
return(log_expr - batch_term - uv_term)
147147
}

0 commit comments

Comments
 (0)