Skip to content

Commit c7b195b

Browse files
committed
Merge branch 'release/0.0.6'
2 parents ee405e9 + 1ca23e3 commit c7b195b

26 files changed

+901
-408
lines changed

.Rbuildignore

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,12 @@ README[.]md
2929
^[.]devel
3030
^[.]test
3131
^[.]check
32+
.Rhistory
33+
R/.Rhistory
3234

3335
#----------------------------
3436
# Temp scripts
3537
#----------------------------
36-
^old_scripts/*
38+
^old_scripts/*
39+
^.*\.Rproj$
40+
^\.Rproj\.user$

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ vignettes/figure
55
*.Rproj
66
vignettes/R_cache
77
vignettes/R_figure
8-
old_scripts
8+
old_scripts

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ language: r
1414
cache: packages
1515

1616
# R versions to be tested on
17-
r:
17+
r:
1818
- bioc-release
1919
- bioc-devel
2020

DESCRIPTION

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,44 @@
11
Package: scone
2-
Version: 0.0.5
2+
Version: 0.0.6
33
Title: Single Cell Overview of Normalized Expression data
44
Description: scone is a package to compare and rank the performance of different normalization schemes in real single-cell RNA-seq datasets.
55
Authors@R: c(person("Michael", "Cole", email = "mbeloc@gmail.com",
66
role = c("aut", "cre", "cph")),
77
person("Davide", "Risso", email = "risso.davide@gmail.com",
8-
role = c("aut")))
9-
Author: Michael Cole [aut, cre, cph], Davide Risso [aut]
8+
role = c("aut", "cph")))
9+
Author: Michael Cole [aut, cre, cph], Davide Risso [aut, cph]
1010
Maintainer: Michael Cole <mbeloc@gmail.com>
11-
Date: 2016-05-12
11+
Date: 2016-07-22
1212
License: Artistic-2.0
1313
Depends:
1414
R (>= 3.3)
1515
Imports:
16+
aroma.light,
1617
BiocParallel,
18+
class,
1719
cluster,
1820
DESeq,
19-
EDASeq,
20-
MASS,
21-
RUVSeq,
22-
aroma.light,
23-
class,
2421
diptest,
22+
EDASeq,
2523
edgeR,
2624
fpc,
2725
gplots,
26+
grDevices,
2827
limma,
28+
MASS,
2929
matrixStats,
3030
mixtools,
31-
grDevices
31+
grDevices,
32+
boot,
33+
shiny,
34+
miniUI,
35+
rhdf5,
36+
RUVSeq
3237
Suggests:
3338
knitr,
3439
rmarkdown,
3540
testthat
3641
VignetteBuilder: knitr
3742
LazyLoad: yes
38-
BugReports: https://github.com/epurdom/clusterExperiment/issues
43+
BugReports: https://github.com/YosefLab/scone/issues
3944
RoxygenNote: 5.0.1

NAMESPACE

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,13 @@ export(TMM_FN)
99
export(UQ_FN)
1010
export(UQ_FN_POS)
1111
export(biplot_colored)
12+
export(biplot_interactive)
1213
export(estimate_ziber)
1314
export(estimate_zinb)
1415
export(factor_sample_filter)
15-
export(impute_ziber_simp)
16+
export(get_normalized)
17+
export(impute_expectation)
18+
export(impute_null)
1619
export(impute_zinb)
1720
export(lm_adjust)
1821
export(make_design)
@@ -26,6 +29,8 @@ importFrom(EDASeq,betweenLaneNormalization)
2629
importFrom(MASS,glm.nb)
2730
importFrom(RUVSeq,RUVg)
2831
importFrom(aroma.light,normalizeQuantileRank.matrix)
32+
importFrom(boot,inv.logit)
33+
importFrom(boot,logit)
2934
importFrom(class,knn)
3035
importFrom(cluster,silhouette)
3136
importFrom(diptest,dip.test)
@@ -43,7 +48,23 @@ importFrom(limma,lmFit)
4348
importFrom(matrixStats,colIQRs)
4449
importFrom(matrixStats,colMedians)
4550
importFrom(matrixStats,rowMedians)
51+
importFrom(miniUI,gadgetTitleBar)
52+
importFrom(miniUI,miniContentPanel)
53+
importFrom(miniUI,miniPage)
4654
importFrom(mixtools,normalmixEM)
55+
importFrom(rhdf5,h5createFile)
56+
importFrom(rhdf5,h5ls)
57+
importFrom(rhdf5,h5read)
58+
importFrom(rhdf5,h5write)
59+
importFrom(rhdf5,h5write.default)
60+
importFrom(shiny,brushedPoints)
61+
importFrom(shiny,observeEvent)
62+
importFrom(shiny,plotOutput)
63+
importFrom(shiny,renderPlot)
64+
importFrom(shiny,renderText)
65+
importFrom(shiny,runGadget)
66+
importFrom(shiny,stopApp)
67+
importFrom(shiny,verbatimTextOutput)
4768
importFrom(stats,approx)
4869
importFrom(stats,as.formula)
4970
importFrom(stats,binomial)

R/biplot_interactive.R

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#' Interactive biplot
2+
#'
3+
#' This is a wrapper around \code{\link{biplot_colored}}, which creates a shiny
4+
#' gadget to allow the user to select specific points in the graph.
5+
#'
6+
#' @details Since this is based on the shiny gadget feature, it will not work in
7+
#' static documents, such as vignettes or markdown / knitr documents.
8+
#' See \code{biplot_colored} for more details on the internals.
9+
#'
10+
#' @param data a data.frame containing the data to be plotted.
11+
#' @param scores a numeric vector used to color the points.
12+
#'
13+
#' @importFrom miniUI gadgetTitleBar miniContentPanel miniPage gadgetTitleBar
14+
#' @importFrom shiny plotOutput renderPlot observeEvent brushedPoints runGadget verbatimTextOutput stopApp renderText
15+
#'
16+
#' @export
17+
#'
18+
#' @examples
19+
#' \dontrun{
20+
#' mat <- matrix(rnorm(1000), ncol=10)
21+
#' colnames(mat) <- paste("X", 1:ncol(mat), sep="")
22+
#'
23+
#' biplot_interactive(mat, mat[,1])
24+
#' }
25+
biplot_interactive <- function(data, scores, ...) {
26+
27+
data <- as.data.frame(data)
28+
scores <- as.numeric(scores)
29+
30+
ui <- miniPage(
31+
gadgetTitleBar("Drag to select points"),
32+
miniContentPanel(
33+
# The brush="brush" argument means we can listen for
34+
# brush events on the plot using input$brush.
35+
plotOutput("plot1", height = "80%", brush = "plot_brush"),
36+
verbatimTextOutput("info")
37+
)
38+
)
39+
40+
server <- function(input, output, session) {
41+
42+
# Compute PCA
43+
pc_obj <- prcomp(data, center = TRUE, scale = FALSE)
44+
bp_obj <- biplot_colored(pc_obj, y = scores)
45+
46+
# Render the plot
47+
output$plot1 <- renderPlot({
48+
# Biplot
49+
biplot_colored(pc_obj, y = scores, ...)
50+
})
51+
52+
data_out <- cbind(data, bp_obj)
53+
54+
output$info <- renderText({
55+
xy_range_str <- function(e) {
56+
if(is.null(e)) return("NULL\n")
57+
idx <- which(bp_obj[,1] >= e$xmin & bp_obj[,1] <= e$xmax &
58+
bp_obj[,2] >= e$ymin & bp_obj[,2] <= e$ymax)
59+
paste0(rownames(data)[idx], collapse = "\n")
60+
}
61+
xy_range_str(input$plot_brush)
62+
})
63+
64+
# Handle the Done button being pressed.
65+
observeEvent(input$done, {
66+
# Return the brushed points. See ?shiny::brushedPoints.
67+
stopApp(brushedPoints(data_out, input$plot_brush, xvar="PC1", yvar="PC2"))
68+
})
69+
}
70+
71+
runGadget(ui, server)
72+
}

R/sample_filtering.R

Lines changed: 55 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,53 @@
11
#' Fit Logistic Regression Model of FNR against set of positive control (ubiquitously expressed) genes
22
#'
3-
#' @details logit(Probability of False Negative) ~ a + b*(mean log10p1 expression) .
3+
#' @details logit(Probability of False Negative) ~ a + b*(median log-expression) .
44
#'
55
#' @param expr matrix The data matrix in transcript-proportional units (genes in rows, cells in columns).
66
#' @param pos_controls A logical vector indexing positive control genes that will be used to compute false-negative rate characteristics.
7+
#' User must provide at least 2 positive control genes.
78
#' @param fn_tresh Inclusive threshold for negative detection. Default 0.01.
9+
#' fn_tresh must be non-negative.
810
#'
9-
#' @return A list of logistic regression coefficients corresponding to glm fits in each sample. If a fit did not converge, the result reported is NA.
11+
#' @return A matrix of logistic regression coefficients corresponding to glm fits in each sample (a and b in columns 1 and 2 respectively). If the a & b fit does not converge, b is set to zero and only a is estimated.
12+
#'
13+
#' @importFrom boot logit
14+
#' @importFrom matrixStats rowMedians
1015
#'
1116
simple_FNR_params = function(expr, pos_controls, fn_tresh = 0.01){
1217

13-
# Mean log10p1 expression
14-
mu_obs = rowMeans(log10(expr[pos_controls,]+1))
15-
16-
# Drop-outs
17-
drop_outs = 0 + (expr[pos_controls,] <= fn_tresh)
18+
stopifnot(!any(is.na(pos_controls)))
19+
20+
if (sum(pos_controls) < 2){
21+
stop("User must provide at least 2 positive control genes")
22+
}
23+
24+
if (fn_tresh < 0){
25+
stop("fn_tresh must be non-negative")
26+
}
27+
28+
pos_expr = expr[pos_controls,] # Selecting positive-control genes
29+
is_drop = pos_expr <= fn_tresh # Identify false negatives
30+
pos_expr[is_drop] = NA # Set false negatives to NA
31+
drop_outs = 0 + is_drop # Numeric drop-out state
32+
drop_rate = colMeans(drop_outs) # Total drop-out rate per sample
33+
34+
# Median log-expression in positive observations
35+
mu_obs = log(rowMedians(pos_expr,na.rm = TRUE))
36+
if(any(is.na(mu_obs))){
37+
stop("Median log-expression in positive observations NA for some positive control gene/s")
38+
}
1839

1940
# Logistic Regression Model of FNR
20-
ref.glms = list()
21-
for (si in 1:dim(drop_outs)[2]){
41+
logistic_coef = matrix(0,ncol(drop_outs),2)
42+
for (si in seq_len(ncol(drop_outs))){
2243
fit = suppressWarnings(glm(cbind(drop_outs[,si],1 - drop_outs[,si]) ~ mu_obs,family=binomial(logit)))
2344
if(fit$converged){
24-
ref.glms[[si]] = fit$coefficients
45+
logistic_coef[si,] = fit$coefficients
2546
} else {
26-
ref.glms[[si]] = NA
47+
logistic_coef[si,1] = logit(drop_rate[si])
2748
}
2849
}
29-
return(ref.glms)
50+
return(logistic_coef)
3051
}
3152

3253
#' metric-based sample filtering: function to filter single-cell RNA-Seq libraries.
@@ -51,7 +72,7 @@ simple_FNR_params = function(expr, pos_controls, fn_tresh = 0.01){
5172
#' If NULL, filtered_fnr will be returned NA.
5273
#' @param scale. logical. Will expression be scaled by total expression for FNR computation? Default = FALSE
5374
#' @param glen Gene lengths for gene-length normalization (normalized data used in FNR computation).
54-
#' @param AUC_range An array of two values, representing range over which FNR AUC will be computed (log10(expr_units + 1)). Default c(0,6)
75+
#' @param AUC_range An array of two values, representing range over which FNR AUC will be computed (log(expr_units)). Default c(0,15)
5576
#' @param zcut A numeric value determining threshold Z-score for sd, mad, and mixture sub-criteria. Default 1.
5677
#' If NULL, only hard threshold sub-criteria will be applied.
5778
#' @param mixture A logical value determining whether mixture modeling sub-criterion will be applied per primary criterion (metric).
@@ -61,11 +82,11 @@ simple_FNR_params = function(expr, pos_controls, fn_tresh = 0.01){
6182
#' @param hard_nreads numeric. Hard (lower bound on) nreads threshold. Default 25000.
6283
#' @param hard_ralign numeric. Hard (lower bound on) ralign threshold. Default 15.
6384
#' @param hard_breadth numeric. Hard (lower bound on) breadth threshold. Default 0.2.
64-
#' @param hard_fnr numeric. Hard (upper bound on) fnr threshold. Default 3.
65-
#' @param suff_nreads numeric. If not null, serves as an upper bound on nreads threshold.
66-
#' @param suff_ralign numeric. If not null, serves as an upper bound on ralign threshold. Default 65.
67-
#' @param suff_breadth numeric. If not null, serves as an upper bound on breadth threshold. Default 0.8.
68-
#' @param suff_fnr numeric. If not null, serves as an lower bound on fnr threshold.
85+
#' @param hard_auc numeric. Hard (upper bound on) fnr auc threshold. Default 10.
86+
#' @param suff_nreads numeric. If not null, serves as an overriding upper bound on nreads threshold.
87+
#' @param suff_ralign numeric. If not null, serves as an overriding upper bound on ralign threshold.
88+
#' @param suff_breadth numeric. If not null, serves as an overriding upper bound on breadth threshold.
89+
#' @param suff_auc numeric. If not null, serves as an overriding lower bound on fnr auc threshold.
6990
#' @param plot logical. Should a plot be produced?
7091
#' @param hist_breaks hist() breaks argument. Ignored if `plot=FALSE`.
7192
#'
@@ -79,15 +100,16 @@ simple_FNR_params = function(expr, pos_controls, fn_tresh = 0.01){
79100
#'
80101
#'@importFrom mixtools normalmixEM
81102
#'@importFrom diptest dip.test
103+
#'@importFrom boot inv.logit
82104
#'@export
83105
#'
84106
#'
85107
metric_sample_filter = function(expr, nreads = colSums(expr), ralign = NULL,
86108
gene_filter = NULL, pos_controls = NULL,scale. = FALSE,glen = NULL,
87-
AUC_range = c(0,6), zcut = 1,
109+
AUC_range = c(0,15), zcut = 1,
88110
mixture = TRUE, dip_thresh = 0.05,
89-
hard_nreads = 25000, hard_ralign = 15, hard_breadth = 0.2, hard_fnr = 3,
90-
suff_nreads = NULL, suff_ralign = 65, suff_breadth = 0.8, suff_fnr = NULL,
111+
hard_nreads = 25000, hard_ralign = 15, hard_breadth = 0.2, hard_auc = 10,
112+
suff_nreads = NULL, suff_ralign = NULL, suff_breadth = NULL, suff_auc = NULL,
91113
plot = FALSE, hist_breaks = 10){
92114

93115
criterion_count = 0
@@ -210,17 +232,17 @@ metric_sample_filter = function(expr, nreads = colSums(expr), ralign = NULL,
210232
}
211233

212234
# Compute FNR AUC
213-
ref.glms = simple_FNR_params(expr = nexpr, pos_controls = pos_controls)
235+
logistic_coef = simple_FNR_params(expr = nexpr, pos_controls = pos_controls)
214236
AUC = NULL
215237
for (si in 1:dim(expr)[2]){
216-
if(!any(is.na(ref.glms[[si]]))){
217-
AUC[si] = log(exp(ref.glms[[si]][1] + ref.glms[[si]][2] * AUC_range[2]) + 1)/ref.glms[[si]][2] - log(exp(ref.glms[[si]][1] + ref.glms[[si]][2] * AUC_range[1]) + 1)/ref.glms[[si]][2]
238+
if(logistic_coef[si,2] != 0){
239+
AUC[si] = log(exp(logistic_coef[si,1] + logistic_coef[si,2] * AUC_range[2]) + 1)/logistic_coef[si,2] - log(exp(logistic_coef[si,1] + logistic_coef[si,2] * AUC_range[1]) + 1)/logistic_coef[si,2]
218240
} else {
219-
stop("glm fit did not converge")
241+
AUC[si] = inv.logit(logistic_coef[si,1])*(AUC_range[2] - AUC_range[1])
220242
}
221243
}
222244

223-
AUC_CUTOFF = hard_fnr
245+
AUC_CUTOFF = hard_auc
224246

225247
if (!is.null(zcut)){
226248

@@ -239,8 +261,8 @@ metric_sample_filter = function(expr, nreads = colSums(expr), ralign = NULL,
239261
}
240262
}
241263

242-
if(!is.null(suff_fnr)){
243-
AUC_CUTOFF = max(AUC_CUTOFF,suff_fnr)
264+
if(!is.null(suff_auc)){
265+
AUC_CUTOFF = max(AUC_CUTOFF,suff_auc)
244266
}
245267
}
246268
filtered_fnr = AUC > AUC_CUTOFF
@@ -254,7 +276,8 @@ metric_sample_filter = function(expr, nreads = colSums(expr), ralign = NULL,
254276

255277
is_bad = rep(FALSE,dim(expr)[2])
256278

257-
par(mfcol = c(criterion_count,2))
279+
op <- par(mfcol = c(criterion_count,2))
280+
on.exit(par(op))
258281

259282
if(!is.null(nreads)){
260283
is_bad = filtered_nreads
@@ -304,7 +327,7 @@ metric_sample_filter = function(expr, nreads = colSums(expr), ralign = NULL,
304327
}else{
305328
hist(AUC, main = paste0("auc: Thresh = ",signif(AUC_CUTOFF,3)," , Rm = ",sum(filtered_fnr)," , Tot_Rm = ",sum(is_bad)), xlab = "FNR AUC", breaks = hist_breaks)
306329
}
307-
abline(v = hard_fnr, col = "yellow", lty = 1)
330+
abline(v = hard_auc, col = "yellow", lty = 1)
308331
abline(v = AUC_CUTOFF, col = "blue", lty = 2)
309332
}
310333

@@ -320,13 +343,6 @@ metric_sample_filter = function(expr, nreads = colSums(expr), ralign = NULL,
320343
if(!is.null(pos_controls)){
321344
hist(AUC[!is_bad], main = paste0("auc: Tot = ",sum(!is_bad)), xlab = "FNR AUC", breaks = hist_breaks)
322345
}
323-
324-
# v = rbind(filtered_nreads,filtered_ralign,filtered_breadth,filtered_fnr)
325-
# rownames(v) = c("nreads","ralign","breadth","fnr")
326-
# v = na.omit(v)
327-
# m = v %*% t(v)
328-
#
329-
# barplot(m, beside = TRUE, legend.text = TRUE)
330346
}
331347

332348
return(list(filtered_nreads = filtered_nreads,
@@ -417,8 +433,9 @@ factor_sample_filter = function(expr, qual, gene_filter = NULL, max_exp_pcs = 5,
417433
num_qual_pcs = which(csum > min_qual_variance)[1]
418434

419435
if(plot){
436+
op <- par(mfrow = c(2,1))
437+
on.exit(par(op))
420438
for (i in 1:num_qual_pcs){
421-
par(mfrow = c(2,1))
422439
hist(qpc$x[,i],breaks = hist_breaks, main = paste0("Distribution of Quality PC ",i), xlab = paste0("Qual PC",i))
423440
barplot(abs(qpc$rotation[,i]),col = c("red","green")[1 + (qpc$rotation[,i] > 0)], cex.names = .25,horiz = T, las=1, main = "Loadings")
424441
}

0 commit comments

Comments
 (0)