YosefLab
diff --git a/‎.gitignore‎
Lines changed: 8 additions & 0 deletions b/‎.gitignore‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎DESCRIPTION‎
Lines changed: 28 additions & 5 deletions b/‎DESCRIPTION‎
Lines changed: 28 additions & 5 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 5 additions & 0 deletions b/‎NAMESPACE‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎R/SCONE_DEFAULTS.R‎
Lines changed: 4 additions & 6 deletions b/‎R/SCONE_DEFAULTS.R‎
Lines changed: 4 additions & 6 deletions
diff --git a/‎R/biplot.R‎
Lines changed: 49 additions & 0 deletions b/‎R/biplot.R‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎R/data.R‎
Lines changed: 48 additions & 0 deletions b/‎R/data.R‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎R/helper.R‎
Lines changed: 27 additions & 27 deletions b/‎R/helper.R‎
Lines changed: 27 additions & 27 deletions
@@ -0,0 +1,8 @@
+.Rproj.user
+.Rhistory
+.RData
+vignettes/figure
+*.Rproj
+vignettes/R_cache
+vignettes/R_figure
+old_scripts
@@ -1,16 +1,39 @@
 Package: scone
-Version: 0.0.1
+Version: 0.0.3
 Title: Single Cell Overview of Normalized Expression data
-Description: SCONE.
+Description: scone is a package to compare and rank the performance of different normalization schemes in real single-cell RNA-seq datasets.
 Authors@R: c(person("Michael", "Cole", email = "mbeloc@gmail.com",
 	          role = c("aut", "cre", "cph")),
 	     person("Davide", "Risso", email = "risso.davide@gmail.com",
 	          role = c("aut")))
 Author: Michael Cole [aut, cre, cph] and Davide Risso [aut]
 Maintainer: Michael Cole <mbeloc@gmail.com>
-Imports: BiocParallel, DESeq, EDASeq, MASS, RUVSeq, aroma.light, class,
-	 diptest, edgeR, fpc, gplots, limma, matrixStats, mixtools, scde
-Date: 02-06-2016
+Date: 2016-02-14
 License: Artistic-2.0
+Depends:
+  R (>= 3.1)
+Imports:
+  BiocParallel,
+  clusterCells,
+  DESeq,
+  EDASeq,
+  MASS,
+  RUVSeq,
+  aroma.light,
+  class,
+  diptest,
+  edgeR,
+  fpc,
+  gplots,
+  limma,
+  matrixStats,
+  mixtools,
+  scde
+Suggests:
+  knitr,
+  rmarkdown,
+  testthat
+VignetteBuilder: knitr
 LazyLoad: yes
+BugReports: https://github.com/epurdom/clusterExperiment/issues
 RoxygenNote: 5.0.1
@@ -7,8 +7,11 @@ export(FQ_FN_POS)
 export(TMM_FN)
 export(UQ_FN)
 export(UQ_FN_POS)
+export(biplot_colored)
+export(estimate_ziber)
 export(estimate_zinb)
 export(factor_sample_filter)
+export(impute_ziber_simp)
 export(impute_zinb)
 export(lm_adjust)
 export(make_design)
@@ -23,9 +26,11 @@ importFrom(MASS,glm.nb)
 importFrom(RUVSeq,RUVg)
 importFrom(aroma.light,normalizeQuantileRank.matrix)
 importFrom(class,knn)
+importFrom(clusterCells,subsampleClustering)
 importFrom(diptest,dip.test)
 importFrom(edgeR,calcNormFactors)
 importFrom(fpc,pamk)
+importFrom(grDevices,colorRampPalette)
 importFrom(limma,lmFit)
 importFrom(matrixStats,rowMedians)
 importFrom(mixtools,normalmixEM)
 
@@ -65,7 +65,6 @@ FQ_FN_POS = function(ei){
   quant_mat = NULL
   # Re-ordered Data Matrix
   x_mat = NULL
-  print("Sorting Matrix...")
   # For each sample:
   for (i in 1:dim(ei)[2]){
     # Sort data and replace zeroes with NA
@@ -79,8 +78,6 @@ FQ_FN_POS = function(ei){
 
   # Vector form of quantile index matrix
   quant_out = as.numeric(as.vector(quant_mat))
-  print("Complete.")
-  print("Spline Interpolation...")
   # Interpolation Matrix (Values of all quantiles)
   inter_mat = rep(0,length(quant_out))
   ob_counts = rep(0,length(quant_out)) # Number of observations for averaging
@@ -95,8 +92,7 @@ FQ_FN_POS = function(ei){
     inter[is.na(inter)] = 0
     inter_mat = inter_mat + inter
   }
-  print("Complete.")
-  
+
   # Average over the interpolated values from all samples
   inter_mean = inter_mat/ob_counts
 
@@ -106,7 +102,9 @@ FQ_FN_POS = function(ei){
   for (i in 1:dim(ei)[2]){
     eo[,i] = rev(eo[,i])[order(order(ei[,i]))]
   }
-  print("Finished!")
+
+  rownames(eo) = rownames(ei)
+  colnames(eo) = colnames(ei)
   return(eo)
 }
 
 
@@ -0,0 +1,49 @@
+## When porting the package to S4, we can make this a biplot method
+
+#' Another implementation of the biplot function
+#'
+#' Function to plot a biplot with no point labels and with points color-coded
+#' according to a certain quantitative variable, for instance the rank of the
+#' normalization performance or the expression of a certain gene.
+#'
+#' This function implements the biplot only for \code{\link[stats]{prcomp}}
+#' objects. Eventually, we will turn this into an S4 method.
+#'
+#' @param x the result of a call to \code{\link[stats]{prcomp}}.
+#' @param y the rank value that should be used to color the points.
+#' @param choices which principal components to plot. Only 2D plots are
+#'   possible for now. Default to first two PCs.
+#' @param expand numeric value used to adjust the spread of the arrows relative
+#'   to the points.
+#' @param ... passed to plot.
+#'
+#' @importFrom grDevices colorRampPalette
+#' @export
+#'
+#' @examples
+#' mat <- matrix(rnorm(1000), ncol=10)
+#' colnames(mat) <- paste("X", 1:ncol(mat), sep="")
+#'
+#' pc <- prcomp(mat)
+#'
+#' biplot_colored(pc, rank(pc$x[,1]))
+#'
+biplot_colored <- function(x, y, choices=1:2, expand=1, ...) {
+
+  lam <- x$sdev[choices]
+  n <- NROW(x$x)
+  lam <- lam * sqrt(n)
+
+  xx <- t(t(x$x[, choices])/lam)
+  yy <- t(t(x$rotation[, choices]) * lam)
+
+  ratio <- max(range(yy)/range(xx))/expand
+
+  cols <- rev(colorRampPalette(c("black","navyblue","mediumblue","dodgerblue3","aquamarine4","green4","yellowgreen","yellow"))(length(y)))[y]
+  plot(xx, pch=19, col=cols, ...)
+
+  labs <- rownames(yy)
+
+  text(yy/ratio, labels=labs, col=2)
+  arrows(0, 0, yy[, 1] * 0.8/ratio, yy[, 2] * 0.8/ratio, col = 2, length = 0.1)
+}
@@ -0,0 +1,48 @@
+#' Positive and negative control genes
+#'
+#' Sets of positive and negative control genes, useful for input in
+#' \code{\link{scone}}.
+#'
+#' These gene sets can be used as negative or positive controls, either for RUV
+#' normalization or for evaluation and ranking of the normalization strategies.
+#'
+#' @details The datasets are in the form of \code{data.frame}, with at least one
+#'   column containing the gene symbols and optionally a second column
+#'   containing additional information (such as cortical layer or cell cycle
+#'   phase).
+#'
+#' @details Note that the gene symbols follow the mouse conventions (i.e.,
+#'   capitalized) or the human conventions (i.e, all upper-case), based on the
+#'   original pubblication. One can use the \code{\link[base]{toupper}},
+#'   \code{\link[base]{tolower}}, and \code{\link[tools]{toTitleCase}} functions
+#'   to convert the symbols.
+#'
+#' @details The genes in \code{cortical_markers} are from Figure 3 of Molyneaux
+#'   et al. (2007). The genes in \code{housekeeping} are from Eisenberg and
+#'   Levanon (2003) and in \code{housekeeping_revised} are from Eisenberg and
+#'   Levanon (2013). The genes in \code{cellcycle_genes} are adapted from
+#'   Kowalczyk et al. (2015).
+#'
+#' @references Molyneaux, B.J., Arlotta, P., Menezes, J.R. and Macklis, J.D..
+#'   Neuronal subtype specification in the cerebral cortex. Nature Reviews
+#'   Neuroscience, 2007, 8(6):427-437.
+#' @references Eisenberg E, Levanon EY. Human housekeeping genes are compact.
+#'   Trends in Genetics, 2003, 19(7):362-5.
+#' @references Eisenberg E, Levanon EY. Human housekeeping genes, revisited.
+#'   Trends in Genetics, 2013, 29(10):569-74.
+#' @references Kowalczyk, M.S., Tirosh, I., Heckl, D., Rao, T.N., Dixit, A.,
+#'   Haas, B.J., Schneider, R.K., Wagers, A.J., Ebert, B.L. and Regev, A.
+#'   Single-cell RNA-seq reveals changes in cell cycle and differentiation
+#'   programs upon aging of hematopoietic stem cells. Genome research, 2015.
+#'
+#' @name control_genes
+#'
+#' @docType data
+#' @aliases cortical_markers housekeeping housekeeping_revised cellcycle_genes
+#'
+#' @examples
+#' data(housekeeping)
+#' data(housekeeping_revised)
+#' data(cellcycle_genes)
+#' data(cortical_markers)
+NULL
@@ -1,19 +1,19 @@
 #' Internal Function
-#' 
+#'
 #' This function is used internally in scone to parse the variables used to generate the design matrices.
-#' 
+#'
 #' @param pars character. A vector of parameters corresponding to a row of params.
 #' @param bio factor. The biological factor of interest.
 #' @param batch factor. The known batch effects.
 #' @param ruv_factors list. A list containing the factors of unwanted variation.
 #' @param qc matrix. The principal components of the QC metrics.
-#' 
+#'
 #' @return A list with the variables to be passed to make_design.
 parse_row <- function(pars, bio, batch, ruv_factors, qc) {
   sc_name <- paste(pars[1:2], collapse="_")
-  
+
   W <- out_bio <- out_batch <- NULL
-  
+
   if(pars[3]!="no_uv") {
     parsed <- strsplit(as.character(pars[3]), "=")[[1]]
     if(grepl("ruv", parsed[1])) {
@@ -22,34 +22,34 @@ parse_row <- function(pars, bio, batch, ruv_factors, qc) {
       W <- qc[,1:as.numeric(parsed[2])]
     }
   }
-  
+
   if(pars[4]=="bio") {
     out_bio <- bio
   }
-  
+
   if(pars[5]=="batch") {
     out_batch <- batch
   }
-  
+
   return(list(sc_name=sc_name, W=W, bio=out_bio, batch=out_batch))
 }
 
 #' Function to make a design matrix
-#' 
+#'
 #' This function is useful to create a design matrix, when the covariates are two (possibly nested) factors
 #' and one or more continuous variables.
-#' 
+#'
 #' @details If nested=TRUE a nested design is used, i.e., the batch variable is assumed to be nested within
 #' the bio variable. Here, nested means that each batch is made of observations from only one level of bio,
 #' while each level of bio may contain multiple batches.
-#'  
+#'
 #' @export
-#'  
+#'
 #' @param bio factor. The biological factor of interest.
 #' @param batch factor. The known batch effects.
 #' @param W numeric. Either a vector or matrix containing one or more continuous covariates (e.g. RUV factors).
 #' @param nested logical. Whether or not to consider a nested design (see details).
-#' 
+#'
 #' @return The design matrix.
 make_design <- function(bio, batch, W, nested=FALSE) {
   if(nested & (is.null(bio) | is.null(batch))) {
@@ -65,7 +65,7 @@ make_design <- function(bio, batch, W, nested=FALSE) {
       stop("batch must be a factor.")
     }
   }
-  
+
   f <- "~ 1"
   if(!is.null(bio)) {
     f <- paste(f, "bio", sep="+")
@@ -76,12 +76,12 @@ make_design <- function(bio, batch, W, nested=FALSE) {
   if(!is.null(W)) {
     f <- paste(f, "W", sep="+")
   }
-  
+
   if(is.null(bio) & is.null(batch) & is.null(W)) {
     return(NULL)
   } else if (!is.null(bio) & !is.null(batch) & nested) {
     n_vec <- tapply(batch, bio, function(x) nlevels(droplevels(x)))
-    
+
     mat = matrix(0,nrow = sum(n_vec),ncol = sum(n_vec - 1))
     xi = 1
     yi = 1
@@ -96,25 +96,25 @@ make_design <- function(bio, batch, W, nested=FALSE) {
         xi = xi + 1
       }
     }
-    
-    return(model.matrix(as.formula(f), contrasts=list(bio=contr.sum, batch=mat)))    
+
+    return(model.matrix(as.formula(f), contrasts=list(bio=contr.sum, batch=mat)))
   } else {
-    return(model.matrix(as.formula(f)))    
+    return(model.matrix(as.formula(f)))
   }
 }
 
 #' Function to perform linear batch effect correction
-#' 
+#'
 #' Given a matrix with log expression values and a design matrix, this function fits a linear model
 #' and removes the effects of the batch factor as well as of the linear variables encoded in W.
-#' 
+#'
 #' @details The function assumes that the columns of the design matrix corresponding to the variable
 #' for which expression needs to be adjusted, start with either the word "batch" or the letter "W" (case sensitive).
 #' Any other covariate (including the intercept) is kept.
-#'  
+#'
 #' @importFrom limma lmFit
 #' @export
-#' 
+#'
 #' @param log_expr matrix. The log gene expression (genes in row, samples in columns).
 #' @param design_mat matrix. The design matrix (usually the result of make_design).
 #' @param batch factor. A factor with the batch information.
@@ -125,13 +125,13 @@ lm_adjust <- function(log_expr, design_mat, batch=NULL, weights=NULL) {
 
   uvind <- grep("^W", colnames(design_mat))
   bind <- grep("^batch", colnames(design_mat))
-  
+
   if(length(uvind)) {
     uv_term <- t(design_mat[,uvind] %*% t(lm_object$coefficients[,uvind]))
   } else {
     uv_term <- 0
   }
-  
+
   if(length(bind)) {
     if(is.character(attr(design_mat,"contrasts")$batch)) {
       contr <- get(attr(design_mat,"contrasts")$batch)(nlevels(batch))
@@ -142,6 +142,6 @@ lm_adjust <- function(log_expr, design_mat, batch=NULL, weights=NULL) {
   } else {
     batch_term <- 0
   }
-  
-  log_norm <- log_expr - batch_term  - uv_term
+
+  return(log_expr - batch_term  - uv_term)
 }