@@ -701,8 +701,6 @@ FitMeanVar.default <- function(
701701 verbose = FALSE ,
702702 ...
703703) {
704-
705- set.seed(random.seed )
706704 rs <- rowSums(x = object )
707705
708706 if (is.character(x = min.cutoff )) {
@@ -727,51 +725,86 @@ FitMeanVar.default <- function(
727725 total.counts = rs
728726 )
729727
730- df $ log_mean <- log1p(x = df $ mean )
728+ # run on a dataframe containing mean, variance, total.counts
729+ df <- FitMeanVar(
730+ object = df ,
731+ loess.span = loess.span ,
732+ weight.mean = weight.mean ,
733+ bins = bins ,
734+ sample_per_bin = sample_per_bin ,
735+ random.seed = random.seed ,
736+ verbose = verbose ,
737+ ...
738+ )
739+
740+ df $ rank [df $ total.counts < count.thresh ] <- NA
741+ vf <- head(
742+ x = order(df $ rank , decreasing = FALSE ),
743+ n = nfeatures
744+ )
745+ df $ variable <- FALSE
746+ df $ variable [vf ] <- TRUE
747+ return (df )
748+ }
749+
750+ # ' @rdname FitMeanVar
751+ # ' @importFrom sparseMatrixStats rowVars
752+ # ' @importFrom stats loess predict
753+ # ' @export
754+ # ' @concept preprocessing
755+ # ' @method FitMeanVar data.frame
756+ FitMeanVar.data.frame <- function (
757+ object ,
758+ loess.span = 0.1 ,
759+ weight.mean = 0.5 ,
760+ bins = 1000 ,
761+ sample_per_bin = 50 ,
762+ random.seed = 1234 ,
763+ verbose = FALSE ,
764+ ...
765+ ) {
766+ if (! all(c(" mean" , " variance" ) %in% colnames(x = object ))) {
767+ stop(" Mean and variance information must be stored in the input dataframe" )
768+ }
769+ set.seed(random.seed )
770+ object $ log_mean <- log1p(x = object $ mean )
731771 breaks <- seq(
732- min(df $ log_mean , na.rm = TRUE ),
733- max(df $ log_mean , na.rm = TRUE ),
772+ min(object $ log_mean , na.rm = TRUE ),
773+ max(object $ log_mean , na.rm = TRUE ),
734774 length.out = bins + 1
735775 )
736- df $ bin <- findInterval(
737- x = df $ log_mean ,
776+ object $ bin <- findInterval(
777+ x = object $ log_mean ,
738778 vec = breaks ,
739779 rightmost.closed = TRUE
740780 )
741781 sampled_df <- do.call(
742782 what = rbind ,
743- args = lapply(X = split(df , df $ bin ), FUN = function (subset ) {
783+ args = lapply(X = split(object , object $ bin ), FUN = function (subset ) {
744784 if (nrow(subset ) > sample_per_bin ) {
745785 subset <- subset [sample(
746786 x = nrow(x = subset ),
747787 size = sample_per_bin ,
748788 replace = FALSE ), ]
749789 }
750790 return (subset )
751- }
791+ }
752792 )
753793 )
754794 loess_fit <- loess(
755795 formula = log1p(x = variance ) ~ log_mean ,
756796 data = sampled_df ,
757797 span = loess.span
758798 )
759- df $ variance.expected <- expm1(
760- x = predict(object = loess_fit , newdata = df $ log_mean )
799+ object $ variance.expected <- expm1(
800+ x = predict(object = loess_fit , newdata = object $ log_mean )
761801 )
762- df $ variance.residual <- df $ variance - df $ variance.expected
802+ object $ variance.residual <- object $ variance - object $ variance.expected
763803
764- df $ residual.rank <- rank(x = - df $ variance.residual , ties.method = " average" )
765- df $ mean.rank <- rank(x = - df $ mean , ties.method = " average" )
766- df $ rank <- (weight.mean * df $ mean.rank ) + ((1 - weight.mean ) * df $ residual.rank )
767- df $ rank [df $ total.counts < count.thresh ] <- NA
768- vf <- head(
769- x = order(df $ rank , decreasing = FALSE ),
770- n = nfeatures
771- )
772- df $ variable <- FALSE
773- df $ variable [vf ] <- TRUE
774- return (df )
804+ object $ residual.rank <- rank(x = - object $ variance.residual , ties.method = " average" )
805+ object $ mean.rank <- rank(x = - object $ mean , ties.method = " average" )
806+ object $ rank <- (weight.mean * object $ mean.rank ) + ((1 - weight.mean ) * object $ residual.rank )
807+ return (object )
775808}
776809
777810# ' @param assay Name of assay to use
0 commit comments