Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions R/apply_test.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,17 @@ apply.test <- function(sim.location, group, type='default',

for (g in names(all)){

# skip metadata entry if present
if (g == "meta") next

# reconstruct abundance table
feat.sim <- all[[g]]$features
markers <- all[[g]]$markers
test.idx <- all[[g]]$idx
log.n0 <- all[[g]]$log.n0
conf.l <- all[[g]]$conf

# Build full confounder matrix for this group
if (!is.null(meta.all) & !is.null(conf.l)){
conf.mat <- cbind(meta.all, conf.l)
colnames(conf.mat)[ncol(conf.mat)] <- 'conf'
Expand Down Expand Up @@ -155,7 +159,7 @@ check.testing.parameters <- function(sim.location, group, type,
'ZIBSeq-sqrt',
'ANCOM',
'ANCOM_old',
'ANCOMBC',
'ANCOMBC', 'ANCOMBC2',
"lm", "lm_inter",
"corncob",
"limma",
Expand Down Expand Up @@ -327,7 +331,7 @@ check.testing.parameters <- function(sim.location, group, type,
}

# check if rep_no in the additional arguments
if (length(additional.arguments > 0)) {
if (length(additional.arguments) > 0) {
if ('rep_no' %in% names(additional.arguments)){
if (length(subset) > 1){
warning("Parameter 'rep_no' will be ignored, since more",
Expand All @@ -336,7 +340,7 @@ check.testing.parameters <- function(sim.location, group, type,
} else {
for (a in names(idx.all)){
idx.all[[a]] <- idx.all[[a]][[
paste0('rep', additional.arguments$rep.no), , drop=FALSE]]
paste0('rep', additional.arguments$rep_no), , drop=FALSE]]
}
}
}
Expand All @@ -355,7 +359,7 @@ check.testing.parameters <- function(sim.location, group, type,

all[[g]]$markers <- complete.group$marker_idx
if (!is.null(conf) & any(conf!='conf')){
all[[meta]] <- h5read(sim.location, 'original_data/metadata')
all[["meta"]] <- h5read(sim.location, 'original_data/metadata')
}
}
return(all)
Expand Down
2 changes: 1 addition & 1 deletion R/create_test_idx.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ save.idx <- function(ab, rep, subs, reps, class.balance, sim.file,
sample(neg.samples, s*class.balance,
replace = TRUE)) }
else {
sub.label <- c(sample(pos.samples, ceiling(s*(1-class.balance))),
sub.label <- c(sample(pos.samples, s-ceiling(s*class.balance)),
sample(neg.samples, ceiling(s*class.balance))) }
stopifnot(length(sub.label) >= s)
sub.label <- sub.label[seq(s)]
Expand Down
4 changes: 3 additions & 1 deletion R/eval_test.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ eval.test <- function(sim.location, group, res.mat, adjust='BH', alpha = 0.05) {

# named vector of p.vals for test rep x
p.val <- res[,x]
p.val <- p.adjust(p.val, method=adjust)
if (!adjust %in% c("PASS", "pass")) {
p.val <- p.adjust(p.val, method=adjust)
}
# auc
auroc <- roc(predictor = -log10(p.val + 1e-50), response = marker,
levels = c(0, 1), direction = '<')
Expand Down
14 changes: 8 additions & 6 deletions R/helper_clean.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ check.simulation.location <- function(sim.out){

# check input data
#' @keywords internal
check.original.data <- function(feat, meta, sim.type, sim.method){
check.original.data <- function(feat, meta, sim.type, sim.method, factorize.metadata = TRUE){

# check that feat and meta make sense
# check the class of the features
Expand Down Expand Up @@ -108,7 +108,7 @@ check.original.data <- function(feat, meta, sim.type, sim.method){
}

# validate original data
data.list <- validate.original.data(original.data, sim.method)
data.list <- validate.original.data(original.data, sim.method, factorize.metadata)
feat <- data.list$feat
meta <- data.list$meta
# remove repeated measurements/individuals with single samples
Expand Down Expand Up @@ -172,7 +172,7 @@ check.original.data <- function(feat, meta, sim.type, sim.method){

# validate original data
#' @keywords internal
validate.original.data <- function(d.list, sim.method){
validate.original.data <- function(d.list, sim.method, factorize.metadata = TRUE){

feat.final <- list()
meta.final <- list()
Expand Down Expand Up @@ -278,7 +278,9 @@ validate.original.data <- function(d.list, sim.method){
meta.final[[j]] <- meta.final[[j]][,colnames.final]
}
meta.all <- do.call(rbind, meta.final)
meta.all <- clean.meta.data(meta.all)
if (factorize.metadata) {
meta.all <- clean.meta.data(meta.all)
}
feat.all <- feat.all[,rownames(meta.all)]
rownames(feat.all) <- paste0('bact_otu_', seq_len(nrow(feat.all)))
return(list(feat=feat.all, meta=meta.all))
Expand Down Expand Up @@ -350,7 +352,7 @@ clean.meta.data <- function(df.meta){
}
}

# check simulation parameters
# check filtering parameters
#' @keywords internal
check.filtering.parameters <- function(filt.params){

Expand Down Expand Up @@ -567,7 +569,7 @@ check.common.simulation.parameters <- function(sim.params, meta, sim.method){
stop("Parameter 'class.balance' should be numeric")
} else if (class.balance > 0.85){
stop("Parameter 'class.balance' seems too high (", class.balance, ")")
} else if (class.balance < 0.45){
} else if (class.balance < 0.15){
stop("Parameter 'class.balance' seems too low (", class.balance, ")")
}
}
Expand Down
85 changes: 77 additions & 8 deletions R/helper_resampling.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ simulate.resampling <- function(feat, meta, sim.out, sim.params){
balanced <- TRUE
}

# optional fixed-label mode via 'fixed.labels=TRUE' in sim.params
fixed.labels <- isTRUE(sim.params$fixed.labels)
label.col <- if ('label.col' %in% names(sim.params)) sim.params$label.col else NULL
case.value <- if ('case.value' %in% names(sim.params)) sim.params$case.value else NULL
control.value <- if ('control.value' %in% names(sim.params)) sim.params$control.value else NULL

if (conf == 'None'){
conf <- NULL
Expand Down Expand Up @@ -107,6 +112,56 @@ simulate.resampling <- function(feat, meta, sim.out, sim.params){
}
}

## prepare a fixed label vector once if requested
if (fixed.labels){
stopifnot(!is.null(label.col))
stopifnot(label.col %in% colnames(meta))

raw.label <- meta[, label.col]
names(raw.label) <- rownames(meta)

keep <- !is.na(raw.label)
raw.label <- raw.label[keep]

if (!is.null(prob)){
common.samples <- intersect(names(prob), names(raw.label))
raw.label <- raw.label[common.samples]
prob <- prob[common.samples]
if (!is.null(conf) && exists("conf.label")){
conf.label <- conf.label[common.samples]
}
}

## recode to -1 / +1
if (!is.null(case.value) && !is.null(control.value)){
label.fixed <- ifelse(raw.label == case.value, 1,
ifelse(raw.label == control.value, -1, NA))
} else {
u <- unique(raw.label)
u <- u[!is.na(u)]
stopifnot(length(u) == 2)
label.fixed <- ifelse(raw.label == u[1], -1, 1)
warning("fixed.labels=TRUE but case/control values not supplied; ",
"mapped first observed label to -1 and second to +1.")
}

keep2 <- !is.na(label.fixed)
label.fixed <- label.fixed[keep2]
names(label.fixed) <- names(raw.label)[keep2]

stopifnot(all(label.fixed %in% c(-1, 1)))
num.sample <- length(label.fixed)

## consistency check
observed.balance <- mean(label.fixed == 1)
if (!is.null(class.balance) && abs(observed.balance - class.balance) > 1e-3){
message("++ fixed.labels=TRUE: ignoring sim.params$class.balance = ",
class.balance,
" and using observed balance = ",
round(observed.balance, 4))
}
}

# actual implantation
pb <- progress_bar$new(total = length(ab.scale)*length(prev.scale)*repeats)
for (a in seq_along(ab.scale)){
Expand Down Expand Up @@ -145,14 +200,28 @@ simulate.resampling <- function(feat, meta, sim.out, sim.params){
}

# generate label
label <- rep(-1, num.sample)
s <- sample(num.sample,
size=round(num.sample*class.balance),
prob=prob+1e-20)
names(label) <- names(prob)
label[s] <- +1 # switch random half of the samples to positive class

feat.tmp <- feat[,names(label)]
if (isTRUE(sim.params$fixed.labels)) {
label <- meta[[sim.params$label.col]]
names(label) <- rownames(meta)

# recode to -1 / +1
label <- ifelse(label == sim.params$case.value, 1, -1)

# keep only samples with non-missing labels
keep <- !is.na(label)
label <- label[keep]
feat.tmp <- feat[, names(label), drop = FALSE]
num.sample <- length(label)
} else {
label <- rep(-1, num.sample)
s <- sample(num.sample,
size=round(num.sample*class.balance),
prob=prob+1e-20)
names(label) <- names(prob)
label[s] <- +1 # switch random half of the samples to positive class

feat.tmp <- feat[,names(label)]
}

# sample markers
marker.idx <- sample(el.feat.names,
Expand Down
43 changes: 41 additions & 2 deletions R/helper_tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ run.test <- function(data, label, test, conf){
p.val <- test.ANCOM(data=data, label=label, conf=conf)
} else if (test == 'ANCOMBC'){
p.val <- test.ANCOMBC(data=data, label=label, conf=conf)
} else if (test == 'ANCOMBC2'){
p.val <- test.ANCOMBC2(data=data, label=label, conf=conf)
} else if (test=='corncob'){
p.val <- test.corncob(data=data, label=label, conf=conf)
} else if (test=='limma'){
Expand Down Expand Up @@ -513,11 +515,48 @@ test.ANCOMBC <- function(data, label, conf){
x.phylo <- phyloseq::phyloseq(
otu_table = phyloseq::otu_table(data, taxa_are_rows = TRUE),
sample_data = phyloseq::sample_data(s.data))
temp <- ANCOMBC::ancombc(phyloseq = x.phylo, formula = f.form,
temp <- ANCOMBC::ancombc(data = x.phylo, formula = f.form,
p_adj_method = 'fdr', lib_cut = 100)
p.val <- rep(1, nrow(data))
names(p.val) <- rownames(data)
p.val[rownames(temp$res$q_val)] <- temp$res$p_val$label
p.val[temp$res$p_val$taxon] <- temp$res$p_val$label
return(p.val)
}

#' @keywords internal
test.ANCOMBC2 <- function(data, label, conf){
#devtools::load_all("/projects/arumugam/people/mjq180/git/ANCOMBC")
test.package('ANCOMBC')
test.package("phyloseq")
# deal with nonunique samples from biased idx generation
names(label) <- paste0(names(label), '_', seq_along(label))
colnames(data) <- paste0(colnames(data), '_', seq_along(label))
label <- label + 1
label <- label/2
if (is.null(conf)){
s.data <- data.frame(label=label, dummy=2)
f.form <- 'label'
} else {
s.data <- cbind(data.frame(label=label, dummy=2), conf)
f.form <- paste0('label+', paste(colnames(conf), collapse = '+'))
}
x.phylo <- phyloseq::phyloseq(
otu_table = phyloseq::otu_table(data, taxa_are_rows = TRUE),
sample_data = phyloseq::sample_data(s.data))
temp <- ancombc2(data = x.phylo,
fix_formula = f.form,
rand_formula = NULL,
p_adj_method = "fdr",
prv_cut = 0.1,
lib_cut = 1000,
group = "label",
struc_zero = TRUE,
neg_lb = TRUE,
alpha = 0.05
)
p.val <- rep(1, nrow(data))
names(p.val) <- rownames(data)
p.val[temp$res$taxon] <- temp$res$p_label
return(p.val)
}

Expand Down
8 changes: 7 additions & 1 deletion R/simulate_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,13 @@ create.data.simulation <- function(feat, meta, sim.location,

# check data
message("+ Start checking data")
res <- check.original.data(feat, meta, sim.type, sim.method)
factorize.metadata <- TRUE
# Only skip factorizing if explicitly requested to skip it!
if (!is.null(sim.params[["factorize.metadata"]]) && (sim.params[["factorize.metadata"]] == FALSE)) {
factorize.metadata <- FALSE
}
message("++ Factorize metadata columns: ", factorize.metadata)
res <- check.original.data(feat, meta, sim.type, sim.method, factorize.metadata = factorize.metadata)
message("+ Finished checking data")
message("")
feat.original <- res$feat
Expand Down