-
Notifications
You must be signed in to change notification settings - Fork 146
Description
Description of feature
The current dada2 summary tables provide information about the output of denoising for forward and reverse reads, but do not specify what is the number of denoised paired reads that is input into the merge stage, so it is currently not possible to know exactly how many reads are lost at the merge stage.
In order to be able to access this information, the merge settings needs to be changed to returnRejects = TRUE, so that all merge input reads are listed in mergers_1.rds.
The merge information can then be parsed with something like this, by importing mergers_1.rds (located in work/) and overall_summary.tsv
merge_stats <- tibble(
sample = names(mergers),
merge_input = sapply(mergers, function(x) sum(x$abundance)), # total abundance (accepted + rejected)
merged_accepted = sapply(mergers, function(x) sum(x$abundance[x$accept])), # abundance of accepted merges
merged_rejected = sapply(mergers, function(x) sum(x$abundance[!x$accept])) # abundance of rejected merges
) %>%
mutate(sample = gsub("_1.filt.fastq.gz", "", sample))
new_stats <- full_join(stats, merge_stats, by = "sample") %>%
relocate(merged_accepted, .after=merged) %>%
relocate(merge_input, .before=merged)
Ideally if similar code was integrated in the pipeline itself then the final overall_summary.tsv file would already have the merge_input information, without the need to retrieve mergers_1.rds.
In the meantime, below is the config file needed to change returnRejects to TRUE in the pipeline.... many thanks!
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Config file for making changes to dada2 pipeline defaults
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
DADA2_DENOISING
change <returnRejects = FALSE> to `<returnRejects = TRUE>
----------------------------------------------------------------------------------------
*/
process {
withName: DADA2_DENOISING {
ext.prefix = { meta.region ? "region-${meta.region}_run-${meta.run}" : "${meta.run}" }
ext.quality_type = { "${params.quality_type}" }
ext.args = {
def base = [
'selfConsist = FALSE',
'priors = character(0)',
'DETECT_SINGLETONS = FALSE',
'GAPLESS = TRUE',
'GAP_PENALTY = -8',
'GREEDY = TRUE',
'KDIST_CUTOFF = 0.42',
'MATCH = 5',
'MAX_CLUST = 0',
'MAX_CONSIST = 10'
]
base << (params.iontorrent ?
'BAND_SIZE = 32, HOMOPOLYMER_GAP_PENALTY = -1' :
'BAND_SIZE = 16, HOMOPOLYMER_GAP_PENALTY = NULL')
base << (params.sample_inference == "pseudo" ?
'pool = "pseudo"' :
(params.sample_inference == "pooled" ? 'pool = TRUE' : 'pool = FALSE'))
return base.join(', ')
}
ext.args2 = {
def args = [
'homo_gap = NULL',
'endsfree = TRUE',
'vec = FALSE',
'propagateCol = character(0)',
'trimOverhang = FALSE'
]
if (params.mergepairs_strategy == "consensus") {
args << "returnRejects = TRUE, match = ${params.mergepairs_consensus_match}, mismatch = ${params.mergepairs_consensus_mismatch}, minOverlap = ${params.mergepairs_consensus_minoverlap}, maxMismatch = ${params.mergepairs_consensus_maxmismatch}"
} else {
def justConcat = (params.mergepairs_strategy == 'concatenate') ? 'TRUE' : 'FALSE'
args << "justConcatenate = ${justConcat}, returnRejects = TRUE, match = 1, mismatch = -64, gap = -64, minOverlap = 12, maxMismatch = 0"
}
return args.join(', ')
}
ext.quantile = { "${params.mergepairs_consensus_percentile_cutoff}" }
publishDir = [
[
path: "${params.outdir}/dada2/args",
mode: 'copy',
pattern: "*.args.txt"
],
[
path: "${params.outdir}/dada2/log",
mode: 'copy',
pattern: "*.log"
]
]
}
}