-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathnextflow.config
More file actions
285 lines (230 loc) · 12 KB
/
nextflow.config
File metadata and controls
285 lines (230 loc) · 12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
plugins {
id 'nf-schema@2.5.1'
}
//***************************************** Global parameters *******************************************//
params {
assay_suffix = "_GLAmpSeq"
output_prefix = ""
// Mandatory parameters
target_region = "16S" // "16S", "18S", "ITS"
raw_R1_suffix = "${params.assay_suffix}_R1_raw.fastq.gz"
raw_R2_suffix = "${params.assay_suffix}_R2_raw.fastq.gz"
trim_primers = "TRUE" // "TRUE" or "FALSE"
// -------- Required only if --accession is false ---------------//
// A 4-column (single-end) or 5-column (paired-end) input csv file with the following headers ( sample_id, forward, [reverse,] paired, groups)
input_file = null
// Cutadapt parameters
min_cutadapt_len = 130
primers_linked = "TRUE"
discard_untrimmed = "TRUE"
anchored_primers = "TRUE"
F_primer = null
R_primer = null
// Dada2 parameters
left_trunc = 0
right_trunc = 0
left_maxEE = 1
right_maxEE = 1
concatenate_reads_only = "FALSE"
// If using conda environments specify their locations so new ones won't be created
// Specify the paths to existing conda environments (/path/to/envs/cutadapt)
// leave as is if you want to create a new conda environment
conda_cutadapt = null // /path/to/envs/cutadapt
conda_diversity = null // /path/to/envs/R_diversity
conda_dp_tools = null // /path/to/envs/dp_tools
conda_fastqc = null // /path/to/envs/fastqc
conda_multiqc = null // /path/to/envs/multiqc
conda_R = null // /path/to/envs/R
conda_zip = null // /path/to/envs/zip
conda_wget = null // /path/to/envs/wget
// Mandatory parameters if using GLDS or OSD accession as input
accession = null
publishDir_mode = "link" // "link", "copy"
// Suffixes
primer_trimmed_R1_suffix = "${params.assay_suffix}_R1_trimmed.fastq.gz"
primer_trimmed_R2_suffix = "${params.assay_suffix}_R2_trimmed.fastq.gz"
filtered_R1_suffix = "${params.assay_suffix}_R1_filtered.fastq.gz"
filtered_R2_suffix = "${params.assay_suffix}_R2_filtered.fastq.gz"
// Directories
outdir = "${launchDir}"
raw_reads_dir = "${params.outdir}/Raw_Sequence_Data/"
metadata_dir = "${params.outdir}/Metadata/"
genelab_dir = "${params.outdir}/GeneLab/"
fastqc_out_dir = "${params.outdir}/workflow_output/FastQC_Outputs/"
trimmed_reads_dir = "${params.outdir}/workflow_output/Trimmed_Sequence_Data/"
filtered_reads_dir = "${params.outdir}/workflow_output/Filtered_Sequence_Data/"
final_outputs_dir = "${params.outdir}/workflow_output/Final_Outputs/"
// Multiqc
multiqc_config = "${projectDir}/config/multiqc.config"
// -------- Differential abundance parameters ----- //
diff_abund_method = "all" // ["all", "ancombc1", "ancombc2", or "deseq2"] - it runs all three by default
group = "groups" // column in input csv file to be compared
samples_column = "sample_id" // column in input csv file containing sample names
remove_struc_zeros = false // should structural zeros be removed when running ANCOMBC?
// Should rare features and samples be discarded. Values are true or false. If set to true then set the cutoffs below
remove_rare = false
prevalence_cutoff = 0 // a fraction between 0 and 1 that represents the prevalance in percentage of taxa to be retained
library_cutoff = 0 // Samples with library sizes less than this number will be excluded in the analysis
// Minimum desired sample rarefaction depth for diversity analysis
rarefaction_depth = 500
errorStrategy = "terminate"
debug = false // set to true if you'd like to see the values of your set parameters
}
// Setting the default container engine as singularity
params.containerEngine = "singularity"
// Conda shouldn't be used by default except when using conda-based profiles
params.use_conda = false
/*******************************************************************************************************
*************************************** Workflow Profiles **********************************************
********************************************************************************************************/
profiles {
slurm {
process.executor = 'slurm'
}
conda {
conda.enabled = true
params.use_conda = true
conda.channels = 'conda-forge,bioconda'
conda.cacheDir = 'conda/' // location of conda environments
conda.createTimeout = '2h'
}
mamba {
conda.enabled = true
conda.useMamba = true
conda.channels = 'conda-forge,bioconda'
params.use_conda = true
conda.cacheDir = 'conda/' // location of conda environments
conda.createTimeout = '2h'
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
/* Uncomment the line below if you'd like to set the cache directory here,
as setting it here takes precedence over setting the nextflow variable
NXF_SINGULARITY_CACHEDIR=singularity/ in your run script
*/
//singularity.cacheDir = "singularity/" // location of singularity images
params.containerEngine = "singularity"
}
docker {
docker.enabled = true
docker.runOptions = '-u $(id -u):$(id -g)'
params.containerEngine = "docker"
}
}
// Maximum number of jobs to submit in parallel
executor.queueSize = 20
/******************************************************************************************************************
***************** Tune process specific resources (cpu, container, memory etc.) ***********************************
*******************************************************************************************************************/
process {
//******************* Default process settings ************************//
errorStrategy = { params.errorStrategy ? params.errorStrategy : "ignore" }
maxRetries = 2
cpus = 2
memory = "5 GB"
cache = 'lenient'
//debug = true // uncomment to see what is being emitted to the standard output
//************************* Accession runsheet and input file retrieval **************************************//
withName: GET_RUNSHEET {
conda = {params.conda_dp_tools ? params.conda_dp_tools : "${projectDir}/envs/dp_tools.yaml"}
container = "quay.io/nasa_genelab/dp_tools:1.3.8"
publishDir = [path: params.genelab_dir, mode: params.publishDir_mode]
}
//********************************** Raw read staging ********************************************//
withName: COPY_READS {
maxRetries = 3
errorStrategy = 'retry'
publishDir = [path: params.raw_reads_dir, mode: params.publishDir_mode]
}
withName: 'COPY_REMOTE_READS|DOWNLOAD_DATABASE' {
conda = {params.conda_wget ? params.conda_wget : "${projectDir}/envs/wget.yaml"}
container = "quay.io/nasa_genelab/wget:1.21.4"
maxRetries = 3
maxForks = 5
errorStrategy = 'retry'
publishDir = [path: params.raw_reads_dir, mode: params.publishDir_mode]
}
//********************************** Read quality control and assesment ********************************************//
withLabel: fastqc {
conda = {params.conda_fastqc ? params.conda_fastqc : "${projectDir}/envs/fastqc.yaml"}
container = "quay.io/biocontainers/fastqc:0.12.1--hdfd78af_0"
}
withLabel: zip {
conda = {params.conda_zip ? params.conda_zip : "${projectDir}/envs/zip.yaml"}
container = "quay.io/nasa_genelab/zip:3.0"
}
withName: RAW_FASTQC {
publishDir = [path: params.raw_reads_dir, mode: params.publishDir_mode]
}
withName: "RAW_MULTIQC|FILTERED_MULTIQC" {
conda = {params.conda_multiqc ? params.conda_multiqc : "${projectDir}/envs/multiqc.yaml"}
container = "quay.io/biocontainers/multiqc:1.27.1--pyhdfd78af_0"
}
withName: "ZIP_MULTIQC_RAW|ZIP_MULTIQC_FILTERED" {
publishDir = [path: params.fastqc_out_dir, mode: params.publishDir_mode]
}
withName: "CUTADAPT|COMBINE_CUTADAPT_LOGS_AND_SUMMARIZE" {
conda = {params.conda_cutadapt ? params.conda_cutadapt : "${projectDir}/envs/cutadapt.yaml"}
container = "quay.io/biocontainers/cutadapt:5.0--py39hbcbf7aa_0"
memory = "10 GB"
publishDir = [path: params.trimmed_reads_dir, mode: params.publishDir_mode]
}
withName: FILTERED_FASTQC {
publishDir = [path: params.filtered_reads_dir, mode: params.publishDir_mode ]
}
//********************************** ASV table creation ********************************************//
withName: "RUN_R_TRIM|RUN_R_NOTRIM" {
conda = {params.conda_R ? params.conda_R : "${projectDir}/envs/R.yaml"}
container = "quay.io/nasa_genelab/r-dada-decipher-biomformat:1.1"
memory = "20 GB"
cpus = 10
publishDir = [[path: params.filtered_reads_dir, pattern: "Filtered_Sequence_Data/*",
mode: params.publishDir_mode, saveAs: { fn -> fn.substring(fn.lastIndexOf('/')+1) } ],
[path: params.final_outputs_dir , pattern: "final_outputs/*.{tsv,biom,fasta}",
mode: params.publishDir_mode, saveAs: { fn -> fn.substring(fn.lastIndexOf('/')+1)} ]]
}
withName: ZIP_BIOM {
publishDir = [path: "${params.final_outputs_dir}", mode: params.publishDir_mode]
}
//********************************** Diversity and differential abundance testing ********************************************//
withLabel: visualization {
conda = {params.conda_diversity ? params.conda_diversity : "${projectDir}/envs/diversity.yaml"}
container = "quay.io/nasa_genelab/r-diversity:1.1"
cpus = 5
memory = "10 GB"
publishDir = [path: "${params.final_outputs_dir}", mode: params.publishDir_mode]
}
withName: SOFTWARE_VERSIONS {
publishDir = [path: params.metadata_dir, mode: params.publishDir_mode]
}
}
/*****************************************************************************
********************** Workflow Resource Usage Capturing *********************
******************************************************************************/
// Adapted from : https://github.com/nf-core/rnaseq/blob/master/nextflow.config
def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
timeline {
enabled = true
file = "${params.outdir}/Resource_Usage/execution_timeline_${trace_timestamp}.html"
}
report {
enabled = true
file = "${params.outdir}/Resource_Usage/execution_report_${trace_timestamp}.html"
}
trace {
enabled = true
file = "${params.outdir}/Resource_Usage/execution_trace_${trace_timestamp}.txt"
}
/******************************************************************************
**************************** Workflow Metadata ********************************
*******************************************************************************/
manifest {
author = 'Olabiyi Aderemi Obayomi, Mike D. Lee'
homePage = 'https://github.com/nasa/GeneLab_Data_Processing/blob/master/Amplicon/'
description = 'Amplicon Illumina workflow for pipeline document GL-DPPD-7104-C'
mainScript = 'main.nf'
defaultBranch = 'main'
nextflowVersion = '>=24.04.4'
version = '1.0.8'
}