-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnextflow.config
More file actions
445 lines (388 loc) · 18.2 KB
/
nextflow.config
File metadata and controls
445 lines (388 loc) · 18.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
somatem Nextflow config file
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Default config options for all compute environments
----------------------------------------------------------------------------------------
*/
/* -----------------------------
additions to nf-core config
---------------------------
*/
// enable dsl2 syntax
nextflow.enable.dsl = 2
// nf-core enabling code
manifest {
name = 'somatem'
description = 'A best practices pipeline for long-read metagenomic analysis'
version = '1.0.0'
nextflowVersion = '>=25.10.2'
homePage = 'https://github.com/treangenlab/somatem'
mainScript = 'workflows/main.nf'
}
plugins {
id 'nf-schema@2.7.2' // latest version, 15/May/26
}
// NOTE: Default Settings: will be overridden by profiles ----------------
// ---------------------------------
// default nf-core config follows
// ---------------------------------
// Global default params, used in configs
params {
// TODO nf-core: Specify your pipeline's command line flags
// Input options -----------------------------------------------------
input = "${projectDir}/assets/samplesheet.csv" // bundled default file should resolve from the installed pipeline root
db_base_dir = System.getenv('SOMATEM_DB_DIR') ?: "${System.getenv('HOME')}/somatem_databases"
unified_db_base_dir = System.getenv('SOMATEM_UNIFIED_DB_DIR') ?: "${params.db_base_dir}/unified/refseq03032025"
// metadata defaults ---------------------------------------------------
// Actual values read in from metadata_template.yaml or .._params.yml override these)
data_type = "metagenomic" // options: 16S, isolate, metagenomic
analysis_type = "taxonomic-profiling" // taxonomic-profiling, assembly, isolate-analysis, genome-dynamics, pathogen-detection
sample_status = "cross-section" // longitudinal
sequencing_technology = "nanopore" // nanopore, pacbio
// References --------------------------------------------------------
genome = null
igenomes_base = 's3://ngi-igenomes/igenomes/'
igenomes_ignore = true
// Quality filtering -------------------------------------------------
maxlength = 30000
minlen = 250
minq = 10
contam_ref = null // path to reference .fa file of contaminants to remove, chopper (not used currently: somatem.nf)
// Host sequence removal (hostile) -----------------------------------
hostile_index = 'human-t2t-hla-argos985-mycob140.mmi' // remove host reads: masked index (https://github.com/bede/hostile?tab=readme-ov-file#indexes)
hostile_db = "${params.db_base_dir}/hostile"
// whether hostile is run or not is computed below in `run_hostile` param
// taxonomic profiling ----------------------------------------------------------
// EMU defaults
emu_db = "${params.db_base_dir}/emu_db/emu_db_mar2026/"
sample_size = null
seqtype = params.sequencing_technology == "nanopore" ? "map-ont" : "map-pb"
min_abundance = 0.0001
minimap_max_alignments = 50
minibatch_size = 500000000
keep_read_assignments = true // gms_16S: changes from default
keep_files = false
output_unclassified = true // gms_16S: changes from default
// LEMUR-MAGNET
lemur_db = "${params.db_base_dir}/lemur_221_db/"
lemur_db_zenodo_id = 10802546
lemur_taxonomy = "${params.lemur_db}/taxonomy.tsv"
rank = "species"
// Ensemble species detection (unified DBs for Ganon, Kraken2 and Sylph)
ganon_db = "${params.unified_db_base_dir}/ganon2_abvf_030325/" // Ganon2 database
kraken2_db = "${params.unified_db_base_dir}/k2_abfv_030325/" // Kraken2 database
kraken2_save_output_fastqs = false
kraken2_save_reads_assignment = true
sylph_db = "${params.unified_db_base_dir}/sylph_abf_030325/database.syldb" // Sylph database
// Sylph : Standalone DBs for different taxonomic groups
sylph_prok_db_file = "${params.db_base_dir}/gtdb-r220-c200-dbv1.syldb" // placeholder ; will use a dir/ when auto-downloading
// TODO: need to make a conditional in the sylph calling subworkflow to use the proper variable for Prok?/ Viral/ Fungal DBs
// TODO: will be downloaded with automatic scripts soon
sylph_prok_db = "${params.db_base_dir}/sylph_prok_db/"
sylph_vir_db = "${params.db_base_dir}/sylph_vir_db/"
// MAG/assembly Workflow ----------------------------------------------------------
// checkm2
checkm2_db = "${params.db_base_dir}/checkm2_db/"
checkm2_db_zenodo_id = 14897628
checkm2_completeness_threshold = 80
// bakta
bakta_db = "${params.db_base_dir}/bakta_db/"
// singlem
singlem_db = "${params.db_base_dir}/singlem_db/"
// flye
flye_mode = "nano-hq" //sequencing data type for Flye // take from params.sequencing_technology later
// isolate/hybrid assembly workflow -------------------------------------------
assembler = "autocycler"
hybrid_assembly = false
autocycler_subsamples = 4
autocycler_min_read_depth = 25
autocycler_seed = 0
autocycler_subsample_depth = null
autocycler_genome_size = null
autocycler_assemblers = "flye"
autocycler_flye_mode = params.flye_mode
autocycler_flye_args = ''
run_polypolish = true
polypolish_args = ''
polypolish_filter_args = ''
run_pypolca = true
pypolca_args = '--careful'
shortread_mapper = 'bwa'
min_shortread_depth_for_pypolca = 25
run_btyper3 = false
btyper3_args = ''
// semibin
sample_environment = "human_gut" // sample environment for SemiBin2's built-in model
// options: human_gut/dog_gut/ocean/soil/cat_gut/human_oral/mouse_gut/pig_gut/built_environment/wastewater/chicken_caecum/global
semibin2_random_seed = 42 // random seed for SemiBin2 reproducibility (keep same across runs ; chose default of 42 on a whim)
// optional iterative MAG binning
mag_iterative_binning_enabled = true
mag_binning_iterations = 5
mag_iterative_consensus_tool = "dastool"
mag_iterative_seed_step = 1
pigeon_selection_score_mode = "pigeon_loss"
pigeon_selection_min_delta_score = 0.005
pigeon_selection_patience = 2
pigeon_residual_rebinning = false
pigeon_residual_ksize = 21
pigeon_residual_min_unexplained_fraction = 0.50
pigeon_residual_min_contig_len = 1500
pigeon_residual_min_contigs = 10
metabat2_seed = 42
vamb_seed = 42
vamb_minfasta = 200000
dastool_search_engine = "diamond"
// Pigeon parameters (unitigs, contigs, bins comparison)
pigeon_ksize = 17
pigeon_scaled = 1000
pigeon_seed = 42
pigeon_top_bins = 20
pigeon_reads_subsample = 0
pigeon_skip_db = false
// Other tools
seqscreen_db = "${params.db_base_dir}/SeqScreenDB_23.4/"
seqscreen_mode = "fast"
// Computed parameters ----------------------------------------------
// these will be evaluated when config is loaded
// Host sequence removal (hostile) -----------------------------
// Conditional HOSTILE_FETCH and _CLEAN based on data_type and sample_environment
run_hostile = params.sample_environment =~ /human/ && params.data_type != "16S"
// TODO: future update: enable hostile db change between human default and mouse based on params.sample_environment
// Then change the matcher to =~ /human|mouse/ to pick up both!
// hostile_index = params.sample_environment == "human" ? "human_genome_index" : "mouse_genome_index"
// Boilerplate options -----------------------------------------------
max_cpus = null // auto-detected by conf/base.config when null
max_memory_gb = null // auto-detected by conf/base.config when null
local_memory_reserve_gb = 4 // RAM kept free for the OS during local runs
outdir = "results/" // need to turn on publishDir in modules.config
outputDir = params.outdir // alias for outdir, used in workflow outputs (future-proof, nf-core style)
output_dir = params.outdir // alias for outdir, used in process level publish (Austin)
publish_dir_mode = 'copy'
monochrome_logs = false
version = false
pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/'
trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')// Config options
config_profile_name = null
config_profile_description = null
custom_config_version = 'master'
custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
config_profile_contact = null
config_profile_url = null
validate_params = true
}
// Shared cache defaults for all conda/micromamba-enabled runs. Profiles still
// decide whether conda is enabled; this only controls where environments live.
conda {
cacheDir = System.getenv('NXF_CONDA_CACHEDIR') ?: "${env('HOME')}/.nextflow/conda" // change from default `work/conda/
channels = ['conda-forge', 'bioconda']
createTimeout = 2.h
}
// Load base.config by default for all pipelines
includeConfig 'conf/base.config'
// -------------------------
// Profiles
// -------------------------
profiles {
standard { // default profile for local execution using micromamba
conda {
enabled = true
useMicromamba = true
channels = ['conda-forge', 'bioconda'] // channel precedence is important!
createTimeout = 2.h
}
}
debug {
dumpHashes = true
process.beforeScript = 'echo $HOSTNAME'
cleanup = false
nextflow.enable.configProcessNamesValidation = true
}
conda {
conda.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
conda.channels = ['conda-forge', 'bioconda']
conda.createTimeout = 2.h
apptainer.enabled = false
}
mamba {
conda.enabled = true
conda.useMamba = true
conda.createTimeout = 2.h
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
apptainer.enabled = false
}
docker {
docker.enabled = true
conda.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
apptainer.enabled = false
docker.runOptions = '-u $(id -u):$(id -g)'
}
arm {
docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64'
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
conda.enabled = false
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
apptainer.enabled = false
}
podman {
podman.enabled = true
conda.enabled = false
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
apptainer.enabled = false
}
shifter {
shifter.enabled = true
conda.enabled = false
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
apptainer.enabled = false
}
charliecloud {
charliecloud.enabled = true
conda.enabled = false
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
apptainer.enabled = false
}
apptainer {
apptainer.enabled = true
apptainer.autoMounts = true
conda.enabled = false
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
wave {
apptainer.ociAutoPull = true
singularity.ociAutoPull = true
wave.enabled = true
wave.freeze = true // Generates lock files automatically
wave.strategy = 'conda,container'
// needs a tower.accessToken to use this without servece rate limits
// https://www.nextflow.io/docs/latest/wave.html#wave-page
}
gpu {
docker.runOptions = '-u $(id -u):$(id -g) --gpus all'
apptainer.runOptions = '--nv'
singularity.runOptions = '--nv'
}
test { includeConfig 'conf/test.config' }
test_full { includeConfig 'conf/test_full.config' }
}
// Load nf-core custom profiles from different institutions
// If params.custom_config_base is set AND either the NXF_OFFLINE environment variable is not set or params.custom_config_base is a local path, the nfcore_custom.config file from the specified base path is included.
// Load nf-core/somatem custom profiles from different institutions.
includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null"
// Load nf-core/somatem custom profiles from different institutions.
// TODO nf-core: Optionally, you can add a pipeline-specific nf-core config at https://github.com/nf-core/configs
// includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/pipeline/somatem.config" : "/dev/null"
// Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile
// Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled
// Set to your registry if you have a mirror of containers
apptainer.registry = 'quay.io'
docker.registry = 'quay.io'
podman.registry = 'quay.io'
singularity.registry = 'quay.io'
charliecloud.registry = 'quay.io'
// Load igenomes.config if required
includeConfig !params.igenomes_ignore ? 'conf/igenomes.config' : 'conf/igenomes_ignored.config'
// Export these variables to prevent local Python/R libraries from conflicting with those in the container
// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container.
// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.
env {
PYTHONNOUSERSITE = 1
R_PROFILE_USER = "/.Rprofile"
R_ENVIRON_USER = "/.Renviron"
JULIA_DEPOT_PATH = "/usr/local/share/julia"
}
// Set bash options
process.shell = [
"bash",
"-C", // No clobber - prevent output redirection from overwriting files.
"-e", // Exit if a tool returns a non-zero status/exit code
"-u", // Treat unset variables and parameters as an error
"-o", // Returns the status of the last command to exit..
"pipefail" // ..with a non-zero status or zero if all successfully execute
]
// nf-schema validation block
validation {
defaultIgnoreParams = ["genomes"]
monochromeLogs = params.monochrome_logs
}
// Disable process selector warnings by default. Use debug profile to enable warnings.
nextflow.enable.configProcessNamesValidation = false
timeline {
enabled = true
file = "${params.outdir}/pipeline_info/execution_timeline_${params.trace_report_suffix}.html"
}
report {
enabled = true
file = "${params.outdir}/pipeline_info/execution_report_${params.trace_report_suffix}.html"
}
trace {
enabled = true
file = "${params.outdir}/pipeline_info/execution_trace_${params.trace_report_suffix}.txt"
}
dag {
enabled = true
file = "${params.outdir}/pipeline_info/pipeline_dag_${params.trace_report_suffix}.html"
}
manifest {
name = 'nf-core/somatem'
contributors = [
// TODO nf-core: Update the field with the details of the contributors to your pipeline. New with Nextflow version 24.10.0
[
name: 'Prashant Kalvapalle',
affiliation: 'Rice University',
email: '',
github: 'ppreshant',
contribution: [], // List of contribution types ('author', 'maintainer' or 'contributor')
orcid: ''
],
[
name: ' Austin Marshall',
affiliation: 'Rice University',
email: '',
github: 'microbemarsh',
contribution: [], // List of contribution types ('author', 'maintainer' or 'contributor')
orcid: ''
],
]
homePage = 'https://github.com/nf-core/somatem'
description = """A long-read metagenomics pipeline with support for longitudinal analysis """
mainScript = 'main.nf'
defaultBranch = 'master'
nextflowVersion = '>=25.10.2'
version = '1.0.0dev'
doi = ''
}
// Load modules.config for DSL2 module specific options
includeConfig 'conf/modules.config'