Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,11 @@ process {
]
}

// Bowtie align arguments
withName: 'SMRNASEQ:GENOME_QUANT:BOWTIE_MAP_GENOME' {
ext.args = params.bowtie_align_args ? params.bowtie_align_args : ''
}

withName: 'CLEAN_FASTA' {
ext.args = "-c fastx '{gsub(/[^ATGCatgc]/, \"N\", \$seq); sub(/ .*/, \"\", \$name); print \">\"\$name\"\\n\"\$seq}'"
ext.prefix = {"${meta.id}_clean.fa"}
Expand Down
1 change: 1 addition & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ See examples in: [the test-datasets repository of nf-core](https://github.com/nf

- `fasta`: the reference genome FASTA file
- `bowtie_index`: points to the folder containing the `bowtie` indices for the genome reference specified by `fasta`.
- `bowtie_align_args`: Optional flags to pass to Bowtie 1 for the genome mapping, e.g. `--bowtie_align_args '-v 1 --best'`. Only a subset of Bowtie 1 flags are allowed to keep compatibility with the pipeline. If invalid or disallowed values are included, the pipeline will exit with an error message.

> [!NOTE]
> if the FASTA file in `fasta` is not the same file used to generate the `bowtie` indices, then the pipeline will fail.
Expand Down
2 changes: 1 addition & 1 deletion modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@
},
"seqcluster/collapse": {
"branch": "master",
"git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
"git_sha": "6d46786420b4d7bc88eba026eb389c0c5535d120",
"installed_by": ["modules"]
},
"seqkit/fq2fa": {
Expand Down
1 change: 1 addition & 0 deletions modules/nf-core/multiqc/environment.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions modules/nf-core/seqcluster/collapse/environment.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions modules/nf-core/seqcluster/collapse/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ params {
genome = null
igenomes_base = 's3://ngi-igenomes/igenomes/'
igenomes_ignore = false

// Bowtie align options
bowtie_align_args = ''

// MultiQC options
multiqc_config = null
multiqc_title = null
Expand Down
8 changes: 8 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,14 @@
"fa_icon": "fas fa-book",
"help_text": "Point to the directory created by Bowtie 1 when indexing. Bowtie 1 indices consist of six files:\n\n```bash\ngenome.1.ebwt, genome.2.ebwt, genome.3.ebwt, genome.4.ebwt, genome.rev.1.ebwt, genome.rev.2.ebwt\n```\n"
},
"bowtie_align_args": {
"type": "string",
"default": "",
"description": "Additional arguments to pass to Bowtie alignment.",
"help_text": "These flags are passed to Bowtie 1 for the genome mapping, e.g. `--bowtie_align_args '-v 1 --best'`. Use with caution as some flags may conflict with pipeline-managed options.",
"fa_icon": "fas fa-terminal",
"hidden": true
},
"save_reference": {
"type": "boolean",
"description": "Save generated reference genome files to results.",
Expand Down
97 changes: 97 additions & 0 deletions subworkflows/local/utils_nfcore_smrnaseq_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ workflow PIPELINE_INITIALISATION {
//
validateInputParameters()

//
// Validate the bowtie align arguments provided through params.bowtie_align_args
//
validateBowtieGenomeArgs()

//
// Create channel from input file provided through params.input
//
Expand Down Expand Up @@ -250,6 +255,98 @@ def validateInputSamplesheet(input) {
return [ metas[0], fastqs ]
}
//
// Validate the bowtie align arguments provided through params.bowtie_align_args
//
def validateBowtieGenomeArgs() {
if (!params.bowtie_align_args) return // empty string is fine

// Allowlist: flag -> expected value type (null = flag only, 'int' = requires integer)
def ALLOWED_GENOME_ARGS = [
'-v' : 'int_0_3',
'-n' : 'int_0_3',
'--seedmms' : 'int_0_3',
'-e' : 'int',
'--maqerr' : 'int',
'-l' : 'int',
'--seedlen' : 'int',
'-k' : 'int',
'-m' : 'int',
'--maxbts' : 'int',
'--chunkmbs' : 'int',
'--seed' : 'int',
'--best' : null,
'--strata' : null,
'--nomaqround' : null,
'--norc' : null,
]

// Mutually exclusive pairs
def MUTEX_PAIRS = [
['-v', '-n'],
['-v', '--seedmms'],
['--strata', '--best'], // --strata requires --best, so flag if --strata without --best
]

def tokens = params.bowtie_align_args.trim().split(/\s+/)
def i = 0
def seen = [] as Set

while (i < tokens.size()) {
def token = tokens[i]

// Handle --flag=value syntax by splitting on '='
def flag = token
def value = null
if (token.contains('=')) {
def parts = token.split('=', 2)
flag = parts[0]
value = parts[1]
}

if (!ALLOWED_GENOME_ARGS.containsKey(flag)) {
error "[bowtie_align_args] Argument '${flag}' is not allowed. " +
"Only these flags are permitted: ${ALLOWED_GENOME_ARGS.keySet().sort().join(', ')}"
}

def expectedType = ALLOWED_GENOME_ARGS[flag]
seen << flag

if (expectedType != null) {
// Expect a value — either inline (--flag=val) or next token (--flag val)
if (value == null) {
i++
if (i >= tokens.size()) {
error "[bowtie_align_args] Argument '${flag}' requires a value but none was provided."
}
value = tokens[i]
}

// Validate the value type
if (expectedType == 'int' || expectedType == 'int_0_3') {
if (!(value =~ /^\d+$/)) {
error "[bowtie_align_args] Argument '${flag}' requires an integer value, got '${value}'."
}
if (expectedType == 'int_0_3' && !(value.toInteger() in 0..3)) {
error "[bowtie_align_args] Argument '${flag}' must be between 0 and 3, got '${value}'."
}
}
} else if (value != null) {
// Flag-only arg was given a value via = syntax
error "[bowtie_align_args] Argument '${flag}' does not take a value, got '${value}'."
}

i++
}

// Mutual exclusivity checks
if (seen.contains('-v') && (seen.contains('-n') || seen.contains('--seedmms'))) {
error "[bowtie_align_args] '-v' and '-n'/'--seedmms' are mutually exclusive."
}
if (seen.contains('--strata') && !seen.contains('--best')) {
error "[bowtie_align_args] '--strata' requires '--best' to also be specified."
}
}
//
// Get attribute from genome config file e.g. fasta
//
def getGenomeAttribute(attribute) {
Expand Down
31 changes: 3 additions & 28 deletions tests/default.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -345,9 +345,6 @@
"multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt",
"multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt",
"multiqc/multiqc_data/llms-full.txt",
"multiqc/multiqc_data/mirtop_mean_read_count_plot.txt",
"multiqc/multiqc_data/mirtop_read_count_plot.txt",
"multiqc/multiqc_data/mirtop_unique_read_count_plot.txt",
"multiqc/multiqc_data/multiqc.log",
"multiqc/multiqc_data/multiqc.parquet",
"multiqc/multiqc_data/multiqc_citations.txt",
Expand All @@ -356,7 +353,6 @@
"multiqc/multiqc_data/multiqc_fastqc.txt",
"multiqc/multiqc_data/multiqc_fastqc_fastqc-1.txt",
"multiqc/multiqc_data/multiqc_general_stats.txt",
"multiqc/multiqc_data/multiqc_mirtop.txt",
"multiqc/multiqc_data/multiqc_mirtrace_complexity.txt",
"multiqc/multiqc_data/multiqc_mirtrace_contamination.txt",
"multiqc/multiqc_data/multiqc_mirtrace_length.txt",
Expand Down Expand Up @@ -401,12 +397,6 @@
"multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf",
"multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf",
"multiqc/multiqc_plots/pdf/fastqc_top_overrepresented_sequences_table.pdf",
"multiqc/multiqc_plots/pdf/mirtop_mean_read_count_plot-cnt.pdf",
"multiqc/multiqc_plots/pdf/mirtop_mean_read_count_plot-pct.pdf",
"multiqc/multiqc_plots/pdf/mirtop_read_count_plot-cnt.pdf",
"multiqc/multiqc_plots/pdf/mirtop_read_count_plot-pct.pdf",
"multiqc/multiqc_plots/pdf/mirtop_unique_read_count_plot-cnt.pdf",
"multiqc/multiqc_plots/pdf/mirtop_unique_read_count_plot-pct.pdf",
"multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf",
"multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf",
"multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf",
Expand Down Expand Up @@ -444,12 +434,6 @@
"multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png",
"multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png",
"multiqc/multiqc_plots/png/fastqc_top_overrepresented_sequences_table.png",
"multiqc/multiqc_plots/png/mirtop_mean_read_count_plot-cnt.png",
"multiqc/multiqc_plots/png/mirtop_mean_read_count_plot-pct.png",
"multiqc/multiqc_plots/png/mirtop_read_count_plot-cnt.png",
"multiqc/multiqc_plots/png/mirtop_read_count_plot-pct.png",
"multiqc/multiqc_plots/png/mirtop_unique_read_count_plot-cnt.png",
"multiqc/multiqc_plots/png/mirtop_unique_read_count_plot-pct.png",
"multiqc/multiqc_plots/png/samtools-stats-dp.png",
"multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png",
"multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png",
Expand Down Expand Up @@ -487,12 +471,6 @@
"multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg",
"multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg",
"multiqc/multiqc_plots/svg/fastqc_top_overrepresented_sequences_table.svg",
"multiqc/multiqc_plots/svg/mirtop_mean_read_count_plot-cnt.svg",
"multiqc/multiqc_plots/svg/mirtop_mean_read_count_plot-pct.svg",
"multiqc/multiqc_plots/svg/mirtop_read_count_plot-cnt.svg",
"multiqc/multiqc_plots/svg/mirtop_read_count_plot-pct.svg",
"multiqc/multiqc_plots/svg/mirtop_unique_read_count_plot-cnt.svg",
"multiqc/multiqc_plots/svg/mirtop_unique_read_count_plot-pct.svg",
"multiqc/multiqc_plots/svg/samtools-stats-dp.svg",
"multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg",
"multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg",
Expand Down Expand Up @@ -643,14 +621,11 @@
"fastqc_sequence_duplication_levels_plot.txt:md5,88b0babe77f623547e8af664791ef2d0",
"fastqc_sequence_length_distribution_plot.txt:md5,f1356c87ed134bdde935faf354b7a9a7",
"fastqc_top_overrepresented_sequences_table.txt:md5,4eb3d5bf770abb0a399bc5710876407b",
"mirtop_mean_read_count_plot.txt:md5,fa4d2c3fe9aac694d2d6d12bf2b844ba",
"mirtop_read_count_plot.txt:md5,835d6b70ae9f61a6e3dcbe369611943f",
"mirtop_unique_read_count_plot.txt:md5,4fc6b7e4f279b0e79a995279d3576682",
"multiqc_citations.txt:md5,ea6d63393b7f47815a949fc58ee0caf8",
"multiqc_citations.txt:md5,57db2426be011862828d18f767d25b57",
"multiqc_fastp.txt:md5,33a1689e5f0f5047da6b28440e121476",
"multiqc_fastqc.txt:md5,74d277d129db4aab9c98790a96a40557",
"multiqc_fastqc_fastqc-1.txt:md5,4c4f6b548ce4f8fdf829a9a086f1ee9f",
"multiqc_general_stats.txt:md5,9f5148b6e5e352814bbc6420efa32573",
"multiqc_general_stats.txt:md5,e20a5b517d415775d152f81cef233f4f",
"multiqc_mirtrace_complexity.txt:md5,2da5d49a2eebc988bf79ba2701982a9d",
"multiqc_mirtrace_contamination.txt:md5,a6f8d6c500f6f53c698d3432297b591d",
"multiqc_mirtrace_length.txt:md5,d740d8d43d3bcfa0bc785f6aa7481da1",
Expand All @@ -670,6 +645,6 @@
"nf-test": "0.9.3",
"nextflow": "25.04.6"
},
"timestamp": "2025-12-16T18:23:20.008558265"
"timestamp": "2026-04-29T22:01:55.005855067"
}
}
33 changes: 4 additions & 29 deletions tests/test_mirgenedb.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -332,9 +332,6 @@
"multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt",
"multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt",
"multiqc/multiqc_data/llms-full.txt",
"multiqc/multiqc_data/mirtop_mean_read_count_plot.txt",
"multiqc/multiqc_data/mirtop_read_count_plot.txt",
"multiqc/multiqc_data/mirtop_unique_read_count_plot.txt",
"multiqc/multiqc_data/multiqc.log",
"multiqc/multiqc_data/multiqc.parquet",
"multiqc/multiqc_data/multiqc_citations.txt",
Expand All @@ -343,7 +340,6 @@
"multiqc/multiqc_data/multiqc_fastqc.txt",
"multiqc/multiqc_data/multiqc_fastqc_fastqc-1.txt",
"multiqc/multiqc_data/multiqc_general_stats.txt",
"multiqc/multiqc_data/multiqc_mirtop.txt",
"multiqc/multiqc_data/multiqc_samtools_stats.txt",
"multiqc/multiqc_data/multiqc_software_versions.txt",
"multiqc/multiqc_data/multiqc_sources.txt",
Expand Down Expand Up @@ -384,12 +380,6 @@
"multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf",
"multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf",
"multiqc/multiqc_plots/pdf/fastqc_top_overrepresented_sequences_table.pdf",
"multiqc/multiqc_plots/pdf/mirtop_mean_read_count_plot-cnt.pdf",
"multiqc/multiqc_plots/pdf/mirtop_mean_read_count_plot-pct.pdf",
"multiqc/multiqc_plots/pdf/mirtop_read_count_plot-cnt.pdf",
"multiqc/multiqc_plots/pdf/mirtop_read_count_plot-pct.pdf",
"multiqc/multiqc_plots/pdf/mirtop_unique_read_count_plot-cnt.pdf",
"multiqc/multiqc_plots/pdf/mirtop_unique_read_count_plot-pct.pdf",
"multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf",
"multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf",
"multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf",
Expand Down Expand Up @@ -427,12 +417,6 @@
"multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png",
"multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png",
"multiqc/multiqc_plots/png/fastqc_top_overrepresented_sequences_table.png",
"multiqc/multiqc_plots/png/mirtop_mean_read_count_plot-cnt.png",
"multiqc/multiqc_plots/png/mirtop_mean_read_count_plot-pct.png",
"multiqc/multiqc_plots/png/mirtop_read_count_plot-cnt.png",
"multiqc/multiqc_plots/png/mirtop_read_count_plot-pct.png",
"multiqc/multiqc_plots/png/mirtop_unique_read_count_plot-cnt.png",
"multiqc/multiqc_plots/png/mirtop_unique_read_count_plot-pct.png",
"multiqc/multiqc_plots/png/samtools-stats-dp.png",
"multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png",
"multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png",
Expand Down Expand Up @@ -470,12 +454,6 @@
"multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg",
"multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg",
"multiqc/multiqc_plots/svg/fastqc_top_overrepresented_sequences_table.svg",
"multiqc/multiqc_plots/svg/mirtop_mean_read_count_plot-cnt.svg",
"multiqc/multiqc_plots/svg/mirtop_mean_read_count_plot-pct.svg",
"multiqc/multiqc_plots/svg/mirtop_read_count_plot-cnt.svg",
"multiqc/multiqc_plots/svg/mirtop_read_count_plot-pct.svg",
"multiqc/multiqc_plots/svg/mirtop_unique_read_count_plot-cnt.svg",
"multiqc/multiqc_plots/svg/mirtop_unique_read_count_plot-pct.svg",
"multiqc/multiqc_plots/svg/samtools-stats-dp.svg",
"multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg",
"multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg",
Expand Down Expand Up @@ -569,14 +547,11 @@
"fastqc_sequence_duplication_levels_plot.txt:md5,2b1cbdce195d2aedc4bff4c5e9b618d4",
"fastqc_sequence_length_distribution_plot.txt:md5,6bd21067e3ab28cd33ac107417e72c4c",
"fastqc_top_overrepresented_sequences_table.txt:md5,91b745155fb6c1652cdd8222fc96a9cc",
"mirtop_mean_read_count_plot.txt:md5,13fc23549fa91434e7cc87ea413022eb",
"mirtop_read_count_plot.txt:md5,2aa7167a13f4db1799736bc7150b7efd",
"mirtop_unique_read_count_plot.txt:md5,3bc374b16585d00654cde12fb7f5a363",
"multiqc_citations.txt:md5,ea6d63393b7f47815a949fc58ee0caf8",
"multiqc_citations.txt:md5,57db2426be011862828d18f767d25b57",
"multiqc_fastp.txt:md5,19649c8fcacf031058beb4574e7757f6",
"multiqc_fastqc.txt:md5,5f3d48b29b7909fd7f0c4da6f8745c08",
"multiqc_fastqc_fastqc-1.txt:md5,ca8352d25077b529780f4dfa79f343a6",
"multiqc_general_stats.txt:md5,0088299b96f9b7e15d23a91228d676d2",
"multiqc_general_stats.txt:md5,b4d1b4edcf34fcbadce157bf5b4fb7f0",
"multiqc_samtools_stats.txt:md5,805d665c8ecc0a099111166581f43c07",
"samtools-stats-dp.txt:md5,f263d0f3e065cbc1a7089887ac24938f",
"samtools_alignment_plot.txt:md5,f516084a276f0931165b93724d3735ad"
Expand Down Expand Up @@ -608,8 +583,8 @@
],
"meta": {
"nf-test": "0.9.3",
"nextflow": "25.04.6"
"nextflow": "25.04.2"
},
"timestamp": "2025-12-16T17:00:25.99408485"
"timestamp": "2026-04-30T09:13:45.941037731"
}
}
Loading
Loading