diff --git a/clowm/CHANGELOG.md b/clowm/CHANGELOG.md index b1db9ae..3321bb9 100644 --- a/clowm/CHANGELOG.md +++ b/clowm/CHANGELOG.md @@ -1,5 +1,12 @@ # SANS Changelog +## v1.0.9 [2026-01-12] + +### Changed + + - make workflow compatible with Nextflow 25.10 (strict syntax, workflow output definition) + - update Python container in workflow pre-step + ## v1.0.8 [2025-10-02] ### Fixed diff --git a/clowm/main.nf b/clowm/main.nf index 756f5b2..017e409 100644 --- a/clowm/main.nf +++ b/clowm/main.nf @@ -1,220 +1,233 @@ -fileEndingList = ["*.fa", "*.fa.gz", "*.fasta", "*.fasta.gz", "*.faa", "*.faa.gz", "*.fna", "*.fna.gz", "*.mpfa", "*.mpfa.gz", "*.ffn", "*.ffn.gz", "*.faa.gz", "*.fastq", "*.fastq.gz", "*.fq", "*.fq.gz"] - -inputChannel=Channel.fromPath(fileEndingList.collect { params.input + "/" + it },type : "file") +// -------------------------------------------------- +// SANS process +// -------------------------------------------------- +process SANS { + label 'highmemMedium' + container "ghcr.io/gi-bielefeld/sans:v1.0.8" + debug false + + input: + path inputFiles + path label + path label_colors + path fof + path blacklist + + output: + path 'sans_*', optional: true, emit: results + path 'sans.*', optional: true, emit: logs + + script: + """ + touch ${inputFiles} + echo "${inputFiles}" | tr " " "\\n" > genomeList.txt + + if [ ${params.pdf ? "1" : "0"} -eq 1 ] || [ ${params.svg ? "1" : "0"} -eq 1 ] || [ label.name != 'NO_FILE' ]; then + /usr/bin/Xvfb & + fi -params.label = "$projectDir/NO_FILE" -params.label_colors = "$projectDir/NO_FILE2" -params.file_of_files = "$projectDir/NO_FILE3" -params.blacklist = "$projectDir/NO_FILE4" + if [ \$( cat genomeList.txt | wc -l) -lt 2 ]; then + echo \"ERROR: The input is either empty or fewer than two files.\" > sans.err; + exit 0 + fi -process sans { - container "ghcr.io/gi-bielefeld/sans:v1.0.8" - publishDir params.outdir, mode: 'symlink' - debug false - label 'highmemMedium' - input: - path inputFiles - file label - file label_colors - file fof - file blacklist - - output: - path 'sans_splitnetwork.pdf', optional: true - path 'sans_splitnetwork.svg', optional: true - path 'sans_splitnetwork.nexus', optional: true - path 'sans_splitnetwork.tsv', optional: true - path 'sans_tree.pdf', optional: true - path 'sans_tree.svg', optional: true - path 'sans_tree.newick', optional: true - path 'sans_tree.tsv', optional: true - path 'sans_tree.tsv.bootstrap', optional: true - path 'sans_splitnetwork.tsv.bootstrap', optional: true - path 'sans_core.fasta', optional: true - path 'sans.log', optional: true - path 'sans.err', optional: true - path 'sans.stats', optional: true - - - script: - """ - touch $inputFiles - echo "$inputFiles" | tr " " "\n" > genomeList.txt - - if [ ${params.pdf ? "1" : "0"} -eq 1 ] || [ ${params.svg ? "1" : "0"} -eq 1 ] || [ label.name != 'NO_FILE' ]; then - /usr/bin/Xvfb & - fi - - if [ \$( cat genomeList.txt | wc -l) -lt 2 ]; then - echo \"ERROR: The input is either empty or fewer than two files.\" > sans.err; - exit 0 - fi - - - if [ ${params.bootstrapping} != null ]; then - if [ ${params.filter} == "none" ] || [ ${params.filter} == "default" ]; then - echo \"ERROR: For bootstrapping, you have to choose a filter criterion using --filter.\" > sans.err; + if [ ${params.bootstrapping} != null ]; then + if [ ${params.filter} == "none" ] || [ ${params.filter} == "default" ]; then + echo \"ERROR: For bootstrapping, you have to choose a filter criterion using --filter.\" > sans.err; + exit 0 + fi + fi + if [ ${params.consensus} != "none" ] && [ ${params.bootstrapping} == null ]; then + echo \"ERROR: Filter on bootstrap values (--consensus) can only be chosen in combination with bootstrapping (--boostrapping).\" > sans.err; + exit 0 + fi + if [ ${params.support} != null ] && [ ${params.support} != "0" ] && [ ${params.bootstrapping} == null ]; then + echo \"ERROR: Bootstrap support filter (--support) can only be chosen in combination with bootstrapping (--boostrapping).\" > sans.err; exit 0 fi - fi - if [ ${params.consensus} != "none" ] && [ ${params.bootstrapping} == null ]; then - echo \"ERROR: Filter on bootstrap values (--consensus) can only be chosen in combination with bootstrapping (--boostrapping).\" > sans.err; - exit 0 - fi - if [ ${params.support} != null ] && [ ${params.support} != "0" ] && [ ${params.bootstrapping} == null ]; then - echo \"ERROR: Bootstrap support filter (--support) can only be chosen in combination with bootstrapping (--boostrapping).\" > sans.err; - exit 0 - fi - - if [ ${params.pdf ? "1" : "0"} -eq 1 ] && [ ${params.svg ? "1" : "0"} -eq 0 ]; then - echo "NOTE: In some cases, the PDF might not be properly readable. Thus, the CloWM version generates the PDF output by first generating an SVG file that is then converted to PDF." >> sans.log - fi - - - SANS_PARAMS=\"\ - ${ params.consensus == "none" && params.filter == 'strict' ? "--output sans_tree.tsv" : '' } \ - ${ params.consensus == "none" && params.filter != 'strict' ? "--output sans_splitnetwork.tsv" : '' } \ - ${ params.consensus == "strict" ? "--output sans_tree.tsv" : '' } \ - ${ params.consensus != "none" && params.consensus != "strict" ? "--output sans_splitnetwork.tsv" : '' } \ - ${ params.consensus == "none" && params.filter == 'strict' ? "--newick sans_tree.newick" : '' } \ - ${ params.consensus == "none" && params.filter == '2-tree' ? "--newick sans_tree.newick" : '' } \ - ${ params.consensus == "none" && params.filter == '3-tree' ? "--newick sans_tree.newick" : '' } \ - ${ params.consensus == "strict" ? "--newick sans_tree.newick" : '' } \ - ${ params.consensus == "2-tree" ? "--newick sans_tree.newick" : '' } \ - ${ params.consensus == "3-tree" ? "--newick sans_tree.newick" : '' } \ - ${ params.consensus == "none" && params.filter == 'weakly' ? "--nexus sans_splitnetwork.nexus" : '' } \ - ${ params.consensus == "none" && params.filter == 'planar' ? "--nexus sans_splitnetwork.nexus" : '' } \ - ${ params.consensus == "none" && params.filter == '2-tree' ? "--nexus sans_splitnetwork.nexus" : '' } \ - ${ params.consensus == "none" && params.filter == '3-tree' ? "--nexus sans_splitnetwork.nexus" : '' } \ - ${ params.consensus == "none" && params.filter == 'default' ? "--nexus sans_splitnetwork.nexus" : '' } \ - ${ params.consensus == "weakly" ? "--nexus sans_splitnetwork.nexus" : '' } \ - ${ params.consensus == "planar" ? "--nexus sans_splitnetwork.nexus" : '' } \ - ${ params.consensus == "2-tree" ? "--nexus sans_splitnetwork.nexus" : '' } \ - ${ params.consensus == "3-tree" ? "--nexus sans_splitnetwork.nexus" : '' } \ - ${ params.qualify != null ? "--qualify ${ params.qualify }" : "" } \ - ${ fof.name != 'NO_FILE3' ? "--input $fof" : '--input genomeList.txt' } \ - ${ params.amino ? "--amino" : "" } \ - ${ params.translate ? "--code ${ params.code }" : "" } \ - ${ params.kmer != null ? "--kmer ${ params.kmer }" : "" } \ - ${ label.name != 'NO_FILE' && label_colors.name == 'NO_FILE2' ? "--label $label" : '' } \ - ${ label.name != 'NO_FILE' && label_colors.name != 'NO_FILE2' ? "--label $label $label_colors" : '' } \ - ${ params.top != null ? "--top ${ params.top }" : "" } \ - ${ params.pdf && params.consensus == "none" && params.filter == 'strict' ? "--svg sans_tree.svg" : "" } \ - ${ params.pdf && params.consensus == "none" && params.filter != 'strict' ? "--svg sans_splitnetwork.svg" : "" } \ - ${ params.pdf && params.consensus == "strict" ? "--svg sans_tree.svg" : "" } \ - ${ params.pdf && params.consensus != "none" && params.consensus != "strict" ? "--svg sans_splitnetwork.svg" : "" } \ - ${ params.svg && params.consensus == "none" && params.filter == 'strict' ? "--svg sans_tree.svg" : "" } \ - ${ params.svg && params.consensus == "none" && params.filter != 'strict' ? "--svg sans_splitnetwork.svg" : "" } \ - ${ params.svg && params.consensus == "strict" ? "--svg sans_tree.svg" : "" } \ - ${ params.svg && params.consensus != "none" && params.consensus != "strict" ? "--svg sans_splitnetwork.svg" : "" } \ - ${ params.filter == 'strict' ? "--filter strict" : '' } \ - ${ params.filter == 'weakly' ? "--filter weakly" : '' } \ - ${ params.filter == 'planar' ? "--filter planar" : '' } \ - ${ params.filter == '2-tree' ? "--filter 2-tree" : '' } \ - ${ params.filter == '3-tree' ? "--filter 3-tree" : '' } \ - ${ params.filter == 'default' ? "--filter weakly" : '' } \ - ${ params.bootstrapping != null ? "--bootstrapping ${ params.bootstrapping } ${ params.support }" : "" } \ - ${ params.consensus != "none" ? "--consensus ${ params.consensus }" : "" } \ - ${ params.iupac != 0 ? "--iupac ${ params.iupac }" : "" } \ - ${ params.norev ? "--norev" : "" } \ - ${ params.mean != "geom2" ? "--mean ${ params.mean }" : "" } \ - ${ params.core ? "--core sans_core.fasta" : "" } \ - ${ blacklist.name != 'NO_FILE4' ? "--blacklist $blacklist" : "" } \ - --verbose \ - --threads ${ task.cpus } \ - --stats sans.stats\" - - echo SANS \$SANS_PARAMS >> sans.log - - SANS-autoN.sh \$SANS_PARAMS 2>&1 | grep -v \"Fontconfig error\" | awk -F \"\r\" '{print \$NF}' >> sans.log - - if [ ${params.filter} == "default" ] && [ ${params.tree ? "1" : "0"} -eq 1 ]; then - - echo \"\" >> sans.log - - SANS_PARAMS=\"--splits sans_splitnetwork.tsv \ - --output sans_tree.tsv \ - --newick sans_tree.newick \ + + if [ ${params.pdf ? "1" : "0"} -eq 1 ] && [ ${params.svg ? "1" : "0"} -eq 0 ]; then + echo "NOTE: In some cases, the PDF might not be properly readable. Thus, the CloWM version generates the PDF output by first generating an SVG file that is then converted to PDF." >> sans.log + fi + + SANS_PARAMS=\"\ + ${ params.consensus == "none" && params.filter == 'strict' ? "--output sans_tree.tsv" : '' } \ + ${ params.consensus == "none" && params.filter != 'strict' ? "--output sans_splitnetwork.tsv" : '' } \ + ${ params.consensus == "strict" ? "--output sans_tree.tsv" : '' } \ + ${ params.consensus != "none" && params.consensus != "strict" ? "--output sans_splitnetwork.tsv" : '' } \ + ${ params.consensus == "none" && params.filter == 'strict' ? "--newick sans_tree.newick" : '' } \ + ${ params.consensus == "none" && params.filter == '2-tree' ? "--newick sans_tree.newick" : '' } \ + ${ params.consensus == "none" && params.filter == '3-tree' ? "--newick sans_tree.newick" : '' } \ + ${ params.consensus == "strict" ? "--newick sans_tree.newick" : '' } \ + ${ params.consensus == "2-tree" ? "--newick sans_tree.newick" : '' } \ + ${ params.consensus == "3-tree" ? "--newick sans_tree.newick" : '' } \ + ${ params.consensus == "none" && params.filter == 'weakly' ? "--nexus sans_splitnetwork.nexus" : '' } \ + ${ params.consensus == "none" && params.filter == 'planar' ? "--nexus sans_splitnetwork.nexus" : '' } \ + ${ params.consensus == "none" && params.filter == '2-tree' ? "--nexus sans_splitnetwork.nexus" : '' } \ + ${ params.consensus == "none" && params.filter == '3-tree' ? "--nexus sans_splitnetwork.nexus" : '' } \ + ${ params.consensus == "none" && params.filter == 'default' ? "--nexus sans_splitnetwork.nexus" : '' } \ + ${ params.consensus == "weakly" ? "--nexus sans_splitnetwork.nexus" : '' } \ + ${ params.consensus == "planar" ? "--nexus sans_splitnetwork.nexus" : '' } \ + ${ params.consensus == "2-tree" ? "--nexus sans_splitnetwork.nexus" : '' } \ + ${ params.consensus == "3-tree" ? "--nexus sans_splitnetwork.nexus" : '' } \ + ${ params.qualify != null ? "--qualify ${ params.qualify }" : "" } \ ${ fof.name != 'NO_FILE3' ? "--input $fof" : '--input genomeList.txt' } \ + ${ params.amino ? "--amino" : "" } \ + ${ params.translate ? "--code ${ params.code }" : "" } \ + ${ params.kmer != null ? "--kmer ${ params.kmer }" : "" } \ ${ label.name != 'NO_FILE' && label_colors.name == 'NO_FILE2' ? "--label $label" : '' } \ ${ label.name != 'NO_FILE' && label_colors.name != 'NO_FILE2' ? "--label $label $label_colors" : '' } \ ${ params.top != null ? "--top ${ params.top }" : "" } \ - ${ params.pdf ? "--svg sans_tree.svg" : "" } \ - ${ params.svg ? "--svg sans_tree.svg" : "" } \ - --filter strict + ${ params.pdf && params.consensus == "none" && params.filter == 'strict' ? "--svg sans_tree.svg" : "" } \ + ${ params.pdf && params.consensus == "none" && params.filter != 'strict' ? "--svg sans_splitnetwork.svg" : "" } \ + ${ params.pdf && params.consensus == "strict" ? "--svg sans_tree.svg" : "" } \ + ${ params.pdf && params.consensus != "none" && params.consensus != "strict" ? "--svg sans_splitnetwork.svg" : "" } \ + ${ params.svg && params.consensus == "none" && params.filter == 'strict' ? "--svg sans_tree.svg" : "" } \ + ${ params.svg && params.consensus == "none" && params.filter != 'strict' ? "--svg sans_splitnetwork.svg" : "" } \ + ${ params.svg && params.consensus == "strict" ? "--svg sans_tree.svg" : "" } \ + ${ params.svg && params.consensus != "none" && params.consensus != "strict" ? "--svg sans_splitnetwork.svg" : "" } \ + ${ params.filter == 'strict' ? "--filter strict" : '' } \ + ${ params.filter == 'weakly' ? "--filter weakly" : '' } \ + ${ params.filter == 'planar' ? "--filter planar" : '' } \ + ${ params.filter == '2-tree' ? "--filter 2-tree" : '' } \ + ${ params.filter == '3-tree' ? "--filter 3-tree" : '' } \ + ${ params.filter == 'default' ? "--filter weakly" : '' } \ + ${ params.bootstrapping != null ? "--bootstrapping ${ params.bootstrapping } ${ params.support }" : "" } \ + ${ params.consensus != "none" ? "--consensus ${ params.consensus }" : "" } \ + ${ params.iupac != 0 ? "--iupac ${ params.iupac }" : "" } \ + ${ params.norev ? "--norev" : "" } \ ${ params.mean != "geom2" ? "--mean ${ params.mean }" : "" } \ + ${ params.core ? "--core sans_core.fasta" : "" } \ + ${ blacklist.name != 'NO_FILE4' ? "--blacklist $blacklist" : "" } \ --verbose \ - --threads ${ task.cpus }\" - + --threads ${ task.cpus } \ + --stats sans.stats\" + echo SANS \$SANS_PARAMS >> sans.log - + SANS-autoN.sh \$SANS_PARAMS 2>&1 | grep -v \"Fontconfig error\" | awk -F \"\r\" '{print \$NF}' >> sans.log - fi + if [ ${params.filter} == "default" ] && [ ${params.tree ? "1" : "0"} -eq 1 ]; then - if [ ${params.pdf ? "1" : "0"} -eq 1 ] && [ ${params.svg ? "1" : "0"} -eq 0 ]; then - if [ -f "sans_tree.svg" ]; then cairosvg sans_tree.svg -o sans_tree.pdf; rm sans_tree.svg; fi - if [ -f "sans_splitnetwork.svg" ]; then cairosvg sans_splitnetwork.svg -o sans_splitnetwork.pdf; rm sans_splitnetwork.svg; fi - fi + echo \"\" >> sans.log - rm -f genomeList.txt - """ -} + SANS_PARAMS=\"--splits sans_splitnetwork.tsv \ + --output sans_tree.tsv \ + --newick sans_tree.newick \ + ${ fof.name != 'NO_FILE3' ? "--input $fof" : '--input genomeList.txt' } \ + ${ label.name != 'NO_FILE' && label_colors.name == 'NO_FILE2' ? "--label $label" : '' } \ + ${ label.name != 'NO_FILE' && label_colors.name != 'NO_FILE2' ? "--label $label $label_colors" : '' } \ + ${ params.top != null ? "--top ${ params.top }" : "" } \ + ${ params.pdf ? "--svg sans_tree.svg" : "" } \ + ${ params.svg ? "--svg sans_tree.svg" : "" } \ + --filter strict + ${ params.mean != "geom2" ? "--mean ${ params.mean }" : "" } \ + --verbose \ + --threads ${ task.cpus }\" -process unzip { - - container 'python:3.12' - input: - path zipgenomes - output: - path 'output/*' - - script: - """ - #!/usr/local/bin/python - - import zipfile - with zipfile.ZipFile("${zipgenomes }", 'r') as zip_ref: - zip_ref.extractall("output") - """ -} + echo SANS \$SANS_PARAMS >> sans.log + + SANS-autoN.sh \$SANS_PARAMS 2>&1 | grep -v \"Fontconfig error\" | awk -F \"\r\" '{print \$NF}' >> sans.log -process untargz { - - container 'python:3.12' - input: - path zipgenomes - output: - path 'output/*' - - script: - """ - #!/usr/local/bin/python - - import tarfile - file = tarfile.open("${zipgenomes }") - file.extractall("output") - file.close() - """ + fi + + if [ ${params.pdf ? "1" : "0"} -eq 1 ] && [ ${params.svg ? "1" : "0"} -eq 0 ]; then + if [ -f "sans_tree.svg" ]; then cairosvg sans_tree.svg -o sans_tree.pdf; rm sans_tree.svg; fi + if [ -f "sans_splitnetwork.svg" ]; then cairosvg sans_splitnetwork.svg -o sans_splitnetwork.pdf; rm sans_splitnetwork.svg; fi + fi + + rm -f genomeList.txt + """ } +// -------------------------------------------------- +// ZIP extractor +// -------------------------------------------------- +process UNZIP { + container 'python:3.14-slim' -workflow { - opt_label = file(params.label, checkIfExists:true) - opt_label_colors = file(params.label_colors, checkIfExists:true) - opt_fof = file(params.file_of_files, checkIfExists:true) - opt_blacklist = file(params.blacklist, checkIfExists:true) - if (params.input.endsWith(".zip")) { - unzip(params.input) - sans(unzip.output,opt_label,opt_label_colors,opt_fof,opt_blacklist) - } else if (params.input.endsWith(".tar.gz")) { - untargz(params.input) - sans(untargz.output,opt_label,opt_label_colors,opt_fof,opt_blacklist) - } else { - sans(inputChannel.ifEmpty(file("$projectDir/NO_FILE5")).collect(),opt_label,opt_label_colors,opt_fof,opt_blacklist) - } + input: + path zipgenomes + + output: + path 'output/*', emit: unzipped + + script: + """ + #!/usr/bin/env python + + import zipfile + with zipfile.ZipFile("${zipgenomes}") as z: + z.extractall("output") + """ } +// -------------------------------------------------- +// TAR extractor +// -------------------------------------------------- +process UNTAR { + container 'python:3.14-slim' + input: + path targenomes + output: + path 'output/*', emit: untarred + + script: + """ + #!/usr/bin/env python + + import tarfile + with tarfile.open("${targenomes}") as t: + t.extractall("output") + """ +} + +// -------------------------------------------------- +// Workflow definition +// -------------------------------------------------- +workflow { + main: + opt_label = params.label == null ? projectDir.resolve("NO_FILE") : file(params.label, checkIfExists:true) + opt_label_colors = params.label_colors == null ? projectDir.resolve("NO_FILE2") : file(params.label_colors, checkIfExists:true) + opt_fof = params.file_of_files == null ? projectDir.resolve("NO_FILE3") : file(params.file_of_files, checkIfExists:true) + opt_blacklist = params.blacklist == null ? projectDir.resolve("NO_FILE4") : file(params.blacklist, checkIfExists:true) + + def fileEndingList = [ + "*.fa","*.fa.gz","*.fasta","*.fasta.gz", + "*.faa","*.faa.gz","*.fna","*.fna.gz", + "*.mpfa","*.mpfa.gz","*.ffn","*.ffn.gz", + "*faa.gz", "*.fastq","*.fastq.gz","*.fq","*.fq.gz" + ] + + if (params.input.endsWith(".zip")) { + UNZIP(file(params.input, checkIfExists: true)) + SANS(UNZIP.out.unzipped, opt_label, opt_label_colors, opt_fof, opt_blacklist) + } + else if (params.input.endsWith(".tar.gz")) { + UNTAR(file(params.input, checkIfExists: true)) + SANS(UNTAR.out.untarred, opt_label, opt_label_colors, opt_fof, opt_blacklist) + } + else { + inputChannel = Channel + .fromPath(fileEndingList.collect { "${params.input}/${it}" }, type: "file") + .ifEmpty(projectDir.resolve("NO_FILE5")) + .collect() + SANS(inputChannel, opt_label, opt_label_colors, opt_fof, opt_blacklist) + } + + publish: + sans_results = SANS.out.results + sans_logs = SANS.out.logs +} + +output { + sans_results { + path '.' + } + sans_logs { + path '.' + } +} diff --git a/nextflow.config b/nextflow.config index 243596c..16e19ff 100644 --- a/nextflow.config +++ b/nextflow.config @@ -23,8 +23,29 @@ params { blacklist = null } +outputDir = params.outdir + manifest { - homePage = "https://gitlab.ub.uni-bielefeld.de/gi/sans" + homePage = "https://github.com/gi-bielefeld/sans" doi = "10.1093/bioinformatics/btab444" mainScript = "clowm/main.nf" + license = "GNU General Public License v3.0" + name = "SANS" + nextflowVersion = ">=25.10" + contributors = [ + [ + name: "Roland Wittler", + affiliation: "Bielefeld University", + contribution: ["author", "contributor", "maintainer"], + orcid: "https://orcid.org/0000-0002-2249-9880", + github: "https://github.com/rwittler" + ], + [ + name: "Andreas Rempel", + affiliation: "Bielefeld University", + contribution: ["contributor"], + github: "https://github.com/andreas-rempel", + orcid: "https://orcid.org/0000-0003-0609-5621" + ] + ] }