diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a2e970aba..500ca1d47 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -319,3 +319,68 @@ jobs: - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.parameters }} | ${{ matrix.profile }}" run: | nextflow run ${GITHUB_WORKSPACE} -profile test_nanopore,${{ matrix.profile }} ${{ matrix.parameters }} --outdir ./results + + test_illumina_fragmented: + name: "Test using multifasta reference (${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }})" + # Only run on push if this is the nf-core dev branch (merged PRs) + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }}" + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "24.04.2" + - "latest-everything" + profile: + - "conda" + - "docker" + - "singularity" + test_name: + - "test_illumina_fragmented" + isMaster: + - ${{ github.base_ref == 'master' }} + # Exclude conda and singularity on dev + exclude: + - isMaster: false + profile: "conda" + - isMaster: false + profile: "singularity" + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Set up Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Set up Apptainer + if: matrix.profile == 'singularity' + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + if: matrix.profile == 'singularity' + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Set up Miniconda + if: matrix.profile == 'conda' + uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 + with: + miniconda-version: "latest" + auto-update-conda: true + conda-solver: libmamba + channels: conda-forge,bioconda + + - name: Set up Conda + if: matrix.profile == 'conda' + run: | + echo $(realpath $CONDA)/condabin >> $GITHUB_PATH + echo $(realpath python) >> $GITHUB_PATH + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" + run: | + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} --outdir ./results diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index 67f4a04b6..2719b9f4e 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -872,7 +872,17 @@ if (!params.skip_assembly) { if (!params.skip_abacas) { process { - withName: '.*:.*:ASSEMBLY_SPADES:.*:ABACAS' { + withName: '.*:.*:ASSEMBLY_SPADES:.*:ABACAS_SINGLE' { + ext.args = '-m -p nucmer' + publishDir = [ + path: { "${params.outdir}/assembly/spades/${params.spades_mode}/abacas" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:.*:ASSEMBLY_SPADES:.*:.*:ABACAS' { + ext.prefix = { "${meta.id}.${fasta.baseName}" } ext.args = '-m -p nucmer' publishDir = [ path: { "${params.outdir}/assembly/spades/${params.spades_mode}/abacas" }, @@ -977,7 +987,17 @@ if (!params.skip_assembly) { if (!params.skip_abacas) { process { - withName: '.*:.*:ASSEMBLY_UNICYCLER:.*:ABACAS' { + withName: '.*:.*:ASSEMBLY_UNICYCLER:.*:ABACAS_SINGLE' { + ext.args = '-m -p nucmer' + publishDir = [ + path: { "${params.outdir}/assembly/unicycler/abacas" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:.*:ASSEMBLY_UNICYCLER:.*:.*:ABACAS' { + ext.prefix = { "${meta.id}.${fasta.baseName}" } ext.args = '-m -p nucmer' publishDir = [ path: { "${params.outdir}/assembly/unicycler/abacas" }, @@ -1049,7 +1069,16 @@ if (!params.skip_assembly) { if (!params.skip_abacas) { process { - withName: '.*:.*:ASSEMBLY_MINIA:.*:ABACAS' { + withName: '.*:.*:ASSEMBLY_MINIA:.*:ABACAS_SINGLE' { + ext.args = '-m -p nucmer' + publishDir = [ + path: { "${params.outdir}/assembly/minia/abacas" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*:.*:ASSEMBLY_MINIA:.*:.*:ABACAS' { + ext.prefix = { "${meta.id}.${fasta.baseName}" } ext.args = '-m -p nucmer' publishDir = [ path: { "${params.outdir}/assembly/minia/abacas" }, diff --git a/conf/test_illumina_fragmented.config b/conf/test_illumina_fragmented.config new file mode 100644 index 000000000..a84a91e13 --- /dev/null +++ b/conf/test_illumina_fragmented.config @@ -0,0 +1,41 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests with multifasta +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/viralrecon -profile test_illumina_fragmented, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '2.h' + ] +} + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function with multifasta' + + // Input data to test SISPA/metagenomics analysis + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_fragmented.csv' + platform = 'illumina' + protocol = 'metagenomic' + + // Genome references + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/crimea_congo/crimea_congo.fasta.gz' + gff = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/crimea_congo/crimea_congo.gff.gz' + kraken2_db = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/kraken2/kraken2_hs22.tar.gz' + + // Variant calling options + variant_caller = 'bcftools' + skip_freyja = true + + // Assembly options + assemblers = 'spades,unicycler,minia' +} diff --git a/modules.json b/modules.json index 6c63430bf..67eb098bd 100644 --- a/modules.json +++ b/modules.json @@ -18,6 +18,7 @@ "artic/minion": { "branch": "master", "git_sha": "b44f728e80a1420b9f130061d91f7ff86673735a", + "installed_by": ["modules"], "patch": "modules/nf-core/artic/minion/artic-minion.diff" }, "bandage/image": { @@ -108,6 +109,7 @@ "cutadapt": { "branch": "master", "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": ["modules"], "patch": "modules/nf-core/cutadapt/cutadapt.diff" }, "fastp": { diff --git a/nextflow.config b/nextflow.config index 888807a19..47948a5c9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -240,13 +240,14 @@ profiles { ] } } - test { includeConfig 'conf/test.config' } - test_sispa { includeConfig 'conf/test_sispa.config' } - test_nanopore { includeConfig 'conf/test_nanopore.config' } - test_full { includeConfig 'conf/test_full.config' } - test_full_illumina { includeConfig 'conf/test_full.config' } - test_full_nanopore { includeConfig 'conf/test_full_nanopore.config' } - test_full_sispa { includeConfig 'conf/test_full_sispa.config' } + test { includeConfig 'conf/test.config' } + test_sispa { includeConfig 'conf/test_sispa.config' } + test_nanopore { includeConfig 'conf/test_nanopore.config' } + test_full { includeConfig 'conf/test_full.config' } + test_full_illumina { includeConfig 'conf/test_full.config' } + test_full_nanopore { includeConfig 'conf/test_full_nanopore.config' } + test_full_sispa { includeConfig 'conf/test_full_sispa.config' } + test_illumina_fragmented { includeConfig 'conf/test_illumina_fragmented.config' } } // Load nf-core custom profiles from different Institutions diff --git a/subworkflows/local/abacas_multifasta.nf b/subworkflows/local/abacas_multifasta.nf new file mode 100644 index 000000000..d6167f464 --- /dev/null +++ b/subworkflows/local/abacas_multifasta.nf @@ -0,0 +1,63 @@ +// +// Process multiple multifasta files with abacas +// +include { ABACAS } from '../../modules/nf-core/abacas/main' + +workflow ABACAS_MULTI { + take: + scaffold // channel: [ val(meta), path(scaffold) ] + multifasta // channel: /path/to/genome.fasta + assembler // string: assembler name + + main: + + ch_versions = Channel.empty() + + // + // Split multifasta file into individual fasta files + // + multifasta + .splitFasta( by: 1, file: true ) + .set { ch_fasta } + + // + // Run abacas on each fasta file + // + ch_abacas = Channel.empty() + + scaffold + .combine (ch_fasta) + .set { ch_scaffold_fasta } + + ABACAS ( + ch_scaffold_fasta.map { meta, scaffold, fasta -> tuple( meta, scaffold ) }, + ch_scaffold_fasta.map { meta, scaffold, fasta -> fasta } + ) + ch_abacas = ABACAS.out.results + ch_versions = ch_versions.mix(ABACAS.out.versions) + + // + // Concatenate abacas results + // + + ch_abacas + .map { meta, files -> tuple(meta, files[3]) } + .multiMap{ meta, fasta -> + metadata: [meta.id, meta] + fasta: [meta.id, fasta] + } + .set { ch_abacas_split } + + ch_abacas_split.fasta + .collectFile (storeDir: "${params.outdir}/assembly/${assembler}/abacas_multi") { id, fasta -> + ["${id}.fa",fasta] + } + .map { file -> [file.simpleName, file] } + .join(ch_abacas_split.metadata) + .map { id, fasta, meta -> tuple(meta, fasta) } + .set { ch_abacas_results } + + emit: + abacas_results = ch_abacas_results // channel: [ val(meta), path('*.abacas*') ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/assembly_minia.nf b/subworkflows/local/assembly_minia.nf index 31f1179dd..d8a910f57 100644 --- a/subworkflows/local/assembly_minia.nf +++ b/subworkflows/local/assembly_minia.nf @@ -14,6 +14,7 @@ workflow ASSEMBLY_MINIA { blast_db // channel: /path/to/blast_db/ blast_header // channel: /path/to/blast_header.txt blast_filtered_header // channel: /path/to/blast_filtered_header.txt + minia // string : assembler used for assembly main: @@ -45,7 +46,8 @@ workflow ASSEMBLY_MINIA { gff, blast_db, blast_header, - blast_filtered_header + blast_filtered_header, + minia ) ch_versions = ch_versions.mix(ASSEMBLY_QC.out.versions) diff --git a/subworkflows/local/assembly_qc.nf b/subworkflows/local/assembly_qc.nf index 315c8cef2..db112488d 100644 --- a/subworkflows/local/assembly_qc.nf +++ b/subworkflows/local/assembly_qc.nf @@ -2,11 +2,12 @@ // Downstream analysis for assembly scaffolds // -include { FILTER_BLASTN } from '../../modules/local/filter_blastn' -include { ABACAS } from '../../modules/nf-core/abacas/main' -include { BLAST_BLASTN } from '../../modules/nf-core/blast/blastn/main' -include { PLASMIDID } from '../../modules/nf-core/plasmidid/main' -include { QUAST } from '../../modules/nf-core/quast/main' +include { FILTER_BLASTN } from '../../modules/local/filter_blastn' +include { ABACAS as ABACAS_SINGLE } from '../../modules/nf-core/abacas/main' +include { ABACAS_MULTI } from '../../subworkflows/local/abacas_multifasta' +include { BLAST_BLASTN } from '../../modules/nf-core/blast/blastn/main' +include { PLASMIDID } from '../../modules/nf-core/plasmidid/main' +include { QUAST } from '../../modules/nf-core/quast/main' workflow ASSEMBLY_QC { take: @@ -16,6 +17,7 @@ workflow ASSEMBLY_QC { blast_db // channel: /path/to/blast_db/ blast_header // channel: /path/to/blast_header.txt blast_filtered_header // channel: /path/to/blast_filtered_header.txt + assembler main: @@ -64,17 +66,34 @@ workflow ASSEMBLY_QC { ch_versions = ch_versions.mix(QUAST.out.versions) } - // - // Contiguate assembly with ABACAS - // +// +// Contiguate assembly with ABACAS +// ch_abacas_results = Channel.empty() if (!params.skip_abacas) { - ABACAS ( + fasta + .branch { fasta -> + multi: fasta.countFasta() > 1 + single: true + } + .set { fasta_type } + + ABACAS_SINGLE ( scaffolds, - fasta + fasta_type.single + ) + + ABACAS_MULTI ( + scaffolds, + fasta_type.multi, + assembler + ) + + ch_abacas_results = ABACAS_SINGLE.out.results.mix(ABACAS_MULTI.out.abacas_results) + ch_versions = ch_versions.mix( + ABACAS_SINGLE.out.versions.first(), + ABACAS_MULTI.out.versions.first() ) - ch_abacas_results = ABACAS.out.results - ch_versions = ch_versions.mix(ABACAS.out.versions.first()) } // diff --git a/subworkflows/local/assembly_spades.nf b/subworkflows/local/assembly_spades.nf index ee11801da..ff8daca2f 100644 --- a/subworkflows/local/assembly_spades.nf +++ b/subworkflows/local/assembly_spades.nf @@ -19,6 +19,7 @@ workflow ASSEMBLY_SPADES { blast_db // channel: /path/to/blast_db/ blast_header // channel: /path/to/blast_header.txt blast_filtered_header // channel: /path/to/blast_filtered_header.txt + spades // string : assembler used for assembly main: @@ -97,7 +98,8 @@ workflow ASSEMBLY_SPADES { gff, blast_db, blast_header, - blast_filtered_header + blast_filtered_header, + spades ) ch_versions = ch_versions.mix(ASSEMBLY_QC.out.versions) diff --git a/subworkflows/local/assembly_unicycler.nf b/subworkflows/local/assembly_unicycler.nf index d6b0574d0..5f1364f7e 100644 --- a/subworkflows/local/assembly_unicycler.nf +++ b/subworkflows/local/assembly_unicycler.nf @@ -17,6 +17,7 @@ workflow ASSEMBLY_UNICYCLER { blast_db // channel: /path/to/blast_db/ blast_header // channel: /path/to/blast_header.txt blast_filtered_header // channel: /path/to/blast_filtered_header.txt + unicycler // string : assembler used for assembly main: @@ -83,7 +84,8 @@ workflow ASSEMBLY_UNICYCLER { gff, blast_db, blast_header, - blast_filtered_header + blast_filtered_header, + unicycler ) ch_versions = ch_versions.mix(ASSEMBLY_QC.out.versions) diff --git a/workflows/illumina.nf b/workflows/illumina.nf index e93e02d01..86f33834e 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -612,7 +612,8 @@ workflow ILLUMINA { ch_genome_gff ? PREPARE_GENOME.out.gff.map { [ [:], it ] } : [ [:], [] ], PREPARE_GENOME.out.blast_db, ch_blast_outfmt6_header, - ch_blast_filtered_outfmt6_header + ch_blast_filtered_outfmt6_header, + 'spades' ) ch_spades_quast_multiqc = ASSEMBLY_SPADES.out.quast_tsv ch_versions = ch_versions.mix(ASSEMBLY_SPADES.out.versions) @@ -629,7 +630,8 @@ workflow ILLUMINA { ch_genome_gff ? PREPARE_GENOME.out.gff.map { [ [:], it ] } : [ [:], [] ], PREPARE_GENOME.out.blast_db, ch_blast_outfmt6_header, - ch_blast_filtered_outfmt6_header + ch_blast_filtered_outfmt6_header, + 'unicycler' ) ch_unicycler_quast_multiqc = ASSEMBLY_UNICYCLER.out.quast_tsv ch_versions = ch_versions.mix(ASSEMBLY_UNICYCLER.out.versions) @@ -646,7 +648,8 @@ workflow ILLUMINA { ch_genome_gff ? PREPARE_GENOME.out.gff.map { [ [:], it ] } : [ [:], [] ], PREPARE_GENOME.out.blast_db, ch_blast_outfmt6_header, - ch_blast_filtered_outfmt6_header + ch_blast_filtered_outfmt6_header, + 'minia' ) ch_minia_quast_multiqc = ASSEMBLY_MINIA.out.quast_tsv ch_versions = ch_versions.mix(ASSEMBLY_MINIA.out.versions)