diff --git a/CHANGELOG.md b/CHANGELOG.md index 162d4499..8b3356d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#226](https://github.com/nf-core/seqinspector/pull/226) Add pipeline level stub tests - [#228](https://github.com/nf-core/seqinspector/pull/228) Update all modules/subworkflows - [#234](https://github.com/nf-core/seqinspector/pull/234) Add pipeline level PICARD tests +- [#236](https://github.com/nf-core/seqinspector/pull/236) Added bbmap/clumpify module for FASTQ deduplication and compression ### `Fixed` @@ -58,6 +59,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | multiqcsav | | 0.2.0 | | samtools | 1.22.1 | 1.23.1 | | toulligqc | | 2.8.4 | +| bbmap | | 39.18 | | tar | | 1.34 | ### `Deprecated` diff --git a/CITATIONS.md b/CITATIONS.md index e4800942..1b359bbd 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,6 +10,10 @@ ## Pipeline tools +- [BBMap](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/clumpify-guide/) + + > Bushnell B. BBTools: a collection of bioinformatics tools for processing short sequencing reads. https://jgi.doe.gov/data-and-tools/software-tools/bbtools/ + - [BWAMEM2](https://ieeexplore.ieee.org/abstract/document/8820962) > Vasimuddin Md, Misra S, Li H, Aluru S. Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. In: 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS). IEEE; 2019:314-324. doi:10.1109/IPDPS.2019.00041 diff --git a/README.md b/README.md index 9649927a..535c4545 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ If provided, nf-core/seqinspector can also parse statistics from an Illumina run | `Subsampling` | [`Seqtk`](https://github.com/lh3/seqtk) | Global subsampling of reads. Only performs subsampling if `--sample_size` parameter is given. | [RNA, DNA] | [N/A] | no | | `Lint FASTQs` | [`fq`](https://github.com/stjude-rust-labs/fq) | fq filters, generates, subsamples, and validates FASTQ files. [RNA, DNA, synthetic] | [N/A] | yes | | `Trimming` | [`Fastp`](https://github.com/OpenGene/fastp) | Trimming of reads. Only performs trimming if `--tools` parameter is given. | [RNA, DNA, synthetic] | [N/A] | no | +| `Compression` | [`BBMap Clumpify`](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/clumpify-guide/) | Deduplicate and compress FASTQ files. Only performs clumpify if `--tools` parameter is given. | [RNA, DNA] | [N/A] | no | | `Indexing, Mapping` | [`Bwamem2`](https://github.com/bwa-mem2/bwa-mem2) | Align reads to reference | [RNA, DNA] | [N/A] | yes | | `Indexing` | [`SAMtools`](http://github.com/samtools) | Index aligned BAM files, create FASTA index | [DNA] | [N/A] | yes | | `QC` | [`checkQC`](https://github.com/Molmed/checkQC) | Read QC | [RNA, DNA] | Illumina rundir | no | @@ -62,6 +63,7 @@ If provided, nf-core/seqinspector can also parse statistics from an Illumina run | Tool | Version | | ----------- | ------- | | bwamem2 | 2.3 | +| bbmap | 39.18 | | checkQC | 4.1.0 | | fq/lint | 0.12.0 | | fastp | 1.1.0 | @@ -130,6 +132,7 @@ We thank the following people for their extensive assistance in the development - [@ctuni](https://github.com/ctuni) - [@edmundmiller](https://github.com/edmundmiller) - [@EliottBo](https://github.com/EliottBo) +- [@erkutilaslan](https://github.com/erkutilaslan) - [@KarNair](https://github.com/KarNair) - [@kjellinjonas](https://github.com/kjellinjonas) - [@mahesh-panchal](https://github.com/mahesh-panchal) diff --git a/conf/modules.config b/conf/modules.config index 34d44547..255328a3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -11,6 +11,10 @@ */ process { + withName: BBMAP_CLUMPIFY { + ext.args = '' + } + withName: CHECKQC { tag = { "${run_dir.simpleName}" } } diff --git a/docs/output.md b/docs/output.md index b4351442..92df0f04 100644 --- a/docs/output.md +++ b/docs/output.md @@ -18,6 +18,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and can generat - [Rundirparser](#rundirparser) - Parse rundir metadata from Illumina runs - [ToulligQC](#toulligqc) - Raw read QC for Oxford Nanopore runs - [SeqFu](#seqfu) - Statistics for FASTA or FASTQ files +- [BBMap Clumpify](#bbmap-clumpify) - Deduplication and compression of FASTQ files - [Seqtk](#seqtk) - Subsample a specific number of reads per sample - [FastQC](#fastqc) - Raw read QC - [Sequali](#sequali) - Sequence quality metrics for short and long reads @@ -119,6 +120,18 @@ This software is written in Python and developped by the GenomiqueENS core facil Includes functions to interleave and de-interleave FASTQ files, to rename sequences and to count and print statistics on sequence lengths. In this pipeline, the `seqfu stats` module is used to produce general quality metrics statistics. +### BBMap Clumpify + +
+Output files + +- `clumped/[sample_id]/` + - `*.clumped.fastq.gz`: Deduplicated and compressed FASTQ files. + +
+ +[BBMap Clumpify](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/clumpify-guide/) removes duplicates from sequencing data and creates smaller, faster gzipped FASTQ files. This is particularly useful for reducing file sizes while maintaining data quality. + ### Seqtk
diff --git a/docs/usage.md b/docs/usage.md index d4d1bb57..a7499337 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -154,7 +154,7 @@ Be aware that the default tools will still be run. In order to ONLY run the sele --tools fastqscreen,rundirparser --tools_bundle null ``` -Currently the `tools` param can have the following values: fastqc, fastqscreen, picard_collecthsmetrics, picard_collectmultiplemetrics, rundirparser and seqfu_stats. +Currently the `tools` param can have the following values: bbmap_clumpify, fastqc, fastqscreen, picard_collecthsmetrics, picard_collectmultiplemetrics, rundirparser and seqfu_stats. #### Skip specific tools @@ -197,6 +197,7 @@ Requirements: Tools: +- bbmap_clumpify - checkQC - fastqc - fastqscreen @@ -204,6 +205,7 @@ Tools: - picard_collectmultiplemetrics - rundirparser - seqfu_stats +- sequali - toulligqc
diff --git a/modules.json b/modules.json index 18314076..9d5cad5b 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,12 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "bbmap/clumpify": { + "branch": "master", + "git_sha": "f946047c97ed78d3cdcecdc64169c7f9faef99df", + "installed_by": ["modules"], + "patch": "modules/nf-core/bbmap/clumpify/bbmap-clumpify.diff" + }, "bwamem2/index": { "branch": "master", "git_sha": "6d46786420b4d7bc88eba026eb389c0c5535d120", diff --git a/modules/nf-core/bbmap/clumpify/bbmap-clumpify.diff b/modules/nf-core/bbmap/clumpify/bbmap-clumpify.diff new file mode 100644 index 00000000..6004581d --- /dev/null +++ b/modules/nf-core/bbmap/clumpify/bbmap-clumpify.diff @@ -0,0 +1,19 @@ +Changes in component 'nf-core/bbmap/clumpify' +'modules/nf-core/bbmap/clumpify/meta.yml' is unchanged +'modules/nf-core/bbmap/clumpify/environment.yml' is unchanged +Changes in 'bbmap/clumpify/main.nf': +--- modules/nf-core/bbmap/clumpify/main.nf ++++ modules/nf-core/bbmap/clumpify/main.nf +@@ -13,7 +13,7 @@ + + output: + tuple val(meta), path('*.fastq.gz'), emit: reads +- tuple val(meta), path('*.log') , emit: log ++ tuple val(meta), val("${task.process}"), val('bbmap'), path("*.log"), emit: log, topic: multiqc_files + tuple val("${task.process}"), val('bbmap'), eval('bbversion.sh | grep -v "Duplicate cpuset"'), emit: versions_bbmap, topic: versions + + when: + +'modules/nf-core/bbmap/clumpify/tests/main.nf.test' is unchanged +'modules/nf-core/bbmap/clumpify/tests/main.nf.test.snap' is unchanged +************************************************************ diff --git a/modules/nf-core/bbmap/clumpify/environment.yml b/modules/nf-core/bbmap/clumpify/environment.yml new file mode 100644 index 00000000..a33ddca8 --- /dev/null +++ b/modules/nf-core/bbmap/clumpify/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bbmap=39.18 + - pigz=2.8 diff --git a/modules/nf-core/bbmap/clumpify/main.nf b/modules/nf-core/bbmap/clumpify/main.nf new file mode 100644 index 00000000..dc1b6e48 --- /dev/null +++ b/modules/nf-core/bbmap/clumpify/main.nf @@ -0,0 +1,44 @@ +process BBMAP_CLUMPIFY { + tag "$meta.id" + label 'process_single' + label 'process_high_memory' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/5a/5aae5977ff9de3e01ff962dc495bfa23f4304c676446b5fdf2de5c7edfa2dc4e/data' : + 'community.wave.seqera.io/library/bbmap_pigz:07416fe99b090fa9' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path('*.fastq.gz'), emit: reads + tuple val(meta), val("${task.process}"), val('bbmap'), path("*.log"), emit: log, topic: multiqc_files + tuple val("${task.process}"), val('bbmap'), eval('bbversion.sh | grep -v "Duplicate cpuset"'), emit: versions_bbmap, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def raw = meta.single_end ? "in=$reads" : "in1=${reads[0]} in2=${reads[1]}" + def clumped = meta.single_end ? "out=${prefix}.clumped.fastq.gz" : "out1=${prefix}_1.clumped.fastq.gz out2=${prefix}_2.clumped.fastq.gz" + """ + clumpify.sh \\ + $raw \\ + $clumped \\ + $args \\ + &> ${prefix}.clumpify.log + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def output_command = meta.single_end ? + "echo '' | gzip > ${prefix}.clumped.fastq.gz" : + "echo '' | gzip > ${prefix}_1.clumped.fastq.gz ; echo '' | gzip > ${prefix}_2.clumped.fastq.gz" + """ + touch ${prefix}.clumpify.log + $output_command + """ +} diff --git a/modules/nf-core/bbmap/clumpify/meta.yml b/modules/nf-core/bbmap/clumpify/meta.yml new file mode 100644 index 00000000..478d4bc7 --- /dev/null +++ b/modules/nf-core/bbmap/clumpify/meta.yml @@ -0,0 +1,79 @@ +name: bbmap_clumpify +description: Create 30% Smaller, Faster Gzipped Fastq Files. And remove + duplicates +keywords: + - clumping fastqs + - smaller fastqs + - deduping + - fastq +tools: + - bbmap: + description: BBMap is a short read aligner, as well as various other + bioinformatic tools. + homepage: https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/clumpify-guide/ + documentation: https://www.biostars.org/p/225338/ + licence: + - "UC-LBL license (see package)" + identifier: biotools:bbmap +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + ontologies: [] +output: + reads: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastq.gz": + type: file + description: The reordered/clumped (and if necessary deduped) fastq + reads + pattern: "*.clumped.fastq.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: Clumpify log file + pattern: "*clumpify.log" + ontologies: [] + versions_bbmap: + - - "${task.process}": + type: string + description: The name of the process + - bbmap: + type: string + description: The name of the tool + - bbversion.sh | grep -v "Duplicate cpuset": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - "${task.process}": + type: string + description: The name of the process + - bbmap: + type: string + description: The name of the tool + - bbversion.sh | grep -v "Duplicate cpuset": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@tamuanand" +maintainers: + - "@tamuanand" diff --git a/nextflow_schema.json b/nextflow_schema.json index bee1c0f8..5d2ca11a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -51,7 +51,7 @@ "tools": { "type": "string", "description": "Comma-separated string of tools to run", - "pattern": "^((checkqc|fastp|fastqc|fastqe|fastqscreen|fq_lint|kraken2|multiqcsav|picard_collecthsmetrics|picard_collectmultiplemetrics|rundirparser|seqfu_stats|sequali|toulligqc)?,?)*(?\n \n \n \"nf-core/seqinspector\"\n \n\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/seqinspector)\n[![GitHub Actions CI Status](https://github.com/nf-core/seqinspector/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/seqinspector/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/seqinspector/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/seqinspector/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/seqinspector/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.18757486-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.18757486)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.4-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-4.0.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/4.0.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/seqinspector)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23seqinspector-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/seqinspector)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/seqinspector** is a bioinformatics pipeline that processes raw sequence data (FASTQ) to provide comprehensive quality control.\nIt can perform subsampling, quality assessment, duplication level analysis, and complexity evaluation on a per-sample basis, while also detecting adapter content, technical artifacts, and common biological contaminants.\nThe pipeline generates detailed MultiQC reports with flexible output options, ranging from individual sample reports to project-wide summaries, making it particularly useful for sequencing core facilities and research groups with access to sequencing instruments.\nIf provided, nf-core/seqinspector can also parse statistics from an Illumina run folder directory into the final MultiQC reports.\n\n### Compatibility between tools and data type\n\n\n\n| Tool Type | Tool Name | Tool Description | Compatibility with Data | Dependencies | Default tool |\n| -------------------------- | ------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- | ----------------------- | --------------------------------------------------------------------------------------- | ------------ |\n| `Subsampling` | [`Seqtk`](https://github.com/lh3/seqtk) | Global subsampling of reads. Only performs subsampling if `--sample_size` parameter is given. | [RNA, DNA] | [N/A] | no |\n| `Lint FASTQs` | [`fq`](https://github.com/stjude-rust-labs/fq) | fq filters, generates, subsamples, and validates FASTQ files. [RNA, DNA, synthetic] | [N/A] | yes |\n| `Trimming` | [`Fastp`](https://github.com/OpenGene/fastp) | Trimming of reads. Only performs trimming if `--tools` parameter is given. | [RNA, DNA, synthetic] | [N/A] | no |\n| `Indexing, Mapping` | [`Bwamem2`](https://github.com/bwa-mem2/bwa-mem2) | Align reads to reference | [RNA, DNA] | [N/A] | yes |\n| `Indexing` | [`SAMtools`](http://github.com/samtools) | Index aligned BAM files, create FASTA index | [DNA] | [N/A] | yes |\n| `QC` | [`checkQC`](https://github.com/Molmed/checkQC) | Read QC | [RNA, DNA] | Illumina rundir | no |\n| `QC` | [`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) | Read QC | [RNA, DNA] | [N/A] | yes |\n| `QC` | [`FASTQE`](https://fastqe.com/) | Read QC | [RNA, DNA] | [N/A] | yes |\n| `QC` | [`FastqScreen`](https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/) | Basic contamination detection | [RNA, DNA] | [N/A] | yes |\n| `QC` | [`SeqFu Stats`](https://github.com/telatin/seqfu2) | Sequence statistics | [RNA, DNA] | [N/A] | yes |\n| `QC` | [`Sequali`](https://sequali.readthedocs.io/en/latest/) | Read QC for long and short reads. | [RNA, DNA] | [N/A] | yes |\n| `Taxonomic Classification` | [`Kraken2`](https://ccb.jhu.edu/software/kraken2/) | Performs taxonomic classification and/or profiling | [RNA, DNA] | [N/A] | no |\n| `QC` | [`Picard collect multiple metrics`](https://broadinstitute.github.io/picard/picard-metric-definitions.html) | Collect multiple QC metrics | [RNA, DNA] | [Bwamem2, SAMtools, `--genome`] | yes |\n| `QC` | [`Picard_collecthsmetrics`](https://gatk.broadinstitute.org/hc/en-us/articles/360036856051-CollectHsMetrics-Picard) | Collect alignment QC metrics of hybrid-selection data. | [RNA, DNA] | [Bwamem2, SAMtools, `--fasta`, `--bait_intervals`, `--target_intervals` (`--ref_dict`)] | no |\n| `Reporting` | [`MultiQC`](http://multiqc.info/) | Present QC for raw reads | [RNA, DNA, synthetic] | [N/A] | yes |\n| `Reporting` | [`Krona`](https://hpc.nih.gov/apps/kronatools.html) | Plotting Kraken2 results | [RNA, DNA, synthetic] | [kraken2] | no |\n\n### Workflow diagram\n\n\n \n \n \"Fallback\n\n\n### Summary of tools and version used in the pipeline\n\n| Tool | Version |\n| ----------- | ------- |\n| bwamem2 | 2.3 |\n| checkQC | 4.1.0 |\n| fq/lint | 0.12.0 |\n| fastp | 1.1.0 |\n| fastqc | 0.12.1 |\n| fastqe | 0.5.2 |\n| fastqscreen | 0.16.0 |\n| kraken2 | 2.1.6 |\n| krona | 2.8.1 |\n| multiqc | 1.33 |\n| multiqcsav | 0.2.0 |\n| picard | 3.4.0 |\n| samtools | 1.23 |\n| seqfu | 1.22.3 |\n| seqtk | 1.4 |\n| sequali | 0.12.0 |\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/get_started/environment_setup/overview) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/get_started/run-your-first-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2,rundir,tags\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX,lane1:project5:group2\n```\n\nEach row represents a fastq file (single-end with only `fastq_1`) or a pair of fastq files (paired end with `fastq_1` and `fastq_2`).\n`rundir` is the path to the runfolder.\n`tags` is a colon-separated list of tags that will be added to the MultiQC report for this `sample`.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/seqinspector \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/running/run-pipelines#using-parameter-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/seqinspector/usage) and the [parameter documentation](https://nf-co.re/seqinspector/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/seqinspector/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/seqinspector/output).\n\n## Credits\n\nnf-core/seqinspector was originally written by [@agrima2010](https://github.com/agrima2010), [@Aratz](https://github.com/Aratz), [@FranBonath](https://github.com/FranBonath), [@kedhammar](https://github.com/kedhammar), and [@MatthiasZepper](https://github.com/MatthiasZepper) from the Swedish [National Genomics Infrastructure](https://github.com/NationalGenomicsInfrastructure/) and [Clinical Genomics Stockholm](https://clinical.scilifelab.se/).\n\nMaintenance is now lead by Maxime U Garcia ([National Genomics Infrastructure](https://github.com/NationalGenomicsInfrastructure/))\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [@adamrtalbot](https://github.com/adamrtalbot)\n- [@alneberg](https://github.com/alneberg)\n- [@beatrizsavinhas](https://github.com/beatrizsavinhas)\n- [@ctuni](https://github.com/ctuni)\n- [@edmundmiller](https://github.com/edmundmiller)\n- [@EliottBo](https://github.com/EliottBo)\n- [@KarNair](https://github.com/KarNair)\n- [@kjellinjonas](https://github.com/kjellinjonas)\n- [@mahesh-panchal](https://github.com/mahesh-panchal)\n- [@matrulda](https://github.com/matrulda)\n- [@mirpedrol](https://github.com/mirpedrol)\n- [@nggvs](https://github.com/nggvs)\n- [@nkongenelly](https://github.com/nkongenelly)\n- [@Patricie34](https://github.com/Patricie34)\n- [@pontushojer](https://github.com/pontushojer)\n- [@ramprasadn](https://github.com/ramprasadn)\n- [@rannick](https://github.com/rannick)\n- [@TMAdams](https://github.com/TMAdams)\n- [@torigiffin](https://github.com/torigiffin)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](docs/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#seqinspector` channel](https://nfcore.slack.com/channels/seqinspector) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nYou can cite the seqinspector zenodo record for a specific version using the following [doi: 10.5281/zenodo.18757486](https://doi.org/10.5281/zenodo.18757486)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "

\n \n \n \"nf-core/seqinspector\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/seqinspector)\n[![GitHub Actions CI Status](https://github.com/nf-core/seqinspector/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/seqinspector/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/seqinspector/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/seqinspector/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/seqinspector/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.18757486-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.18757486)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.4-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-4.0.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/4.0.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/seqinspector)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23seqinspector-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/seqinspector)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/seqinspector** is a bioinformatics pipeline that processes raw sequence data (FASTQ) to provide comprehensive quality control.\nIt can perform subsampling, quality assessment, duplication level analysis, and complexity evaluation on a per-sample basis, while also detecting adapter content, technical artifacts, and common biological contaminants.\nThe pipeline generates detailed MultiQC reports with flexible output options, ranging from individual sample reports to project-wide summaries, making it particularly useful for sequencing core facilities and research groups with access to sequencing instruments.\nIf provided, nf-core/seqinspector can also parse statistics from an Illumina run folder directory into the final MultiQC reports.\n\n### Compatibility between tools and data type\n\n\n\n| Tool Type | Tool Name | Tool Description | Compatibility with Data | Dependencies | Default tool |\n| -------------------------- | ------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- | ----------------------- | --------------------------------------------------------------------------------------- | ------------ |\n| `Subsampling` | [`Seqtk`](https://github.com/lh3/seqtk) | Global subsampling of reads. Only performs subsampling if `--sample_size` parameter is given. | [RNA, DNA] | [N/A] | no |\n| `Lint FASTQs` | [`fq`](https://github.com/stjude-rust-labs/fq) | fq filters, generates, subsamples, and validates FASTQ files. [RNA, DNA, synthetic] | [N/A] | yes |\n| `Trimming` | [`Fastp`](https://github.com/OpenGene/fastp) | Trimming of reads. Only performs trimming if `--tools` parameter is given. | [RNA, DNA, synthetic] | [N/A] | no |\n| `Indexing, Mapping` | [`Bwamem2`](https://github.com/bwa-mem2/bwa-mem2) | Align reads to reference | [RNA, DNA] | [N/A] | yes |\n| `Indexing` | [`SAMtools`](http://github.com/samtools) | Index aligned BAM files, create FASTA index | [DNA] | [N/A] | yes |\n| `QC` | [`checkQC`](https://github.com/Molmed/checkQC) | Read QC | [RNA, DNA] | Illumina rundir | no |\n| `QC` | [`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) | Read QC | [RNA, DNA] | [N/A] | yes |\n| `QC` | [`FASTQE`](https://fastqe.com/) | Read QC | [RNA, DNA] | [N/A] | yes |\n| `QC` | [`FastqScreen`](https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/) | Basic contamination detection | [RNA, DNA] | [N/A] | yes |\n| `QC` | [`SeqFu Stats`](https://github.com/telatin/seqfu2) | Sequence statistics | [RNA, DNA] | [N/A] | yes |\n| `QC` | [`Sequali`](https://sequali.readthedocs.io/en/latest/) | Read QC for long and short reads. | [RNA, DNA] | [N/A] | yes |\n| `Taxonomic Classification` | [`Kraken2`](https://ccb.jhu.edu/software/kraken2/) | Performs taxonomic classification and/or profiling | [RNA, DNA] | [N/A] | no |\n| `QC` | [`Picard collect multiple metrics`](https://broadinstitute.github.io/picard/picard-metric-definitions.html) | Collect multiple QC metrics | [RNA, DNA] | [Bwamem2, SAMtools, `--genome`] | yes |\n| `QC` | [`Picard_collecthsmetrics`](https://gatk.broadinstitute.org/hc/en-us/articles/360036856051-CollectHsMetrics-Picard) | Collect alignment QC metrics of hybrid-selection data. | [RNA, DNA] | [Bwamem2, SAMtools, `--fasta`, `--bait_intervals`, `--target_intervals` (`--ref_dict`)] | no |\n| `Reporting` | [`MultiQC`](http://multiqc.info/) | Present QC for raw reads | [RNA, DNA, synthetic] | [N/A] | yes |\n| `Reporting` | [`Krona`](https://hpc.nih.gov/apps/kronatools.html) | Plotting Kraken2 results | [RNA, DNA, synthetic] | [kraken2] | no |\n\n### Workflow diagram\n\n\n \n \n \"Fallback\n\n\n### Summary of tools and version used in the pipeline\n\n| Tool | Version |\n| ----------- | ------- |\n| bwamem2 | 2.3 |\n| checkQC | 4.1.0 |\n| fq/lint | 0.12.0 |\n| fastp | 1.1.0 |\n| fastqc | 0.12.1 |\n| fastqe | 0.5.2 |\n| fastqscreen | 0.16.0 |\n| kraken2 | 2.1.6 |\n| krona | 2.8.1 |\n| multiqc | 1.35 |\n| multiqcsav | 0.2.0 |\n| picard | 3.4.0 |\n| samtools | 1.23 |\n| seqfu | 1.22.3 |\n| seqtk | 1.4 |\n| sequali | 1.0.2 |\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/get_started/environment_setup/overview) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/get_started/run-your-first-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2,rundir,tags\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX,lane1:project5:group2\n```\n\nEach row represents a fastq file (single-end with only `fastq_1`) or a pair of fastq files (paired end with `fastq_1` and `fastq_2`).\n`rundir` is the path to the runfolder.\n`tags` is a colon-separated list of tags that will be added to the MultiQC report for this `sample`.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/seqinspector \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/running/run-pipelines#using-parameter-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/seqinspector/usage) and the [parameter documentation](https://nf-co.re/seqinspector/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/seqinspector/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/seqinspector/output).\n\n## Credits\n\nnf-core/seqinspector was originally written by [@agrima2010](https://github.com/agrima2010), [@Aratz](https://github.com/Aratz), [@FranBonath](https://github.com/FranBonath), [@kedhammar](https://github.com/kedhammar), and [@MatthiasZepper](https://github.com/MatthiasZepper) from the Swedish [National Genomics Infrastructure](https://github.com/NationalGenomicsInfrastructure/) and [Clinical Genomics Stockholm](https://clinical.scilifelab.se/).\n\nMaintenance is now lead by Maxime U Garcia ([National Genomics Infrastructure](https://github.com/NationalGenomicsInfrastructure/))\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [@adamrtalbot](https://github.com/adamrtalbot)\n- [@alneberg](https://github.com/alneberg)\n- [@beatrizsavinhas](https://github.com/beatrizsavinhas)\n- [@ctuni](https://github.com/ctuni)\n- [@edmundmiller](https://github.com/edmundmiller)\n- [@EliottBo](https://github.com/EliottBo)\n- [@KarNair](https://github.com/KarNair)\n- [@kjellinjonas](https://github.com/kjellinjonas)\n- [@mahesh-panchal](https://github.com/mahesh-panchal)\n- [@matrulda](https://github.com/matrulda)\n- [@mirpedrol](https://github.com/mirpedrol)\n- [@nggvs](https://github.com/nggvs)\n- [@nkongenelly](https://github.com/nkongenelly)\n- [@Patricie34](https://github.com/Patricie34)\n- [@pontushojer](https://github.com/pontushojer)\n- [@ramprasadn](https://github.com/ramprasadn)\n- [@rannick](https://github.com/rannick)\n- [@TMAdams](https://github.com/TMAdams)\n- [@torigiffin](https://github.com/torigiffin)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](docs/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#seqinspector` channel](https://nfcore.slack.com/channels/seqinspector) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nYou can cite the seqinspector zenodo record for a specific version using the following [doi: 10.5281/zenodo.18757486](https://doi.org/10.5281/zenodo.18757486)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index 501b874f..96ace487 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -261,6 +261,7 @@ def genomeExistsError() { def toolCitationText() { def citation_text = [ "Tools used in the workflow included:", + "BBMap (Bushnell 2014),", "BWAMEM2 (Vasimuddin et al. 2019)", "FastQC (Andrews 2010),", "FastQ Screen (Wingett & Andrews 2018)", @@ -278,6 +279,7 @@ def toolCitationText() { def toolBibliographyText() { def reference_text = [ + "
  • Bushnell B. BBTools: a collection of bioinformatics tools for processing short sequencing reads. https://jgi.doe.gov/data-and-tools/software-tools/bbtools/.
  • ", "
  • Vasimuddin Md., Misra S., Li H, & Aluru S. (2019). Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems.
  • ", "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/.
  • ", "
  • Wingett SW., & Andrews S. FastQ Screen: A tool for multi-genome mapping and quality control. F1000Res. 2018 Aug 24 [revised 2018 Jan 1];7:1338. doi: 10.12688/f1000research.15931.2. eCollection
  • ", @@ -343,6 +345,7 @@ def defineToolsList(input_bundle, input_tools, input_skip) { // please update the docs/usage.md section about tools selection when adding new tools here! if ('all' in bundle_list) { + tools_list << 'bbmap_clumpify' tools_list << 'checkqc' tools_list << 'fastqc' tools_list << 'fastqe' diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.nf.test new file mode 100644 index 00000000..8940d32d --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.nf.test.snap new file mode 100644 index 00000000..859d1030 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/tests/.nftignore b/tests/.nftignore index d2ee22b1..b3c87f09 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -12,6 +12,7 @@ multiqc/{global_report,group_reports/*}/multiqc_plots/{svg,pdf,png}/*.{svg,pdf,p multiqc/{global_report,group_reports/*}/multiqc_report.html pipeline_info/*.{html,json,txt,yml} references/R64-1-1.dict +reports/bbmap/*/*.clumpify.log reports/fastp/*/*fastp.* reports/fastqc/*/*_fastqc.{html,zip} reports/fastqscreen/*/*_screen.html diff --git a/tests/tools_bbmap_clumpify.nf.test b/tests/tools_bbmap_clumpify.nf.test new file mode 100644 index 00000000..b662114d --- /dev/null +++ b/tests/tools_bbmap_clumpify.nf.test @@ -0,0 +1,32 @@ +def pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "MiSeq data test - bbmap_clumpify", + params: [ + input: pipelines_testdata_base_path + 'seqinspector/samplesheet/1.0/miseq.csv', + tools: 'bbmap_clumpify', + tools_bundle: null, + ], + ], + [ + name: "MiSeq data test - bbmap_clumpify - stub", + params: [ + input: pipelines_testdata_base_path + 'seqinspector/samplesheet/1.0/miseq.csv', + tools: 'bbmap_clumpify', + tools_bundle: null, + ], + stub: true, + ], + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/tools_bbmap_clumpify.nf.test.snap b/tests/tools_bbmap_clumpify.nf.test.snap new file mode 100644 index 00000000..f07d5685 --- /dev/null +++ b/tests/tools_bbmap_clumpify.nf.test.snap @@ -0,0 +1,118 @@ +{ + "MiSeq data test - bbmap_clumpify": { + "content": [ + 6, + { + "BBMAP_CLUMPIFY": { + "bbmap": "39.18" + } + }, + [ + "multiqc", + "multiqc/global_report", + "multiqc/global_report/multiqc_data", + "multiqc/global_report/multiqc_data/llms-full.txt", + "multiqc/global_report/multiqc_data/multiqc.log", + "multiqc/global_report/multiqc_data/multiqc.parquet", + "multiqc/global_report/multiqc_data/multiqc_citations.txt", + "multiqc/global_report/multiqc_data/multiqc_data.json", + "multiqc/global_report/multiqc_data/multiqc_software_versions.txt", + "multiqc/global_report/multiqc_data/multiqc_sources.txt", + "multiqc/global_report/multiqc_report.html", + "multiqc/group_reports", + "multiqc/group_reports/Bpacificus", + "multiqc/group_reports/Bpacificus/multiqc_data", + "multiqc/group_reports/Bpacificus/multiqc_data/llms-full.txt", + "multiqc/group_reports/Bpacificus/multiqc_data/multiqc.log", + "multiqc/group_reports/Bpacificus/multiqc_data/multiqc.parquet", + "multiqc/group_reports/Bpacificus/multiqc_data/multiqc_citations.txt", + "multiqc/group_reports/Bpacificus/multiqc_data/multiqc_data.json", + "multiqc/group_reports/Bpacificus/multiqc_data/multiqc_software_versions.txt", + "multiqc/group_reports/Bpacificus/multiqc_data/multiqc_sources.txt", + "multiqc/group_reports/Bpacificus/multiqc_report.html", + "multiqc/group_reports/K9H97", + "multiqc/group_reports/K9H97/multiqc_data", + "multiqc/group_reports/K9H97/multiqc_data/llms-full.txt", + "multiqc/group_reports/K9H97/multiqc_data/multiqc.log", + "multiqc/group_reports/K9H97/multiqc_data/multiqc.parquet", + "multiqc/group_reports/K9H97/multiqc_data/multiqc_citations.txt", + "multiqc/group_reports/K9H97/multiqc_data/multiqc_data.json", + "multiqc/group_reports/K9H97/multiqc_data/multiqc_software_versions.txt", + "multiqc/group_reports/K9H97/multiqc_data/multiqc_sources.txt", + "multiqc/group_reports/K9H97/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_seqinspector_software_mqc_versions.yml", + "reports", + "reports/bbmap", + "reports/bbmap/SAMPLE_PAIRED_END_1_01", + "reports/bbmap/SAMPLE_PAIRED_END_1_01/SAMPLE_PAIRED_END_1_01.clumpify.log", + "reports/bbmap/SAMPLE_PAIRED_END_2_02", + "reports/bbmap/SAMPLE_PAIRED_END_2_02/SAMPLE_PAIRED_END_2_02.clumpify.log", + "reports/bbmap/SAMPLE_SINGLE_END_03", + "reports/bbmap/SAMPLE_SINGLE_END_03/SAMPLE_SINGLE_END_03.clumpify.log" + ], + [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" + ], + "No BAM files", + "No warnings" + ], + "timestamp": "2026-06-12T13:59:57.270092654", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.3" + } + }, + "MiSeq data test - bbmap_clumpify - stub": { + "content": [ + 6, + { + "BBMAP_CLUMPIFY": { + "bbmap": "39.18" + } + }, + [ + "multiqc", + "multiqc/global_report", + "multiqc/global_report/multiqc_data", + "multiqc/global_report/multiqc_data/.stub", + "multiqc/global_report/multiqc_plots", + "multiqc/global_report/multiqc_plots/.stub", + "multiqc/global_report/multiqc_report.html", + "multiqc/group_reports", + "multiqc/group_reports/Bpacificus", + "multiqc/group_reports/Bpacificus/multiqc_data", + "multiqc/group_reports/Bpacificus/multiqc_data/.stub", + "multiqc/group_reports/Bpacificus/multiqc_plots", + "multiqc/group_reports/Bpacificus/multiqc_plots/.stub", + "multiqc/group_reports/Bpacificus/multiqc_report.html", + "multiqc/group_reports/K9H97", + "multiqc/group_reports/K9H97/multiqc_data", + "multiqc/group_reports/K9H97/multiqc_data/.stub", + "multiqc/group_reports/K9H97/multiqc_plots", + "multiqc/group_reports/K9H97/multiqc_plots/.stub", + "multiqc/group_reports/K9H97/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_seqinspector_software_mqc_versions.yml", + "reports", + "reports/bbmap", + "reports/bbmap/SAMPLE_PAIRED_END_1_01", + "reports/bbmap/SAMPLE_PAIRED_END_1_01/SAMPLE_PAIRED_END_1_01.clumpify.log", + "reports/bbmap/SAMPLE_PAIRED_END_2_02", + "reports/bbmap/SAMPLE_PAIRED_END_2_02/SAMPLE_PAIRED_END_2_02.clumpify.log", + "reports/bbmap/SAMPLE_SINGLE_END_03", + "reports/bbmap/SAMPLE_SINGLE_END_03/SAMPLE_SINGLE_END_03.clumpify.log" + ], + [ + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "timestamp": "2026-06-12T13:32:08.528487804", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.3" + } + } +} \ No newline at end of file diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index f093f7ad..69425e96 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -5,6 +5,7 @@ */ // modules +include { BBMAP_CLUMPIFY } from '../modules/nf-core/bbmap/clumpify' include { BWAMEM2_MEM } from '../modules/nf-core/bwamem2/mem' include { CHECKQC } from '../modules/nf-core/checkqc' include { FASTP } from '../modules/nf-core/fastp' @@ -173,6 +174,12 @@ workflow SEQINSPECTOR { // STEP 02: BASIC QC ON FASTQ FILES + // + // MODULE: Run BBMAP_CLUMPIFY + // + + BBMAP_CLUMPIFY(ch_samplesheet.filter { 'bbmap_clumpify' in tools }) + // // MODULE: SEQFU_STATS // @@ -200,6 +207,7 @@ workflow SEQINSPECTOR { SEQTK_SAMPLE(ch_samplesheet.map { meta, reads -> [meta, reads, sample_size] }.filter { sample_size }) ch_sample = sample_size ? SEQTK_SAMPLE.out.reads : ch_samplesheet + // STEP 04: MORE QC ON FASTQ FILES (CAN BE SUMSAMPLED) FASTQC(ch_sample.filter { 'fastqc' in tools })