diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 1843aa586..7aad6ace7 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -4,44 +4,23 @@ name: nf-core AWS full size tests # It runs the -profile 'test_full' on AWS batch on: - pull_request: - branches: - - main - - master workflow_dispatch: pull_request_review: types: [submitted] + release: + types: [published] jobs: run-platform: name: Run AWS full tests - # run only if the PR is approved by at least 2 reviewers and against the master branch or manually triggered - if: github.repository == 'nf-core/raredisease' && github.event.review.state == 'approved' && github.event.pull_request.base.ref == 'master' || github.event_name == 'workflow_dispatch' + # run only if the PR is approved by at least 2 reviewers and against the master/main branch or manually triggered + if: github.repository == 'nf-core/raredisease' && github.event.review.state == 'approved' && (github.event.pull_request.base.ref == 'master' || github.event.pull_request.base.ref == 'main') || github.event_name == 'workflow_dispatch' runs-on: ubuntu-latest steps: - - name: Get PR reviews - uses: octokit/request-action@v2.x - if: github.event_name != 'workflow_dispatch' - id: check_approvals - continue-on-error: true - with: - route: GET /repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/reviews?per_page=100 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Check for approvals - if: ${{ failure() && github.event_name != 'workflow_dispatch' }} - run: | - echo "No review approvals found. At least 2 approvals are required to run this action automatically." - exit 1 - - - name: Check for enough approvals (>=2) - id: test_variables - if: github.event_name != 'workflow_dispatch' + - name: Set revision variable + id: revision run: | - JSON_RESPONSE='${{ steps.check_approvals.outputs.data }}' - CURRENT_APPROVALS_COUNT=$(echo $JSON_RESPONSE | jq -c '[.[] | select(.state | contains("APPROVED")) ] | length') - test $CURRENT_APPROVALS_COUNT -ge 2 || exit 1 # At least 2 approvals are required + echo "revision=${{ (github.event_name == 'workflow_dispatch' || github.event_name == 'release') && github.sha || 'dev' }}" >> "$GITHUB_OUTPUT" - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 @@ -51,12 +30,12 @@ jobs: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - revision: ${{ github.sha }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/raredisease/work-${{ github.sha }} + revision: ${{ steps.revision.outputs.revision }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/raredisease/work-${{ steps.revision.outputs.revision }} parameters: | { "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/raredisease/results-${{ github.sha }}" + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/raredisease/results-${{ steps.revision.outputs.revision }}" } profiles: test_full diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d4998c68b..02aa9a5cd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -67,5 +67,6 @@ jobs: uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" + continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} run: | nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} -stub --outdir ./results diff --git a/.nf-core.yml b/.nf-core.yml index 147f28dc1..4e9a9e593 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -8,7 +8,7 @@ lint: - docs/images/nf-core-raredisease_logo_light.png - assets/nf-core-raredisease_logo_light.png modules_config: false -nf_core_version: 3.2.0 +nf_core_version: 3.2.1 repository_type: pipeline template: author: Clinical Genomics Stockholm @@ -18,4 +18,4 @@ template: name: raredisease org: nf-core outdir: . - version: 2.5.0dev + version: 2.6.0dev diff --git a/CHANGELOG.md b/CHANGELOG.md index 23e6a2fc5..6af0c33ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## 2.5.0dev - Cacofonix [XXXX-XX-XX] +## 2.6.0dev - Cacofonix [XXXX-XX-XX] ### `Added` @@ -52,6 +52,38 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | add_most_severe_consequence | 1.0 | 1.1 | | add_most_severe_pli | 1.0 | 1.1 | +## v2.5.0 - Fulliautomatix [2025-05-22] + +### `Added` + +- A new parameter `concatenate_snv_calls` to generate a concatenated VCF file containing unannotated nuclear & mitochondrial SNV calls [#699](https://github.com/nf-core/raredisease/pull/699) +- Functionality to check contamination in samples using VerifyBamID2 [#701](https://github.com/nf-core/raredisease/pull/701) +- New parameters `verifybamid_svd_bed`, `verifybamid_svd_mu`, and `verifybamid_svd_ud` to supply reference files for VerifyBamID2 [#701](https://github.com/nf-core/raredisease/pull/701) + +### `Changed` + +- Default to remove mitochondrial variants with FILTER status not equal to PASS [#697](https://github.com/nf-core/raredisease/pull/697) + +### `Fixed` + +- Sort the input files before vcf2cytosure is invoked [#697](https://github.com/nf-core/raredisease/pull/697) +- Use '--mitochondria-mode' by default when running Gatk4 FilterMutectCalls on mitochondrial variants[#697](https://github.com/nf-core/raredisease/pull/697) + +### Parameters + +| Old parameter | New parameter | +| ------------- | --------------------- | +| | concatenate_snv_calls | +| | verifybamid_svd_bed | +| | verifybamid_svd_mu | +| | verifybamid_svd_ud | + +### Tool updates + +| Tool | Old version | New version | +| ------------ | ----------- | ----------- | +| VerifyBamID2 | | 2.0.1 | + ## 2.4.0 - Vitalstatistix [2025-02-24] ### `Added` diff --git a/CITATIONS.md b/CITATIONS.md index 27a306282..bd5c97d4f 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -160,6 +160,10 @@ > Pedersen BS, Layer RM, Quinlan AR. Vcfanno: fast, flexible annotation of genetic variants. Genome Biol. 2016;17(1):118. doi:10.1186/s13059-016-0973-5 +- [VerifyBamID2]() + + > Zhang F, Flickinger M, Taliun SAG, Consortium IPG, Abecasis GR, Scott LJ, McCaroll SA, Pato CN, Boehnke M, & Kang HM. (2020). Ancestry-agnostic estimation of DNA sample contamination from sequence reads. Genome Research, 30(2), 185–194. https://doi.org/10.1101/gr.246934.118 + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/README.md b/README.md index 05edf157f..524fd51e6 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,7 @@ On release, automated continuous integration tests run the pipeline on a full-si - [Qualimap](http://qualimap.conesalab.org/) - [Sentieon's WgsMetricsAlgo](https://support.sentieon.com/manual/usages/general/) - [TIDDIT's cov](https://github.com/J35P312/) +- [VerifyBamID2](https://github.com/Griffan/VerifyBamID) **2. Alignment:** diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 836a1d673..087a5b6ef 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,13 +1,10 @@ custom_logo: "nf-core-raredisease_logo_light.png" custom_logo_url: https://github.com/nf-core/raredisease/ custom_logo_title: "nf-core/raredisease" - -report_comment: >2 - - This report has been generated by the nf-core/raredisease analysis pipeline. For information about - how to interpret these results, please see the documentation. +report_comment: > + This report has been generated by the nf-core/raredisease + analysis pipeline. For information about how to interpret these results, please see the + documentation. report_section_order: "nf-core-raredisease-methods-description": @@ -28,6 +25,7 @@ run_modules: - mosdepth - ngsbits - peddy + - verifybamid - custom_content module_order: @@ -49,6 +47,8 @@ module_order: name: "ngsbits" - peddy: name: "Peddy" + - verifybamid: + name: "VerifyBamID2" extra_fn_clean_exts: - "_sorted_md" diff --git a/conf/modules/align.config b/conf/modules/align.config index 84f4a36a9..c605a1586 100644 --- a/conf/modules/align.config +++ b/conf/modules/align.config @@ -28,7 +28,7 @@ process{ withName: '.*ALIGN:SAMTOOLS_VIEW' { ext.args = { '--output-fmt cram --write-index' } - ext.prefix = { "${meta.id}_sorted_md" } + ext.prefix = { "${meta.id}_sort_md" } publishDir = [ path: { "${params.outdir}/alignment" }, mode: params.publish_dir_mode, diff --git a/conf/modules/call_snv.config b/conf/modules/call_snv.config index 1546df4bd..58ad7b337 100644 --- a/conf/modules/call_snv.config +++ b/conf/modules/call_snv.config @@ -22,4 +22,15 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + + withName: '.*CALL_SNV:BCFTOOLS_CONCAT' { + ext.args = { "--write-index=tbi" } + ext.prefix = { "${meta.id}_mt_and_nuclear_snvs" } + publishDir = [ + enabled: params.concatenate_snv_calls, + path: { "${params.outdir}/call_snv/concatenated_calls" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } diff --git a/conf/modules/call_snv_MT.config b/conf/modules/call_snv_MT.config index 22e6fcb08..f77d2f6b1 100644 --- a/conf/modules/call_snv_MT.config +++ b/conf/modules/call_snv_MT.config @@ -22,6 +22,7 @@ process { } withName: '.*CALL_SNV_MT:GATK4_FILTERMUTECTCALLS_MT' { + ext.args = '--mitochondria-mode' ext.prefix = { "${meta.id}_filtered" } } @@ -46,6 +47,7 @@ process { } withName: '.*CALL_SNV_MT_SHIFT:GATK4_FILTERMUTECTCALLS_MT' { + ext.args = '--mitochondria-mode' ext.prefix = { "${meta.id}_filtered_shifted" } } diff --git a/conf/modules/postprocess_MT_calls.config b/conf/modules/postprocess_MT_calls.config index 1b4c8357a..61a14131a 100644 --- a/conf/modules/postprocess_MT_calls.config +++ b/conf/modules/postprocess_MT_calls.config @@ -45,7 +45,7 @@ process { } withName: '.*POSTPROCESS_MT_CALLS:BCFTOOLS_ANNOTATE' { - ext.args = "-c CHROM,FROM,TO,FOUND_IN --output-type z" + ext.args = "-c CHROM,FROM,TO,FOUND_IN --output-type z --include FILTER='\"PASS\"'" ext.prefix = { "${meta.id}_mitochondria" } publishDir = [ path: { "${params.outdir}/call_snv/mitochondria" }, diff --git a/docs/images/raredisease_metromap_dark.pdf b/docs/images/raredisease_metromap_dark.pdf index 030e64e00..65eecd9d7 100644 Binary files a/docs/images/raredisease_metromap_dark.pdf and b/docs/images/raredisease_metromap_dark.pdf differ diff --git a/docs/images/raredisease_metromap_dark.png b/docs/images/raredisease_metromap_dark.png index 1d5faec28..e3fe3f334 100644 Binary files a/docs/images/raredisease_metromap_dark.png and b/docs/images/raredisease_metromap_dark.png differ diff --git a/docs/images/raredisease_metromap_dark.svg b/docs/images/raredisease_metromap_dark.svg index 02b513c1e..e0342a017 100644 --- a/docs/images/raredisease_metromap_dark.svg +++ b/docs/images/raredisease_metromap_dark.svg @@ -28,14 +28,14 @@ inkscape:deskcolor="#d1d1d1" inkscape:document-units="mm" showgrid="true" - inkscape:zoom="0.29297896" - inkscape:cx="1926.7595" - inkscape:cy="762.85341" - inkscape:window-width="1920" - inkscape:window-height="1052" - inkscape:window-x="1710" - inkscape:window-y="28" - inkscape:window-maximized="0" + inkscape:zoom="0.31720749" + inkscape:cx="1473.7987" + inkscape:cy="941.02444" + inkscape:window-width="1712" + inkscape:window-height="1040" + inkscape:window-x="0" + inkscape:window-y="39" + inkscape:window-maximized="1" inkscape:current-layer="layer1" showguides="true" inkscape:export-bgcolor="#272829ff">mantasentieon-bwasentieon-dedupmarkduplicatesbwamem2bwaalignment to mitochondriaalignment to mitochondriabwamem2/sentieon/bwabwamem2/sentieon/bwamarkduplicatesmarkduplicatesmutect2mutect2alignment to shifted mitochondriaalignment to shifted mitochondriasentieon-dnascopesentieon-dnascopesentieon-dnamodelapplysentieon-dnamodelapplydeepvariantdeepvariantglnexusglnexusbcftools - rohbcftools - rohstrangerstrangerupdupdvcfannovcfannocaddcaddvepvepgenmodgenmodcaddcaddvepvephmtnotehmtnotegenmodgenmodmanta + id="tspan10">manta + + x="-95.560486" + y="292.85658" + id="tspan11"> + tiddit + x="-95.560486" + y="299.03019" + id="tspan12">tiddit + + x="-95.560486" + y="305.20381" + id="tspan13"> + cnvnatorcnvnatorvepvepsvdb-querysvdb-querygermlinecnvcallergermlinecnvcallergenmodgenmodexpansionhunterexpansionhuntermultiqcpicardtools+mosdepthmultiqcpicardtools+mosdepthvcfannobambam vcf vcf vcf vcfeklipsedefault path alternative pathskippable pathsretroseq vcfvepsvdb-querymarkduplicatessentieon-bwasentieon-dedupvcfannobwa/bwamem2/bwamemefastqfastqfastqreferencesfastqbambam vcf vcf vcfreferences vcfeklipsedefault path alternative pathskippable pathsmantaretroseq + sodipodi:nodetypes="scsccccccccssscsssscsssscsscsccsscccccccccc" + style="stroke-width:1.26326" /> vcfvepsvdb-queryverifybamid diff --git a/docs/images/raredisease_metromap_light.pdf b/docs/images/raredisease_metromap_light.pdf index ac9e39182..cc41a1368 100644 Binary files a/docs/images/raredisease_metromap_light.pdf and b/docs/images/raredisease_metromap_light.pdf differ diff --git a/docs/images/raredisease_metromap_light.png b/docs/images/raredisease_metromap_light.png index b1d0c3c69..39a391aa8 100644 Binary files a/docs/images/raredisease_metromap_light.png and b/docs/images/raredisease_metromap_light.png differ diff --git a/docs/images/raredisease_metromap_light.svg b/docs/images/raredisease_metromap_light.svg index c761ed366..34918de31 100644 --- a/docs/images/raredisease_metromap_light.svg +++ b/docs/images/raredisease_metromap_light.svg @@ -28,13 +28,13 @@ inkscape:deskcolor="#d1d1d1" inkscape:document-units="mm" showgrid="true" - inkscape:zoom="0.31621395" - inkscape:cx="1563.8146" - inkscape:cy="1098.9395" - inkscape:window-width="1920" - inkscape:window-height="1052" - inkscape:window-x="1710" - inkscape:window-y="28" + inkscape:zoom="0.31001843" + inkscape:cx="1533.7798" + inkscape:cy="864.46473" + inkscape:window-width="1712" + inkscape:window-height="1025" + inkscape:window-x="0" + inkscape:window-y="39" inkscape:window-maximized="0" inkscape:current-layer="layer1" showguides="true" @@ -139,13 +139,880 @@ id="path26-7" />alignment to mitochondriabwamem2/sentieon/bwamarkduplicatesmutect2alignment to shifted mitochondriasentieon-dnascopesentieon-dnamodelapplydeepvariantglnexusbcftools - rohstrangerupdvcfannocaddvepgenmodcaddvephmtnotegenmodmanta + + +tiddit + + +cnvnatorvepsvdb-querygermlinecnvcallergenmodexpansionhuntermultiqcpicardtools+mosdepthvcfannobambam vcf vcf vcf vcfeklipseretroseq vcfmantadefault path alignment to mitochondriabwamem2/sentieon/bwamarkduplicatesmutect2alignment to shifted mitochondriasentieon-dnascopesentieon-dnamodelapplydeepvariantglnexusbcftools - rohstrangerupdvcfannocaddvepgenmodcaddvephmtnotegenmodmanta - + -tiddit - + -cnvnatorvepsvdb-querygermlinecnvcallergenmodexpansionhuntermultiqcpicardtools+mosdepthvcfanno vcf vcf vcf vcfeklipseretroseq vcfdefault path alternative pathskippable pathsmarkduplicatessentieon-bwasentieon-dedupbwa/bwamem2/bwamemefastqfastqalternative pathskippable pathssentieon-bwasentieon-dedupmarkduplicatesbwamem2bwafastqreferencesfastqvepsvdb-querybambam + d="m 20.967951,158.32865 h 9.871264 v 4.36026 c 0,0.23542 0.190999,0.42612 0.426108,0.42612 h 3.764247 v 2.1348 H 20.967951 Z m 14.061658,19.79627 H 20.967951 v -4.10529 H 35.02957 v 4.10529 z" + id="path6276-4-8" />referencesmantavepsvdb-queryverifybamid diff --git a/docs/output.md b/docs/output.md index 8096e4194..519de7ef6 100644 --- a/docs/output.md +++ b/docs/output.md @@ -32,6 +32,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Chromograph coverage](#chromograph-coverage) - [Sention WgsMetricsAlgo](#sention-wgsmetricsalgo) - [TIDDIT's cov and UCSC WigToBigWig](#tiddits-cov-and-ucsc-wigtobigwig) + - [VerifyBamID2](#verifybamid2) - [Reporting](#reporting) - [MultiQC](#multiqc) - [Variant calling - SNV](#variant-calling---snv) @@ -242,6 +243,18 @@ The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They m - `.bw`: +##### VerifyBamID2 + +[VerifyBamID2](https://github.com/Griffan/VerifyBamID) is used to analyse a bam file and generates a contamination report. The pipeline will only generate the following files when the parameters `verifybamid_svd_bed`, `verifybamid_svd_mu`, and `verifybamid_svd_ud` are provided. + +
+Output files + +- `{outputdir}/qc_bam/` + - `.selfSM`: + - `.Ancestry`: +
+ #### Reporting ##### MultiQC diff --git a/docs/usage.md b/docs/usage.md index fdc5ac365..94e57fcea 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -209,14 +209,18 @@ The mandatory and optional parameters for each category are tabulated below. ##### 2. QC stats from the alignment files -| Mandatory | Optional | -| ------------------------------------------------------------ | -------- | -| intervals_wgs1 | | -| intervals_y1 | | -| target_bed / (bait_intervals & target_intervals)2 | | +| Mandatory | Optional | +| ------------------------------------------------------------ | ------------------------------- | +| intervals_wgs1 | | +| intervals_y1 | | +| target_bed / (bait_intervals & target_intervals)2 | | +| | verifybamid_svd_bed3 | +| | verifybamid_svd_mu3 | +| | verifybamid_svd_ud3 | 1These files are Picard's style interval list files, comprising the entire genome or only the chromosome Y. A version of these files for GRCh37 and for GRCh38 is supplied in the pipeline assets. These files are not necessary if you are using Sentieon.
2 If a target_bed file is provided, bait_intervals and target_intervals can be generated by the pipeline.
+3 Used by VerifyBamID2 to check for contamination. These files can either be downloaded from [VerifyBamID2 repository](https://github.com/Griffan/VerifyBamID/tree/master/resource) or created using VerifyBamID2 with custom files as described [here](https://github.com/Griffan/VerifyBamID?tab=readme-ov-file#generating-your-own-resource-files). ##### 3. Repeat expansions diff --git a/modules.json b/modules.json index 68be77b7f..3eafb893b 100644 --- a/modules.json +++ b/modules.json @@ -522,6 +522,11 @@ "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] + }, + "verifybamid/verifybamid2": { + "branch": "master", + "git_sha": "33278f733f3452dc77bde5c1a45db3a5f9278ac0", + "installed_by": ["modules"] } } }, diff --git a/modules/nf-core/verifybamid/verifybamid2/environment.yml b/modules/nf-core/verifybamid/verifybamid2/environment.yml new file mode 100644 index 000000000..c3c4371ea --- /dev/null +++ b/modules/nf-core/verifybamid/verifybamid2/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::verifybamid2=2.0.1 diff --git a/modules/nf-core/verifybamid/verifybamid2/main.nf b/modules/nf-core/verifybamid/verifybamid2/main.nf new file mode 100644 index 000000000..9568fee95 --- /dev/null +++ b/modules/nf-core/verifybamid/verifybamid2/main.nf @@ -0,0 +1,73 @@ +process VERIFYBAMID_VERIFYBAMID2 { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/verifybamid2:2.0.1--hbb20b25_6' : + 'biocontainers/verifybamid2:2.0.1--h19d48f6_8' }" + + input: + tuple val(meta), path(bam), path(bai) + tuple path(svd_ud), path(svd_mu), path(svd_bed) + path refvcf + path references + + output: + tuple val(meta), path("*.log") , optional:true, emit: log + tuple val(meta), path("*.UD") , optional:true, emit: ud + tuple val(meta), path("*.bed") , optional:true, emit: bed + tuple val(meta), path("*.mu") , optional:true, emit: mu + tuple val(meta), path("*.selfSM") , optional:true, emit: self_sm + tuple val(meta), path("*.Ancestry") , optional:true, emit: ancestry + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def args_list = args.tokenize() + + def bam_file = "${bam}.endsWith('.bam|.cram')" ? "--BamFile ${bam}" : "" + + def svd_args = (svd_ud.baseName.equals(svd_mu.baseName) && svd_ud.baseName.equals(svd_bed.baseName)) ? + "--SVDPrefix ${svd_ud.baseName}" : "--UDPath ${svd_ud} --MeanPath ${svd_mu} --BedPath ${svd_bed}" + def refvcf_args = "${refvcf}".endsWith(".vcf") ? "--RefVCF ${refvcf}" : "" + + def reference_args = ("$references".endsWith('.fasta')) ? + "--Reference ${references}" : '' + + """ + verifybamid2 \\ + --NumThread $task.cpus \\ + ${svd_args} \\ + ${bam_file} \\ + ${refvcf_args} \\ + ${reference_args} \\ + ${args_list.join(' ')} \\ + > ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + verifybamid: \$(echo \$(verifybamid2 --help 2>&1 | sed -e '3p;d' | sed -e 's/ Version://')) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.log + touch ${prefix}.ud + touch ${prefix}.bed + touch ${prefix}.mu + touch ${prefix}.selfSM + touch ${prefix}.Ancestry + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + verifybamid: \$(echo \$(verifybamid2 --help 2>&1 | sed -e '3p;d' | sed -e 's/ Version://')) + END_VERSIONS + """ +} diff --git a/modules/nf-core/verifybamid/verifybamid2/meta.yml b/modules/nf-core/verifybamid/verifybamid2/meta.yml new file mode 100644 index 000000000..b511fc3cd --- /dev/null +++ b/modules/nf-core/verifybamid/verifybamid2/meta.yml @@ -0,0 +1,126 @@ +name: "VERIFYBAMID_VERIFYBAMID2" +description: Detecting and estimating inter-sample DNA contamination became a crucial + quality assessment step to ensure high quality sequence reads and reliable downstream + analysis. +keywords: + - contamination + - bam + - verifybamid + - DNA contamination estimation +tools: + - "verifybamid2": + description: "A robust tool for DNA contamination estimation from sequence reads + using ancestry-agnostic method." + homepage: "http://griffan.github.io/VerifyBamID" + documentation: "http://griffan.github.io/VerifyBamID" + tool_dev_url: "https://github.com/Griffan/VerifyBamID" + doi: "10.1101/gr.246934.118" + licence: ["MIT"] + identifier: biotools:verifybamid +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAI/CRAI/CSI index file + pattern: "*.{bai,crai,csi}" + - - svd_ud: + type: file + description: .UD matrix file from SVD result of genotype matrix + pattern: "*.UD" + - svd_mu: + type: file + description: .mu matrix file of genotype matrix + pattern: "*.mu" + - svd_bed: + type: file + description: .Bed file for markers used in this analysis,format(chr\tpos-1\tpos\trefAllele\taltAllele)[Required] + pattern: "*.bed" + - - refvcf: + type: file + description: Reference panel VCF with genotype information, for generation of + .UD .mu .bed files [Optional] + pattern: "*.vcf" + - - references: + type: file + description: reference file [Required] + pattern: "*.fasta" +output: + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: Detailed summary of the VerifyBamId2 results + pattern: "*.log" + - ud: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.UD": + type: file + description: .UD matrix file from customized reference vcf input + pattern: "*.UD" + - bed: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bed": + type: file + description: .Bed file from customized reference marker vcf input + pattern: "*.bed" + - mu: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.mu": + type: file + description: .mu matrix file of genotype matrix from customized reference vcf + input + pattern: "*.mu" + - self_sm: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.selfSM": + type: file + description: Shares the same format as legacy VB1 and the key information FREEMIX + indicates the estimated contamination level. + pattern: "*.selfSM" + - ancestry: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.Ancestry": + type: file + description: Ancestry information + pattern: "*.Ancestry" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@manifestoso" +maintainers: + - "@manifestoso" diff --git a/modules/nf-core/verifybamid/verifybamid2/tests/main.nf.test b/modules/nf-core/verifybamid/verifybamid2/tests/main.nf.test new file mode 100644 index 000000000..99acd6844 --- /dev/null +++ b/modules/nf-core/verifybamid/verifybamid2/tests/main.nf.test @@ -0,0 +1,180 @@ +nextflow_process { + + name "Test Process VERIFYBAMID_VERIFYBAMID2" + script "../main.nf" + process "VERIFYBAMID_VERIFYBAMID2" + + tag "modules" + tag "modules_nfcore" + tag "verifybamid" + tag "verifybamid/verifybamid2" + + test("Should run with SVD input") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam'), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai') ] + input[1] = [ file(params.modules_testdata_base_path + 'delete_me/verifybamid/test.genome.vcf.UD'), + file(params.modules_testdata_base_path + 'delete_me/verifybamid/test.genome.vcf.mu'), + file(params.modules_testdata_base_path + 'delete_me/verifybamid/test.genome.vcf.bed') ] + input[2] = [] + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta') + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Should run with RefVCF input") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam'), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai') ] + input[1] = [ [], [], [] ] + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf') + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta') + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log[0][1]).name, + process.out.bed, + process.out.mu, + process.out.self_sm, + process.out.ancestry, + process.out.versions, + file(process.out.ud[0][1]).name + ).match() } + ) + } + } + + test("Should run with panel input") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam'), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai') ] + input[1] = [ file(params.modules_testdata_base_path + 'delete_me/verifybamid/test.genome.vcf.UD'), + file(params.modules_testdata_base_path + 'delete_me/verifybamid/test.genome.vcf.mu'), + file(params.modules_testdata_base_path + 'delete_me/verifybamid/test.genome.vcf.bed') ] + input[2] = [] + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta') + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("Should run with SVD input - stub") { + config "./nextflow.config" + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam'), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai') ] + input[1] = [ file(params.modules_testdata_base_path + 'delete_me/verifybamid/test.genome.vcf.UD'), + file(params.modules_testdata_base_path + 'delete_me/verifybamid/test.genome.vcf.mu'), + file(params.modules_testdata_base_path + 'delete_me/verifybamid/test.genome.vcf.bed') ] + input[2] = [] + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta') + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Should run with RefVCF input - stub") { + config "./nextflow.config" + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam'), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai') ] + input[1] = [ [], [], [] ] + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf') + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta') + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Should run with panel input - stub") { + config "./nextflow.config" + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam'), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai') ] + input[1] = [ file(params.modules_testdata_base_path + 'delete_me/verifybamid/test.genome.vcf.UD'), + file(params.modules_testdata_base_path + 'delete_me/verifybamid/test.genome.vcf.mu'), + file(params.modules_testdata_base_path + 'delete_me/verifybamid/test.genome.vcf.bed') ] + input[2] = [] + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta') + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/verifybamid/verifybamid2/tests/main.nf.test.snap b/modules/nf-core/verifybamid/verifybamid2/tests/main.nf.test.snap new file mode 100644 index 000000000..9bc0853a4 --- /dev/null +++ b/modules/nf-core/verifybamid/verifybamid2/tests/main.nf.test.snap @@ -0,0 +1,481 @@ +{ + "Should run with panel input": { + "content": [ + "test.log", + [ + "versions.yml:md5,199f2e02b570926d6a249055c917c6b1" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-22T13:02:25.962766167" + }, + "Should run with panel input - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.mu:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.selfSM:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Ancestry:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + "versions.yml:md5,199f2e02b570926d6a249055c917c6b1" + ], + "ancestry": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Ancestry:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "mu": [ + [ + { + "id": "test", + "single_end": false + }, + "test.mu:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "self_sm": [ + [ + { + "id": "test", + "single_end": false + }, + "test.selfSM:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "ud": [ + + ], + "versions": [ + "versions.yml:md5,199f2e02b570926d6a249055c917c6b1" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-05-05T16:46:04.375799563" + }, + "Should run with RefVCF input - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.mu:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.selfSM:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Ancestry:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + "versions.yml:md5,199f2e02b570926d6a249055c917c6b1" + ], + "ancestry": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Ancestry:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "mu": [ + [ + { + "id": "test", + "single_end": false + }, + "test.mu:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "self_sm": [ + [ + { + "id": "test", + "single_end": false + }, + "test.selfSM:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "ud": [ + + ], + "versions": [ + "versions.yml:md5,199f2e02b570926d6a249055c917c6b1" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-05-05T16:45:57.687144464" + }, + "Should run with SVD input": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.log:md5,32a9811d143450f9f5b99c21b6ecff61" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "result.selfSM:md5,9a10860a7e79e393c773aa801ec9f84a" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "result.Ancestry:md5,fe39ad1aced7cbb1eb21a74332389ad2" + ] + ], + "6": [ + "versions.yml:md5,199f2e02b570926d6a249055c917c6b1" + ], + "ancestry": [ + [ + { + "id": "test", + "single_end": false + }, + "result.Ancestry:md5,fe39ad1aced7cbb1eb21a74332389ad2" + ] + ], + "bed": [ + + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.log:md5,32a9811d143450f9f5b99c21b6ecff61" + ] + ], + "mu": [ + + ], + "self_sm": [ + [ + { + "id": "test", + "single_end": false + }, + "result.selfSM:md5,9a10860a7e79e393c773aa801ec9f84a" + ] + ], + "ud": [ + + ], + "versions": [ + "versions.yml:md5,199f2e02b570926d6a249055c917c6b1" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-05T20:51:39.853534509" + }, + "Should run with RefVCF input": { + "content": [ + "test.log", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.genome.vcf.bed:md5,47106c764019ee359b1936410e2efa1e" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.genome.vcf.mu:md5,0810db02af2a176fa5ddc0f404911b66" + ] + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,199f2e02b570926d6a249055c917c6b1" + ], + "test.genome.vcf.UD" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-20T13:43:20.984216864" + }, + "Should run with SVD input - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.mu:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.selfSM:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Ancestry:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + "versions.yml:md5,199f2e02b570926d6a249055c917c6b1" + ], + "ancestry": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Ancestry:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "mu": [ + [ + { + "id": "test", + "single_end": false + }, + "test.mu:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "self_sm": [ + [ + { + "id": "test", + "single_end": false + }, + "test.selfSM:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "ud": [ + + ], + "versions": [ + "versions.yml:md5,199f2e02b570926d6a249055c917c6b1" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-05-05T16:40:02.231564621" + } +} \ No newline at end of file diff --git a/modules/nf-core/verifybamid/verifybamid2/tests/nextflow.config b/modules/nf-core/verifybamid/verifybamid2/tests/nextflow.config new file mode 100644 index 000000000..85682eaa2 --- /dev/null +++ b/modules/nf-core/verifybamid/verifybamid2/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'VERIFYBAMID_VERIFYBAMID2' { + ext.args = '--DisableSanityCheck' + } +} diff --git a/nextflow.config b/nextflow.config index 50079dcc0..8cb2794ae 100644 --- a/nextflow.config +++ b/nextflow.config @@ -24,6 +24,7 @@ params { analysis_type = 'wgs' bwa_as_fallback = false bait_padding = 100 + concatenate_snv_calls = false extract_alignments = false restrict_to_contigs = null run_mt_for_wes = false @@ -77,6 +78,9 @@ params { variant_consequences_sv = null vep_filters = null vep_filters_scout_fmt = null + verifybamid_svd_bed = null + verifybamid_svd_mu = null + verifybamid_svd_ud = null vcf2cytosure_blacklist = null vcfanno_extra_resources = null vcfanno_resources = null @@ -421,8 +425,9 @@ manifest { mainScript = 'main.nf' defaultBranch = 'master' nextflowVersion = '!>=24.04.4' - version = '2.5.0dev' + version = '2.6.0dev' doi = '10.5281/zenodo.7995798' + doi = '' } // Load DSL2 module options from config files, where each file contains the options for modules used in the eponymous subworkflow. @@ -466,7 +471,7 @@ includeConfig 'conf/modules/annotate_rhocallviz.config' // Nextflow plugins plugins { - id 'nf-schema@2.3.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.2.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet } validation { diff --git a/nextflow_schema.json b/nextflow_schema.json index 6b4cf02f0..95c412a25 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -409,6 +409,27 @@ "help_text": "Optional file to rename sample ids in the vcf2cytosure vcf", "pattern": "^\\S+\\.csv$" }, + "verifybamid_svd_bed": { + "type": "string", + "exists": true, + "format": "file-path", + "description": "Path to a BED file containing markers used by verifybamid2.", + "fa_icon": "fas fa-file-csv" + }, + "verifybamid_svd_mu": { + "type": "string", + "exists": true, + "format": "file-path", + "description": "Path to mean matrix file of genotype matrix. Used by verifybamid2.", + "fa_icon": "fas fa-file-csv" + }, + "verifybamid_svd_ud": { + "type": "string", + "exists": true, + "format": "file-path", + "description": "Path to UD matrix file from SVD result of genotype matrix. Used by verifybamid2.", + "fa_icon": "fas fa-file-csv" + }, "vcf2cytosure_blacklist": { "type": "string", "exists": true, @@ -503,6 +524,11 @@ "help_text": "errorStrategy needs to be set to ignore for the bwamem2 process for the fallback to work. Turned off by default.", "fa_icon": "fas fa-toggle-on" }, + "concatenate_snv_calls": { + "type": "boolean", + "description": "Specifies whether to generate a concatenated VCF file containing both nuclear & mitochondrial snv calls", + "fa_icon": "fas fa-toggle-on" + }, "extract_alignments": { "type": "boolean", "default": "false", diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 00b1dff1d..9c368d9b4 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -22,8 +22,8 @@ "@id": "./", "@type": "Dataset", "creativeWorkStatus": "InProgress", - "datePublished": "2025-02-24T14:01:00+00:00", - "description": "

\n \n \n \"nf-core/raredisease\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/raredisease/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/raredisease/actions/workflows/ci.yml)\n\n[![GitHub Actions Linting Status](https://github.com/nf-core/raredisease/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/raredisease/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/raredisease/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7995798-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7995798)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n[![GitHub Actions Linting Status](https://github.com/nf-core/raredisease/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/raredisease/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/raredisease/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7995798-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7995798)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/raredisease)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23raredisease-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/raredisease)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n#### TOC\n\n- [Introduction](#introduction)\n- [Pipeline summary](#pipeline-summary)\n- [Usage](#usage)\n- [Pipeline output](#pipeline-output)\n- [Credits](#credits)\n- [Contributions and Support](#contributions-and-support)\n- [Citations](#citations)\n\n## Introduction\n\n**nf-core/raredisease** is a best-practice bioinformatic pipeline for calling and scoring variants from WGS/WES data from rare disease patients. This pipeline is heavily inspired by [MIP](https://github.com/Clinical-Genomics/MIP).\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/raredisease/results).\n\n## Pipeline summary\n\n \n \n \"nf-core/raredisease\n \n\n**1. Metrics:**\n\n- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)\n- [Mosdepth](https://github.com/brentp/mosdepth)\n- [MultiQC](http://multiqc.info/)\n- [Picard's CollectMutipleMetrics, CollectHsMetrics, and CollectWgsMetrics](https://broadinstitute.github.io/picard/)\n- [Qualimap](http://qualimap.conesalab.org/)\n- [Sentieon's WgsMetricsAlgo](https://support.sentieon.com/manual/usages/general/)\n- [TIDDIT's cov](https://github.com/J35P312/)\n\n**2. Alignment:**\n\n- [Bwa-mem2](https://github.com/bwa-mem2/bwa-mem2)\n- [BWA-MEME](https://github.com/kaist-ina/BWA-MEME)\n- [BWA](https://github.com/lh3/bwa)\n- [Sentieon DNAseq](https://support.sentieon.com/manual/DNAseq_usage/dnaseq/)\n\n**3. Variant calling - SNV:**\n\n- [DeepVariant](https://github.com/google/deepvariant)\n- [Sentieon DNAscope](https://support.sentieon.com/manual/DNAscope_usage/dnascope/)\n\n**4. Variant calling - SV:**\n\n- [Manta](https://github.com/Illumina/manta)\n- [TIDDIT's sv](https://github.com/SciLifeLab/TIDDIT)\n- Copy number variant calling:\n - [CNVnator](https://github.com/abyzovlab/CNVnator)\n - [GATK GermlineCNVCaller](https://github.com/broadinstitute/gatk)\n\n**5. Annotation - SNV:**\n\n- [bcftools roh](https://samtools.github.io/bcftools/bcftools.html#roh)\n- [vcfanno](https://github.com/brentp/vcfanno)\n- [CADD](https://cadd.gs.washington.edu/)\n- [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html)\n- [UPD](https://github.com/bjhall/upd)\n- [Chromograph](https://github.com/Clinical-Genomics/chromograph)\n\n**6. Annotation - SV:**\n\n- [SVDB query](https://github.com/J35P312/SVDB#Query)\n- [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html)\n\n**7. Mitochondrial analysis:**\n\n- [Alignment and variant calling - GATK Mitochondrial short variant discovery pipeline ](https://gatk.broadinstitute.org/hc/en-us/articles/4403870837275-Mitochondrial-short-variant-discovery-SNVs-Indels-)\n- [eKLIPse](https://github.com/dooguypapua/eKLIPse/tree/master)\n- Annotation:\n - [HaploGrep2](https://github.com/seppinho/haplogrep-cmd)\n - [Hmtnote](https://github.com/robertopreste/HmtNote)\n - [vcfanno](https://github.com/brentp/vcfanno)\n - [CADD](https://cadd.gs.washington.edu/)\n - [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html)\n\n**8. Variant calling - repeat expansions:**\n\n- [Expansion Hunter](https://github.com/Illumina/ExpansionHunter)\n- [Stranger](https://github.com/Clinical-Genomics/stranger)\n\n**9. Variant calling - mobile elements:**\n\n- [RetroSeq](https://github.com/tk2/RetroSeq)\n\n**10. Rank variants - SV and SNV:**\n\n- [GENMOD](https://github.com/Clinical-Genomics/genmod)\n\n**11. Variant evaluation:**\n\n- [RTG Tools](https://github.com/RealTimeGenomics/rtg-tools)\n\nNote that it is possible to include/exclude certain tools or steps.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,lane,fastq_1,fastq_2,sex,phenotype,paternal_id,maternal_id,case_id\nhugelymodelbat,1,reads_1.fastq.gz,reads_2.fastq.gz,1,2,,,justhusky\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end).\n\nSecond, ensure that you have defined the path to reference files and parameters required for the type of analysis that you want to perform. More information about this can be found [here](https://github.com/nf-core/raredisease/blob/dev/docs/usage.md).\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/raredisease \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/raredisease/usage) and the [parameter documentation](https://nf-co.re/raredisease/parameters).\n\n## Pipeline output\n\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/raredisease/output).\n\n## Credits\n\nnf-core/raredisease was written in a collaboration between the Clinical Genomics nodes in Sweden, with major contributions from [Ramprasad Neethiraj](https://github.com/ramprasadn), [Anders Jemt](https://github.com/jemten), [Lucia Pena Perez](https://github.com/Lucpen), and [Mei Wu](https://github.com/projectoriented) at Clinical Genomics Stockholm.\n\nAdditional contributors were [Sima Rahimi](https://github.com/sima-r), [Gwenna Breton](https://github.com/Gwennid) and [Emma V\u00e4sterviga](https://github.com/EmmaCAndersson) (Clinical Genomics Gothenburg); [Halfdan Rydbeck](https://github.com/hrydbeck) and [Lauri Mesilaakso](https://github.com/ljmesi) (Clinical Genomics Link\u00f6ping); [Subazini Thankaswamy Kosalai](https://github.com/sysbiocoder) (Clinical Genomics \u00d6rebro); [Annick Renevey](https://github.com/rannick), [Peter Pruisscher](https://github.com/peterpru) and [Eva Caceres](https://github.com/fevac) (Clinical Genomics Stockholm); [Ryan Kennedy](https://github.com/ryanjameskennedy) (Clinical Genomics Lund); [Anders Sune Pedersen](https://github.com/asp8200) (Danish National Genome Center) and [Lucas Taniguti](https://github.com/lmtani).\n\nWe thank the nf-core community for their extensive assistance in the development of this pipeline.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#raredisease` channel](https://nfcore.slack.com/channels/raredisease) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/raredisease for your analysis, please cite it using the following doi: [10.5281/zenodo.7995798](https://doi.org/10.5281/zenodo.7995798)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n\nYou can read more about MIP's use in healthcare in,\n\n> Stranneheim H, Lagerstedt-Robinson K, Magnusson M, et al. Integration of whole genome sequencing into a healthcare setting: high diagnostic rates across multiple clinical entities in 3219 rare disease patients. Genome Med. 2021;13(1):40. doi:10.1186/s13073-021-00855-5\n", + "datePublished": "2025-05-23T07:59:34+00:00", + "description": "

\n \n \n \"nf-core/raredisease\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/raredisease/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/raredisease/actions/workflows/ci.yml)\n\n[![GitHub Actions Linting Status](https://github.com/nf-core/raredisease/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/raredisease/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/raredisease/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7995798-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7995798)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n[![GitHub Actions Linting Status](https://github.com/nf-core/raredisease/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/raredisease/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/raredisease/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7995798-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7995798)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.4-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/raredisease)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23raredisease-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/raredisease)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n#### TOC\n\n- [Introduction](#introduction)\n- [Pipeline summary](#pipeline-summary)\n- [Usage](#usage)\n- [Pipeline output](#pipeline-output)\n- [Credits](#credits)\n- [Contributions and Support](#contributions-and-support)\n- [Citations](#citations)\n\n## Introduction\n\n**nf-core/raredisease** is a best-practice bioinformatic pipeline for calling and scoring variants from WGS/WES data from rare disease patients. This pipeline is heavily inspired by [MIP](https://github.com/Clinical-Genomics/MIP).\n\n> [!NOTE]\n> We do not support single-end data from Illumina, as some tools in the pipeline are not compatible with this type of data.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/raredisease/results).\n\n## Pipeline summary\n\n \n \n \"nf-core/raredisease\n \n\n**1. Metrics:**\n\n- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)\n- [Mosdepth](https://github.com/brentp/mosdepth)\n- [MultiQC](http://multiqc.info/)\n- [Picard's CollectMutipleMetrics, CollectHsMetrics, and CollectWgsMetrics](https://broadinstitute.github.io/picard/)\n- [Qualimap](http://qualimap.conesalab.org/)\n- [Sentieon's WgsMetricsAlgo](https://support.sentieon.com/manual/usages/general/)\n- [TIDDIT's cov](https://github.com/J35P312/)\n- [VerifyBamID2](https://github.com/Griffan/VerifyBamID)\n\n**2. Alignment:**\n\n- [Bwa-mem2](https://github.com/bwa-mem2/bwa-mem2)\n- [BWA-MEME](https://github.com/kaist-ina/BWA-MEME)\n- [BWA](https://github.com/lh3/bwa)\n- [Sentieon DNAseq](https://support.sentieon.com/manual/DNAseq_usage/dnaseq/)\n\n**3. Variant calling - SNV:**\n\n- [DeepVariant](https://github.com/google/deepvariant)\n- [Sentieon DNAscope](https://support.sentieon.com/manual/DNAscope_usage/dnascope/)\n\n**4. Variant calling - SV:**\n\n- [Manta](https://github.com/Illumina/manta)\n- [TIDDIT's sv](https://github.com/SciLifeLab/TIDDIT)\n- Copy number variant calling:\n - [CNVnator](https://github.com/abyzovlab/CNVnator)\n - [GATK GermlineCNVCaller](https://github.com/broadinstitute/gatk)\n\n**5. Annotation - SNV:**\n\n- [bcftools roh](https://samtools.github.io/bcftools/bcftools.html#roh)\n- [vcfanno](https://github.com/brentp/vcfanno)\n- [CADD](https://cadd.gs.washington.edu/)\n- [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html)\n- [UPD](https://github.com/bjhall/upd)\n- [Chromograph](https://github.com/Clinical-Genomics/chromograph)\n\n**6. Annotation - SV:**\n\n- [SVDB query](https://github.com/J35P312/SVDB#Query)\n- [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html)\n\n**7. Mitochondrial analysis:**\n\n- [Alignment and variant calling - GATK Mitochondrial short variant discovery pipeline ](https://gatk.broadinstitute.org/hc/en-us/articles/4403870837275-Mitochondrial-short-variant-discovery-SNVs-Indels-)\n- [eKLIPse](https://github.com/dooguypapua/eKLIPse/tree/master)\n- Annotation:\n - [HaploGrep2](https://github.com/seppinho/haplogrep-cmd)\n - [Hmtnote](https://github.com/robertopreste/HmtNote)\n - [vcfanno](https://github.com/brentp/vcfanno)\n - [CADD](https://cadd.gs.washington.edu/)\n - [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html)\n\n**8. Variant calling - repeat expansions:**\n\n- [Expansion Hunter](https://github.com/Illumina/ExpansionHunter)\n- [Stranger](https://github.com/Clinical-Genomics/stranger)\n\n**9. Variant calling - mobile elements:**\n\n- [RetroSeq](https://github.com/tk2/RetroSeq)\n\n**10. Rank variants - SV and SNV:**\n\n- [GENMOD](https://github.com/Clinical-Genomics/genmod)\n\n**11. Variant evaluation:**\n\n- [RTG Tools](https://github.com/RealTimeGenomics/rtg-tools)\n\nNote that it is possible to include/exclude certain tools or steps.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,lane,fastq_1,fastq_2,sex,phenotype,paternal_id,maternal_id,case_id\nhugelymodelbat,1,reads_1.fastq.gz,reads_2.fastq.gz,1,2,,,justhusky\n```\n\nEach row represents a pair of fastq files (paired end).\n\nSecond, ensure that you have defined the path to reference files and parameters required for the type of analysis that you want to perform. More information about this can be found [here](https://github.com/nf-core/raredisease/blob/dev/docs/usage.md).\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/raredisease \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/raredisease/usage) and the [parameter documentation](https://nf-co.re/raredisease/parameters).\n\n## Pipeline output\n\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/raredisease/output).\n\n## Credits\n\nnf-core/raredisease was written in a collaboration between the Clinical Genomics nodes in Sweden, with major contributions from [Ramprasad Neethiraj](https://github.com/ramprasadn), [Anders Jemt](https://github.com/jemten), [Lucia Pena Perez](https://github.com/Lucpen), and [Mei Wu](https://github.com/projectoriented) at Clinical Genomics Stockholm.\n\nAdditional contributors were [Sima Rahimi](https://github.com/sima-r), [Gwenna Breton](https://github.com/Gwennid) and [Emma V\u00e4sterviga](https://github.com/EmmaCAndersson) (Clinical Genomics Gothenburg); [Halfdan Rydbeck](https://github.com/hrydbeck) and [Lauri Mesilaakso](https://github.com/ljmesi) (Clinical Genomics Link\u00f6ping); [Subazini Thankaswamy Kosalai](https://github.com/sysbiocoder) (Clinical Genomics \u00d6rebro); [Annick Renevey](https://github.com/rannick), [Peter Pruisscher](https://github.com/peterpru) and [Eva Caceres](https://github.com/fevac) (Clinical Genomics Stockholm); [Ryan Kennedy](https://github.com/ryanjameskennedy) (Clinical Genomics Lund); [Anders Sune Pedersen](https://github.com/asp8200) (Danish National Genome Center) and [Lucas Taniguti](https://github.com/lmtani).\n\nWe thank the nf-core community for their extensive assistance in the development of this pipeline.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#raredisease` channel](https://nfcore.slack.com/channels/raredisease) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/raredisease for your analysis, please cite it using the following doi: [10.5281/zenodo.7995798](https://doi.org/10.5281/zenodo.7995798)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n\nYou can read more about MIP's use in healthcare in,\n\n> Stranneheim H, Lagerstedt-Robinson K, Magnusson M, et al. Integration of whole genome sequencing into a healthcare setting: high diagnostic rates across multiple clinical entities in 3219 rare disease patients. Genome Med. 2021;13(1):40. doi:10.1186/s13073-021-00855-5\n", "hasPart": [ { "@id": "main.nf" @@ -105,7 +105,7 @@ }, "mentions": [ { - "@id": "#9e7e4930-a6b6-4031-b002-1fc1b6c9b007" + "@id": "#8dbdeae9-bfc7-44e8-8e1c-d8091dbbc24d" } ], "name": "nf-core/raredisease" @@ -133,9 +133,6 @@ "ComputationalWorkflow" ], "creator": [ - { - "@id": "https://orcid.org/0000-0003-1316-2845" - }, { "@id": "#25568561+projectoriented@users.noreply.github.com" }, @@ -143,17 +140,20 @@ "@id": "https://orcid.org/0000-0001-7313-3734" }, { - "@id": "https://orcid.org/0000-0002-4100-9963" + "@id": "https://orcid.org/0000-0002-5044-7754" + }, + { + "@id": "https://orcid.org/0000-0003-1316-2845" }, { "@id": "https://orcid.org/0000-0002-2219-0197" }, { - "@id": "https://orcid.org/0000-0002-5044-7754" + "@id": "https://orcid.org/0000-0002-4100-9963" } ], "dateCreated": "", - "dateModified": "2025-02-24T15:01:00Z", + "dateModified": "2025-05-23T09:59:34Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": [ "nf-core", @@ -178,13 +178,13 @@ "@id": "https://orcid.org/0000-0001-7313-3734" }, { - "@id": "https://orcid.org/0000-0002-4100-9963" + "@id": "https://orcid.org/0000-0002-5044-7754" }, { "@id": "https://orcid.org/0000-0002-2219-0197" }, { - "@id": "https://orcid.org/0000-0002-5044-7754" + "@id": "https://orcid.org/0000-0002-4100-9963" } ], "name": [ @@ -201,7 +201,7 @@ "https://nf-co.re/raredisease/dev/" ], "version": [ - "2.5.0dev" + "2.6.0dev" ] }, { @@ -214,14 +214,14 @@ "url": { "@id": "https://www.nextflow.io/" }, - "version": "!>=24.04.2" + "version": "!>=24.04.4" }, { - "@id": "#9e7e4930-a6b6-4031-b002-1fc1b6c9b007", + "@id": "#8dbdeae9-bfc7-44e8-8e1c-d8091dbbc24d", "@type": "TestSuite", "instance": [ { - "@id": "#6339bb98-b912-406b-8650-18985795ca21" + "@id": "#a30ab90f-4c97-46ff-b053-16a2ec97f0b6" } ], "mainEntity": { @@ -230,7 +230,7 @@ "name": "Test suite for nf-core/raredisease" }, { - "@id": "#6339bb98-b912-406b-8650-18985795ca21", + "@id": "#a30ab90f-4c97-46ff-b053-16a2ec97f0b6", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/raredisease", "resource": "repos/nf-core/raredisease/actions/workflows/ci.yml", @@ -368,12 +368,6 @@ "name": "nf-core", "url": "https://nf-co.re/" }, - { - "@id": "https://orcid.org/0000-0003-1316-2845", - "@type": "Person", - "email": "raysloks@gmail.com", - "name": "Emil Bertilsson" - }, { "@id": "#25568561+projectoriented@users.noreply.github.com", "@type": "Person", @@ -387,10 +381,16 @@ "name": "Ramprasad Neethiraj" }, { - "@id": "https://orcid.org/0000-0002-4100-9963", + "@id": "https://orcid.org/0000-0002-5044-7754", "@type": "Person", - "email": "Gwenna.breton@gu.se", - "name": "Gwenna Breton" + "email": "lucia.pena.perez@scilifelab.se", + "name": "Luc\u00eda Pe\u00f1a-P\u00e9rez" + }, + { + "@id": "https://orcid.org/0000-0003-1316-2845", + "@type": "Person", + "email": "raysloks@gmail.com", + "name": "Emil Bertilsson" }, { "@id": "https://orcid.org/0000-0002-2219-0197", @@ -399,10 +399,10 @@ "name": "Anders Jemt" }, { - "@id": "https://orcid.org/0000-0002-5044-7754", + "@id": "https://orcid.org/0000-0002-4100-9963", "@type": "Person", - "email": "lucia.pena.perez@scilifelab.se", - "name": "Luc\u00eda Pe\u00f1a-P\u00e9rez" + "email": "Gwenna.breton@gu.se", + "name": "Gwenna Breton" } ] } \ No newline at end of file diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf index 33da6d418..eac3adfec 100644 --- a/subworkflows/local/call_snv.nf +++ b/subworkflows/local/call_snv.nf @@ -8,6 +8,7 @@ include { CALL_SNV_MT } from './call_snv_MT' include { CALL_SNV_MT as CALL_SNV_MT_SHIFT } from './call_snv_MT' include { POSTPROCESS_MT_CALLS } from './postprocess_MT_calls' include { GATK4_SELECTVARIANTS } from '../../modules/nf-core/gatk4/selectvariants/main' +include { BCFTOOLS_CONCAT } from '../../modules/nf-core/bcftools/concat' workflow CALL_SNV { take: @@ -45,6 +46,7 @@ workflow CALL_SNV { ch_deepvar_gtbi = Channel.empty() ch_mt_vcf = Channel.empty() ch_mt_tabix = Channel.empty() + ch_mt_vcf_tabix = Channel.empty() ch_mt_txt = Channel.empty() ch_sentieon_vcf = Channel.empty() ch_sentieon_tbi = Channel.empty() @@ -131,13 +133,22 @@ workflow CALL_SNV { ch_foundin_header, ch_genome_chrsizes ) - ch_mt_vcf = POSTPROCESS_MT_CALLS.out.vcf - ch_mt_tabix = POSTPROCESS_MT_CALLS.out.tbi - ch_mt_txt = CALL_SNV_MT.out.txt - ch_versions = ch_versions.mix(CALL_SNV_MT.out.versions) - ch_versions = ch_versions.mix(CALL_SNV_MT_SHIFT.out.versions) - ch_versions = ch_versions.mix(POSTPROCESS_MT_CALLS.out.versions) - ch_versions = ch_versions.mix(GATK4_SELECTVARIANTS.out.versions) + ch_mt_vcf = POSTPROCESS_MT_CALLS.out.vcf + ch_mt_tabix = POSTPROCESS_MT_CALLS.out.tbi + ch_mt_vcf_tabix = ch_mt_vcf.join(ch_mt_tabix, failOnMismatch:true, failOnDuplicate:true) + ch_mt_txt = CALL_SNV_MT.out.txt + ch_versions = ch_versions.mix(CALL_SNV_MT.out.versions) + ch_versions = ch_versions.mix(CALL_SNV_MT_SHIFT.out.versions) + ch_versions = ch_versions.mix(POSTPROCESS_MT_CALLS.out.versions) + ch_versions = ch_versions.mix(GATK4_SELECTVARIANTS.out.versions) + } + + if (params.concatenate_snv_calls) { + ch_concat_vcf_in = ch_genome_vcf_tabix.concat(ch_mt_vcf_tabix).groupTuple() + BCFTOOLS_CONCAT ( + ch_concat_vcf_in + ) + ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions) } emit: diff --git a/subworkflows/local/call_snv_deepvariant/main.nf b/subworkflows/local/call_snv_deepvariant/main.nf index 9cfb63899..2623ab7b2 100644 --- a/subworkflows/local/call_snv_deepvariant/main.nf +++ b/subworkflows/local/call_snv_deepvariant/main.nf @@ -40,9 +40,9 @@ workflow CALL_SNV_DEEPVARIANT { DEEPVARIANT ( ch_deepvar_in, ch_genome_fasta, ch_genome_fai, [[],[]], ch_par_bed ) DEEPVARIANT.out.gvcf - .collect{it[1]} + .map{ it -> it[1]} + .toSortedList{a, b -> a.name <=> b.name} .toList() - .collect() .set { ch_file_list } ch_case_info diff --git a/subworkflows/local/generate_cytosure_files.nf b/subworkflows/local/generate_cytosure_files.nf index 14d74023a..569300db2 100644 --- a/subworkflows/local/generate_cytosure_files.nf +++ b/subworkflows/local/generate_cytosure_files.nf @@ -28,15 +28,16 @@ workflow GENERATE_CYTOSURE_FILES { ch_bam.combine(ch_vcf_tbi) .map { meta_sample, bam, meta_case, vcf, tbi -> - def new_meta = ['id':meta_sample.sample, 'sex':meta_sample.sex] - return [ new_meta, vcf, tbi ] + id_meta = ['id':meta_sample.sample] + sex_meta = ['sex':meta_sample.sex] + return [ id_meta, sex_meta, vcf, tbi ] } .join(ch_sample_id_map, remainder: true) .branch { it -> - id: it[3].equals(null) - return [it[0] + [custid:it[0].id], it[1], it[2]] - custid: !(it[3].equals(null)) - return [it[0] + [custid:it[3]], it[1], it[2]] + id: it[4].equals(null) + return [it[0] + [custid:it[0].id] + it[1], it[2], it[3]] + custid: !(it[4].equals(null)) + return [it[0] + [custid:it[4]] + it[1], it[2], it[3]] } .set { ch_for_mix } @@ -71,11 +72,18 @@ workflow GENERATE_CYTOSURE_FILES { Channel.empty() .mix(ch_for_mix.split, ch_for_mix.reheader) + .toSortedList { a, b -> a[0].id <=> b[0].id } + .flatMap() .set { ch_vcf2cytosure_in } + TIDDIT_COV_VCF2CYTOSURE.out.cov + .toSortedList { a, b -> a[0].id <=> b[0].id } + .flatMap() + .set { ch_cov2cytosure_in } + VCF2CYTOSURE ( ch_vcf2cytosure_in, - TIDDIT_COV_VCF2CYTOSURE.out.cov, + ch_cov2cytosure_in, [[:], []], [[:], []], ch_blacklist ) diff --git a/subworkflows/local/postprocess_MT_calls/main.nf b/subworkflows/local/postprocess_MT_calls/main.nf index 9978ca292..6cd32fec6 100644 --- a/subworkflows/local/postprocess_MT_calls/main.nf +++ b/subworkflows/local/postprocess_MT_calls/main.nf @@ -41,8 +41,9 @@ workflow POSTPROCESS_MT_CALLS { ch_vcfs = ch_mt_vcf .join(PICARD_LIFTOVERVCF.out.vcf_lifted, remainder: true) .map{ meta, vcf1, vcf2 -> - [meta, [vcf1, vcf2]] - } + [meta, [vcf1, vcf2]] + } + GATK4_MERGEVCFS_LIFT_UNLIFT_MT( ch_vcfs, ch_genome_dictionary) // Filtering Variants @@ -66,14 +67,14 @@ workflow POSTPROCESS_MT_CALLS { TABIX_TABIX_MT2(REMOVE_DUPLICATES_MT.out.vcf) REMOVE_DUPLICATES_MT.out.vcf - .collect{it[1]} - .ifEmpty([]) + .map{ it -> it[1]} + .toSortedList{a, b -> a.name <=> b.name} .toList() .set { file_list_vcf } TABIX_TABIX_MT2.out.tbi - .collect{it[1]} - .ifEmpty([]) + .map{ it -> it[1]} + .toSortedList{a, b -> a.name <=> b.name} .toList() .set { file_list_tbi } diff --git a/subworkflows/local/qc_bam.nf b/subworkflows/local/qc_bam.nf index 2b08eebb0..f4bb6ff51 100644 --- a/subworkflows/local/qc_bam.nf +++ b/subworkflows/local/qc_bam.nf @@ -14,6 +14,7 @@ include { PICARD_COLLECTWGSMETRICS as PICARD_COLLECTWGSMETRICS_Y } from '../.. include { SENTIEON_WGSMETRICS as SENTIEON_WGSMETRICS_WG } from '../../modules/nf-core/sentieon/wgsmetrics/main' include { SENTIEON_WGSMETRICS as SENTIEON_WGSMETRICS_Y } from '../../modules/nf-core/sentieon/wgsmetrics/main' include { NGSBITS_SAMPLEGENDER } from '../../modules/nf-core/ngsbits/samplegender/main' +include { VERIFYBAMID_VERIFYBAMID2 } from '../../modules/nf-core/verifybamid/verifybamid2/main' workflow QC_BAM { @@ -28,7 +29,10 @@ workflow QC_BAM { ch_chrom_sizes // channel: [mandatory] [ path(sizes) ] ch_intervals_wgs // channel: [mandatory] [ path(intervals) ] ch_intervals_y // channel: [mandatory] [ path(intervals) ] - ngsbits_samplegender_method // channel [val(method)] + ch_svd_bed // channel: [optional] [ path(bed) ] + ch_svd_mu // channel: [optional] [ path(meanpath) ] + ch_svd_ud // channel: [optional] [ path(ud) ] + ngsbits_samplegender_method // channel: [val(method)] main: ch_cov = Channel.empty() @@ -71,6 +75,9 @@ workflow QC_BAM { // Check sex NGSBITS_SAMPLEGENDER(ch_bam_bai, ch_genome_fasta, ch_genome_fai, ngsbits_samplegender_method) + // Check contamination + ch_svd_in = ch_svd_ud.combine(ch_svd_mu).combine(ch_svd_bed).collect() + VERIFYBAMID_VERIFYBAMID2(ch_bam_bai, ch_svd_in, [], ch_genome_fasta.map {it-> it[1]}) ch_versions = ch_versions.mix(CHROMOGRAPH_COV.out.versions.first()) ch_versions = ch_versions.mix(PICARD_COLLECTMULTIPLEMETRICS.out.versions.first()) @@ -79,6 +86,7 @@ workflow QC_BAM { ch_versions = ch_versions.mix(UCSC_WIGTOBIGWIG.out.versions.first()) ch_versions = ch_versions.mix(MOSDEPTH.out.versions.first()) ch_versions = ch_versions.mix(NGSBITS_SAMPLEGENDER.out.versions.first()) + ch_versions = ch_versions.mix(VERIFYBAMID_VERIFYBAMID2.out.versions.first()) emit: multiple_metrics = PICARD_COLLECTMULTIPLEMETRICS.out.metrics // channel: [ val(meta), path(metrics) ] @@ -88,7 +96,8 @@ workflow QC_BAM { bigwig = UCSC_WIGTOBIGWIG.out.bw // channel: [ val(meta), path(bw) ] d4 = MOSDEPTH.out.per_base_d4 // channel: [ val(meta), path(d4) ] global_dist = MOSDEPTH.out.global_txt // channel: [ val(meta), path(txt) ] - sex_check = NGSBITS_SAMPLEGENDER.out.tsv // channel: [val(meta), path(tsv) ] + sex_check = NGSBITS_SAMPLEGENDER.out.tsv // channel: [ val(meta), path(tsv) ] + self_sm = VERIFYBAMID_VERIFYBAMID2.out.self_sm // channel: [ val(meta), path(selfSM) ] cov = ch_cov // channel: [ val(meta), path(metrics) ] cov_y = ch_cov_y // channel: [ val(meta), path(metrics) ] versions = ch_versions // channel: [ path(versions.yml) ] diff --git a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf index c089fe2a1..1fd4335d6 100644 --- a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf @@ -451,6 +451,7 @@ def toolCitationText() { "Qualimap (Okonechnikov et al., 2016),", "TIDDIT (Eisfeldt et al., 2017),", "UCSC Bigwig and Bigbed (Kent et al., 2010),", + (params.verifybamid_svd_bed && params.verifybamid_svd_mu && params.verifybamid_svd_ud) ? "VerifyBamID2 (Zhang et al., 2020)," : "", "Mosdepth (Pedersen & Quinlan, 2018)," ] preprocessing_text = [ @@ -579,6 +580,7 @@ def toolBibliographyText() { "
  • Okonechnikov, K., Conesa, A., & García-Alcalde, F. (2016). Qualimap 2: Advanced multi-sample quality control for high-throughput sequencing data. Bioinformatics, 32(2), 292–294. https://doi.org/10.1093/bioinformatics/btv566
  • ", "
  • Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2
  • ", "
  • Kent, W. J., Zweig, A. S., Barber, G., Hinrichs, A. S., & Karolchik, D. (2010). BigWig and BigBed: Enabling browsing of large distributed datasets. Bioinformatics, 26(17), 2204–2207. https://doi.org/10.1093/bioinformatics/btq351
  • ", + (params.verifybamid_svd_bed && params.verifybamid_svd_mu && params.verifybamid_svd_ud) ? "
  • Zhang, F., Flickinger, M., Taliun, S. A. G., Consortium, I. P. G., Abecasis, G. R., Scott, L. J., McCaroll, S. A., Pato, C. N., Boehnke, M., & Kang, H. M. (2020). Ancestry-agnostic estimation of DNA sample contamination from sequence reads. Genome Research, 30(2), 185–194. https://doi.org/10.1101/gr.246934.118
  • " : "", "
  • Pedersen, B. S., & Quinlan, A. R. (2018). Mosdepth: Quick coverage calculation for genomes and exomes. Bioinformatics, 34(5), 867–868. https://doi.org/10.1093/bioinformatics/btx699
  • " ] preprocessing_text = [ diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 0c7bf36e0..25fd4ab13 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -211,6 +211,12 @@ workflow RAREDISEASE { : Channel.empty() ch_sv_bedpedbs = params.svdb_query_bedpedbs ? Channel.fromPath(params.svdb_query_bedpedbs) : Channel.empty() + ch_svd_bed = params.verifybamid_svd_bed ? Channel.fromPath(params.verifybamid_svd_bed) + : Channel.empty() + ch_svd_mu = params.verifybamid_svd_mu ? Channel.fromPath(params.verifybamid_svd_mu) + : Channel.empty() + ch_svd_ud = params.verifybamid_svd_ud ? Channel.fromPath(params.verifybamid_svd_ud) + : Channel.empty() ch_target_bed = ch_references.target_bed ch_target_intervals = ch_references.target_intervals ch_variant_catalog = params.variant_catalog ? Channel.fromPath(params.variant_catalog).map { it -> [[id:it.simpleName],it]}.collect() @@ -391,6 +397,9 @@ workflow RAREDISEASE { ch_genome_chrsizes, ch_intervals_wgs, ch_intervals_y, + ch_svd_bed, + ch_svd_mu, + ch_svd_ud, Channel.value(params.ngsbits_samplegender_method) ) ch_versions = ch_versions.mix(QC_BAM.out.versions) @@ -912,6 +921,7 @@ workflow RAREDISEASE { ch_multiqc_files = ch_multiqc_files.mix(QC_BAM.out.qualimap_results.map{it[1]}.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(QC_BAM.out.global_dist.map{it[1]}.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(QC_BAM.out.cov.map{it[1]}.collect().ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QC_BAM.out.self_sm.map{it[1]}.collect().ifEmpty([])) if (!(params.skip_tools && params.skip_tools.split(',').contains('peddy'))) { ch_multiqc_files = ch_multiqc_files.mix(PEDDY.out.ped.map{it[1]}.collect().ifEmpty([]))