nf-core · LouisLeNezet · Nov 10, 2025 · Nov 6, 2025 · Nov 6, 2025 · Nov 6, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#181](https://github.com/nf-core/phaseimpute/pull/181) - Add nf-co2footprint plugin to the config file.
 - [#184](https://github.com/nf-core/phaseimpute/pull/184) - Add support `.csi` index for `.bam` files.
 - [#188](https://github.com/nf-core/phaseimpute/pull/188) - Add documentation for all subworkflows.
+- [#210](https://github.com/nf-core/phaseimpute/pull/200) - Add BEAGLE5 support for genotype imputation.
 
 ### `Changed`
 
@@ -45,6 +46,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 | `r-stitch` | 1.6.10      | 1.7.3       |
 | `shapeit5` | 1.0.0       | 5.1.1       |
 | `vcflib`   | 1.0.3       | 1.0.14      |
+| `beagle5`  |             | 5.2         |
 
 ## v1.0.0 - Black Labrador [2024-12-09]
 

diff --git a/README.md b/README.md
@@ -42,7 +42,7 @@ The whole pipeline consists of five main steps, each of which can be run separat
    - **Position Extraction** for targeted imputation sites.
 
 4. **Imputation (`--impute`)**: This is the primary step, where genotypes in the target dataset are imputed using the prepared reference panel. The main steps are:
-   - **Imputation** of the target dataset using tools like [Glimpse1](https://odelaneau.github.io/GLIMPSE/glimpse1/index.html), [Glimpse2](https://odelaneau.github.io/GLIMPSE/), [Stitch](https://github.com/rwdavies/stitch), or [Quilt](https://github.com/rwdavies/QUILT).
+   - **Imputation** of the target dataset using tools like [Glimpse1](https://odelaneau.github.io/GLIMPSE/glimpse1/index.html), [Glimpse2](https://odelaneau.github.io/GLIMPSE/), [Stitch](https://github.com/rwdavies/stitch), or [Quilt](https://github.com/rwdavies/QUILT), or [Beagle5](https://faculty.washington.edu/browning/beagle/beagle.html).
    - **Ligation** of imputed chunks to produce a final VCF file per sample, with all chromosomes unified.
 
 5. **Validation (`--validate`)**: Assesses imputation accuracy by comparing the imputed dataset to a truth dataset. This step leverages the [Glimpse2](https://odelaneau.github.io/GLIMPSE/) concordance process to summarize differences between two VCF files.

diff --git a/conf/steps/imputation_beagle5.config b/conf/steps/imputation_beagle5.config
@@ -0,0 +1,54 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Config file for defining DSL2 per module options and publishing paths
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Available keys to override module options:
+        ext.args   = Additional arguments appended to command in module.
+        ext.args2  = Second set of arguments appended to command in module (multi-tool modules).
+        ext.args3  = Third set of arguments appended to command in module (multi-tool modules).
+        ext.prefix = File name prefix for output files.
+----------------------------------------------------------------------------------------
+*/
+
+process {
+    // Configuration for the BEAGLE5 imputation subworkflow
+
+    // Impute the variants with BEAGLE5
+    withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_BEAGLE5:.*' {
+        publishDir = [ enabled: false ]
+        tag = {"${meta.id} ${meta.chr}"}
+    }
+
+    withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_BEAGLE5:BEAGLE5_BEAGLE' {
+        ext.args = { "gp=true ap=true chrom=${meta.chr}" }
+        ext.prefix = { "${meta.id}.${meta.chr}.beagle5" }
+        publishDir = [ enabled: false ]
+    }
+
+    // Convert BCF to VCF if necessary
+    withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_BEAGLE5:BCFTOOLS_VIEW' {
+        ext.args = ["--output-type z", "--write-index=csi"].join(' ')
+        ext.prefix = { "${meta.id}.${meta.chr}.converted" }
+        publishDir = [ enabled: false ]
+    }
+
+    // Index the imputed VCF files
+    withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_BEAGLE5:BCFTOOLS_INDEX_BEAGLE' {
+        ext.args = ''
+        publishDir = [ enabled: false ]
+    }
+
+    // Concatenate the imputed chromosomes
+    withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_BEAGLE5:.*' {
+        publishDir = [
+            path: { "${params.outdir}/imputation/beagle5/concat" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
+    withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_BEAGLE5:BCFTOOLS_CONCAT' {
+        ext.args = ["--output-type z", "--write-index=tbi"].join(' ')
+        ext.prefix = { "${meta.id}.beagle5" }
+    }
+}
diff --git a/conf/test_all.config b/conf/test_all.config
@@ -42,7 +42,7 @@ params {
     steps = "all"
 
     // Impute tools
-    tools = "glimpse1,glimpse2,stitch,quilt"
+    tools = "glimpse1,glimpse2,stitch,quilt,beagle5"
 }
 
 process {

diff --git a/conf/test_all_fullchr.config b/conf/test_all_fullchr.config
@@ -29,7 +29,7 @@ params {
 
     // Pipeline steps
     steps   = "all"
-    tools   = "glimpse1,glimpse2,quilt,stitch"
+    tools   = "glimpse1,glimpse2,quilt,stitch,beagle5"
     depth   = 1
 
     // Panelprep optional args

diff --git a/conf/test_beagle5.config b/conf/test_beagle5.config
@@ -0,0 +1,44 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests with BEAGLE5
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/phaseimpute -profile test_beagle5,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+process {
+    resourceLimits = [
+        cpus: 4,
+        memory: '8.GB',
+        time: '1.h'
+    ]
+}
+
+params {
+    config_profile_name        = 'Test profile for BEAGLE5'
+    config_profile_description = 'Minimal test dataset to check BEAGLE5 imputation function'
+
+    // Input data
+    input        = "${projectDir}/tests/csv/sample_vcf_snp.csv"
+    input_region = "${projectDir}/tests/csv/region.csv"
+    panel        = "${projectDir}/tests/csv/panel.csv"
+
+    // Map file
+    map          = "${projectDir}/tests/csv/map_plink.csv"
+
+    // Genome references
+    fasta = params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz"
+
+    // Pipeline steps
+    steps = 'impute'
+
+    // Imputation tools
+    tools = 'beagle5'
+
+    // Main options
+    outdir = 'results'
+}
diff --git a/conf/test_glimpse2.config b/conf/test_glimpse2.config
@@ -19,7 +19,7 @@ process {
 }
 
 params {
-    config_profile_name        = 'Test profile'
+    config_profile_name        = 'Test profile GLIMPSE2'
     config_profile_description = 'Minimal test dataset to check pipeline function with GLIMPSE2'
 
     // Input data

diff --git a/conf/test_validate.config b/conf/test_validate.config
@@ -30,7 +30,7 @@ params {
     // Genome references
     fasta   = params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz"
     posfile = "${projectDir}/tests/csv/posfile_vcf_index.csv"
-    map     = "${projectDir}/tests/csv/map.csv"
+    map     = "${projectDir}/tests/csv/map_glimpse.csv"
 
     // Pipeline steps
     steps   = "validate"

diff --git a/docs/output.md b/docs/output.md
@@ -111,7 +111,7 @@ The results from `--steps impute` will have the following directory structure:
 ```tree
 ├── batch
 ├── csv
-├── glimpse1/glimpse2/quilt/stitch
+├── glimpse1/glimpse2/quilt/stitch/beagle5
 │   ├── concat/
 │   └── samples/
 ├── stats

diff --git a/docs/usage.md b/docs/usage.md
@@ -307,6 +307,7 @@ For starting from the imputation steps, the required flags are:
 | `GLIMPSE2` | ✅               | ✅ ¹      | ✅                      | ✅        | ✅         | ❌          |
 | `QUILT`    | ✅               | ✅ ²      | ✅                      | ❌        | ✅         | ✅ ⁴        |
 | `STITCH`   | ✅               | ✅ ²      | ✅                      | ❌        | ❌         | ✅ ³        |
+| `BEAGLE5`  | ✅               | ✅ ¹      | ✅                      | ✅        | ❌         | ❌          |
 
 > ¹ Alignment files as well as variant calling format (i.e. BAM, CRAM, VCF or BCF)
 > ² Alignment files only (i.e. BAM or CRAM)
@@ -332,12 +333,14 @@ When the number of samples exceeds the batch size, the pipeline will split the s
 
 To summarize:
 
-- If you have Variant Calling Format (VCF) files, join them into a single file and choose either GLIMPSE1 or GLIMPSE2.
+- If you have Variant Calling Format (VCF) files, join them into a single file and choose either GLIMPSE1, GLIMPSE2 or BEAGLE5.
+  - GLIMPSE1 and STITCH may induce batch effects, so all samples need to be imputed together.
+  - GLIMPSE2 should not do target-to-target imputation.
 - If you have alignment files (e.g., BAM or CRAM), all tools are available, and processing will occur in `batch_size`:
   - GLIMPSE1 and STITCH may induce batch effects, so all samples need to be imputed together.
   - GLIMPSE2 and QUILT can process samples in separate batches.
 
-## Imputation tools `--steps impute --tools [glimpse1, glimpse2, quilt, stitch]`
+## Imputation tools `--steps impute --tools [glimpse1, glimpse2, quilt, stitch, beagle5]`
 
 You can choose different software to perform the imputation. In the following sections, the typical commands for running the pipeline with each software are included. Multiple tools can be selected by separating them with a comma (eg. `--tools glimpse1,quilt`).
 
@@ -435,7 +438,10 @@ bcftools convert --haplegendsample ${vcf}
 
 ### GLIMPSE1
 
-[GLIMPSE1](https://github.com/odelaneau/GLIMPSE/tree/glimpse1) is a set of tools for phasing and imputation for low-coverage sequencing datasets. Recommended for many samples at >0.5x coverage and small reference panels. Glimpse1 works with alignment (i.e. BAM or CRAM) as well as variant (i.e. VCF or BCF) files as input. This is an example command to run this tool from the `--steps impute`:
+[GLIMPSE1](https://github.com/odelaneau/GLIMPSE/tree/glimpse1) is a set of tools for phasing and imputation for low-coverage sequencing datasets. Recommended for many samples at >0.5x coverage and small reference panels.
+Glimpse1 works with variant (i.e. VCF or BCF) files as input.
+Alignment (i.e. BAM or CRAM) can also be used and the variants will be called using `bcftools mpileup` to convert to a VCF format.
+This is an example command to run this tool from the `--steps impute`:
 
 ```bash
 nextflow run nf-core/phaseimpute \
@@ -477,6 +483,25 @@ nextflow run nf-core/phaseimpute \
 
 Make sure the CSV file with the input panel is the output from `--step panelprep` or has been previously prepared.
 
+### BEAGLE5
+
+[BEAGLE5](https://faculty.washington.edu/browning/beagle/beagle.html) is a software package for analyzing large-scale genetic
+data sets with hundreds of thousands of markers genotyped on thousands of samples.
+BEAGLE can phase genotype data and perform genotype imputation but only on genotyped data.
+
+```bash
+nextflow run nf-core/phaseimpute \
+    --input samplesheet.csv \
+    --panel samplesheet_reference.csv \
+    --steps impute \
+    --tools beagle5 \
+    --outdir results \
+    --genome GRCh37 \
+    -profile docker
+```
+
+The CSV file provided in `--panel` must be prepared with `--steps panelprep` and must contain four columns [panel, chr, vcf, index].
+
 ## Start with validation `--steps validate`
 
 <img src="images/metro/Validate.png" alt="concordance_metro" width="600"/>
@@ -519,7 +544,7 @@ This mode runs all the previous steps. This requires several flags:
 - `--input input.csv`: The samplesheet containing the input sample files in `bam` or `cram` format.
 - `--depth`: The final depth of the input file [default: 1].
 - `--genome` or `--fasta`: The reference genome of the samples.
-- `--tools [glimpse1, glimpse2, quilt, stitch]`: A selection of one or more of the available imputation tools.
+- `--tools [glimpse1, glimpse2, quilt, stitch, beagle5]`: A selection of one or more of the available imputation tools.
 - `--panel panel.csv`: The samplesheet containing the reference panel files in `vcf.gz` format.
 - `--remove_samples`: (optional) A comma-separated list of samples to remove from the reference.
 - `--input_truth input_truth.csv`: The samplesheet containing the truth VCF files in `vcf` format.

diff --git a/modules.json b/modules.json
@@ -64,6 +64,11 @@
                         "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
                         "installed_by": ["modules"]
                     },
+                    "beagle5/beagle": {
+                        "branch": "master",
+                        "git_sha": "359cfb69d521fcb8b56313e9a6ca9d66036aa921",
+                        "installed_by": ["modules"]
+                    },
                     "bedtools/makewindows": {
                         "branch": "master",
                         "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",

diff --git a/modules/nf-core/beagle5/beagle/environment.yml b/modules/nf-core/beagle5/beagle/environment.yml
diff --git a/modules/nf-core/beagle5/beagle/main.nf b/modules/nf-core/beagle5/beagle/main.nf