nf-core · LouisLeNezet · Nov 14, 2025 · Nov 10, 2025 · Nov 10, 2025 · Nov 11, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#184](https://github.com/nf-core/phaseimpute/pull/184) - Add support `.csi` index for `.bam` files.
 - [#188](https://github.com/nf-core/phaseimpute/pull/188) - Add documentation for all subworkflows.
 - [#210](https://github.com/nf-core/phaseimpute/pull/200) - Add BEAGLE5 support for genotype imputation.
+- [#211](https://github.com/nf-core/phaseimpute/pull/211) - Add MINIMAC4 support for genotype imputation.
 
 ### `Changed`
 
@@ -47,6 +48,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 | `shapeit5` | 1.0.0       | 5.1.1       |
 | `vcflib`   | 1.0.3       | 1.0.14      |
 | `beagle5`  |             | 5.2         |
+| `minimac4` |             | 4.1.6       |
+
+### New contributors
+
+[Gaspard Ichas](https://github.com/gichas)
 
 ## v1.0.0 - Black Labrador [2024-12-09]
 

diff --git a/README.md b/README.md
@@ -42,7 +42,7 @@ The whole pipeline consists of five main steps, each of which can be run separat
    - **Position Extraction** for targeted imputation sites.
 
 4. **Imputation (`--impute`)**: This is the primary step, where genotypes in the target dataset are imputed using the prepared reference panel. The main steps are:
-   - **Imputation** of the target dataset using tools like [Glimpse1](https://odelaneau.github.io/GLIMPSE/glimpse1/index.html), [Glimpse2](https://odelaneau.github.io/GLIMPSE/), [Stitch](https://github.com/rwdavies/stitch), or [Quilt](https://github.com/rwdavies/QUILT), or [Beagle5](https://faculty.washington.edu/browning/beagle/beagle.html).
+   - **Imputation** of the target dataset using tools like [Glimpse1](https://odelaneau.github.io/GLIMPSE/glimpse1/index.html), [Glimpse2](https://odelaneau.github.io/GLIMPSE/), [Stitch](https://github.com/rwdavies/stitch), [Quilt](https://github.com/rwdavies/QUILT), [Beagle5](https://faculty.washington.edu/browning/beagle/beagle.html) or [Minimac4](https://github.com/statgen/Minimac4).
    - **Ligation** of imputed chunks to produce a final VCF file per sample, with all chromosomes unified.
 
 5. **Validation (`--validate`)**: Assesses imputation accuracy by comparing the imputed dataset to a truth dataset. This step leverages the [Glimpse2](https://odelaneau.github.io/GLIMPSE/) concordance process to summarize differences between two VCF files.

diff --git a/conf/steps/imputation_minimac4.config b/conf/steps/imputation_minimac4.config
@@ -0,0 +1,50 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Config file for defining DSL2 per module options and publishing paths
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Available keys to override module options:
+        ext.args   = Additional arguments appended to command in module.
+        ext.args2  = Second set of arguments appended to command in module (multi-tool modules).
+        ext.args3  = Third set of arguments appended to command in module (multi-tool modules).
+        ext.prefix = File name prefix for output files.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+process {
+    withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_MINIMAC4:.*' {
+        publishDir = [enabled: false]
+        tag = { "${meta.id} ${meta.chr}" }
+    }
+
+    withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_MINIMAC4:MINIMAC4_COMPRESSREF' {
+        ext.args   = ''
+        ext.prefix = { "${meta.id}.${meta.chr}.minimac4" }
+        publishDir = [enabled: false]
+        tag = { "${meta.id} ${meta.chr}" }
+    }
+
+    withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_MINIMAC4:MINIMAC4_IMPUTE' {
+        ext.args   = { "--output-format vcf.gz" }
+        ext.prefix = { "${meta.id}.${meta.chr}.minimac4" }
+        tag = { "${meta.id} ${meta.chr}" }
+    }
+
+    withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_MINIMAC4:BCFTOOLS_INDEX' {
+        ext.args   = ''
+        publishDir = [enabled: false]
+        tag = { "${meta.id} ${meta.chr}" }
+    }
+
+    withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_MINIMAC4:.*' {
+        publishDir = [
+            path: { "${params.outdir}/imputation/minimac4/concat" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
+    withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_MINIMAC4:BCFTOOLS_CONCAT' {
+        ext.args   = ["--ligate", "--output-type z", "--write-index=tbi"].join(' ')
+        ext.prefix = { "${meta.id}.minimac4" }
+    }
+}
diff --git a/conf/test_minimac4.config b/conf/test_minimac4.config
@@ -0,0 +1,46 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/phaseimpute -profile test_minimac4,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+process {
+    resourceLimits = [
+        cpus: 4,
+        memory: '4.GB',
+        time: '1.h'
+    ]
+}
+
+params {
+    config_profile_name        = 'Test profile'
+    config_profile_description = 'Minimal test dataset to check pipeline function with MINIMAC4'
+
+    // Input data
+    input  = "${projectDir}/tests/csv/sample_vcf_snp.csv"
+
+    // Genome references
+    fasta  = params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz"
+    panel  = "${projectDir}/tests/csv/panel.csv"
+
+    // Region file
+    input_region = "${projectDir}/tests/csv/region.csv"
+
+    // Map file
+    map    = "${projectDir}/tests/csv/map_plink.csv"
+
+    // Position file
+    posfile = "${projectDir}/tests/csv/posfile.csv"
+
+    // Pipeline steps
+    steps  = "impute"
+
+    // Impute tools
+    tools  = "minimac4"
+}
diff --git a/docs/usage.md b/docs/usage.md
@@ -200,6 +200,8 @@ The different tests profiles are:
 - `test_glimpse2`: A profile to evaluate the imputation step with the `glimpse2` tool.
 - `test_quilt`: A profile to evaluate the imputation step with the `quilt` tool.
 - `test_stitch`: A profile to evaluate the imputation step with the `stitch` tool.
+- `test_beagle5`: A profile to evaluate the imputation step with the `beagle5` tool.
+- `test_minimac4`: A profile to evaluate the imputation step with the `minimac4` tool.
 - `test_panelprep`: A profile to evaluate the panel preparation step.
 - `test_sim`: A profile to evaluate the simulation step.
 - `test_validate`: A profile to evaluate the validation step.
@@ -294,7 +296,7 @@ For starting from the imputation steps, the required flags are:
 - `--steps impute`
 - `--input input.csv`: The samplesheet containing the input sample files in `bam`, `cram` or `vcf`, `bcf` format.
 - `--genome` or `--fasta`: The reference genome of the samples.
-- `--tools [glimpse1, quilt, stitch]`: A selection of one or more of the available imputation tools. Each imputation tool has their own set of specific flags and input files. These required files are produced by `--steps panelprep` and used as input in:
+- `--tools [glimpse1, glimpse2, quilt, stitch, beagle5, minimac4]`: A selection of one or more of the available imputation tools. Each imputation tool has their own set of specific flags and input files. These required files are produced by `--steps panelprep` and used as input in:
   - `--chunks chunks.csv`: A samplesheet containing chunks per chromosome. These are produced by `--steps panelprep` using `GLIMPSE1`.
   - `--posfile posfile.csv`: A samplesheet containing a `.legend.gz` file with the list of positions to genotype per chromosome. These are required by tools ( QUILT/STITCH/GLIMPSE1). It can also contain the `hap.gz` files (required by QUILT). The posfile can be generated with `--steps panelprep`.
   - `--panel panel.csv`: A samplesheet containing the post-processed reference panel VCF (required by GLIMPSE1, GLIMPSE2). These files can be obtained with `--steps panelprep`.
@@ -308,6 +310,7 @@ For starting from the imputation steps, the required flags are:
 | `QUILT`    | ✅               | ✅ ²      | ✅                      | ❌        | ✅         | ✅ ⁴        |
 | `STITCH`   | ✅               | ✅ ²      | ✅                      | ❌        | ❌         | ✅ ³        |
 | `BEAGLE5`  | ✅               | ✅ ¹      | ✅                      | ✅        | ❌         | ❌          |
+| `MINIMAC4` | ✅               | ✅ ¹      | ✅                      | ✅        | ❌         | ❌          |
 
 > ¹ Alignment files as well as variant calling format (i.e. BAM, CRAM, VCF or BCF)
 > ² Alignment files only (i.e. BAM or CRAM)
@@ -333,14 +336,14 @@ When the number of samples exceeds the batch size, the pipeline will split the s
 
 To summarize:
 
-- If you have Variant Calling Format (VCF) files, join them into a single file and choose either GLIMPSE1, GLIMPSE2 or BEAGLE5.
+- If you have Variant Calling Format (VCF) files, join them into a single file and choose either GLIMPSE1, GLIMPSE2, BEAGLE5 or MINIMAC4.
   - GLIMPSE1 and STITCH may induce batch effects, so all samples need to be imputed together.
   - GLIMPSE2 should not do target-to-target imputation.
 - If you have alignment files (e.g., BAM or CRAM), all tools are available, and processing will occur in `batch_size`:
   - GLIMPSE1 and STITCH may induce batch effects, so all samples need to be imputed together.
   - GLIMPSE2 and QUILT can process samples in separate batches.
 
-## Imputation tools `--steps impute --tools [glimpse1, glimpse2, quilt, stitch, beagle5]`
+## Imputation tools `--steps impute --tools [glimpse1, glimpse2, quilt, stitch, beagle5, minimac4]`
 
 You can choose different software to perform the imputation. In the following sections, the typical commands for running the pipeline with each software are included. Multiple tools can be selected by separating them with a comma (eg. `--tools glimpse1,quilt`).
 
@@ -502,6 +505,26 @@ nextflow run nf-core/phaseimpute \
 
 The CSV file provided in `--panel` must be prepared with `--steps panelprep` and must contain four columns [panel, chr, vcf, index].
 
+### MINIMAC4
+
+[MINIMAC4](https://github.com/statgen/Minimac4) is a low memory, computationally efficient implementation of the MaCH algorithm for genotype imputation. It is designed to work on phased haplotypes and can handle very large reference panels.
+
+```bash
+nextflow run nf-core/phaseimpute \
+    --input samplesheet.csv \
+    --panel samplesheet_reference.csv \
+    --steps impute \
+    --tool minimac4 \
+    --outdir results \
+    --genome GRCh37 \
+    -profile docker \
+    --posfile posfile.csv
+```
+
+The CSV file provided in `--panel` must be prepared with `--steps panelprep` and must contain four columns [panel, chr, vcf, index].
+
+MINIMAC4 works only with variant calling format files (VCF or BCF) as input.
+
 ## Start with validation `--steps validate`
 
 <img src="images/metro/Validate.png" alt="concordance_metro" width="600"/>

diff --git a/modules.json b/modules.json
@@ -119,6 +119,16 @@
                         "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
                         "installed_by": ["modules"]
                     },
+                    "minimac4/compressref": {
+                        "branch": "master",
+                        "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
+                        "installed_by": ["modules"]
+                    },
+                    "minimac4/impute": {
+                        "branch": "master",
+                        "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
+                        "installed_by": ["modules"]
+                    },
                     "multiqc": {
                         "branch": "master",
                         "git_sha": "e10b76ca0c66213581bec2833e30d31f239dec0b",

diff --git a/modules/nf-core/minimac4/compressref/environment.yml b/modules/nf-core/minimac4/compressref/environment.yml
diff --git a/modules/nf-core/minimac4/compressref/main.nf b/modules/nf-core/minimac4/compressref/main.nf
diff --git a/modules/nf-core/minimac4/compressref/meta.yml b/modules/nf-core/minimac4/compressref/meta.yml
diff --git a/modules/nf-core/minimac4/compressref/tests/main.nf.test b/modules/nf-core/minimac4/compressref/tests/main.nf.test