nf-core · TCLamnidis · Apr 24, 2026 · Jan 30, 2026 · Feb 13, 2026 · Feb 13, 2026
diff --git a/conf/test_microbial.config b/conf/test_microbial.config
@@ -44,6 +44,7 @@ params {
 
     // BAM filtering
     deduplication_tool                    = "dedup"
+    deduplication_skipregionsplit               = true
     run_bamfiltering                      = true
     bamfiltering_minreadlength            = 30
     bamfiltering_mappingquality           = 37

diff --git a/nextflow.config b/nextflow.config
@@ -199,6 +199,7 @@ params {
 
     // Deduplication options
     skip_deduplication                                               = false
+    deduplication_skipregionsplit                                    = false
     deduplication_tool                                               = 'markduplicates'
 
     // Qualimap

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -1002,6 +1002,12 @@
                     "description": "Specify to skip the removal of PCR duplicates.",
                     "fa_icon": "fas fa-forward"
                 },
+                "deduplication_skipregionsplit": {
+                    "type": "boolean",
+                    "description": "Specify to run deduplicaiton without splitting bams by contig (default behavior).",
+                    "fa_icon": "fas fa-forward",
+                    "help_text": "Run deduplication steps bam-by-bam rather than contig-by-contig for each bam file. This reduces the total number of jobs submitted to a cluster, but increases the computational runtime. If you use a shared cluster with limited resources, running many low-resource jobs can slow down the overall runtime of eager due to scheduling constraints.\nAlso applicable for poor-quality reference genomes."
+                },
                 "deduplication_tool": {
                     "type": "string",
                     "default": "markduplicates",

diff --git a/subworkflows/local/deduplicate.nf b/subworkflows/local/deduplicate.nf
@@ -29,39 +29,49 @@ workflow DEDUPLICATE {
         addNewMetaFromAttributes( it, "id" , "reference" , false )
     }
 
-    // Create genomic regions file for splitting the bam before deduplication
-    BUILD_INTERVALS( fasta_fai )
-    ch_versions      = ch_versions.mix( BUILD_INTERVALS.out.versions.first() )
+    if ( params.deduplication_skipregionsplit ) {
 
-    // Prep regions for combining
-    ch_intervals_for_join = BUILD_INTERVALS.out.bed
-    .map {
-        // Replace meta with new meta that contains the meta.id value in the meta.reference attribute only
-        addNewMetaFromAttributes( it, "id" , "reference" , true )
-    }
+        // No splitting of .bam files by contig, deduplicate all in one
+        input_for_deduplication = ch_bam_bai
 
-    // Ensure input bam matches the regions file
-    ch_bam_for_split = ch_bam_bai
-        .map {
-            // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute
-            addNewMetaFromAttributes( it, "reference" , "reference" , false )
-        }
-        .combine(
-            by: 0,
-            ch_intervals_for_join
-        )
+    } else {
+
+        // Create genomic regions file for splitting the bam before deduplication
+        BUILD_INTERVALS( fasta_fai )
+        ch_versions      = ch_versions.mix( BUILD_INTERVALS.out.versions.first() )
+
+        // Prep regions for combining
+        ch_intervals_for_join = BUILD_INTERVALS.out.bed
         .map {
-            ignore_me, meta, bam, bai, regions ->
-            [ meta, bam, bai, regions ]
+            // Replace meta with new meta that contains the meta.id value in the meta.reference attribute only
+            addNewMetaFromAttributes( it, "id" , "reference" , true )
         }
 
-    //Split input bam by region
-    BAM_SPLIT_BY_REGION( ch_bam_for_split )
-    ch_versions   = ch_versions.mix( BAM_SPLIT_BY_REGION.out.versions )
+        // Ensure input bam matches the regions file
+        ch_bam_for_split = ch_bam_bai
+            .map {
+                // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute
+                addNewMetaFromAttributes( it, "reference" , "reference" , false )
+            }
+            .combine(
+                by: 0,
+                ch_intervals_for_join
+            )
+            .map {
+                ignore_me, meta, bam, bai, regions ->
+                [ meta, bam, bai, regions ]
+            }
+
+        //Split input bam by region
+        BAM_SPLIT_BY_REGION( ch_bam_for_split )
+        input_for_deduplication = BAM_SPLIT_BY_REGION.out.bam_bai
+        ch_versions   = ch_versions.mix( BAM_SPLIT_BY_REGION.out.versions )
+
+    }
 
     if ( params.deduplication_tool == 'markduplicates' ) {
 
-        ch_markduplicates_input = BAM_SPLIT_BY_REGION.out.bam_bai
+        ch_markduplicates_input = input_for_deduplication
             .map {
                 // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute
                 addNewMetaFromAttributes( it, "reference" , "reference" , false )
@@ -83,70 +93,83 @@ workflow DEDUPLICATE {
                 ch_markduplicates_input.fasta,
                 ch_markduplicates_input.fasta_fai
             )
-            ch_versions             = ch_versions.mix( PICARD_MARKDUPLICATES.out.versions.first() )
+            ch_versions     = ch_versions.mix( PICARD_MARKDUPLICATES.out.versions.first() )
 
-            ch_dedupped_region_bam  = PICARD_MARKDUPLICATES.out.bam
+            ch_dedupped_bam = PICARD_MARKDUPLICATES.out.bam
 
     } else if ( params.deduplication_tool == "dedup" ) {
-        ch_dedup_input = BAM_SPLIT_BY_REGION.out.bam_bai
+        ch_dedup_input = input_for_deduplication
             .map {
                 meta, bam, bai ->
                 [ meta, bam ]
             }
 
         DEDUP( ch_dedup_input )
-        ch_versions            = ch_versions.mix( DEDUP.out.versions.first() )
+        ch_versions     = ch_versions.mix( DEDUP.out.versions.first() )
 
-        ch_dedupped_region_bam = DEDUP.out.bam
+        ch_dedupped_bam = DEDUP.out.bam
     }
 
-    ch_input_for_samtools_merge = ch_dedupped_region_bam
-        .map {
-            meta, bam ->
-            meta2 = meta.clone().findAll{ it.key != 'genomic_region' }
-            [ meta2, bam ]
-        }
-        .groupTuple()
-        .map {
-            // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute
-            addNewMetaFromAttributes( it, "reference" , "reference" , false )
-        }
-        .combine(
-            by:0,
-            ch_refs
+    if ( params.deduplication_skipregionsplit ) {
+
+        // Bams were never split by region, so bypass of re-merging
+        ch_input_for_samtools_sort_dedupped = ch_dedupped_bam
+
+    } else {
+
+        // Re-merging of bams-by-contig must take place after deduplciation
+        ch_input_for_samtools_merge = ch_dedupped_bam
+            .map {
+                meta, bam ->
+                meta2 = meta.clone().findAll{ it.key != 'genomic_region' }
+                [ meta2, bam ]
+            }
+            .groupTuple()
+            .map {
+                // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute
+                addNewMetaFromAttributes( it, "reference" , "reference" , false )
+            }
+            .combine(
+                by:0,
+                ch_refs
+            )
+            .multiMap{
+                // bam here is a list of bams
+                ignore_me, meta, bam, meta2, fasta, fasta_fai ->
+                bam:        [ meta, bam ]
+                fasta:      [ meta2, fasta ]
+                fasta_fai:  [ meta2, fasta_fai ]
+            }
+
+        // Merge the bams for each region into one bam
+        SAMTOOLS_MERGE_DEDUPPED(
+            ch_input_for_samtools_merge.bam,
+            ch_input_for_samtools_merge.fasta,
+            ch_input_for_samtools_merge.fasta_fai
         )
-        .multiMap{
-            // bam here is a list of bams
-            ignore_me, meta, bam, meta2, fasta, fasta_fai ->
-            bam:        [ meta, bam ]
-            fasta:      [ meta2, fasta ]
-            fasta_fai:  [ meta2, fasta_fai ]
-        }
+        ch_versions                         = ch_versions.mix( SAMTOOLS_MERGE_DEDUPPED.out.versions )
 
-    // Merge the bams for each region into one bam
-    SAMTOOLS_MERGE_DEDUPPED(
-        ch_input_for_samtools_merge.bam,
-        ch_input_for_samtools_merge.fasta,
-        ch_input_for_samtools_merge.fasta_fai
-    )
-    ch_versions   = ch_versions.mix( SAMTOOLS_MERGE_DEDUPPED.out.versions )
+        ch_input_for_samtools_sort_dedupped = SAMTOOLS_MERGE_DEDUPPED.out.bam
+
+    }
 
 
     // Sort the merged bam and index
-    SAMTOOLS_SORT_DEDUPPED ( SAMTOOLS_MERGE_DEDUPPED.out.bam )
+    SAMTOOLS_SORT_DEDUPPED ( ch_input_for_samtools_sort_dedupped )
     ch_versions   = ch_versions.mix( SAMTOOLS_SORT_DEDUPPED.out.versions )
     ch_dedup_bam  = SAMTOOLS_SORT_DEDUPPED.out.bam
 
     SAMTOOLS_INDEX_DEDUPPED ( ch_dedup_bam )
     ch_versions   = ch_versions.mix( SAMTOOLS_INDEX_DEDUPPED.out.versions )
-    ch_dedup_bai  =  params.fasta_largeref ? SAMTOOLS_INDEX_DEDUPPED.out.csi : SAMTOOLS_INDEX_DEDUPPED.out.bai
+    ch_dedup_bai  = params.fasta_largeref ? SAMTOOLS_INDEX_DEDUPPED.out.csi : SAMTOOLS_INDEX_DEDUPPED.out.bai
 
     // Finally run flagstat on the dedupped bam
     ch_input_for_samtools_flagstat = ch_dedup_bam.join( ch_dedup_bai )
 
     SAMTOOLS_FLAGSTAT_DEDUPPED(
         ch_input_for_samtools_flagstat
     )
+
     ch_versions       = ch_versions.mix( SAMTOOLS_FLAGSTAT_DEDUPPED.out.versions )
     ch_multiqc_files  = ch_multiqc_files.mix( SAMTOOLS_FLAGSTAT_DEDUPPED.out.flagstat )
     ch_dedup_flagstat = SAMTOOLS_FLAGSTAT_DEDUPPED.out.flagstat

diff --git a/tests/test_microbial.nf.test.snap b/tests/test_microbial.nf.test.snap
@@ -636,9 +636,6 @@
                 "BEDTOOLS_COVERAGE_DEPTH": {
                     "bedtools": "2.31.1)"
                 },
-                "BUILD_INTERVALS": {
-                    "gawk": "5.1.0"
-                },
                 "BWA_ALN": {
                     "bwa": "0.7.18-r1243-dirty"
                 },
@@ -697,9 +694,6 @@
                 "SAMTOOLS_FLAGSTAT_MERGED_LIBRARIES": {
                     "samtools": 1.18
                 },
-                "SAMTOOLS_INDEX": {
-                    "samtools": 1.18
-                },
                 "SAMTOOLS_INDEX_DEDUPPED": {
                     "samtools": 1.18
                 },
@@ -712,9 +706,6 @@
                 "SAMTOOLS_LENGTH_FILTER_INDEX": {
                     "samtools": 1.18
                 },
-                "SAMTOOLS_MERGE_DEDUPPED": {
-                    "samtools": 1.18
-                },
                 "SAMTOOLS_MERGE_LIBRARIES": {
                     "samtools": 1.18
                 },
@@ -724,9 +715,6 @@
                 "SAMTOOLS_SORT_MERGED_LIBRARIES": {
                     "samtools": 1.18
                 },
-                "SAMTOOLS_VIEW": {
-                    "samtools": 1.18
-                },
                 "SAMTOOLS_VIEW_BAM_FILTERING": {
                     "samtools": 1.18
                 },
@@ -740,9 +728,9 @@
         ],
         "meta": {
             "nf-test": "0.9.3",
-            "nextflow": "25.04.8"
+            "nextflow": "25.10.3"
         },
-        "timestamp": "2025-11-07T11:08:47.368306"
+        "timestamp": "2026-02-13T09:52:11.74559929"
     },
     "authentication": {
         "content": [