From b6e364e1024a4a118f752df9fda9e656ac9745b1 Mon Sep 17 00:00:00 2001
From: Jose Soto <jsoto@broadinstitute.org>
Date: Wed, 25 Mar 2026 10:49:46 -0400
Subject: [PATCH 01/14] first pass at making the wdl more contig friendly

---
 .../Glimpse2LowPassImputation.changelog.md    |   5 +
 .../Glimpse2LowPassImputation.wdl             | 265 ++++++++++++------
 2 files changed, 183 insertions(+), 87 deletions(-)

diff --git a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.changelog.md b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.changelog.md
index 414b86c857..7fd6668410 100644
--- a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.changelog.md
+++ b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.changelog.md
@@ -1,3 +1,8 @@
+# 0.0.3
+2026-03-25 (Date of Last Commit)
+
+* Reorganize wdl to be able to run on contigs more easily.  Now the workflow is fully driven by the `contigs` input
+
 # 0.0.2
 2026-03-19 (Date of Last Commit)
 
diff --git a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
index 97b275d291..c38e67d583 100644
--- a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
+++ b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
@@ -2,15 +2,12 @@ version 1.0
 
 workflow Glimpse2LowPassImputation {
     input {
-        String pipeline_version = "0.0.2"
+        String pipeline_version = "0.0.3"
 
         # List of files, one per line
-        File reference_chunks
-        File sites_vcf
-        File sites_table
-        File sites_table_index
 
         Array[String] contigs
+        String reference_panel_prefix
 
         File? input_vcf
         File? input_vcf_index
@@ -32,7 +29,8 @@ workflow Glimpse2LowPassImputation {
         # batch size used when calling SplitIntoBatches to make variant calls from the crams
         Int calling_batch_size = 100
 
-        String docker = "us.gcr.io/broad-dsde-methods/glimpse:kachulis_ck_bam_reader_retry_cf5822c"
+        String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.6.0.0"
+        String glimpse_docker = "us.gcr.io/broad-dsde-methods/glimpse:kachulis_ck_bam_reader_retry_cf5822c"
         String docker_extract_num_sites_from_reference_chunk = "us.gcr.io/broad-dsde-methods/glimpse_extract_num_sites_from_reference_chunks:michaelgatzen_edc7f3a"
     }
 
@@ -45,114 +43,138 @@ workflow Glimpse2LowPassImputation {
 
     Int n_samples = select_first([CountSamples.nSamples, length(select_first([crams]))])
 
-    if (defined(crams)) {
-        if (length(select_first([crams])) > 1) {
-            call SplitIntoBatches {
-                input:
-                    batch_size = calling_batch_size,
-                    crams = select_first([crams]),
-                    cram_indices = select_first([cram_indices]),
-                    sample_ids = sample_ids
+    scatter(contig in contigs) {
+        File sites_vcf = reference_panel_prefix + "sites." + contig + ".vcf.gz"
+        File sites_vcf_index =reference_panel_prefix + "sites." + contig + ".vcf.gz.tbi"
+        File sites_table = reference_panel_prefix + "sites_table." + contig + ".vcf.gz"
+        File sites_table_index = reference_panel_prefix + "sites_table." + contig + ".vcf.gz.tbi"
+        File reference_chunks = reference_panel_prefix + "reference_chunks." + contig + ".txt"
+
+        if (defined(crams)) {
+            if (length(select_first([crams])) > 1) {
+                call SplitIntoBatches {
+                    input:
+                        batch_size = calling_batch_size,
+                        crams = select_first([crams]),
+                        cram_indices = select_first([cram_indices]),
+                        sample_ids = sample_ids
+                }
             }
-        }
-        Array[Array[String]] crams_batches = select_first([SplitIntoBatches.crams_batches, [select_first([crams])]])
-        Array[Array[String]] cram_indices_batches = select_first([SplitIntoBatches.cram_indices_batches, [select_first([cram_indices])]])
-        Array[Array[String]] sample_ids_batches = select_first([SplitIntoBatches.sample_ids_batches, [select_first([sample_ids])]])
-
-        scatter(i in range(length(crams_batches))) {
-            call BcftoolsMpileup {
-                input:
-                    crams = crams_batches[i],
-                    cram_indices = cram_indices_batches[i],
-                    sample_ids = sample_ids_batches[i],
-                    fasta = fasta,
-                    fasta_index = fasta_index,
-                    call_indels = call_indels,
-                    sites_vcf = sites_vcf,
+            Array[Array[String]] crams_batches = select_first([SplitIntoBatches.crams_batches, [select_first([crams])]])
+            Array[Array[String]] cram_indices_batches = select_first([SplitIntoBatches.cram_indices_batches, [select_first([cram_indices])]])
+            Array[Array[String]] sample_ids_batches = select_first([SplitIntoBatches.sample_ids_batches, [select_first([sample_ids])]])
+
+            scatter(i in range(length(crams_batches))) {
+                call BcftoolsMpileup {
+                    input:
+                        crams = crams_batches[i],
+                        cram_indices = cram_indices_batches[i],
+                        sample_ids = sample_ids_batches[i],
+                        fasta = fasta,
+                        fasta_index = fasta_index,
+                        call_indels = call_indels,
+                        sites_vcf = sites_vcf,
+                }
+
+                call BcftoolsCall {
+                    input:
+                        mpileup_bcf = BcftoolsMpileup.output_bcf,
+                        sites_table = sites_table,
+                        sites_table_index = sites_table_index,
+                }
+
+                call BcftoolsNorm {
+                    input:
+                        calls_bcf = BcftoolsCall.output_bcf,
+                }
             }
 
-            call BcftoolsCall {
-                input:
-                    mpileup_bcf = BcftoolsMpileup.output_bcf,
-                    sites_table = sites_table,
-                    sites_table_index = sites_table_index,
+            if (length(BcftoolsNorm.output_vcf) > 1) {
+                call BcftoolsMerge {
+                    input:
+                        vcfs = BcftoolsNorm.output_vcf,
+                        vcf_indices = BcftoolsNorm.output_vcf_index,
+                        output_basename = output_basename
+                }
             }
 
-            call BcftoolsNorm {
-                input:
-                    calls_bcf = BcftoolsCall.output_bcf,
-            }
+            File merged_vcf = select_first([BcftoolsMerge.merged_vcf, BcftoolsNorm.output_vcf[0]])
+            File merged_vcf_index = select_first([BcftoolsMerge.merged_vcf_index, BcftoolsNorm.output_vcf_index[0]])
+        }
+
+        ## this task is used to grab the reference chunk but does not affect memory usage of glimpsePhase.
+        ## still tbd which method makes the most sense cost wise
+        call ComputeShardsAndMemoryPerShard {
+            input:
+                reference_chunks_memory = reference_chunks,
+                contigs = contigs,
+                n_samples = n_samples
         }
 
-        if (length(BcftoolsNorm.output_vcf) > 1) {
-            call BcftoolsMerge {
+        scatter (reference_chunk_index in range(length(ComputeShardsAndMemoryPerShard.reference_chunk_file_paths))) {
+
+            call GlimpsePhase {
                 input:
-                    vcfs = BcftoolsNorm.output_vcf,
-                    vcf_indices = BcftoolsNorm.output_vcf_index,
-                    output_basename = output_basename
+                    reference_chunk = ComputeShardsAndMemoryPerShard.reference_chunk_file_paths[reference_chunk_index],
+                    input_vcf = select_first([merged_vcf,input_vcf]),
+                    input_vcf_index = select_first([merged_vcf_index,input_vcf_index]),
+                    impute_reference_only_variants = impute_reference_only_variants,
+                    n_burnin = n_burnin,
+                    n_main = n_main,
+                    effective_population_size = effective_population_size,
+                    call_indels = call_indels,
+                    sample_ids = sample_ids,
+                    fasta = fasta,
+                    fasta_index = fasta_index,
+                    docker = glimpse_docker
             }
         }
 
-        File merged_vcf = select_first([BcftoolsMerge.merged_vcf, BcftoolsNorm.output_vcf[0]])
-        File merged_vcf_index = select_first([BcftoolsMerge.merged_vcf_index, BcftoolsNorm.output_vcf_index[0]])
+        call GlimpseLigate {
+            input:
+                imputed_chunks = GlimpsePhase.imputed_vcf,
+                imputed_chunks_indices = GlimpsePhase.imputed_vcf_index,
+                output_basename = output_basename,
+                ref_dict = ref_dict,
+                docker = glimpse_docker
+        }
+        Array[File] contig_coverage_metrics = select_all(GlimpsePhase.coverage_metrics)
     }
 
-    ## this task is used to grab the reference chunk but does not affect memory usage of glimpsePhase.
-    ## still tbd which method makes the most sense cost wise
-    call ComputeShardsAndMemoryPerShard {
+    call GatherVcfsNoIndex {
         input:
-            reference_chunks_memory = reference_chunks,
-            contigs = contigs,
-            n_samples = n_samples
-    }
-
-    scatter (reference_chunk_index in range(length(ComputeShardsAndMemoryPerShard.reference_chunk_file_paths))) {
-
-        call GlimpsePhase {
-            input:
-                reference_chunk = ComputeShardsAndMemoryPerShard.reference_chunk_file_paths[reference_chunk_index],
-                input_vcf = select_first([merged_vcf,input_vcf]),
-                input_vcf_index = select_first([merged_vcf_index,input_vcf_index]),
-                impute_reference_only_variants = impute_reference_only_variants,
-                n_burnin = n_burnin,
-                n_main = n_main,
-                effective_population_size = effective_population_size,
-                call_indels = call_indels,
-                sample_ids = sample_ids,
-                fasta = fasta,
-                fasta_index = fasta_index,
-                docker = docker
-        }
+            input_vcfs = GlimpseLigate.imputed_vcf,
+            output_vcf_basename = output_basename + ".imputed",
+            gatk_docker = gatk_docker
     }
 
-    call GlimpseLigate {
+    call CreateVcfIndexAndMd5 {
         input:
-            imputed_chunks = GlimpsePhase.imputed_vcf,
-            imputed_chunks_indices = GlimpsePhase.imputed_vcf_index,
-            output_basename = output_basename,
-            ref_dict = ref_dict,
-            docker = docker
+            vcf_input = GatherVcfsNoIndex.output_vcf,
+            gatk_docker = gatk_docker,
+            preemptible = 0
     }
 
-    if (length(select_all(GlimpsePhase.coverage_metrics)) > 0) {
+    Array[File] genome_coverage_metrics = flatten(contig_coverage_metrics)
+    if (length(genome_coverage_metrics) > 0) {
         call CombineCoverageMetrics {
             input:
-                cov_metrics = select_all(GlimpsePhase.coverage_metrics),
+                cov_metrics = genome_coverage_metrics,
                 output_basename = output_basename
         }
     }
 
     call CollectQCMetrics {
         input:
-            imputed_vcf = GlimpseLigate.imputed_vcf,
+            imputed_vcf = GatherVcfsNoIndex.output_vcf,
             output_basename = output_basename
     }
 
 
     output {
-        File imputed_vcf = GlimpseLigate.imputed_vcf
-        File imputed_vcf_index = GlimpseLigate.imputed_vcf_index
-        File imputed_vcf_md5sum = GlimpseLigate.imputed_vcf_md5sum
+        File imputed_vcf = CreateVcfIndexAndMd5.output_vcf
+        File imputed_vcf_index = CreateVcfIndexAndMd5.output_vcf_index
+        File imputed_vcf_md5sum = CreateVcfIndexAndMd5.output_vcf_md5sum
 
         File qc_metrics = CollectQCMetrics.qc_metrics
         File? coverage_metrics = CombineCoverageMetrics.coverage_metrics
@@ -537,9 +559,6 @@ task GlimpseLigate {
         bcftools view -h --no-version ligated.vcf.gz > old_header.vcf
         java -jar /picard.jar UpdateVcfSequenceDictionary -I old_header.vcf --SD ~{ref_dict} -O new_header.vcf
         bcftools reheader -h new_header.vcf -o ~{output_basename}.imputed.vcf.gz ligated.vcf.gz
-        tabix ~{output_basename}.imputed.vcf.gz
-
-        md5sum ~{output_basename}.imputed.vcf.gz | awk '{ print $1 }' > ~{output_basename}.imputed.vcf.gz.md5sum
     >>>
 
     runtime {
@@ -553,8 +572,6 @@ task GlimpseLigate {
 
     output {
         File imputed_vcf = "~{output_basename}.imputed.vcf.gz"
-        File imputed_vcf_index = "~{output_basename}.imputed.vcf.gz.tbi"
-        File imputed_vcf_md5sum = "~{output_basename}.imputed.vcf.gz.md5sum"
     }
 }
 
@@ -678,3 +695,77 @@ task CombineCoverageMetrics
         File coverage_metrics="~{output_basename}.coverage_metrics.txt"
     }
 }
+
+task GatherVcfsNoIndex {
+    input {
+        Array[File] input_vcfs
+        String output_vcf_basename
+
+        String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.6.1.0"
+        Int cpu = 2
+        Int memory_mb = 10000
+        Int disk_size_gb = ceil(3*size(input_vcfs, "GiB")) + 10
+    }
+    Int command_mem = memory_mb - 1500
+    Int max_heap = memory_mb - 1000
+
+    command <<<
+        set -e -o pipefail
+
+        gatk --java-options "-Xms~{command_mem}m -Xmx~{max_heap}m" \
+        GatherVcfs \
+        -I ~{sep=' -I ' input_vcfs} \
+        --REORDER_INPUT_BY_FIRST_VARIANT \
+        -O ~{output_vcf_basename}.vcf.gz
+    >>>
+    runtime {
+        docker: gatk_docker
+        disks: "local-disk ${disk_size_gb} SSD"
+        memory: "${memory_mb} MiB"
+        cpu: cpu
+        maxRetries: 1
+        noAddress: true
+    }
+    output {
+        File output_vcf = "~{output_vcf_basename}.vcf.gz"
+    }
+}
+
+task CreateVcfIndexAndMd5 {
+    input {
+        File vcf_input
+
+        Int disk_size_gb = ceil(1.1*size(vcf_input, "GiB")) + 10
+        Int cpu = 1
+        Int memory_mb = 6000
+        String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
+        Int preemptible = 3
+    }
+    Int command_mem = memory_mb - 1500
+    Int max_heap = memory_mb - 1000
+
+    String vcf_basename = basename(vcf_input)
+
+    command <<<
+        set -e -o pipefail
+
+        ln -sf ~{vcf_input} ~{vcf_basename}
+
+        bcftools index -t ~{vcf_basename}
+        md5sum ~{vcf_basename} | awk '{ print $1 }' > ~{vcf_basename}.md5sum
+    >>>
+    runtime {
+        docker: gatk_docker
+        disks: "local-disk ${disk_size_gb} SSD"
+        memory: "${memory_mb} MiB"
+        cpu: cpu
+        preemptible: preemptible
+        maxRetries: 1
+        noAddress: true
+    }
+    output {
+        File output_vcf = "~{vcf_basename}"
+        File output_vcf_index = "~{vcf_basename}.tbi"
+        File output_vcf_md5sum = "~{vcf_basename}.md5sum"
+    }
+}

From 75fe83c683f596b8e13db8f3d13f266a12e44c10 Mon Sep 17 00:00:00 2001
From: Jose Soto <jsoto@broadinstitute.org>
Date: Wed, 25 Mar 2026 10:58:24 -0400
Subject: [PATCH 02/14] make compute shard and memory task better with this new
 change

---
 .../Glimpse2LowPassImputation.wdl                  | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
index c38e67d583..64553642da 100644
--- a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
+++ b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
@@ -107,7 +107,6 @@ workflow Glimpse2LowPassImputation {
         call ComputeShardsAndMemoryPerShard {
             input:
                 reference_chunks_memory = reference_chunks,
-                contigs = contigs,
                 n_samples = n_samples
         }
 
@@ -231,7 +230,6 @@ task SplitIntoBatches {
 task ComputeShardsAndMemoryPerShard {
     input {
         File reference_chunks_memory
-        Array[String] contigs
         Int n_samples
     }
 
@@ -243,17 +241,13 @@ task ComputeShardsAndMemoryPerShard {
 
         df = pd.read_csv('~{reference_chunks_memory}', sep='\t', header=None, names=['contig', 'reference_shard', 'base_gb', 'slope_per_sample_gb'])
 
-        # filter dataframe by contig list
-        chromosomes_to_filter = ["~{sep='", "' contigs}"]
-        filtered_df = df[df['contig'].isin(chromosomes_to_filter)]
-
         # write out reference shards to process
-        filtered_df['reference_shard'].to_csv('reference_shard_file_paths.tsv', sep='\t', index=False, header=None)
+        df['reference_shard'].to_csv('reference_shard_file_paths.tsv', sep='\t', index=False, header=None)
 
         # calculate memory usage and save to file
-        filtered_df['mem_gb'] = filtered_df['base_gb'] + filtered_df['slope_per_sample_gb'] * ~{n_samples}
-        filtered_df['mem_gb'] = filtered_df['mem_gb'].apply(lambda x: min(256, int(np.ceil(x))))  # cap at 256 GB
-        filtered_df['mem_gb'].to_csv('memory_per_chunk.tsv', sep='\t', index=False, header=None)
+        df['mem_gb'] = filtered_df['base_gb'] + filtered_df['slope_per_sample_gb'] * ~{n_samples}
+        df['mem_gb'] = filtered_df['mem_gb'].apply(lambda x: min(256, int(np.ceil(x))))  # cap at 256 GB
+        df['mem_gb'].to_csv('memory_per_chunk.tsv', sep='\t', index=False, header=None)
         EOF
     >>>
 

From acc1d35ddd92ca3a3cfdbe3c9ae50de72dabf48c Mon Sep 17 00:00:00 2001
From: Jose Soto <jsoto@broadinstitute.org>
Date: Wed, 25 Mar 2026 11:04:01 -0400
Subject: [PATCH 03/14] only need to run split batches once per submission

---
 .../Glimpse2LowPassImputation.wdl             | 21 +++++++++++--------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
index 64553642da..0bf8f26004 100644
--- a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
+++ b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
@@ -43,6 +43,18 @@ workflow Glimpse2LowPassImputation {
 
     Int n_samples = select_first([CountSamples.nSamples, length(select_first([crams]))])
 
+    if (defined(crams)) {
+        if (length(select_first([crams])) > 1) {
+            call SplitIntoBatches {
+                input:
+                    batch_size = calling_batch_size,
+                    crams = select_first([crams]),
+                    cram_indices = select_first([cram_indices]),
+                    sample_ids = sample_ids
+            }
+        }
+    }
+
     scatter(contig in contigs) {
         File sites_vcf = reference_panel_prefix + "sites." + contig + ".vcf.gz"
         File sites_vcf_index =reference_panel_prefix + "sites." + contig + ".vcf.gz.tbi"
@@ -51,15 +63,6 @@ workflow Glimpse2LowPassImputation {
         File reference_chunks = reference_panel_prefix + "reference_chunks." + contig + ".txt"
 
         if (defined(crams)) {
-            if (length(select_first([crams])) > 1) {
-                call SplitIntoBatches {
-                    input:
-                        batch_size = calling_batch_size,
-                        crams = select_first([crams]),
-                        cram_indices = select_first([cram_indices]),
-                        sample_ids = sample_ids
-                }
-            }
             Array[Array[String]] crams_batches = select_first([SplitIntoBatches.crams_batches, [select_first([crams])]])
             Array[Array[String]] cram_indices_batches = select_first([SplitIntoBatches.cram_indices_batches, [select_first([cram_indices])]])
             Array[Array[String]] sample_ids_batches = select_first([SplitIntoBatches.sample_ids_batches, [select_first([sample_ids])]])

From 28e8813199b5b75a8a70f21d36a6bd53f1c6daa4 Mon Sep 17 00:00:00 2001
From: Jose Soto <jsoto@broadinstitute.org>
Date: Wed, 25 Mar 2026 11:35:51 -0400
Subject: [PATCH 04/14] fix compute memory and shard task

---
 .../glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
index 0bf8f26004..36debc1a79 100644
--- a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
+++ b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
@@ -248,8 +248,8 @@ task ComputeShardsAndMemoryPerShard {
         df['reference_shard'].to_csv('reference_shard_file_paths.tsv', sep='\t', index=False, header=None)
 
         # calculate memory usage and save to file
-        df['mem_gb'] = filtered_df['base_gb'] + filtered_df['slope_per_sample_gb'] * ~{n_samples}
-        df['mem_gb'] = filtered_df['mem_gb'].apply(lambda x: min(256, int(np.ceil(x))))  # cap at 256 GB
+        df['mem_gb'] = df['base_gb'] + df['slope_per_sample_gb'] * ~{n_samples}
+        df['mem_gb'] = df['mem_gb'].apply(lambda x: min(256, int(np.ceil(x))))  # cap at 256 GB
         df['mem_gb'].to_csv('memory_per_chunk.tsv', sep='\t', index=False, header=None)
         EOF
     >>>

From fd8917723383cbd9661fc64dd9220887a927ab64 Mon Sep 17 00:00:00 2001
From: Jose Soto <jsoto@broadinstitute.org>
Date: Thu, 26 Mar 2026 09:26:42 -0400
Subject: [PATCH 05/14] fix sites table suffix and increase memory for bcftools
 commands

---
 .../low_pass_imputation/Glimpse2LowPassImputation.wdl  | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
index 36debc1a79..97fefc4123 100644
--- a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
+++ b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
@@ -58,8 +58,8 @@ workflow Glimpse2LowPassImputation {
     scatter(contig in contigs) {
         File sites_vcf = reference_panel_prefix + "sites." + contig + ".vcf.gz"
         File sites_vcf_index =reference_panel_prefix + "sites." + contig + ".vcf.gz.tbi"
-        File sites_table = reference_panel_prefix + "sites_table." + contig + ".vcf.gz"
-        File sites_table_index = reference_panel_prefix + "sites_table." + contig + ".vcf.gz.tbi"
+        File sites_table = reference_panel_prefix + "sites_table." + contig + ".gz"
+        File sites_table_index = reference_panel_prefix + "sites_table." + contig + ".gz.tbi"
         File reference_chunks = reference_panel_prefix + "reference_chunks." + contig + ".txt"
 
         if (defined(crams)) {
@@ -276,7 +276,7 @@ task BcftoolsMpileup {
         File sites_vcf
 
         Int seed = 12345
-        Int mem_gb = 4
+        Int mem_gb = 6
         Int cpu = 1
         Int preemptible = 0
     }
@@ -318,7 +318,7 @@ task BcftoolsCall {
         File sites_table
         File sites_table_index
 
-        Int mem_gb = 4
+        Int mem_gb = 6
         Int cpu = 1
         Int preemptible = 3
     }
@@ -350,7 +350,7 @@ task BcftoolsNorm {
     input {
         File calls_bcf
 
-        Int mem_gb = 4
+        Int mem_gb = 6
         Int cpu = 1
         Int preemptible = 3
     }

From e0170d8d64b44ef28e6607414dae3fb60bd0d4c7 Mon Sep 17 00:00:00 2001
From: Jose Soto <jsoto@broadinstitute.org>
Date: Fri, 27 Mar 2026 10:52:14 -0400
Subject: [PATCH 06/14] remove unused optional inputs that are failing and add
 max retries to bcftools tasks

---
 .../Glimpse2LowPassImputation.wdl             | 34 ++++++++-----------
 1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
index 97fefc4123..d5c19738a4 100644
--- a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
+++ b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
@@ -22,16 +22,12 @@ workflow Glimpse2LowPassImputation {
 
         Boolean impute_reference_only_variants = false
         Boolean call_indels = false
-        Int? n_burnin
-        Int? n_main
-        Int? effective_population_size
 
         # batch size used when calling SplitIntoBatches to make variant calls from the crams
         Int calling_batch_size = 100
 
         String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.6.0.0"
         String glimpse_docker = "us.gcr.io/broad-dsde-methods/glimpse:kachulis_ck_bam_reader_retry_cf5822c"
-        String docker_extract_num_sites_from_reference_chunk = "us.gcr.io/broad-dsde-methods/glimpse_extract_num_sites_from_reference_chunks:michaelgatzen_edc7f3a"
     }
 
     if (defined(input_vcf)) {
@@ -121,9 +117,6 @@ workflow Glimpse2LowPassImputation {
                     input_vcf = select_first([merged_vcf,input_vcf]),
                     input_vcf_index = select_first([merged_vcf_index,input_vcf_index]),
                     impute_reference_only_variants = impute_reference_only_variants,
-                    n_burnin = n_burnin,
-                    n_main = n_main,
-                    effective_population_size = effective_population_size,
                     call_indels = call_indels,
                     sample_ids = sample_ids,
                     fasta = fasta,
@@ -279,12 +272,11 @@ task BcftoolsMpileup {
         Int mem_gb = 6
         Int cpu = 1
         Int preemptible = 0
+        Int max_retries = 3
     }
 
     Int disk_size_gb = ceil(1.5*size(crams, "GiB") + size(fasta, "GiB") + size(sites_vcf, "GiB")) + 10
 
-    String out_basename = "batch"
-
     command <<<
         set -xeuo pipefail
 
@@ -304,6 +296,8 @@ task BcftoolsMpileup {
         memory: mem_gb + " GiB"
         cpu: cpu
         preemptible: preemptible
+        maxRetries: max_retries
+        maxRetries: max_retries
     }
 
     output {
@@ -321,12 +315,11 @@ task BcftoolsCall {
         Int mem_gb = 6
         Int cpu = 1
         Int preemptible = 3
+        Int max_retries = 3
     }
 
     Int disk_size_gb = ceil(3*size(mpileup_bcf, "GiB") + size(sites_table, "GiB")) + 10
 
-    String out_basename = "batch"
-
     command <<<
         set -xeuo pipefail
 
@@ -339,6 +332,7 @@ task BcftoolsCall {
         memory: mem_gb + " GiB"
         cpu: cpu
         preemptible: preemptible
+        maxRetries: max_retries
     }
 
     output {
@@ -353,18 +347,17 @@ task BcftoolsNorm {
         Int mem_gb = 6
         Int cpu = 1
         Int preemptible = 3
+        Int max_retries = 3
     }
 
     Int disk_size_gb = ceil(3*size(calls_bcf, "GiB")) + 10
 
-    String out_basename = "batch"
-
     command <<<
         set -xeuo pipefail
 
 
-        bcftools norm -m -both -Oz -o ~{out_basename}.vcf.gz ~{calls_bcf}
-        bcftools index -t ~{out_basename}.vcf.gz
+        bcftools norm -m -both -Oz -o normalized.vcf.gz ~{calls_bcf}
+        bcftools index -t normalized.vcf.gz
     >>>
 
     runtime {
@@ -373,11 +366,12 @@ task BcftoolsNorm {
         memory: mem_gb + " GiB"
         cpu: cpu
         preemptible: preemptible
+        maxRetries: max_retries
     }
 
     output {
-        File output_vcf = "~{out_basename}.vcf.gz"
-        File output_vcf_index = "~{out_basename}.vcf.gz.tbi"
+        File output_vcf = "normalized.vcf.gz"
+        File output_vcf_index = "normalized.vcf.gz.tbi"
     }
 }
 
@@ -385,9 +379,10 @@ task BcftoolsMerge {
     input {
         Array[File] vcfs
         Array[File] vcf_indices
-        Int mem_gb = 4
+        Int mem_gb = 6
         Int cpu = 1
         Int preemptible = 0
+        Int max_retries = 3
 
         String output_basename
     }
@@ -406,6 +401,7 @@ task BcftoolsMerge {
         memory: mem_gb + " GiB"
         cpu: cpu
         preemptible: preemptible
+        maxRetries: max_retries
     }
 
     output {
@@ -738,8 +734,6 @@ task CreateVcfIndexAndMd5 {
         String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
         Int preemptible = 3
     }
-    Int command_mem = memory_mb - 1500
-    Int max_heap = memory_mb - 1000
 
     String vcf_basename = basename(vcf_input)
 

From 96151a6ea170c512fc4c8906bf391355869e820d Mon Sep 17 00:00:00 2001
From: Jose Soto <jsoto@broadinstitute.org>
Date: Sat, 28 Mar 2026 10:08:16 -0400
Subject: [PATCH 07/14] more memory for bcftools call task and try to fix
 optional variable passed down to nested scatters

---
 .../low_pass_imputation/Glimpse2LowPassImputation.wdl     | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
index d5c19738a4..765bb1ce14 100644
--- a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
+++ b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
@@ -58,6 +58,9 @@ workflow Glimpse2LowPassImputation {
         File sites_table_index = reference_panel_prefix + "sites_table." + contig + ".gz.tbi"
         File reference_chunks = reference_panel_prefix + "reference_chunks." + contig + ".txt"
 
+        File? input_vcf = input_vcf
+        File? input_vcf_index = input_vcf_index
+
         if (defined(crams)) {
             Array[Array[String]] crams_batches = select_first([SplitIntoBatches.crams_batches, [select_first([crams])]])
             Array[Array[String]] cram_indices_batches = select_first([SplitIntoBatches.cram_indices_batches, [select_first([cram_indices])]])
@@ -111,6 +114,9 @@ workflow Glimpse2LowPassImputation {
 
         scatter (reference_chunk_index in range(length(ComputeShardsAndMemoryPerShard.reference_chunk_file_paths))) {
 
+            File? input_vcf = input_vcf
+            File? input_vcf_index = input_vcf_index
+
             call GlimpsePhase {
                 input:
                     reference_chunk = ComputeShardsAndMemoryPerShard.reference_chunk_file_paths[reference_chunk_index],
@@ -312,7 +318,7 @@ task BcftoolsCall {
         File sites_table
         File sites_table_index
 
-        Int mem_gb = 6
+        Int mem_gb = 12
         Int cpu = 1
         Int preemptible = 3
         Int max_retries = 3

From e6b84a36b5397b39040a3fc13f6175a0625463a5 Mon Sep 17 00:00:00 2001
From: Jose Soto <jsoto@broadinstitute.org>
Date: Sat, 28 Mar 2026 10:11:23 -0400
Subject: [PATCH 08/14] thihs is dumb

---
 .../low_pass_imputation/Glimpse2LowPassImputation.wdl     | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
index 765bb1ce14..40375d63e6 100644
--- a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
+++ b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
@@ -58,8 +58,8 @@ workflow Glimpse2LowPassImputation {
         File sites_table_index = reference_panel_prefix + "sites_table." + contig + ".gz.tbi"
         File reference_chunks = reference_panel_prefix + "reference_chunks." + contig + ".txt"
 
-        File? input_vcf = input_vcf
-        File? input_vcf_index = input_vcf_index
+        File? input_vcf_scatter_1 = input_vcf
+        File? input_vcf_scatter_1_index = input_vcf_index
 
         if (defined(crams)) {
             Array[Array[String]] crams_batches = select_first([SplitIntoBatches.crams_batches, [select_first([crams])]])
@@ -114,8 +114,8 @@ workflow Glimpse2LowPassImputation {
 
         scatter (reference_chunk_index in range(length(ComputeShardsAndMemoryPerShard.reference_chunk_file_paths))) {
 
-            File? input_vcf = input_vcf
-            File? input_vcf_index = input_vcf_index
+            File? input_vcf_scatter_2 = input_vcf_scatter_1
+            File? input_vcf_scatter_2_index = input_vcf_scatter_1_index
 
             call GlimpsePhase {
                 input:

From fe66126f087ef7d3318795d3309a5992a5a1bc8f Mon Sep 17 00:00:00 2001
From: Jose Soto <jsoto@broadinstitute.org>
Date: Sat, 28 Mar 2026 10:12:15 -0400
Subject: [PATCH 09/14] use dumbest fix i can think of

---
 .../glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
index 40375d63e6..fe20266ac4 100644
--- a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
+++ b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
@@ -120,8 +120,8 @@ workflow Glimpse2LowPassImputation {
             call GlimpsePhase {
                 input:
                     reference_chunk = ComputeShardsAndMemoryPerShard.reference_chunk_file_paths[reference_chunk_index],
-                    input_vcf = select_first([merged_vcf,input_vcf]),
-                    input_vcf_index = select_first([merged_vcf_index,input_vcf_index]),
+                    input_vcf = select_first([merged_vcf,input_vcf_scatter_2]),
+                    input_vcf_index = select_first([merged_vcf_index,input_vcf_scatter_2_index]),
                     impute_reference_only_variants = impute_reference_only_variants,
                     call_indels = call_indels,
                     sample_ids = sample_ids,

From 7fde2e2268fec48530eeaebf09e5477a585d461e Mon Sep 17 00:00:00 2001
From: Jose Soto <jsoto@broadinstitute.org>
Date: Sun, 29 Mar 2026 01:39:43 -0400
Subject: [PATCH 10/14] figure out mor eoptional inputs

---
 .../Glimpse2LowPassImputation.wdl                  | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
index fe20266ac4..7fdf70da4a 100644
--- a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
+++ b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
@@ -58,9 +58,6 @@ workflow Glimpse2LowPassImputation {
         File sites_table_index = reference_panel_prefix + "sites_table." + contig + ".gz.tbi"
         File reference_chunks = reference_panel_prefix + "reference_chunks." + contig + ".txt"
 
-        File? input_vcf_scatter_1 = input_vcf
-        File? input_vcf_scatter_1_index = input_vcf_index
-
         if (defined(crams)) {
             Array[Array[String]] crams_batches = select_first([SplitIntoBatches.crams_batches, [select_first([crams])]])
             Array[Array[String]] cram_indices_batches = select_first([SplitIntoBatches.cram_indices_batches, [select_first([cram_indices])]])
@@ -100,8 +97,8 @@ workflow Glimpse2LowPassImputation {
                 }
             }
 
-            File merged_vcf = select_first([BcftoolsMerge.merged_vcf, BcftoolsNorm.output_vcf[0]])
-            File merged_vcf_index = select_first([BcftoolsMerge.merged_vcf_index, BcftoolsNorm.output_vcf_index[0]])
+            File phase_input_vcf = select_first([BcftoolsMerge.merged_vcf, BcftoolsNorm.output_vcf[0], input_vcf])
+            File phase_input_vcf_index = select_first([BcftoolsMerge.merged_vcf_index, BcftoolsNorm.output_vcf_index[0],input_vcf_index])
         }
 
         ## this task is used to grab the reference chunk but does not affect memory usage of glimpsePhase.
@@ -114,14 +111,11 @@ workflow Glimpse2LowPassImputation {
 
         scatter (reference_chunk_index in range(length(ComputeShardsAndMemoryPerShard.reference_chunk_file_paths))) {
 
-            File? input_vcf_scatter_2 = input_vcf_scatter_1
-            File? input_vcf_scatter_2_index = input_vcf_scatter_1_index
-
             call GlimpsePhase {
                 input:
                     reference_chunk = ComputeShardsAndMemoryPerShard.reference_chunk_file_paths[reference_chunk_index],
-                    input_vcf = select_first([merged_vcf,input_vcf_scatter_2]),
-                    input_vcf_index = select_first([merged_vcf_index,input_vcf_scatter_2_index]),
+                    input_vcf = phase_input_vcf,
+                    input_vcf_index = phase_input_vcf_index,
                     impute_reference_only_variants = impute_reference_only_variants,
                     call_indels = call_indels,
                     sample_ids = sample_ids,

From e63bf8f062a9b3e5e6062bf385287cfda448c31b Mon Sep 17 00:00:00 2001
From: GitHub Action <action@github.com>
Date: Mon, 30 Mar 2026 13:38:02 +0000
Subject: [PATCH 11/14] Updated pipeline_versions.txt with all pipeline version
 information

---
 pipeline_versions.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipeline_versions.txt b/pipeline_versions.txt
index c0d8e61068..e9ccdc4382 100644
--- a/pipeline_versions.txt
+++ b/pipeline_versions.txt
@@ -4,7 +4,7 @@ BuildIndices	 5.1.0	2026-02-13
 CramToUnmappedBams	 1.1.3	2024-08-02 
 ExomeGermlineSingleSample	 3.2.7	2026-01-21 
 ExomeReprocessing	 3.3.7	2026-01-21 
-Glimpse2LowPassImputation	 0.0.2	2026-03-19 
+Glimpse2LowPassImputation	 0.0.3	2026-03-25 
 IlluminaGenotypingArray	 1.12.27	2026-01-21 
 Imputation	 1.1.23	2025-10-03 
 ImputationBeagle	 3.0.1	2026-02-23 

From ad8864619443e570f29770966602f4375ab0ee2d Mon Sep 17 00:00:00 2001
From: Jose Soto <jsoto@broadinstitute.org>
Date: Mon, 30 Mar 2026 10:48:42 -0400
Subject: [PATCH 12/14] update changelog

---
 .../low_pass_imputation/Glimpse2LowPassImputation.changelog.md   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.changelog.md b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.changelog.md
index 7fd6668410..803cdc7954 100644
--- a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.changelog.md
+++ b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.changelog.md
@@ -2,6 +2,7 @@
 2026-03-25 (Date of Last Commit)
 
 * Reorganize wdl to be able to run on contigs more easily.  Now the workflow is fully driven by the `contigs` input
+* The wdl now expects the reference related files to all live under the same cloud base path
 
 # 0.0.2
 2026-03-19 (Date of Last Commit)

From 20151cb4335eef29a44716bfbe91f1dada535847 Mon Sep 17 00:00:00 2001
From: Jose Soto <jsoto@broadinstitute.org>
Date: Wed, 1 Apr 2026 10:27:46 -0400
Subject: [PATCH 13/14] pr feedback1

---
 .../low_pass_imputation/Glimpse2LowPassImputation.wdl        | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
index 7fdf70da4a..5173bb47ba 100644
--- a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
+++ b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
@@ -4,9 +4,9 @@ workflow Glimpse2LowPassImputation {
     input {
         String pipeline_version = "0.0.3"
 
-        # List of files, one per line
-
         Array[String] contigs
+
+        # this is the path the a directory that contains sites vcf, sites tabke, and reference chunks file.  should end with a "/
         String reference_panel_prefix
 
         File? input_vcf
@@ -297,7 +297,6 @@ task BcftoolsMpileup {
         cpu: cpu
         preemptible: preemptible
         maxRetries: max_retries
-        maxRetries: max_retries
     }
 
     output {

From bc32bc346e702ca03b24564cb5198503ff1640ac Mon Sep 17 00:00:00 2001
From: Jose Soto <jsoto@broadinstitute.org>
Date: Wed, 1 Apr 2026 10:29:02 -0400
Subject: [PATCH 14/14] add space aafeter comma

---
 .../glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
index 5173bb47ba..5e99b82089 100644
--- a/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
+++ b/pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl
@@ -98,7 +98,7 @@ workflow Glimpse2LowPassImputation {
             }
 
             File phase_input_vcf = select_first([BcftoolsMerge.merged_vcf, BcftoolsNorm.output_vcf[0], input_vcf])
-            File phase_input_vcf_index = select_first([BcftoolsMerge.merged_vcf_index, BcftoolsNorm.output_vcf_index[0],input_vcf_index])
+            File phase_input_vcf_index = select_first([BcftoolsMerge.merged_vcf_index, BcftoolsNorm.output_vcf_index[0], input_vcf_index])
         }
 
         ## this task is used to grab the reference chunk but does not affect memory usage of glimpsePhase.