@@ -161,10 +161,12 @@ workflow Work {
161161 ####################################################################################################################################
162162 # but if custom sharding isn't requested, then per-chr sharding is already done, so no need to redo
163163 if (defined (ref_bundle .size_balanced_scatter_intervallists_locators )) {
164+ call ReorderBamHeader { input : input_bam = bam , fasta_fai = ref_bundle .fai }
165+
164166 call ShardWholeGenome .Split as CustomSplitBamForSmallVar {
165167 input :
166- bam = bam ,
167- bai = bai ,
168+ bam = ReorderBamHeader . output_bam ,
169+ bai = ReorderBamHeader . output_bai ,
168170 ref_dict = ref_bundle .dict ,
169171 ref_scatter_interval_list_ids = select_first ([ref_bundle .size_balanced_scatter_interval_ids ]),
170172 ref_scatter_interval_list_locator = select_first ([ref_bundle .size_balanced_scatter_intervallists_locators ])
@@ -280,3 +282,59 @@ workflow Work {
280282 call FF .FinalizeToFile as FinalizeClairGTbi { input : outdir = gcs_variants_out_dir , file = RunClair3 .clair_gtbi }
281283 }
282284}
285+
286+ task ReorderBamHeader {
287+ meta {
288+ description : "Reorder BAM header to match the order of contigs in the reference FASTA. This is for just three samples."
289+ }
290+ input {
291+ File input_bam
292+ File fasta_fai
293+ }
294+ output {
295+ File output_bam = "~{prefix }.resorted.bam"
296+ File output_bai = "~{prefix }.resorted.bam.bai"
297+ }
298+
299+ String prefix = basename (input_bam , ".bam" )
300+
301+ command <<<
302+ set -euo pipefail
303+
304+ # 1. Extract the current header
305+ samtools view -H ~{input_bam } > original_header.sam
306+
307+ # 2. Create the new @SQ lines from the .fai file
308+ # FAI format: [chr] [length] [offset] ...
309+ awk '{print "@SQ\tSN:"$1"\tLN:"$2}' ~{fasta_fai } > new_sq_lines.txt
310+
311+ # 3. Reconstruct the header:
312+ # - Keep the original @HD line (first line)
313+ # - Add the new @SQ lines
314+ # - Add all other original lines (@RG, @PG, @CO) except old @SQ and @HD
315+ head -n 1 original_header.sam | grep "^@HD" > final_header.sam
316+ cat new_sq_lines.txt >> final_header.sam
317+ grep -vE "^@HD|^@SQ" original_header.sam >> final_header.sam
318+
319+ # 4. Apply new header and sort the records to match the new order
320+ # We use samtools reheader to swap the metadata, then sort to fix record order.
321+ samtools reheader \
322+ final_header.sam \
323+ ~{input_bam } \
324+ | samtools sort \
325+ -@ 7 \
326+ -m 4G \
327+ -o ~{prefix }.resorted.bam \
328+ -
329+
330+ # 5. Index the final output
331+ samtools index -@3 ~{prefix }.resorted.bam
332+ >>>
333+
334+ runtime {
335+ cpu : 10
336+ memory : "40 GB"
337+ disks : "local-disk 375 LOCAL"
338+ docker : "us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools:0.1.23"
339+ }
340+ }
0 commit comments