Skip to content

Commit 4f07969

Browse files
committed
just for three BAMs with scrambled @sq headers
1 parent 41e475f commit 4f07969

File tree

1 file changed

+60
-2
lines changed

1 file changed

+60
-2
lines changed

wdl/pipelines/TechAgnostic/VariantCalling/CallSmallVariants.wdl

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,10 +161,12 @@ workflow Work {
161161
####################################################################################################################################
162162
# but if custom sharding isn't requested, then per-chr sharding is already done, so no need to redo
163163
if (defined(ref_bundle.size_balanced_scatter_intervallists_locators)) {
164+
call ReorderBamHeader { input: input_bam = bam, fasta_fai = ref_bundle.fai }
165+
164166
call ShardWholeGenome.Split as CustomSplitBamForSmallVar {
165167
input:
166-
bam = bam,
167-
bai = bai,
168+
bam = ReorderBamHeader.output_bam,
169+
bai = ReorderBamHeader.output_bai,
168170
ref_dict = ref_bundle.dict,
169171
ref_scatter_interval_list_ids = select_first([ref_bundle.size_balanced_scatter_interval_ids]),
170172
ref_scatter_interval_list_locator = select_first([ref_bundle.size_balanced_scatter_intervallists_locators])
@@ -280,3 +282,59 @@ workflow Work {
280282
call FF.FinalizeToFile as FinalizeClairGTbi { input: outdir = gcs_variants_out_dir, file = RunClair3.clair_gtbi }
281283
}
282284
}
285+
286+
task ReorderBamHeader {
287+
meta {
288+
description: "Reorder BAM header to match the order of contigs in the reference FASTA. This is for just three samples."
289+
}
290+
input {
291+
File input_bam
292+
File fasta_fai
293+
}
294+
output {
295+
File output_bam = "~{prefix}.resorted.bam"
296+
File output_bai = "~{prefix}.resorted.bam.bai"
297+
}
298+
299+
String prefix = basename(input_bam, ".bam")
300+
301+
command <<<
302+
set -euo pipefail
303+
304+
# 1. Extract the current header
305+
samtools view -H ~{input_bam} > original_header.sam
306+
307+
# 2. Create the new @SQ lines from the .fai file
308+
# FAI format: [chr] [length] [offset] ...
309+
awk '{print "@SQ\tSN:"$1"\tLN:"$2}' ~{fasta_fai} > new_sq_lines.txt
310+
311+
# 3. Reconstruct the header:
312+
# - Keep the original @HD line (first line)
313+
# - Add the new @SQ lines
314+
# - Add all other original lines (@RG, @PG, @CO) except old @SQ and @HD
315+
head -n 1 original_header.sam | grep "^@HD" > final_header.sam
316+
cat new_sq_lines.txt >> final_header.sam
317+
grep -vE "^@HD|^@SQ" original_header.sam >> final_header.sam
318+
319+
# 4. Apply new header and sort the records to match the new order
320+
# We use samtools reheader to swap the metadata, then sort to fix record order.
321+
samtools reheader \
322+
final_header.sam \
323+
~{input_bam} \
324+
| samtools sort \
325+
-@ 7 \
326+
-m 4G \
327+
-o ~{prefix}.resorted.bam \
328+
-
329+
330+
# 5. Index the final output
331+
samtools index -@3 ~{prefix}.resorted.bam
332+
>>>
333+
334+
runtime {
335+
cpu: 10
336+
memory: "40 GB"
337+
disks: "local-disk 375 LOCAL"
338+
docker: "us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools:0.1.23"
339+
}
340+
}

0 commit comments

Comments
 (0)