@@ -142,7 +142,7 @@ task SortCompressIndexVcf {
142142 RuntimeAttr ? runtime_attr_override
143143 }
144144
145- Int disk_size = 10 + 10 *ceil (2 *size (input_vcf , "GB" ))
145+ Int disk_size = 10 + 10 *ceil (3 *size (input_vcf , "GB" ))
146146
147147 String output_vcf = basename (input_vcf ) + ".gz"
148148
@@ -161,11 +161,13 @@ task SortCompressIndexVcf {
161161 tot_mem_mb = $(free -m | grep '^Mem' | awk '{print $2}' )
162162
163163 ################################
164-
165- # First we need to fix the integer values in the floating point INFO fields.
166- # Without this fix / hack, downstream GATK3 tools will fail (specifically GenotypeGVCFs)
167164
168- awk -f - "~{input_vcf}" > tmp.vcf << 'AWK_CODE'
165+ # Sort first because otherwise we'll end up with integers in the INFO fields again.
166+ bcftools sort -m $((tot_mem_mb - 2048 ))M -o tmp.vcf ~{input_vcf }
167+
168+ # Then we need to fix the integer values in the floating point INFO fields.
169+ # Without this fix / hack, downstream GATK3 tools will fail (specifically GenotypeGVCFs)
170+ awk -f - "tmp.vcf" > tmp2.vcf << 'AWK_CODE'
169171 BEGIN {
170172 FS = "\t" ; OFS = "\t"
171173
@@ -229,9 +231,12 @@ task SortCompressIndexVcf {
229231AWK_CODE
230232
231233 ################################
234+
235+ # Zip it:
236+ bgzip -c -l2 tmp2.vcf > ~{output_vcf }
232237
233- bcftools sort -m $(( tot_mem_mb - 2048 )) M -Oz2 -o ~{ output_vcf } tmp.vcf
234- bcftools index --threads ${np } --tbi ~{output_vcf }
238+ # Index the output:
239+ bcftools index --threads ${np } --tbi ~{output_vcf }
235240 >>>
236241
237242 output {
0 commit comments