@@ -6,8 +6,9 @@ import "Structs.wdl"
66
77workflow BAFFromGVCFs {
88 input {
9- Array [File ] gvcfs
9+ Array [File ? ] gvcfs
1010 Array [String ] samples
11+ Boolean ignore_missing_gvcfs
1112 File unpadded_intervals_file
1213 File dbsnp_vcf
1314 File ? dbsnp_vcf_index
@@ -26,17 +27,24 @@ workflow BAFFromGVCFs {
2627 }
2728
2829 Int num_of_original_intervals = length (read_lines (unpadded_intervals_file ))
29- Int num_gvcfs = length (gvcfs )
3030
3131 # Make a 2.5:1 interval number to samples in callset ratio interval list
3232 Int possible_merge_count = floor (num_of_original_intervals / num_gvcfs / 2.5 )
3333 Int merge_count = if possible_merge_count > 1 then possible_merge_count else 1
3434
3535 File dbsnp_vcf_index_ = if defined (dbsnp_vcf_index ) then select_first ([dbsnp_vcf_index ]) else dbsnp_vcf + ".idx"
3636
37- scatter (gvcf in gvcfs ) {
38- File gvcf_indexes = gvcf + ".tbi"
37+ scatter (i in range (length (samples ))) {
38+ if (defined (gvcfs [i ])) {
39+ String defined_samples_optional_ = samples [i ]
40+ File defined_gvcfs_optional_ = select_first ([gvcfs [i ]])
41+ File gvcf_indexes_optional_ = select_first ([gvcfs [i ]]) + ".tbi"
42+ }
3943 }
44+ Array [String ] defined_samples_ = select_all (defined_samples_optional_ )
45+ Array [File ] defined_gvcfs_ = select_all (defined_gvcfs_optional_ )
46+ Array [File ] gvcf_indexes_ = select_all (gvcf_indexes_optional_ )
47+ Int num_gvcfs = length (defined_gvcfs_ )
4048
4149 call DynamicallyCombineIntervals {
4250 input :
@@ -47,14 +55,14 @@ workflow BAFFromGVCFs {
4755
4856 Array [String ] unpadded_intervals = read_lines (DynamicallyCombineIntervals .output_intervals )
4957
50- Int disk_size_gb = 10 + ceil ((size (gvcfs , "GB" ) + size (gvcf_indexes , "GB" )) * 1.5 )
58+ Int disk_size_gb = 10 + ceil ((size (defined_gvcfs_ , "GB" ) + size (gvcf_indexes_ , "GB" )) * 1.5 )
5159
5260 scatter (idx in range (length (unpadded_intervals ))) {
5361 call ImportGVCFs {
5462 input :
55- sample_names = samples ,
56- input_gvcfs = gvcfs ,
57- input_gvcfs_indices = gvcf_indexes ,
63+ sample_names = defined_samples_ ,
64+ input_gvcfs = defined_gvcfs_ ,
65+ input_gvcfs_indices = gvcf_indexes_ ,
5866 interval = unpadded_intervals [idx ],
5967 workspace_dir_name = "genomicsdb" ,
6068 disk_size = disk_size_gb ,
@@ -82,7 +90,8 @@ workflow BAFFromGVCFs {
8290 call GenerateBAF {
8391 input :
8492 vcf = GenotypeGVCFs .output_vcf ,
85- vcf_index = GenotypeGVCFs .output_vcf_index ,
93+ samples = samples ,
94+ ignore_missing_vcf_samples = ignore_missing_gvcfs ,
8695 batch = batch ,
8796 shard = "~{idx }" ,
8897 sv_pipeline_docker = sv_pipeline_docker ,
@@ -92,8 +101,8 @@ workflow BAFFromGVCFs {
92101
93102 call MergeEvidenceFiles {
94103 input :
95- files = GenerateBAF .BAF ,
96- indexes = GenerateBAF .BAF_idx ,
104+ files = GenerateBAF .out ,
105+ indexes = GenerateBAF .out_index ,
97106 batch = batch ,
98107 evidence = "BAF" ,
99108 inclusion_bed = inclusion_bed ,
@@ -312,84 +321,50 @@ task ImportGVCFs {
312321task GenerateBAF {
313322 input {
314323 File vcf
315- File vcf_index
324+ File ? vcf_header
325+ Array [String ] samples
326+ Boolean ignore_missing_vcf_samples
316327 String batch
317328 String shard
318329 String sv_pipeline_docker
319330 RuntimeAttr ? runtime_attr_override
320331 }
321332
322- RuntimeAttr default_attr = object {
323- cpu_cores : 1 ,
324- mem_gb : 3.75 ,
325- disk_gb : 10 ,
326- boot_disk_gb : 10 ,
327- preemptible_tries : 3 ,
328- max_retries : 1
329- }
330- RuntimeAttr runtime_attr = select_first ([runtime_attr_override , default_attr ])
331-
332- output {
333- File BAF = "~{batch }.BAF.shard-~{shard }.txt.gz"
334- File BAF_idx = "~{batch }.BAF.shard-~{shard }.txt.gz.tbi"
335- }
336- command <<<
337-
338- set -euo pipefail
339- bcftools view -M2 -v snps ~{vcf } \
340- | python /opt/sv-pipeline/02_evidence_assessment/02d_baftest/scripts/Filegenerate/generate_baf.py --unfiltered \
341- | bgzip -c \
342- > ~{batch }.BAF.shard -~{shard }.txt.gz
343- tabix -f -s1 -b 2 -e 2 ~{batch }.BAF.shard -~{shard }.txt.gz
344-
345- >>>
346- runtime {
347- cpu : select_first ([runtime_attr .cpu_cores , default_attr .cpu_cores ])
348- memory : select_first ([runtime_attr .mem_gb , default_attr .mem_gb ]) + " GiB"
349- disks : "local-disk " + select_first ([runtime_attr .disk_gb , default_attr .disk_gb ]) + " HDD"
350- bootDiskSizeGb : select_first ([runtime_attr .boot_disk_gb , default_attr .boot_disk_gb ])
351- docker : sv_pipeline_docker
352- preemptible : select_first ([runtime_attr .preemptible_tries , default_attr .preemptible_tries ])
353- maxRetries : select_first ([runtime_attr .max_retries , default_attr .max_retries ])
354- }
355-
356- }
357-
358- task GatherBAF {
359- input {
360- String batch
361- Array [File ] BAF
362- String sv_base_mini_docker
363- RuntimeAttr ? runtime_attr_override
364- }
333+ String ignore_missing_vcf_samples_flag = if (ignore_missing_vcf_samples ) then "--ignore-missing-vcf-samples" else ""
365334
366335 RuntimeAttr default_attr = object {
367- cpu_cores : 1 ,
368- mem_gb : 3.75 ,
369- disk_gb : 100 ,
370- boot_disk_gb : 10 ,
371- preemptible_tries : 3 ,
372- max_retries : 1
373- }
336+ cpu_cores : 1 ,
337+ mem_gb : 3.75 ,
338+ disk_gb : 10 ,
339+ boot_disk_gb : 10 ,
340+ preemptible_tries : 3 ,
341+ max_retries : 1
342+ }
374343 RuntimeAttr runtime_attr = select_first ([runtime_attr_override , default_attr ])
375344
376345 output {
377- File out = "~{batch }.BAF .txt.gz"
378- File out_index = "~{batch }.BAF .txt.gz.tbi"
346+ File out = "~{batch }.~{ shard } .txt.gz"
347+ File out_index = "~{batch }.~{ shard } .txt.gz.tbi"
379348 }
380349 command <<<
381350
382351 set -euo pipefail
383- cat ~{sep =" " BAF } | bgzip -c > ~{batch }.BAF.txt.gz
384- tabix -f -s1 -b 2 -e 2 ~{batch }.BAF.txt.gz
385-
352+ python /opt/sv-pipeline/02_evidence_assessment/02d_baftest/scripts/Filegenerate/generate_baf.py \
353+ --unfiltered \
354+ --samples-list ~{write_lines (samples )} \
355+ ~{ignore_missing_vcf_samples_flag } \
356+ ~{if defined (vcf_header ) then "<(cat ~{vcf_header } ~{vcf })" else vcf } \
357+ | bgzip \
358+ > ~{batch }.~{shard }.txt.gz
359+
360+ tabix -s1 -b2 -e2 ~{batch }.~{shard }.txt.gz
386361 >>>
387362 runtime {
388363 cpu : select_first ([runtime_attr .cpu_cores , default_attr .cpu_cores ])
389364 memory : select_first ([runtime_attr .mem_gb , default_attr .mem_gb ]) + " GiB"
390365 disks : "local-disk " + select_first ([runtime_attr .disk_gb , default_attr .disk_gb ]) + " HDD"
391366 bootDiskSizeGb : select_first ([runtime_attr .boot_disk_gb , default_attr .boot_disk_gb ])
392- docker : sv_base_mini_docker
367+ docker : sv_pipeline_docker
393368 preemptible : select_first ([runtime_attr .preemptible_tries , default_attr .preemptible_tries ])
394369 maxRetries : select_first ([runtime_attr .max_retries , default_attr .max_retries ])
395370 }
@@ -436,7 +411,7 @@ task MergeEvidenceFiles {
436411
437412 mkdir tmp
438413 sort -m -k1 ,1V -k2 ,2n -T tmp data /*.txt | bgzip -c > ~{batch }.~{evidence }.txt.gz
439- tabix -f - s1 -b2 -e2 ~{batch }.~{evidence }.txt.gz
414+ tabix -s1 -b2 -e2 ~{batch }.~{evidence }.txt.gz
440415
441416 >>>
442417 runtime {
0 commit comments