Skip to content

Commit 6b5e983

Browse files
committed
Update to sawfish and sv_stats.
sawfish: - Remove sex from inputs. - Add `--verbose` to all fs operations. - Switch to long form arguments. - Clean up compressed/decompressed files at the end of each task. - Pass supporting_reads_json up to main workflow outputs. sv_stats: - Remove InversionBND filtered events. - Require that all events are FILTER="PASS" and non REF genotypes. - Parameterize minimum variant size and maximum "scar" size (for sequence swaps) - Filter "sequence swap" (len(REF) and len(ALT) both >10bp) complex variants from the INS and DEL counts, and put them in a separate SWAP category. These are variants where both the REF and ALT are longer than 1bp.
1 parent 27dc0c0 commit 6b5e983

9 files changed

Lines changed: 39 additions & 33 deletions

File tree

docs/family.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,9 @@ The `Sample` struct contains sample specific data and metadata. The struct has t
168168
| Array\[String\] | stat_sv_DEL_count | Structural variant DEL count | (PASS variants) |
169169
| Array\[String\] | stat_sv_INS_count | Structural variant INS count | (PASS variants) |
170170
| Array\[String\] | stat_sv_INV_count | Structural variant INV count | (PASS variants) |
171-
| Array\[String\] | stat_sv_INVBND_count | Structural variant INVBND count | (PASS variants) |
172171
| Array\[String\] | stat_sv_BND_count | Structural variant BND count | (PASS variants) |
172+
| Array\[String\] | stat_sv_SWAP_count | Structural variant sequence swap events | (PASS variants) |
173+
| File | sv_supporting_reads | Supporting reads for structural variants | |
173174
| Array\[File\] | bcftools_roh_out | ROH calling | `bcftools roh` |
174175
| Array\[File\] | bcftools_roh_bed | Generated from above, without filtering | |
175176
| File? | joint_sv_vcf | Joint-called structural variant VCF | |

docs/singleton.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,9 @@ flowchart TD
131131
| String | stat_sv_DEL_count | Structural variant DEL count | (PASS variants) |
132132
| String | stat_sv_INS_count | Structural variant INS count | (PASS variants) |
133133
| String | stat_sv_INV_count | Structural variant INV count | (PASS variants) |
134-
| String | stat_sv_INVBND_count | Structural variant INVBND count | (PASS variants) |
135134
| String | stat_sv_BND_count | Structural variant BND count | (PASS variants) |
135+
| String | stat_sv_SWAP_count | Structural variant sequence swap events | (PASS variants) |
136+
| File | sv_supporting_reads | Supporting reads for structural variants | |
136137
| File | bcftools_roh_out | ROH calling | `bcftools roh` |
137138
| File | bcftools_roh_bed | Generated from above, without filtering | |
138139

wdl-ci.config.json

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -717,7 +717,7 @@
717717
},
718718
"sv_stats": {
719719
"key": "sv_stats",
720-
"digest": "i5iptmzk472kcck6varsvun7ip6pd4tf",
720+
"digest": "4qqgmft6nd55eqgtbkvmmpzikooro3fp",
721721
"tests": [
722722
{
723723
"inputs": {
@@ -755,8 +755,8 @@
755755
"compare_string"
756756
]
757757
},
758-
"stat_sv_INVBND_count": {
759-
"value": "4",
758+
"stat_sv_SWAP_count": {
759+
"value": "17",
760760
"test_tasks": [
761761
"compare_string"
762762
]
@@ -2032,7 +2032,7 @@
20322032
"tasks": {
20332033
"sawfish_discover": {
20342034
"key": "sawfish_discover",
2035-
"digest": "wbvv3v5qtbdrgvqvtqgeeamqmhgzsw4p",
2035+
"digest": "fhepxt5mh25uz4eg267h6al6nclhl7vl",
20362036
"tests": [
20372037
{
20382038
"inputs": {
@@ -2100,7 +2100,7 @@
21002100
},
21012101
"sawfish_call": {
21022102
"key": "sawfish_call",
2103-
"digest": "rfuh5khjsyiajhrxz4the5j4fv3kzzhb",
2103+
"digest": "kjha4llh7xjs55xbkowbvfikvfnwtct4",
21042104
"tests": [
21052105
{
21062106
"inputs": {

workflows/downstream/downstream.wdl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -219,12 +219,12 @@ workflow downstream {
219219
File indel_distribution_plot = bcftools_stats_roh_small_variants.indel_distribution_plot
220220

221221
# sv stats
222-
String stat_sv_DUP_count = sv_stats.stat_sv_DUP_count
223-
String stat_sv_DEL_count = sv_stats.stat_sv_DEL_count
224-
String stat_sv_INS_count = sv_stats.stat_sv_INS_count
225-
String stat_sv_INV_count = sv_stats.stat_sv_INV_count
226-
String stat_sv_INVBND_count = sv_stats.stat_sv_INVBND_count
227-
String stat_sv_BND_count = sv_stats.stat_sv_BND_count
222+
String stat_sv_DUP_count = sv_stats.stat_sv_DUP_count
223+
String stat_sv_DEL_count = sv_stats.stat_sv_DEL_count
224+
String stat_sv_INS_count = sv_stats.stat_sv_INS_count
225+
String stat_sv_INV_count = sv_stats.stat_sv_INV_count
226+
String stat_sv_BND_count = sv_stats.stat_sv_BND_count
227+
String stat_sv_SWAP_count = sv_stats.stat_sv_SWAP_count
228228

229229
# cpg_pileup outputs
230230
File? cpg_combined_bed = cpg_pileup.combined_bed

workflows/family.wdl

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ workflow humanwgs_family {
244244
'sv_DEL_count': downstream.stat_sv_DEL_count,
245245
'sv_INS_count': downstream.stat_sv_INS_count,
246246
'sv_INV_count': downstream.stat_sv_INV_count,
247-
'sv_INVBND_count': downstream.stat_sv_INVBND_count,
247+
'sv_SWAP_count': downstream.stat_sv_SWAP_count,
248248
'sv_BND_count': downstream.stat_sv_BND_count,
249249
'cnv_DUP_count': upstream.stat_cnv_DUP_count,
250250
'cnv_DEL_count': upstream.stat_cnv_DEL_count,
@@ -318,14 +318,15 @@ workflow humanwgs_family {
318318
# sv outputs
319319
Array[File] phased_sv_vcf = downstream.phased_sv_vcf
320320
Array[File] phased_sv_vcf_index = downstream.phased_sv_vcf_index
321+
File sv_supporting_reads = select_first([joint.sv_supporting_reads, upstream.sv_supporting_reads[0]])
321322

322323
# sv stats
323-
Array[String] stat_sv_DUP_count = downstream.stat_sv_DUP_count
324-
Array[String] stat_sv_DEL_count = downstream.stat_sv_DEL_count
325-
Array[String] stat_sv_INS_count = downstream.stat_sv_INS_count
326-
Array[String] stat_sv_INV_count = downstream.stat_sv_INV_count
327-
Array[String] stat_sv_INVBND_count = downstream.stat_sv_INVBND_count
328-
Array[String] stat_sv_BND_count = downstream.stat_sv_BND_count
324+
Array[String] stat_sv_DUP_count = downstream.stat_sv_DUP_count
325+
Array[String] stat_sv_DEL_count = downstream.stat_sv_DEL_count
326+
Array[String] stat_sv_INS_count = downstream.stat_sv_INS_count
327+
Array[String] stat_sv_INV_count = downstream.stat_sv_INV_count
328+
Array[String] stat_sv_SWAP_count = downstream.stat_sv_SWAP_count
329+
Array[String] stat_sv_BND_count = downstream.stat_sv_BND_count
329330

330331
# small variant outputs
331332
Array[File] phased_small_variant_vcf = downstream.phased_small_variant_vcf

workflows/joint/joint.wdl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ workflow joint {
5353
split_joint_small_variant_vcf_indices: {
5454
name: "Joint-call small variant VCF indices, split by sample"
5555
}
56+
sv_supporting_reads: {
57+
name: "Supporting reads JSON"
58+
}
5659
}
5760

5861
input {
@@ -135,5 +138,6 @@ workflow joint {
135138
Array[File] split_joint_structural_variant_vcf_indices = split_sawfish.split_vcf_indices
136139
Array[File] split_joint_small_variant_vcfs = split_glnexus.split_vcfs
137140
Array[File] split_joint_small_variant_vcf_indices = split_glnexus.split_vcf_indices
141+
File sv_supporting_reads = select_first([sawfish_call.supporting_reads])
138142
}
139143
}

workflows/singleton.wdl

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ workflow humanwgs_singleton {
188188
'sv_DEL_count': [downstream.stat_sv_DEL_count],
189189
'sv_INS_count': [downstream.stat_sv_INS_count],
190190
'sv_INV_count': [downstream.stat_sv_INV_count],
191-
'sv_INVBND_count': [downstream.stat_sv_INVBND_count],
191+
'sv_SWAP_count': [downstream.stat_sv_SWAP_count],
192192
'sv_BND_count': [downstream.stat_sv_BND_count],
193193
'cnv_DUP_count': [upstream.stat_cnv_DUP_count],
194194
'cnv_DEL_count': [upstream.stat_cnv_DEL_count],
@@ -261,14 +261,15 @@ workflow humanwgs_singleton {
261261
# sv outputs
262262
File phased_sv_vcf = downstream.phased_sv_vcf
263263
File phased_sv_vcf_index = downstream.phased_sv_vcf_index
264+
File sv_supporting_reads = select_first([upstream.sv_supporting_reads])
264265

265266
# sv stats
266-
String stat_sv_DUP_count = downstream.stat_sv_DUP_count
267-
String stat_sv_DEL_count = downstream.stat_sv_DEL_count
268-
String stat_sv_INS_count = downstream.stat_sv_INS_count
269-
String stat_sv_INV_count = downstream.stat_sv_INV_count
270-
String stat_sv_INVBND_count = downstream.stat_sv_INVBND_count
271-
String stat_sv_BND_count = downstream.stat_sv_BND_count
267+
String stat_sv_DUP_count = downstream.stat_sv_DUP_count
268+
String stat_sv_DEL_count = downstream.stat_sv_DEL_count
269+
String stat_sv_INS_count = downstream.stat_sv_INS_count
270+
String stat_sv_INV_count = downstream.stat_sv_INV_count
271+
String stat_sv_SWAP_count = downstream.stat_sv_SWAP_count
272+
String stat_sv_BND_count = downstream.stat_sv_BND_count
272273

273274
# small variant outputs
274275
File phased_small_variant_vcf = downstream.phased_small_variant_vcf

workflows/upstream/upstream.wdl

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -117,14 +117,11 @@ workflow upstream {
117117
118118
call Sawfish.sawfish_discover {
119119
input:
120-
sex = mosdepth.inferred_sex,
121120
aligned_bam = aligned_bam_data,
122121
aligned_bam_index = aligned_bam_index,
123122
ref_fasta = ref_map["fasta"], # !FileCoercion
124123
ref_index = ref_map["fasta_index"], # !FileCoercion
125124
out_prefix = "~{sample_id}.~{ref_map['name']}",
126-
expected_male_bed = ref_map["hificnv_expected_bed_male"], # !FileCoercion
127-
expected_female_bed = ref_map["hificnv_expected_bed_female"], # !FileCoercion
128125
runtime_attributes = default_runtime_attributes
129126
}
130127
@@ -199,8 +196,9 @@ workflow upstream {
199196
File discover_tar = sawfish_discover.discover_tar
200197

201198
# sawfish outputs for single sample
202-
File? sv_vcf = sawfish_call.vcf
203-
File? sv_vcf_index = sawfish_call.vcf_index
199+
File? sv_vcf = sawfish_call.vcf
200+
File? sv_vcf_index = sawfish_call.vcf_index
201+
File? sv_supporting_reads = sawfish_call.supporting_reads
204202

205203
# small variant outputs
206204
File small_variant_vcf = deepvariant.vcf

0 commit comments

Comments
 (0)