Skip to content

Commit ba508ac

Browse files
get male-only var IDs from ped file and GTs not log p-value (#254)
1 parent 9f5620c commit ba508ac

File tree

9 files changed

+77
-3
lines changed

9 files changed

+77
-3
lines changed

wdl/GATKSVPipelinePhase1.wdl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ workflow GATKSVPipelinePhase1 {
223223
RuntimeAttr? runtime_attr_exclude_outliers
224224
RuntimeAttr? runtime_attr_cat_outliers
225225
RuntimeAttr? runtime_attr_filter_samples
226+
RuntimeAttr? runtime_attr_get_male_only
226227

227228
############################################################
228229
## Module metrics parameters for GatherBatchEvidence, ClusterBatch, GenerateBatchMetrics, FilterBatch metrics

wdl/GenerateBatchMetrics.wdl

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ workflow GenerateBatchMetrics {
6565
RuntimeAttr? runtime_attr_merge_allo
6666
RuntimeAttr? runtime_attr_merge_baf
6767
RuntimeAttr? runtime_attr_merge_stats
68+
RuntimeAttr? runtime_attr_get_male_only
6869
}
6970

7071
Array[String] algorithms = ["depth", "melt", "delly", "wham", "manta"]
@@ -101,6 +102,15 @@ workflow GenerateBatchMetrics {
101102
String algorithm = algorithms[i]
102103
File vcf = select_first([vcfs[i]])
103104

105+
call GetMaleOnlyVariantIDs {
106+
input:
107+
vcf = vcf,
108+
female_samples = GetSampleLists.female_samples,
109+
male_samples = GetSampleLists.male_samples,
110+
sv_pipeline_docker = sv_pipeline_docker,
111+
runtime_attr_override = runtime_attr_get_male_only
112+
}
113+
104114
if (algorithm != "melt") {
105115
call rdt.RDTest as RDTest {
106116
input:
@@ -118,6 +128,7 @@ workflow GenerateBatchMetrics {
118128
samples = GetSampleLists.samples_file,
119129
male_samples = GetSampleLists.male_samples,
120130
female_samples = GetSampleLists.female_samples,
131+
male_only_variant_ids = GetMaleOnlyVariantIDs.male_only_variant_ids,
121132
sv_pipeline_docker = sv_pipeline_docker,
122133
sv_pipeline_rdtest_docker = sv_pipeline_rdtest_docker,
123134
linux_docker = linux_docker,
@@ -162,6 +173,7 @@ workflow GenerateBatchMetrics {
162173
samples = GetSampleLists.samples_file,
163174
male_samples = GetSampleLists.male_samples,
164175
female_samples = GetSampleLists.female_samples,
176+
male_only_variant_ids = GetMaleOnlyVariantIDs.male_only_variant_ids,
165177
run_common = true,
166178
common_cnv_size_cutoff = common_cnv_size_cutoff,
167179
sv_base_mini_docker = sv_base_mini_docker,
@@ -190,6 +202,7 @@ workflow GenerateBatchMetrics {
190202
samples = GetSampleLists.samples_file,
191203
male_samples = GetSampleLists.male_samples,
192204
female_samples = GetSampleLists.female_samples,
205+
male_only_variant_ids = GetMaleOnlyVariantIDs.male_only_variant_ids,
193206
common_cnv_size_cutoff = common_cnv_size_cutoff,
194207
sv_base_mini_docker = sv_base_mini_docker,
195208
linux_docker = linux_docker,
@@ -331,6 +344,46 @@ task GetSampleLists {
331344
}
332345
}
333346

347+
task GetMaleOnlyVariantIDs {
348+
input {
349+
File vcf
350+
File female_samples
351+
File male_samples
352+
String sv_pipeline_docker
353+
RuntimeAttr? runtime_attr_override
354+
}
355+
356+
RuntimeAttr default_attr = object {
357+
cpu_cores: 1,
358+
mem_gb: 3.75,
359+
disk_gb: 10,
360+
boot_disk_gb: 10,
361+
preemptible_tries: 3,
362+
max_retries: 1
363+
}
364+
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
365+
366+
output {
367+
File male_only_variant_ids = "male_only_variant_ids.txt"
368+
}
369+
command <<<
370+
bcftools view -S ~{male_samples} ~{vcf} | bcftools view --min-ac 1 | bcftools query -f '%ID\n' > variant_ids_in_males.txt
371+
bcftools view -S ~{female_samples} ~{vcf} | bcftools view --min-ac 1 | bcftools query -f '%ID\n' > variant_ids_in_females.txt
372+
awk 'NR==FNR{a[$0];next} !($0 in a)' variant_ids_in_females.txt variant_ids_in_males.txt > male_only_variant_ids.txt
373+
374+
>>>
375+
runtime {
376+
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
377+
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
378+
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
379+
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
380+
docker: sv_pipeline_docker
381+
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
382+
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
383+
}
384+
}
385+
386+
334387
task AggregateTests {
335388
input {
336389
File vcf

wdl/PETest.wdl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ workflow PETest {
1717
File ped_file
1818
File male_samples
1919
File female_samples
20+
File male_only_variant_ids
2021
File samples
2122
Int common_cnv_size_cutoff
2223

@@ -47,6 +48,7 @@ workflow PETest {
4748
samples = samples,
4849
male_samples = male_samples,
4950
female_samples = female_samples,
51+
male_only_variant_ids = male_only_variant_ids,
5052
allosome = false,
5153
ref_dict = ref_dict,
5254
common_cnv_size_cutoff = common_cnv_size_cutoff,
@@ -75,6 +77,7 @@ workflow PETest {
7577
samples = samples,
7678
male_samples = male_samples,
7779
female_samples = female_samples,
80+
male_only_variant_ids = male_only_variant_ids,
7881
allosome = true,
7982
ref_dict = ref_dict,
8083
common_cnv_size_cutoff = common_cnv_size_cutoff,

wdl/PETestChromosome.wdl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ workflow PETestChromosome {
1616
Int? suffix_len
1717
File male_samples
1818
File female_samples
19+
File male_only_variant_ids
1920
File samples
2021
Boolean allosome
2122
Int common_cnv_size_cutoff
@@ -77,10 +78,11 @@ workflow PETestChromosome {
7778
input:
7879
male_test = PETestMale.stats,
7980
female_test = PETestFemale.stats,
81+
male_only_ids_list = male_only_variant_ids,
8082
chrom = chrom,
8183
sv_pipeline_docker = sv_pipeline_docker,
8284
runtime_attr_override = runtime_attr_merge_allo,
83-
male_only_expr = "females.log_pval == 0"
85+
male_only_expr = "females.name.isin(male_only_ids)"
8486
}
8587
}
8688
if (!allosome) {

wdl/RDTest.wdl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ workflow RDTest {
1717
File ped_file
1818
File male_samples
1919
File female_samples
20+
File male_only_variant_ids
2021
File samples
2122
File ref_dict
2223

@@ -47,6 +48,7 @@ workflow RDTest {
4748
samples = samples,
4849
male_samples = male_samples,
4950
female_samples = female_samples,
51+
male_only_variant_ids = male_only_variant_ids,
5052
allosome = false,
5153
ref_dict = ref_dict,
5254
sv_pipeline_docker = sv_pipeline_docker,
@@ -74,6 +76,7 @@ workflow RDTest {
7476
samples = samples,
7577
male_samples = male_samples,
7678
female_samples = female_samples,
79+
male_only_variant_ids = male_only_variant_ids,
7780
allosome = true,
7881
ref_dict = ref_dict,
7982
sv_pipeline_docker = sv_pipeline_docker,

wdl/RDTestChromosome.wdl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ workflow RDTestChromosome {
1616
Int? suffix_len
1717
File male_samples
1818
File female_samples
19+
File male_only_variant_ids
1920
File samples
2021
Boolean allosome
2122
File ref_dict
@@ -79,10 +80,11 @@ workflow RDTestChromosome {
7980
input:
8081
male_test = RDTestMale.stats,
8182
female_test = RDTestFemale.stats,
83+
male_only_ids_list = male_only_variant_ids,
8284
chrom = chrom,
8385
sv_pipeline_docker = sv_pipeline_docker,
8486
runtime_attr_override = runtime_attr_merge_allo,
85-
male_only_expr = "females.P.astype(str) == 'No_samples_for_analysis'"
87+
male_only_expr = "females.CNVID.isin(male_only_ids)"
8688
}
8789
}
8890

wdl/SRTest.wdl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ workflow SRTest {
1616
String algorithm
1717
File male_samples
1818
File female_samples
19+
File male_only_variant_ids
1920
File samples
2021
Boolean run_common
2122
Int? common_cnv_size_cutoff # Required if run_common is true
@@ -49,6 +50,7 @@ workflow SRTest {
4950
samples = samples,
5051
male_samples = male_samples,
5152
female_samples = female_samples,
53+
male_only_variant_ids = male_only_variant_ids,
5254
allosome = false,
5355
run_common = run_common,
5456
common_cnv_size_cutoff = common_cnv_size_cutoff,
@@ -78,6 +80,7 @@ workflow SRTest {
7880
samples = samples,
7981
male_samples = male_samples,
8082
female_samples = female_samples,
83+
male_only_variant_ids = male_only_variant_ids,
8184
allosome = true,
8285
run_common = run_common,
8386
common_cnv_size_cutoff = common_cnv_size_cutoff,

wdl/SRTestChromosome.wdl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ workflow SRTestChromosome {
1515
Int? suffix_len
1616
File male_samples
1717
File female_samples
18+
File male_only_variant_ids
1819
File samples
1920
File ref_dict
2021
Boolean allosome
@@ -78,10 +79,11 @@ workflow SRTestChromosome {
7879
input:
7980
male_test = SRTestMale.stats,
8081
female_test = SRTestFemale.stats,
82+
male_only_ids_list = male_only_variant_ids,
8183
chrom = chrom,
8284
runtime_attr_override = runtime_attr_merge_allo,
8385
sv_pipeline_docker = sv_pipeline_docker,
84-
male_only_expr = "females.log_pval == 0"
86+
male_only_expr = "females.name.isin(male_only_ids)"
8587
}
8688
}
8789

wdl/TasksGenerateBatchMetrics.wdl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ task MergeAllosomes {
117117
input {
118118
File male_test
119119
File female_test
120+
File male_only_ids_list
120121
String chrom
121122
String male_only_expr
122123
String sv_pipeline_docker
@@ -143,6 +144,10 @@ task MergeAllosomes {
143144
import pandas as pd
144145
males = pd.read_table("~{male_test}")
145146
females = pd.read_table("~{female_test}")
147+
male_only_ids = set()
148+
with open("~{male_only_ids_list}", 'r') as male_only_file:
149+
for line in male_only_file:
150+
male_only_ids.add(line.strip())
146151
if "~{chrom}" == 'Y' or "~{chrom}" == 'chrY':
147152
males.to_csv("~{basename(male_test)}.merged.csv", sep='\t', index=False, na_rep='NA')
148153
else:

0 commit comments

Comments
 (0)