Skip to content

Commit 98e8ada

Browse files
committed
add shapeit4 pipeline for hidive experiment
1 parent 0480200 commit 98e8ada

1 file changed

Lines changed: 179 additions & 0 deletions

File tree

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
version 1.0
2+
3+
workflow Shapeit4Hidive {
4+
5+
input {
6+
File joint_vcf
7+
File joint_vcf_tbi
8+
File genetic_mapping_tsv_for_shapeit4
9+
File regionlist
10+
File chromosomelist
11+
Int shapeit4_num_threads
12+
Int shapeit4_memory
13+
String shapeit4_extra_args
14+
String output_prefix
15+
16+
}
17+
18+
Map[String, String] genetic_mapping_dict = read_map(genetic_mapping_tsv_for_shapeit4)
19+
20+
Array[String] region_list = read_lines(regionlist)
21+
22+
Array[String] chromosome_list = read_lines(chromosomelist)
23+
24+
scatter (j in range(length(region_list))) {
25+
String chromosome = chromosome_list[j]
26+
call Shapeit4 { input:
27+
vcf_input = joint_vcf,
28+
vcf_index = joint_vcf_tbi,
29+
mappingfile = genetic_mapping_dict[chromosome],
30+
region = region_list[j],
31+
prefix = output_prefix + "." + chromosome + ".shard-" + j + ".phased",
32+
num_threads = shapeit4_num_threads,
33+
memory = shapeit4_memory,
34+
extra_args = shapeit4_extra_args
35+
}
36+
}
37+
38+
call LigateVcfs { input:
39+
vcfs = Shapeit4.phased_bcf,
40+
prefix = output_prefix + "." + ".phased.ligated"
41+
}
42+
43+
output {
44+
45+
File phased_vcf_gz = LigateVcfs.ligated_vcf_gz
46+
File phased_vcf_gz_tbi = LigateVcfs.ligated_vcf_gz_tbi
47+
48+
}
49+
}
50+
51+
struct RuntimeAttr {
52+
Float? mem_gb
53+
Int? cpu_cores
54+
Int? disk_gb
55+
Int? boot_disk_gb
56+
Int? preemptible_tries
57+
Int? max_retries
58+
String? docker
59+
}
60+
61+
struct DataTypeParameters {
62+
Int num_shards
63+
String map_preset
64+
}
65+
66+
67+
task LigateVcfs {
68+
69+
input {
70+
Array[File] vcfs
71+
Array[File]? vcf_idxs
72+
String prefix
73+
74+
RuntimeAttr? runtime_attr_override
75+
}
76+
77+
Int disk_size = 2*ceil(size(vcfs, "GB")) + 1
78+
79+
command <<<
80+
set -euxo pipefail
81+
if ! ~{defined(vcf_idxs)}; then
82+
for ff in ~{sep=' ' vcfs}; do bcftools index $ff; done
83+
fi
84+
85+
wget https://github.com/odelaneau/shapeit5/releases/download/v5.1.1/ligate_static
86+
chmod +x ligate_static
87+
88+
./ligate_static --input ~{write_lines(vcfs)} --output ~{prefix}.vcf.gz
89+
bcftools index -t ~{prefix}.vcf.gz
90+
>>>
91+
92+
output {
93+
File ligated_vcf_gz = "~{prefix}.vcf.gz"
94+
File ligated_vcf_gz_tbi = "~{prefix}.vcf.gz.tbi"
95+
}
96+
97+
#########################
98+
RuntimeAttr default_attr = object {
99+
cpu_cores: 2,
100+
mem_gb: 8,
101+
disk_gb: disk_size,
102+
boot_disk_gb: 10,
103+
preemptible_tries: 2,
104+
max_retries: 1,
105+
docker:"us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools:0.1.2"
106+
}
107+
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
108+
runtime {
109+
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
110+
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
111+
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD"
112+
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
113+
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
114+
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
115+
docker: select_first([runtime_attr.docker, default_attr.docker])
116+
}
117+
}
118+
119+
task Shapeit4 {
120+
input{
121+
File vcf_input
122+
File vcf_index
123+
File mappingfile
124+
String region
125+
String prefix
126+
Int num_threads
127+
Int memory
128+
String extra_args
129+
130+
RuntimeAttr? runtime_attr_override
131+
String zones = "us-central1-a us-central1-b us-central1-c us-central1-f"
132+
}
133+
command <<<
134+
# add AN AC tag
135+
136+
# export MONITOR_MOUNT_POINT="/cromwell_root/"
137+
# bash /opt/vm_local_monitoring_script.sh &> resources.log &
138+
# job_id=$(ps -aux | grep -F 'vm_local_monitoring_script.sh' | head -1 | awk '{print $2}')
139+
140+
shapeit4.2 --input ~{vcf_input} \
141+
--map ~{mappingfile} \
142+
--region ~{region} \
143+
--sequencing \
144+
--output ~{prefix}.bcf \
145+
--thread ~{num_threads} \
146+
~{extra_args}
147+
148+
# if ps -p "${job_id}" > /dev/null; then kill "${job_id}"; fi
149+
>>>
150+
151+
output{
152+
# File resouce_monitor_log = "resources.log"
153+
File phased_bcf = "~{prefix}.bcf"
154+
}
155+
156+
#Int disk_size = 100 + ceil(2 * size(vcf_input, "GiB"))
157+
158+
#########################
159+
RuntimeAttr default_attr = object {
160+
cpu_cores: num_threads,
161+
mem_gb: memory,
162+
disk_gb: 100,
163+
boot_disk_gb: 100,
164+
preemptible_tries: 0,
165+
max_retries: 0,
166+
docker: "hangsuunc/shapeit4:v1"
167+
}
168+
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
169+
runtime {
170+
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
171+
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
172+
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD"
173+
zones: zones
174+
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
175+
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
176+
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
177+
docker: select_first([runtime_attr.docker, default_attr.docker])
178+
}
179+
}

0 commit comments

Comments
 (0)