Skip to content

Commit 1100f53

Browse files
committed
Workflow to rescue short reads for use with other downstream applications
1 parent dc117f3 commit 1100f53

File tree

3 files changed

+157
-2
lines changed

3 files changed

+157
-2
lines changed

.dockstore.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,6 @@ workflows:
3030
- name: SubsetAndAlignBam
3131
subclass: wdl
3232
primaryDescriptorPath: /wdl/SubsetAndAlignBam.wdl
33+
- name: HidiveRescue
34+
subclass: wdl
35+
primaryDescriptorPath: /wdl/HidiveRescue.wdl

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

wdl/HidiveRescue.wdl

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
version 1.0
2+
3+
workflow HidiveRescue {
4+
input {
5+
File long_reads_bam
6+
File long_reads_bai
7+
8+
File short_reads_cram
9+
File short_reads_crai
10+
11+
String? locus
12+
File? loci
13+
14+
String sample_name
15+
16+
File ref_fa_with_alt
17+
File ref_fai_with_alt
18+
File ref_cache_tar_gz
19+
20+
Int padding = 500
21+
}
22+
23+
if (defined(locus)) { call PrepareLocus { input: locus = select_first([locus]) } }
24+
File bed = select_first([PrepareLocus.bed, loci])
25+
26+
call Fetch {
27+
input:
28+
bam = long_reads_bam,
29+
loci = bed,
30+
padding = padding,
31+
prefix = sample_name
32+
}
33+
34+
call Rescue {
35+
input:
36+
long_reads_fasta = Fetch.fasta,
37+
short_reads_cram = short_reads_cram,
38+
short_reads_crai = short_reads_crai,
39+
ref_fa_with_alt = ref_fa_with_alt,
40+
ref_fai_with_alt = ref_fai_with_alt,
41+
ref_cache_tar_gz = ref_cache_tar_gz,
42+
prefix = sample_name
43+
}
44+
45+
output {
46+
File rescued_fasta_gz = Rescue.fasta_gz
47+
}
48+
}
49+
50+
task PrepareLocus {
51+
input {
52+
String locus
53+
}
54+
55+
command <<<
56+
set -euxo pipefail
57+
58+
echo "~{locus}" | \
59+
sed 's/,//g' | \
60+
sed 's/\|/\t/g' | \
61+
sed 's/\(.*\):/\1\t/' | \
62+
sed 's/\(.*\)-\(.*\)/\1\t\2/' > locus.bed
63+
>>>
64+
65+
output {
66+
File bed = "locus.bed"
67+
}
68+
69+
runtime {
70+
docker: "us.gcr.io/broad-dsp-lrma/lr-hidive:0.1.101"
71+
memory: "1 GB"
72+
cpu: 1
73+
disks: "local-disk 1 SSD"
74+
}
75+
}
76+
77+
task Fetch {
78+
input {
79+
String bam
80+
String? locus
81+
File? loci
82+
Int padding
83+
84+
String prefix = "out"
85+
86+
Int disk_size_gb = 2
87+
Int num_cpus = 4
88+
}
89+
90+
command <<<
91+
set -euxo pipefail
92+
93+
hidive fetch -l ~{select_first([locus, loci])} -p ~{padding} ~{bam} > ~{prefix}.fa
94+
>>>
95+
96+
output {
97+
File fasta = "~{prefix}.fa"
98+
}
99+
100+
runtime {
101+
docker: "us.gcr.io/broad-dsp-lrma/lr-hidive:0.1.107"
102+
memory: "2 GB"
103+
cpu: num_cpus
104+
disks: "local-disk ~{disk_size_gb} SSD"
105+
}
106+
}
107+
108+
task Rescue {
109+
input {
110+
File long_reads_fasta
111+
File short_reads_cram
112+
File short_reads_crai
113+
114+
File ref_fa_with_alt
115+
File ref_fai_with_alt
116+
File ref_cache_tar_gz
117+
118+
String prefix = "out"
119+
120+
Int num_cpus = 16
121+
}
122+
123+
Int disk_size_gb = 1 + 2*ceil(size([long_reads_fasta, short_reads_cram, short_reads_crai, ref_fa_with_alt, ref_fai_with_alt, ref_cache_tar_gz], "GB"))
124+
Int memory_gb = 2*num_cpus
125+
126+
command <<<
127+
set -euxo pipefail
128+
129+
mv ~{ref_fa_with_alt} Homo_sapiens_assembly38.fasta
130+
mv ~{ref_fai_with_alt} Homo_sapiens_assembly38.fasta.fai
131+
mv ~{ref_cache_tar_gz} Homo_sapiens_assembly38.ref_cache.tar.gz
132+
133+
tar xzf Homo_sapiens_assembly38.ref_cache.tar.gz >/dev/null 2>&1
134+
135+
export REF_PATH="$(pwd)/ref/cache/%2s/%2s/%s:http://www.ebi.ac.uk/ena/cram/md5/%s"
136+
export REF_CACHE="$(pwd)/ref/cache/%2s/%2s/%s"
137+
138+
hidive rescue -r Homo_sapiens_assembly38.fasta -f ~{long_reads_fasta} ~{short_reads_cram} | gzip > ~{prefix}.fa.gz
139+
>>>
140+
141+
output {
142+
File fasta_gz = "~{prefix}.fa.gz"
143+
}
144+
145+
runtime {
146+
docker: "us.gcr.io/broad-dsp-lrma/lr-hidive:0.1.101"
147+
memory: "~{memory_gb} GB"
148+
cpu: num_cpus
149+
disks: "local-disk ~{disk_size_gb} SSD"
150+
maxRetries: 2
151+
}
152+
}

0 commit comments

Comments
 (0)