Skip to content

Commit aad3b3e

Browse files
Add files via upload
1 parent c15398b commit aad3b3e

File tree

1 file changed

+116
-0
lines changed

1 file changed

+116
-0
lines changed

wdl/BamMetrics.wdl

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
version 1.0
2+
3+
workflow BamMetrics {
4+
meta {
5+
description: "Minimal workflow to compute coverage and read-length metrics from an aligned BAM."
6+
}
7+
8+
input {
9+
File bam
10+
File bai
11+
File ref_fasta
12+
}
13+
14+
call ComputeGenomeLength {
15+
input:
16+
fasta = ref_fasta
17+
}
18+
19+
call NanoPlotFromBam {
20+
input:
21+
bam = bam,
22+
bai = bai
23+
}
24+
25+
output {
26+
Float aligned_coverage = NanoPlotFromBam.stats_map["number_of_bases_aligned"] / ComputeGenomeLength.length
27+
28+
Float read_length_mean = NanoPlotFromBam.stats_map["mean_read_length"]
29+
Float read_length_median = NanoPlotFromBam.stats_map["median_read_length"]
30+
Float read_length_stdev = NanoPlotFromBam.stats_map["read_length_stdev"]
31+
Float read_length_n50 = NanoPlotFromBam.stats_map["n50"]
32+
33+
Float aligned_num_reads = NanoPlotFromBam.stats_map["number_of_reads"]
34+
Float aligned_num_bases = NanoPlotFromBam.stats_map["number_of_bases_aligned"]
35+
Float aligned_fraction_bases = NanoPlotFromBam.stats_map["fraction_bases_aligned"]
36+
37+
Float average_identity = NanoPlotFromBam.stats_map["average_identity"]
38+
Float median_identity = NanoPlotFromBam.stats_map["median_identity"]
39+
40+
File nanoplot_stats = NanoPlotFromBam.stats
41+
}
42+
}
43+
44+
task ComputeGenomeLength {
45+
input {
46+
File fasta
47+
}
48+
49+
Int disk_gb = 2 * ceil(size(fasta, "GB"))
50+
51+
command <<<
52+
set -euxo pipefail
53+
54+
samtools dict ~{fasta} \
55+
| grep '^@SQ' \
56+
| awk '{ print $3 }' \
57+
| sed 's/LN://' \
58+
| awk '{ sum += $1 } END { print sum }' \
59+
> genome_length.txt
60+
>>>
61+
62+
output {
63+
Float length = read_float("genome_length.txt")
64+
}
65+
66+
runtime {
67+
cpu: 1
68+
memory: "1 GiB"
69+
disks: "local-disk " + disk_gb + " HDD"
70+
docker: "us.gcr.io/broad-dsp-lrma/lr-utils:0.1.8"
71+
}
72+
}
73+
74+
task NanoPlotFromBam {
75+
input {
76+
File bam
77+
File bai
78+
}
79+
80+
Int disk_gb = 2 * ceil(size(bam, "GB")) + 10
81+
82+
command <<<
83+
set -euxo pipefail
84+
85+
touch ~{bai}
86+
87+
num_core=$(grep -c '^processor' /proc/cpuinfo)
88+
89+
NanoPlot -t ${num_core} \
90+
-c orangered \
91+
--N50 \
92+
--tsv_stats \
93+
--no_supplementary \
94+
--verbose \
95+
--bam "~{bam}"
96+
97+
cat NanoStats.txt \
98+
| grep -v -e '^Metrics' -e '^highest' -e '^longest' \
99+
| sed 's/ >/_/' \
100+
| sed 's/://' \
101+
| awk '{ print $1 "\t" $2 }' \
102+
> map.txt
103+
>>>
104+
105+
output {
106+
File stats = "NanoStats.txt"
107+
Map[String, Float] stats_map = read_map("map.txt")
108+
}
109+
110+
runtime {
111+
cpu: 8
112+
memory: "24 GiB"
113+
disks: "local-disk " + disk_gb + " LOCAL"
114+
docker: "us.gcr.io/broad-dsp-lrma/lr-nanoplot:1.40.0-1"
115+
}
116+
}

0 commit comments

Comments
 (0)