11version 1.0
22
3- task nextclade_one_sample {
4- meta {
5- description : "Nextclade classification of one sample. Leaving optional inputs unspecified will use SARS-CoV-2 defaults."
6- }
7- input {
8- File genome_fasta
9- File ? root_sequence
10- File ? auspice_reference_tree_json
11- File ? qc_config_json
12- File ? gene_annotations_json
13- File ? pcr_primers_csv
14- String docker = "nextstrain/nextclade:1.2.3"
15- }
16- String basename = basename (genome_fasta , ".fasta" )
17- command {
18- set -e
19- apt-get update
20- apt-get -y install python3
21-
22- URI = $(echo "~{docker}" | sed 's|:|/|g' )
23- NEXTCLADE_VERSION = "$(nextclade --version)"
24- echo $NEXTCLADE_VERSION > VERSION
25-
26- # grab reference data for SARS-CoV-2
27- curl https ://raw.githubusercontent.com /$URI /data/sars-cov-2/reference.fasta > reference.fasta
28- curl https ://raw.githubusercontent.com /$URI /data/sars-cov-2/genemap.gff > genemap.gff
29- curl https ://raw.githubusercontent.com /$URI /data/sars-cov-2/tree.json > tree.json
30- curl https ://raw.githubusercontent.com /$URI /data/sars-cov-2/qc.json > qc.json
31- curl https ://raw.githubusercontent.com /$URI /data/sars-cov-2/primers.csv > primers.csv
32-
33- nextclade \
34- --input-fasta "~{genome_fasta}" \
35- --input-root-seq ~{default ="reference.fasta" root_sequence } \
36- --input-tree ~{default ="tree.json" auspice_reference_tree_json } \
37- --input-qc-config ~{default ="qc.json" qc_config_json } \
38- --input-gene-map ~{default ="genemap.gff" gene_annotations_json } \
39- --input-pcr-primers ~{default ="primers.csv" pcr_primers_csv } \
40- --output-json "~{basename}" .nextclade.json \
41- --output-tsv "~{basename}" .nextclade.tsv \
42- --output-tree "~{basename}" .nextclade.auspice.json
43- cp "~{basename}" .nextclade.tsv input.tsv
44- python3 << CODE
45- # transpose table
46- import codecs
47- with codecs.open('input.tsv', 'r', encoding='utf-8') as inf:
48- with codecs.open('transposed.tsv', 'w', encoding='utf-8') as outf:
49- for c in zip(*(l.rstrip().split('\t') for l in inf)):
50- outf.write('\t'.join(c)+'\n')
51- CODE
52- grep ^clade transposed.tsv | cut -f 2 | grep -v clade > NEXTCLADE_CLADE
53- grep ^aaSubstitutions transposed.tsv | cut -f 2 | grep -v aaSubstitutions > NEXTCLADE_AASUBS
54- grep ^aaDeletions transposed.tsv | cut -f 2 | grep -v aaDeletions > NEXTCLADE_AADELS
55- }
56- runtime {
57- docker : docker
58- memory : "3 GB"
59- cpu : 2
60- disks : "local-disk 50 HDD"
61- dx_instance_type : "mem1_ssd1_v2_x2"
62- }
63- output {
64- String nextclade_version = read_string ("VERSION" )
65- File nextclade_json = "~{basename }.nextclade.json"
66- File auspice_json = "~{basename }.nextclade.auspice.json"
67- File nextclade_tsv = "~{basename }.nextclade.tsv"
68- String nextclade_clade = read_string ("NEXTCLADE_CLADE" )
69- String aa_subs_csv = read_string ("NEXTCLADE_AASUBS" )
70- String aa_dels_csv = read_string ("NEXTCLADE_AADELS" )
71- }
72- }
73-
74- task nextclade_many_samples {
75- meta {
76- description : "Nextclade classification of many samples. Leaving optional inputs unspecified will use SARS-CoV-2 defaults."
77- }
78- input {
79- Array [File ]+ genome_fastas
80- File ? root_sequence
81- File ? auspice_reference_tree_json
82- File ? qc_config_json
83- File ? gene_annotations_json
84- File ? pcr_primers_csv
85- String basename
86- String docker = "nextstrain/nextclade:1.2.3"
87- }
88- command <<<
89- set -e
90- apt-get update
91- apt-get -y install python3
92-
93- URI = $(echo "~{docker}" | sed 's|:|/|g' )
94- NEXTCLADE_VERSION = "$(nextclade --version)"
95- echo $NEXTCLADE_VERSION > VERSION
96-
97- # grab reference data for SARS-CoV-2
98- curl https ://raw.githubusercontent.com /$URI /data/sars-cov-2/reference.fasta > reference.fasta
99- curl https ://raw.githubusercontent.com /$URI /data/sars-cov-2/genemap.gff > genemap.gff
100- curl https ://raw.githubusercontent.com /$URI /data/sars-cov-2/tree.json > tree.json
101- curl https ://raw.githubusercontent.com /$URI /data/sars-cov-2/qc.json > qc.json
102- curl https ://raw.githubusercontent.com /$URI /data/sars-cov-2/primers.csv > primers.csv
103-
104- cat ~{sep =" " genome_fastas } > genomes.fasta
105- nextclade \
106- --input-fasta genomes.fasta \
107- --input-root-seq ~{default ="reference.fasta" root_sequence } \
108- --input-tree ~{default ="tree.json" auspice_reference_tree_json } \
109- --input-qc-config ~{default ="qc.json" qc_config_json } \
110- --input-gene-map ~{default ="genemap.gff" gene_annotations_json } \
111- --input-pcr-primers ~{default ="primers.csv" pcr_primers_csv } \
112- --output-json "~{basename}" .nextclade.json \
113- --output-tsv "~{basename}" .nextclade.tsv \
114- --output-tree "~{basename}" .nextclade.auspice.json
115-
116- cp genomes.aligned.fasta "~{basename}" .nextalign.msa.fasta
117-
118- python3 << CODE
119- # transpose table
120- import codecs, csv, json
121- out_maps = {'clade':{}, 'aaSubstitutions':{}, 'aaDeletions':{}}
122- with codecs.open('~{basename }.nextclade.tsv', 'r', encoding='utf-8') as inf:
123- with codecs.open('NEXTCLADE_CLADE', 'w', encoding='utf-8') as outf_clade:
124- with codecs.open('NEXTCLADE_AASUBS', 'w', encoding='utf-8') as outf_aasubs:
125- with codecs.open('NEXTCLADE_AADELS', 'w', encoding='utf-8') as outf_aadels:
126- for row in csv.DictReader(inf, delimiter='\t'):
127- outf_clade.write('\t'.join([row['seqName'], row['clade']])+'\n')
128- outf_aasubs.write('\t'.join([row['seqName'], row['aaSubstitutions']])+'\n')
129- outf_aadels.write('\t'.join([row['seqName'], row['aaDeletions']])+'\n')
130- for k in ('clade','aaSubstitutions','aaDeletions'):
131- out_maps[k][row['seqName']] = row[k]
132- with codecs.open('NEXTCLADE_CLADE.json', 'w', encoding='utf-8') as outf:
133- json.dump(out_maps['clade'], outf)
134- with codecs.open('NEXTCLADE_AASUBS.json', 'w', encoding='utf-8') as outf:
135- json.dump(out_maps['aaSubstitutions'], outf)
136- with codecs.open('NEXTCLADE_AADELS.json', 'w', encoding='utf-8') as outf:
137- json.dump(out_maps['aaDeletions'], outf)
138- CODE
139-
140- # gather runtime metrics
141- cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC
142- cat /proc/loadavg > CPU_LOAD
143- cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES
144- >>>
145- runtime {
146- docker : docker
147- memory : "14 GB"
148- cpu : 16
149- disks : "local-disk 100 HDD"
150- dx_instance_type : "mem1_ssd1_v2_x16"
151- }
152- output {
153- #Map[String,String] nextclade_clade = read_map("NEXTCLADE_CLADE")
154- #Map[String,String] aa_subs_csv = read_map("NEXTCLADE_AASUBS")
155- #Map[String,String] aa_dels_csv = read_map("NEXTCLADE_AADELS")
156- Map [String ,String ] nextclade_clade = read_json ("NEXTCLADE_CLADE.json" )
157- Map [String ,String ] aa_subs_csv = read_json ("NEXTCLADE_AASUBS.json" )
158- Map [String ,String ] aa_dels_csv = read_json ("NEXTCLADE_AADELS.json" )
159- String nextclade_version = read_string ("VERSION" )
160- File nextalign_msa = "~{basename }.nextalign.msa.fasta"
161- File nextclade_json = "~{basename }.nextclade.json"
162- File auspice_json = "~{basename }.nextclade.auspice.json"
163- File nextclade_tsv = "~{basename }.nextclade.tsv"
164- Int max_ram_gb = ceil (read_float ("MEM_BYTES" )/1000000000 )
165- Int runtime_sec = ceil (read_float ("UPTIME_SEC" ))
166- String cpu_load = read_string ("CPU_LOAD" )
167- }
168- }
169-
1703task pangolin_one_sample {
1714 meta {
1725 description : "Pangolin classification of one SARS-CoV-2 sample."
@@ -176,7 +9,7 @@ task pangolin_one_sample {
1769 Int ? min_length
17710 Float ? max_ambig
17811 Boolean inference_usher =true
179- String docker = "quay.io/staphb/pangolin:3.1.11 -pangolearn-2021-09-17 "
12+ String docker = "quay.io/staphb/pangolin:3.1.14 -pangolearn-2021-09-28 "
18013 }
18114 String basename = basename (genome_fasta , ".fasta" )
18215 command <<<
@@ -248,7 +81,7 @@ task pangolin_many_samples {
24881 Float ? max_ambig
24982 Boolean inference_usher =true
25083 String basename
251- String docker = "quay.io/staphb/pangolin:3.1.11 -pangolearn-2021-09-17 "
84+ String docker = "quay.io/staphb/pangolin:3.1.14 -pangolearn-2021-09-28 "
25285 }
25386 command <<<
25487 date | tee DATE
0 commit comments