Skip to content

Commit ddaf4d3

Browse files
authored
Merge pull request #655 from broadinstitute/dp-downsample
Add rasusa-based coverage downsampling and tune preemptible settings
2 parents 733f16a + 916820a commit ddaf4d3

14 files changed

+94
-87
lines changed

pipes/WDL/tasks/tasks_assembly.wdl

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ task assemble {
1616

1717
Int? machine_mem_gb
1818
Int? cpu
19-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-assemble"
19+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-assemble"
2020
}
2121
parameter_meta{
2222
reads_unmapped_bam: {
@@ -124,7 +124,7 @@ task select_references {
124124
Int? skani_c
125125
Int? skani_n
126126

127-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-assemble"
127+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-assemble"
128128
Int machine_mem_gb = 4
129129
Int cpu = 2
130130
Int disk_size = 100
@@ -223,7 +223,7 @@ task scaffold {
223223
Float? scaffold_min_pct_contig_aligned
224224

225225
Int? machine_mem_gb
226-
String docker="quay.io/broadinstitute/viral-ngs:3.0.10-assemble"
226+
String docker="quay.io/broadinstitute/viral-ngs:3.0.11-assemble"
227227

228228
# do this in multiple steps in case the input doesn't actually have "assembly1-x" in the name
229229
String sample_name = basename(basename(contigs_fasta, ".fasta"), ".assembly1-spades")
@@ -475,7 +475,7 @@ task skani_triangle {
475475
Int compression_factor = 10
476476
Int min_aligned_frac = 15
477477

478-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-assemble"
478+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-assemble"
479479
Int machine_mem_gb = 8
480480
Int cpu = 4
481481
Int disk_size = 100
@@ -715,7 +715,7 @@ task align_reads {
715715
716716
Int? cpu
717717
Int? machine_mem_gb
718-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-core"
718+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-core"
719719

720720
String sample_name = basename(basename(basename(reads_unmapped_bam, ".bam"), ".taxfilt"), ".clean")
721721
}
@@ -876,9 +876,10 @@ task refine_assembly_with_aligned_reads {
876876
Boolean mark_duplicates = false
877877
Float major_cutoff = 0.5
878878
Int min_coverage = 3
879+
Int? max_coverage = 4000
879880

880881
Int machine_mem_gb = 8
881-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-assemble"
882+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-assemble"
882883
}
883884

884885
Int disk_size = 375
@@ -903,7 +904,11 @@ task refine_assembly_with_aligned_reads {
903904
}
904905
min_coverage: {
905906
description: "Minimum read coverage required to call a position unambiguous.",
906-
category: "advanaced"
907+
category: "advanced"
908+
}
909+
max_coverage: {
910+
description: "If specified, 'rasusa aln' will be used to downsample alignments at any genomic position that exceeds this level of coverage prior to variant calling. Recommended for any highly 'spiky' coverage samples (e.g. tiled amplicon sequencing).",
911+
category: "advanced"
907912
}
908913
}
909914

@@ -935,6 +940,7 @@ task refine_assembly_with_aligned_reads {
935940
--outVcf "~{out_basename}.sites.vcf.gz" \
936941
--min_coverage ~{min_coverage} \
937942
--major_cutoff ~{major_cutoff} \
943+
~{'--max_coverage ' + max_coverage} \
938944
--JVMmemory "$mem_in_mb"m \
939945
--loglevel=DEBUG
940946
@@ -1014,7 +1020,8 @@ task run_discordance {
10141020
String out_basename = "run"
10151021
Int min_coverage = 4
10161022

1017-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-core"
1023+
Int machine_mem_gb = 4
1024+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-core"
10181025
}
10191026
parameter_meta {
10201027
reads_aligned_bam: {
@@ -1114,7 +1121,7 @@ task run_discordance {
11141121

11151122
runtime {
11161123
docker: docker
1117-
memory: "3 GB"
1124+
memory: "~{machine_mem_gb} GB"
11181125
cpu: 2
11191126
disks: "local-disk ~{disk_size} HDD"
11201127
disk: "~{disk_size} GB" # TES
@@ -1260,7 +1267,7 @@ task wgsim {
12601267
Int? random_seed
12611268

12621269
Int machine_mem_gb = 7
1263-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-assemble"
1270+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-assemble"
12641271
}
12651272

12661273
parameter_meta {

pipes/WDL/tasks/tasks_demux.wdl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ task merge_tarballs {
66
String out_filename
77

88
Int? machine_mem_gb
9-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-core"
9+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-core"
1010
}
1111

1212
Int disk_size = 2625
@@ -179,7 +179,7 @@ task illumina_demux {
179179
Int? machine_mem_gb
180180
# Note: GCP local SSDs must be allocated in pairs (2, 4, 8, 16, 24 × 375GB), so use 3000 (8 SSDs) instead of 2625 (7 SSDs)
181181
Int disk_size = 3000
182-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-core"
182+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-core"
183183
}
184184

185185
parameter_meta {
@@ -817,7 +817,7 @@ task get_illumina_run_metadata {
817817
String? sequencing_center
818818

819819
Int? machine_mem_gb
820-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-core"
820+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-core"
821821
}
822822

823823
parameter_meta {
@@ -920,7 +920,7 @@ task demux_fastqs {
920920
Int? machine_mem_gb
921921
Int max_cpu = 32 # Maximum CPU cap for autoscaling (use 16 for 2-barcode, 64 for 3-barcode)
922922
Int disk_size = 750
923-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-core"
923+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-core"
924924
}
925925

926926
# Calculate total input size for autoscaling
@@ -1048,7 +1048,7 @@ task merge_demux_metrics {
10481048
input {
10491049
Array[File]+ metrics_files
10501050
String output_filename = "merged_demux_metrics.txt"
1051-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-core"
1051+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-core"
10521052
}
10531053

10541054
parameter_meta {

pipes/WDL/tasks/tasks_interhost.wdl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ task multi_align_mafft_ref {
160160
Float? mafft_gapOpeningPenalty
161161

162162
Int? machine_mem_gb
163-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-phylo"
163+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-phylo"
164164
}
165165

166166
String fasta_basename = basename(reference_fasta, '.fasta')
@@ -206,7 +206,7 @@ task multi_align_mafft {
206206
Float? mafft_gapOpeningPenalty
207207

208208
Int? machine_mem_gb
209-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-phylo"
209+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-phylo"
210210
}
211211

212212
Int disk_size = 200
@@ -348,7 +348,7 @@ task index_ref {
348348
File? novocraft_license
349349

350350
Int? machine_mem_gb
351-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-core"
351+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-core"
352352
}
353353

354354
Int disk_size = 100
@@ -470,7 +470,7 @@ task merge_vcfs_gatk {
470470
File ref_fasta
471471

472472
Int? machine_mem_gb
473-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-phylo"
473+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-phylo"
474474

475475
String output_prefix = "merged"
476476
}

pipes/WDL/tasks/tasks_intrahost.wdl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ task lofreq {
138138
Int cpu = 4
139139

140140
String out_basename = basename(aligned_bam, '.bam')
141-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-phylo"
141+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-phylo"
142142
}
143143
Int disk_size = ceil(5 * size(aligned_bam, "GB") + 50)
144144
command <<<
@@ -215,7 +215,7 @@ task isnvs_per_sample {
215215
Boolean removeDoublyMappedReads = true
216216

217217
Int? machine_mem_gb
218-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-phylo"
218+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-phylo"
219219

220220
String sample_name = basename(basename(basename(mapped_bam, ".bam"), ".all"), ".mapped")
221221
}
@@ -257,7 +257,7 @@ task isnvs_vcf {
257257
Boolean naiveFilter = false
258258

259259
Int? machine_mem_gb
260-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-phylo"
260+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-phylo"
261261
}
262262

263263
parameter_meta {
@@ -330,7 +330,7 @@ task annotate_vcf_snpeff {
330330
String? emailAddress
331331

332332
Int? machine_mem_gb
333-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-phylo"
333+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-phylo"
334334

335335
String output_basename = basename(basename(in_vcf, ".gz"), ".vcf")
336336
}

pipes/WDL/tasks/tasks_megablast.wdl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ task trim_rmdup_subsamp {
1515
Int cpu = 16
1616
Int disk_size_gb = 100
1717

18-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-core"
18+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-core"
1919
}
2020

2121
parameter_meta {
@@ -75,7 +75,7 @@ task lca_megablast {
7575
Int cpu = 16
7676
Int disk_size_gb = 300
7777

78-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-classify"
78+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-classify"
7979
}
8080
parameter_meta {
8181
trimmed_fasta: {

pipes/WDL/tasks/tasks_metagenomics.wdl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ task kraken2 {
216216
Int? min_base_qual
217217

218218
Int machine_mem_gb = 90
219-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-classify"
219+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-classify"
220220
}
221221

222222
parameter_meta {
@@ -348,7 +348,7 @@ task report_primary_kraken_taxa {
348348
File kraken_summary_report
349349
String focal_taxon = "Viruses"
350350

351-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-classify"
351+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-classify"
352352
}
353353
String out_basename = basename(kraken_summary_report, '.txt')
354354
Int disk_size = 50
@@ -397,7 +397,7 @@ task filter_refs_to_found_taxa {
397397
File taxdump_tgz
398398
Int min_read_count = 100
399399

400-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-classify"
400+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-classify"
401401
}
402402
String ref_basename = basename(taxid_to_ref_accessions_tsv, '.tsv')
403403
String hits_basename = basename(focal_report_tsv, '.tsv')
@@ -447,7 +447,7 @@ task build_kraken2_db {
447447
Int? zstd_compression_level
448448

449449
Int machine_mem_gb = 100
450-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-classify"
450+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-classify"
451451
}
452452

453453
Int disk_size = 750
@@ -588,7 +588,7 @@ task blastx {
588588
File krona_taxonomy_db_tgz
589589

590590
Int machine_mem_gb = 8
591-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-classify"
591+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-classify"
592592
}
593593

594594
parameter_meta {
@@ -677,7 +677,7 @@ task krona {
677677
Int? magnitude_column
678678

679679
Int machine_mem_gb = 3
680-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-classify"
680+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-classify"
681681
}
682682

683683
Int disk_size = 50
@@ -782,7 +782,7 @@ task filter_bam_to_taxa {
782782
String out_filename_suffix = "filtered"
783783

784784
Int machine_mem_gb = 8
785-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-classify"
785+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-classify"
786786
}
787787

788788
String out_basename = basename(classified_bam, ".bam") + "." + out_filename_suffix
@@ -874,7 +874,7 @@ task kaiju {
874874
File krona_taxonomy_db_tgz # taxonomy/taxonomy.tab
875875
876876
Int machine_mem_gb = 100
877-
String docker = "quay.io/broadinstitute/viral-ngs:3.0.10-classify"
877+
String docker = "quay.io/broadinstitute/viral-ngs:3.0.11-classify"
878878
}
879879

880880
String input_basename = basename(reads_unmapped_bam, ".bam")

0 commit comments

Comments
 (0)