@@ -15,7 +15,8 @@ task assemble {
1515 String sample_name = basename (basename (reads_unmapped_bam , ".bam" ), ".taxfilt" )
1616
1717 Int ? machine_mem_gb
18- String docker = "quay.io/broadinstitute/viral-assemble:2.5.18.0"
18+ Int ? cpu
19+ String docker = "quay.io/broadinstitute/viral-assemble:2.5.21.0"
1920 }
2021 parameter_meta {
2122 reads_unmapped_bam : {
@@ -101,9 +102,9 @@ task assemble {
101102
102103 runtime {
103104 docker : docker
104- memory : select_first ([machine_mem_gb , 63 ]) + " GB"
105- cpu : 4
106- disks : "local-disk " + disk_size + " HDD "
105+ memory : select_first ([machine_mem_gb , 32 ]) + " GB"
106+ cpu : select_first ([ cpu , 8 ])
107+ disks : "local-disk " + disk_size + " SSD "
107108 disk : disk_size + " GB" # TES
108109 dx_instance_type : "mem1_ssd1_v2_x8"
109110 maxRetries : 2
@@ -124,7 +125,7 @@ task select_references {
124125 Int ? skani_c
125126 Int ? skani_n
126127
127- String docker = "quay.io/broadinstitute/viral-assemble:2.5.18 .0"
128+ String docker = "quay.io/broadinstitute/viral-assemble:2.5.21 .0"
128129 Int machine_mem_gb = 4
129130 Int cpu = 2
130131 Int disk_size = 100
@@ -193,7 +194,7 @@ task select_references {
193194 docker : docker
194195 memory : machine_mem_gb + " GB"
195196 cpu : cpu
196- disks : "local-disk " + disk_size + " HDD "
197+ disks : "local-disk " + disk_size + " SSD "
197198 disk : disk_size + " GB" # TESs
198199 dx_instance_type : "mem1_ssd1_v2_x2"
199200 preemptible : 2
@@ -204,7 +205,7 @@ task select_references {
204205task scaffold {
205206 input {
206207 File contigs_fasta
207- File reads_bam
208+ File ? reads_bam
208209 Array [File ]+ reference_genome_fasta
209210
210211 String aligner ="muscle"
@@ -224,16 +225,22 @@ task scaffold {
224225 Float ? scaffold_min_pct_contig_aligned
225226
226227 Int ? machine_mem_gb
227- String docker ="quay.io/broadinstitute/viral-assemble:2.5.18 .0"
228+ String docker ="quay.io/broadinstitute/viral-assemble:2.5.21 .0"
228229
229230 # do this in multiple steps in case the input doesn't actually have "assembly1-x" in the name
230231 String sample_name = basename (basename (contigs_fasta , ".fasta" ), ".assembly1-spades" )
231232 }
233+
234+ # Determine whether to run Gap2Seq based on reads_bam size
235+ # Gap2Seq can take 100+ min for large BAMs (>1GB), providing diminishing returns
236+ Float reads_bam_size_gb = if defined (reads_bam ) then size (select_first ([reads_bam ]), "GB" ) else 0.0
237+ Boolean run_gap2seq = defined (reads_bam ) && reads_bam_size_gb < 1.0
238+
232239 parameter_meta {
233240 reads_bam : {
234- description : "Reads in BAM format." ,
241+ description : "Reads in BAM format. If provided, Gap2Seq will attempt to fill gaps using reads. Skipping this for large BAMs (>1GB) can save significant runtime. " ,
235242 patterns : ["*.bam" ],
236- category : "required "
243+ category : "optional "
237244 }
238245
239246 contigs_fasta : {
@@ -367,13 +374,19 @@ task scaffold {
367374 fi
368375 grep '^>' "~{sample_name}" .scaffolding_chosen_ref.fasta | cut -c 2 - | cut -f 1 -d ' ' > "~{sample_name}" .scaffolding_chosen_refs.txt
369376
370- assembly.py gapfill_gap2seq \
371- "~{sample_name}" .intermediate_scaffold.fasta \
372- "~{reads_bam}" \
373- "~{sample_name}" .intermediate_gapfill.fasta \
374- --memLimitGb $mem_in_gb \
375- --maskErrors \
376- --loglevel =DEBUG
377+ # Run Gap2Seq only if reads_bam is provided and smaller than 1GB
378+ if ~{true ='true' false ='false' run_gap2seq }; then
379+ assembly.py gapfill_gap2seq \
380+ "~{sample_name}" .intermediate_scaffold.fasta \
381+ "~{reads_bam}" \
382+ "~{sample_name}" .intermediate_gapfill.fasta \
383+ --memLimitGb $mem_in_gb \
384+ --maskErrors \
385+ --loglevel =DEBUG
386+ else
387+ echo "Skipping Gap2Seq: reads_bam not provided or >= 1GB (~{reads_bam_size_gb} GB)" >&2
388+ cp "~{sample_name}" .intermediate_scaffold.fasta "~{sample_name}" .intermediate_gapfill.fasta
389+ fi
377390
378391 set +e +o pipefail
379392 grep -v '^>' "~{sample_name}" .intermediate_gapfill.fasta | tr -d '\n' | wc -c | tee assembly_preimpute_length
@@ -435,9 +448,9 @@ task scaffold {
435448
436449 runtime {
437450 docker : docker
438- memory : select_first ([machine_mem_gb , 63 ]) + " GB"
451+ memory : select_first ([machine_mem_gb , 20 ]) + " GB"
439452 cpu : 4
440- disks : "local-disk " + disk_size + " HDD "
453+ disks : "local-disk " + disk_size + " SSD "
441454 disk : disk_size + " GB" # TES
442455 dx_instance_type : "mem1_ssd1_v2_x8"
443456 maxRetries : 2
@@ -457,7 +470,7 @@ task skani_triangle {
457470 Int compression_factor = 10
458471 Int min_aligned_frac = 15
459472
460- String docker = "quay.io/broadinstitute/viral-assemble:2.5.18 .0"
473+ String docker = "quay.io/broadinstitute/viral-assemble:2.5.21 .0"
461474 Int machine_mem_gb = 8
462475 Int cpu = 4
463476 Int disk_size = 100
@@ -696,7 +709,7 @@ task align_reads {
696709
697710 Int ? cpu
698711 Int ? machine_mem_gb
699- String docker = "quay.io/broadinstitute/viral-core:2.5.20 "
712+ String docker = "quay.io/broadinstitute/viral-core:2.5.21 "
700713
701714 String sample_name = basename (basename (basename (reads_unmapped_bam , ".bam" ), ".taxfilt" ), ".clean" )
702715 }
@@ -707,8 +720,8 @@ task align_reads {
707720 # Linear scaling: 8 + (input_GB / 15) * 56, capped at 64, rounded to nearest multiple of 4
708721 Float cpu_unclamped = 8.0 + (size (reads_unmapped_bam , "GB" ) / 15.0 ) * 56.0
709722 Int cpu_actual = select_first ([cpu , floor (((if cpu_unclamped > 64.0 then 64.0 else cpu_unclamped ) + 2.0 ) / 4.0 ) * 4 ])
710- # Memory scales with CPU at 2x ratio (default), or use override
711- Int machine_mem_gb_actual = select_first ([machine_mem_gb , cpu_actual * 2 ])
723+ # Memory scales with CPU at 3x ratio (default), or use override
724+ Int machine_mem_gb_actual = select_first ([machine_mem_gb , cpu_actual * 3 ])
712725
713726 parameter_meta {
714727 reference_fasta : {
@@ -811,7 +824,7 @@ task align_reads {
811824 docker : docker
812825 memory : machine_mem_gb_actual + " GB"
813826 cpu : cpu_actual
814- disks : "local-disk " + disk_size + " SSD "
827+ disks : "local-disk " + disk_size + " LOCAL "
815828 disk : disk_size + " GB" # TES
816829 dx_instance_type : "mem1_ssd1_v2_x8"
817830 preemptible : 1
@@ -834,8 +847,8 @@ task refine_assembly_with_aligned_reads {
834847 Float major_cutoff = 0.5
835848 Int min_coverage = 3
836849
837- Int machine_mem_gb = 15
838- String docker = "quay.io/broadinstitute/viral-assemble:2.5.18 .0"
850+ Int machine_mem_gb = 8
851+ String docker = "quay.io/broadinstitute/viral-assemble:2.5.21 .0"
839852 }
840853
841854 Int disk_size = 375
@@ -972,7 +985,7 @@ task run_discordance {
972985 String out_basename = "run"
973986 Int min_coverage = 4
974987
975- String docker = "quay.io/broadinstitute/viral-core:2.5.20 "
988+ String docker = "quay.io/broadinstitute/viral-core:2.5.21 "
976989 }
977990 parameter_meta {
978991 reads_aligned_bam : {
@@ -1221,7 +1234,7 @@ task wgsim {
12211234 Int ? random_seed
12221235
12231236 Int machine_mem_gb = 7
1224- String docker = "quay.io/broadinstitute/viral-assemble:2.5.18 .0"
1237+ String docker = "quay.io/broadinstitute/viral-assemble:2.5.21 .0"
12251238 }
12261239
12271240 parameter_meta {
0 commit comments