Skip to content

Commit b98908f

Browse files
author
Zachary Foster
committed
refine how API limits are handled
1 parent 2081c5d commit b98908f

4 files changed

Lines changed: 91 additions & 21 deletions

File tree

conf/modules.config

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,8 @@ process {
102102
enabled: params.data_dir == "false"
103103
]
104104
ext.args = { "--type ${params.bakta_db_type}" }
105-
//storeDir = { params.data_dir == "false" ? null : "${params.data_dir}/bakta_db_${workflow.containerEngine}" }
106105
storeDir = { params.data_dir == "false" ? null : "${params.data_dir}/bakta_db" }
106+
beforeScript = { task.attempt == 1 ? null : "sleep ${Math.pow(5, task.attempt)}" }
107107
}
108108

109109
withName: BBMAP_SENDSKETCH {
@@ -117,8 +117,11 @@ process {
117117
memory = { 1.GB * task.attempt }
118118
time = { 1.h * task.attempt }
119119
maxRetries = 10
120+
121+
// Settings to avoid API rate limits and not put too much stress on servers
120122
maxForks = 3
121-
errorStrategy = { sleep(Math.pow(4, task.attempt) * 6000 as long); return task.attempt > 2 ? 'finish' : 'retry' }
123+
errorStrategy = { task.attempt >= 3 ? 'finish' : 'retry' }
124+
beforeScript = { task.attempt == 1 ? 'sleep "0.$(($RANDOM % 100))"' : "sleep ${Math.pow(5, task.attempt)}" }
122125
}
123126

124127
withName: BUSCO_BUSCO {
@@ -141,6 +144,7 @@ process {
141144
enabled: params.data_dir == "false"
142145
]
143146
storeDir = { params.data_dir == "false" ? null : "${params.data_dir}/busco_db" }
147+
beforeScript = { task.attempt == 1 ? null : "sleep ${Math.pow(5, task.attempt)}" }
144148
}
145149

146150
withName: BWA_INDEX {
@@ -194,13 +198,15 @@ process {
194198
pattern: "*.fasta.gz"
195199
]
196200
]
197-
cpus = { 1 }
201+
cpus = { 1 }
198202
memory = { 500.MB * task.attempt }
199-
time = { 24.h * task.attempt }
203+
time = { 24.h * task.attempt }
200204
storeDir = { params.data_dir == "false" ? null : "${params.data_dir}/assemblies" }
201-
maxForks = 3
205+
206+
// Settings to avoid API rate limits and not put too much stress on servers
207+
maxForks = 1 // NCBI seems to be not allowing concurrent downloads, althogh I cannot find any documentation about this
202208
errorStrategy = { task.attempt >= 3 ? 'ignore' : 'retry' }
203-
beforeScript = { task.attempt == 1 ? 'sleep "0.$(($RANDOM % 100))"' : "sleep ${Math.pow(10, task.attempt - 1)}" }
209+
beforeScript = { task.attempt == 1 ? 'sleep "0.$(($RANDOM % 100))"' : "sleep ${Math.pow(5, task.attempt)}" }
204210
}
205211

206212
withName: EXTRACT_FEATURE_SEQUENCES {
@@ -245,18 +251,13 @@ process {
245251
cpus = { 1 * task.attempt }
246252
memory = { 1.GB * task.attempt }
247253
time = { 12.h * task.attempt }
248-
errorStrategy = {
249-
if (task.attempt > 1) {
250-
return 'ignore'
251-
} else {
252-
sleep(Math.pow(5, task.attempt - 1) * 6000 as long)
253-
return 'retry'
254-
}
255-
}
256-
maxForks = 3
257254
storeDir = { params.data_dir == "false" ? null : "${params.data_dir}/assembly_metadata" }
258255
ext.args = { "--as-json-lines" }
259-
beforeScript = 'sleep "0.$(($RANDOM % 100))"'
256+
257+
// Settings to avoid API rate limits and not put too much stress on servers
258+
maxForks = 3
259+
errorStrategy = { task.attempt >= 3 ? 'ignore' : 'retry' }
260+
beforeScript = { task.attempt == 1 ? 'sleep "0.$(($RANDOM % 100))"' : "sleep ${Math.pow(5, task.attempt)}" }
260261
}
261262

262263
withName: 'FLYE.*' {
@@ -267,7 +268,7 @@ process {
267268
]
268269
cpus = { meta.domain == 'Bacteria' ? 4 * task.attempt : 8 * task.attempt }
269270
memory = { meta.domain == 'Bacteria' ? 15.GB * Math.pow(2, task.attempt - 1) : 30.GB * Math.pow(4, task.attempt - 1) }
270-
time = { 24.h * task.attempt }
271+
time = { 24.h * task.attempt }
271272
errorStrategy = { return task.attempt > 3 ? 'ignore' : 'retry' }
272273
maxRetries = 3
273274
}
@@ -313,8 +314,11 @@ process {
313314
cpus = { 1 }
314315
memory = { 1.GB * task.attempt }
315316
time = { 1.h * task.attempt }
317+
318+
// Settings to avoid API rate limits and not put too much stress on servers
316319
maxForks = 3
317-
beforeScript = 'sleep "0.$(($RANDOM % 100))"'
320+
errorStrategy = { task.attempt >= 3 ? 'ignore' : 'retry' }
321+
beforeScript = { task.attempt == 1 ? 'sleep "0.$(($RANDOM % 100))"' : "sleep ${Math.pow(5, task.attempt)}" }
318322
}
319323

320324
withName: IQTREE_BUSCO {
@@ -500,6 +504,10 @@ process {
500504
cpus = { 1 }
501505
memory = { 1.GB * task.attempt }
502506
time = { 1.h * task.attempt }
507+
508+
// Settings to avoid API rate limits and not put too much stress on servers
509+
maxForks = 3
510+
beforeScript = { task.attempt == 1 ? 'sleep "0.$(($RANDOM % 100))"' : "sleep ${Math.pow(5, task.attempt)}" }
503511
}
504512

505513
withName: SAMTOOLS_FAIDX {
@@ -585,10 +593,12 @@ process {
585593
time = { 48.h * task.attempt }
586594
ext.args = { params.temp_dir ? "--temp ${params.temp_dir}" : "" }
587595
storeDir = { params.data_dir == "false" ? null : "${params.data_dir}/reads" }
588-
errorStrategy = { return task.attempt > 3 ? 'ignore' : 'retry' }
589-
maxRetries = 3
596+
597+
// Settings to avoid API rate limits and not put too much stress on servers
590598
maxForks = 3
591-
beforeScript = 'sleep "0.$(($RANDOM % 100))"'
599+
maxRetries = 3
600+
errorStrategy = { task.attempt >= 4 ? 'finish' : 'retry' }
601+
beforeScript = { task.attempt == 1 ? 'sleep "0.$(($RANDOM % 100))"' : "sleep ${Math.pow(5, task.attempt)}" }
592602
}
593603

594604
withName: SUBSET_BUSCO_GENES {

modules/local/custom/download_assemblies/main.nf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
process DOWNLOAD_ASSEMBLIES {
22
tag "${ref_meta.id}"
33
label 'process_single'
4+
secret 'NCBI_API_KEY'
45

56
conda "conda-forge::ncbi-datasets-cli=15.11.0 bioconda::samtools=1.18 conda-forge::unzip=6.0"
67
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
@@ -26,6 +27,8 @@ process DOWNLOAD_ASSEMBLIES {
2627
prefix = task.ext.prefix ?: "${ref_meta.id}"
2728
def args = task.ext.args ?: ''
2829
"""
30+
echo \$NCBI_API_KEY
31+
2932
# Download assemblies as zip archives
3033
datasets download genome accession $id --include gff3,genome --filename ${prefix}.zip
3134

modules/nf-core/sratools/fasterqdump/sratools-fasterqdump.diff

Lines changed: 52 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

scripts/watch_nf_docker_logs.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/usr/bin/env bash
2+
3+
# Watch the .command.log files in active docker containers
4+
5+
watch 'docker stats --no-stream | tail -n +2 | cut -d " " -f 1 | xargs -I {} bash -c "echo -e "---------------{}---------------";docker exec {} tail -n 7 .command.log"'

0 commit comments

Comments
 (0)