Sage-Bionetworks-Workflows · adamjtaylor · Sep 16, 2025 · Copilot · Jan 15, 2026 · Copilot
diff --git a/nextflow.config b/nextflow.config
@@ -3,28 +3,76 @@
 process.container = 'ghcr.io/sage-bionetworks-workflows/nf-artist:latest'
 docker.enabled = true
 
+// Global retry strategy for Docker OOM errors
+process {
+    errorStrategy = { 
+        // Handle Docker out-of-memory errors (exit codes 125, 137, 139)
+        if (task.exitStatus in [125, 137, 139]) {
+            return task.attempt <= 3 ? 'retry' : 'ignore'
+        }
+        // Handle general failures
+        else if (task.exitStatus != 0) {
+            return task.attempt <= 2 ? 'retry' : 'ignore'  
+        }
+        return 'ignore'
+    }
+    maxRetries = 3
+
+    // Default resource scaling
+    cpus = { 2 * task.attempt }
-    cpus = { 2 * task.attempt }
+    cpus = { Math.min(2 * task.attempt, 8) }
-    cpus = { 2 * task.attempt }
+    cpus = { Math.min(2 * task.attempt, 8) }
+    memory = { 4.GB * Math.pow(2, task.attempt - 1) } // Exponential memory scaling
+}
+
 profiles {
     test { includeConfig 'conf/test.config'}
     sage { includeConfig 'conf/sage.config'}
     tower {
         process {
             withLabel: process_low {
-                cpus = {1 * task.attempt}
-                memory = {2.GB * task.attempt}
+                cpus = { 1 * task.attempt }
+                memory = { 2.GB * Math.pow(2, task.attempt - 1) } // 2GB, 4GB, 8GB
                 maxRetries = 3
-                errorStrategy = {task.attempt <= 2 ? 'retry' : 'ignore' }
+                errorStrategy = { 
+                    if (task.exitStatus in [125, 137, 139]) {
+                        return task.attempt <= 3 ? 'retry' : 'ignore'
+                    }
+                    return task.attempt <= 2 ? 'retry' : 'ignore'
+                }
             }
             withLabel: process_medium {
-                cpus = {4 * task.attempt}
-                memory = {8.GB * task.attempt}
+                cpus = { Math.min(4 * task.attempt, 8) }
+                memory = { 8.GB * Math.pow(2, task.attempt - 1) } // 8GB, 16GB, 32GB
                 maxRetries = 3
-                errorStrategy = {task.attempt <= 3 ? 'retry' : 'ignore' }
+                errorStrategy = { 
+                    if (task.exitStatus in [125, 137, 139]) {
+                        return task.attempt <= 3 ? 'retry' : 'ignore'
+                    }
+                    return task.attempt <= 3 ? 'retry' : 'ignore'
-                    return task.attempt <= 3 ? 'retry' : 'ignore'
+                    return task.attempt <= 2 ? 'retry' : 'ignore'
-                    return task.attempt <= 3 ? 'retry' : 'ignore'
+                    return task.attempt <= 2 ? 'retry' : 'ignore'
+                }
             }
             withLabel: process_high {
-                cpus = {8 * task.attempt}
-                memory = {16.GB * task.attempt}
-                maxRetries = 3
-                errorStrategy = {task.attempt <= 3 ? 'retry' : 'ignore' }
+                cpus = { Math.min(8 * task.attempt, 16) }
+                memory = { 16.GB * Math.pow(2, task.attempt - 1) } // 16GB, 32GB, 64GB
+                maxRetries = 4 // One extra retry for high-memory processes
-                maxRetries = 4 // One extra retry for high-memory processes
+                maxRetries = 4 // One extra retry for high processes
-                maxRetries = 4 // One extra retry for high-memory processes
+                maxRetries = 4 // One extra retry for high processes
+                errorStrategy = { 
+                    if (task.exitStatus in [125, 137, 139]) {
+                        return task.attempt <= 4 ? 'retry' : 'ignore'
+                    }
+                    return task.attempt <= 3 ? 'retry' : 'ignore'
+                }
+            }
+
+            // Special handling for very memory-intensive processes
+            withLabel: process_high_memory {
+                cpus = { Math.min(4, 2 * task.attempt) }
+                memory = { 32.GB * Math.pow(2, task.attempt - 1) } // 32GB, 64GB, 128GB
+                maxRetries = 4
+                errorStrategy = { 
+                    if (task.exitStatus in [125, 137, 139]) {
+                        return task.attempt <= 4 ? 'retry' : 'ignore'
+                    }
+                    return task.attempt <= 2 ? 'retry' : 'ignore'
+                }
             }
         }
     }