feat: add batch size guardrails to prevent GPU memory exhaustion (#135)

tphakala · web-flow · commit b2597379e366 · 2026-02-15T16:34:29.000+02:00
* feat: add batch size guardrails to prevent GPU memory exhaustion

Implements two-stage batch size validation:

1. Hard maximum (512): Validates at CLI parse time to reject absurdly
   large values immediately, preventing GPU memory exhaustion and timeouts.

2. Per-file adjustment: At runtime, automatically adjusts batch size down
   to the estimated segment count for short audio files, preventing
   unnecessary memory allocation and padding.

Changes:
- Add MAX_BATCH_SIZE constant (512) in src/constants.rs
- Update parse_batch_size() validator to enforce maximum
- Add per-file batch size adjustment in process_file()
- Adjustment happens before create_batch_context() to save GPU memory
- Log adjustments at DEBUG level to avoid spam with large file sets
- Add comprehensive tests for new validation behavior

Fixes issue where users could specify batch sizes like 2560 that
caused GPU hangs and process termination.

* fix: address code review findings

Fixes identified by code review (Claude + Gemini):

1. Critical: Prevent effective_batch_size from becoming 0
   - Empty or corrupt files with duration_hint=0 could set batch_size to 0
   - This would trigger process_batch for every chunk (batch size 1)
   - Now keeps original batch_size when estimated_segments is 0

2. Low: Add trim() for whitespace handling in parse_batch_size
   - Config files/env vars may include leading/trailing whitespace
   - Now accepts inputs like " 32 " or "  64  "
   - Added test case to verify whitespace handling

All 230 tests pass.

* fix: address Claude code review feedback

Fixed items 1-3 from Claude's review:

1. Channel capacity now uses effective_batch_size instead of batch_size
   - Ensures memory optimization is consistent for short files
   - Channel buffer size now matches adjusted batch allocation

2. Added comment clarifying cast_possible_truncation scope
   - Documents the truncation happens in closure (u64 -&gt; usize)
   - Notes it's safe in practice (would need 408 years of audio)

3. Tests now use MAX_BATCH_SIZE constant in assertions
   - If MAX_BATCH_SIZE changes, tests will catch mismatches
   - More robust than hardcoded "512" strings

Items 4-6 deferred:
- Item 4: .trim() consistency across validators (needs architectural decision)
- Item 5: Extract adjustment logic (quality improvement, not blocking)
- Item 6: Pre-existing code, not introduced by this PR
diff --git a/src/cli/validators.rs b/src/cli/validators.rs
@@ -2,6 +2,8 @@
 //!
 //! Shared validation functions for CLI argument parsing.
 
+use crate::constants::MAX_BATCH_SIZE;
+
 /// Parse and validate confidence value (0.0-1.0).
 pub fn parse_confidence(s: &str) -> Result<f32, String> {
     let value: f32 = s
@@ -49,16 +51,25 @@ pub fn parse_longitude(s: &str) -> Result<f64, String> {
     parse_bounded_float(s, -180.0, 180.0, "longitude")
 }
 
-/// Parse and validate batch size (must be at least 1).
+/// Parse and validate batch size (must be between 1 and `MAX_BATCH_SIZE`).
 pub fn parse_batch_size(s: &str) -> Result<usize, String> {
     let value: usize = s
+        .trim()
         .parse()
         .map_err(|_| format!("'{s}' is not a valid number"))?;
 
     if value < 1 {
         return Err(format!("batch_size must be at least 1, got {value}"));
     }
 
+    if value > MAX_BATCH_SIZE {
+        return Err(format!(
+            "batch_size must be between 1 and {MAX_BATCH_SIZE}, got {value}\n\n\
+             This limit prevents GPU memory exhaustion.\n\
+             If processing fails with batch_size={MAX_BATCH_SIZE}, try reducing it further or use --cpu."
+        ));
+    }
+
     Ok(value)
 }
 
@@ -124,4 +135,40 @@ mod tests {
         assert!(parse_batch_size("-1").is_err());
         assert!(parse_batch_size("abc").is_err());
     }
+
+    #[test]
+    fn test_parse_batch_size_at_maximum() {
+        assert_eq!(parse_batch_size("512").ok(), Some(MAX_BATCH_SIZE));
+    }
+
+    #[test]
+    fn test_parse_batch_size_above_maximum() {
+        let result = parse_batch_size("513");
+        assert!(result.is_err());
+        let err = result.unwrap_err();
+        assert!(err.contains(&format!(
+            "batch_size must be between 1 and {MAX_BATCH_SIZE}"
+        )));
+        assert!(err.contains("GPU memory exhaustion"));
+    }
+
+    #[test]
+    fn test_parse_batch_size_way_above_maximum() {
+        let result = parse_batch_size("2560");
+        assert!(result.is_err());
+        let err = result.unwrap_err();
+        assert!(err.contains(&format!(
+            "batch_size must be between 1 and {MAX_BATCH_SIZE}"
+        )));
+        assert!(err.contains("GPU memory exhaustion"));
+    }
+
+    #[test]
+    fn test_parse_batch_size_with_whitespace() {
+        // Test leading/trailing whitespace (common in config files)
+        assert_eq!(parse_batch_size(" 32").ok(), Some(32));
+        assert_eq!(parse_batch_size("32 ").ok(), Some(32));
+        assert_eq!(parse_batch_size(" 32 ").ok(), Some(32));
+        assert_eq!(parse_batch_size("  64  ").ok(), Some(64));
+    }
 }
diff --git a/src/constants.rs b/src/constants.rs
@@ -20,6 +20,17 @@ pub const DEFAULT_OVERLAP: f32 = 0.0;
 /// See `determine_default_batch_size()` in `lib.rs` for dynamic batch size selection.
 pub const DEFAULT_BATCH_SIZE: usize = 8;
 
+/// Maximum allowed batch size to prevent GPU memory exhaustion.
+///
+/// This hard limit prevents users from specifying absurdly large batch sizes
+/// that would cause GPU memory exhaustion and system hangs. The limit is
+/// conservative enough to work on most consumer GPUs while still allowing
+/// efficient processing of large files.
+///
+/// Batch sizes larger than the number of segments in a file are automatically
+/// adjusted down at runtime to avoid unnecessary memory allocation and padding.
+pub const MAX_BATCH_SIZE: usize = 512;
+
 /// Batch size defaults by execution provider and model type.
 pub mod batch_size {
     /// CPU batch size for all models.
diff --git a/src/pipeline/processor.rs b/src/pipeline/processor.rs
@@ -435,14 +435,63 @@ pub fn process_file(
         None
     };
 
-    // Create batch context for GPU memory efficiency (if batch_size > 1)
+    // Calculate segment parameters (needed for batch size adjustment and progress bar)
+    #[allow(
+        clippy::cast_possible_truncation,
+        clippy::cast_sign_loss,
+        clippy::cast_precision_loss
+    )]
+    let segment_samples = (segment_duration * target_rate as f32) as usize;
+    #[allow(
+        clippy::cast_possible_truncation,
+        clippy::cast_sign_loss,
+        clippy::cast_precision_loss
+    )]
+    let overlap_samples = (overlap * target_rate as f32) as usize;
+
+    // Estimate segment count for batch size adjustment and progress bar
+    let estimated_segments = estimate_segment_count(duration_hint, segment_duration, overlap);
+
+    // Adjust batch size if it exceeds the estimated segment count
+    // This prevents unnecessary memory allocation and padding for short files
+    // Cast is safe in practice: would need ~408 years of audio to overflow on 32-bit
+    #[allow(clippy::cast_possible_truncation)]
+    let effective_batch_size = estimated_segments.map_or(batch_size, |est_segments| {
+        let est_segments_usize = est_segments as usize;
+        // Handle empty or corrupt files - never set batch size to 0
+        if est_segments_usize == 0 {
+            batch_size
+        } else if batch_size > est_segments_usize {
+            debug!(
+                "Batch size {} exceeds segment count ({} segments), using {} for this file",
+                batch_size, est_segments_usize, est_segments_usize
+            );
+            est_segments_usize
+        } else {
+            batch_size
+        }
+    });
+
+    // Log audio info
+    if let Some(duration) = duration_hint {
+        info!(
+            "Processing ~{} of audio ({:.1}s)",
+            progress::format_duration(duration),
+            duration
+        );
+    } else {
+        info!("Processing audio (duration unknown)");
+    }
+
+    // Create batch context for GPU memory efficiency (if effective_batch_size > 1)
     // Context is created once and reused for all batches in this file
-    let mut batch_context = if batch_size > 1 {
-        match classifier.create_batch_context(batch_size) {
+    // IMPORTANT: This uses effective_batch_size to avoid over-allocating memory
+    let mut batch_context = if effective_batch_size > 1 {
+        match classifier.create_batch_context(effective_batch_size) {
             Ok(ctx) => {
                 debug!(
                     "Created BatchInferenceContext for up to {} segments ({} bytes input buffer)",
-                    batch_size,
+                    effective_batch_size,
                     ctx.input_buffer_bytes()
                 );
                 Some(ctx)
@@ -460,34 +509,6 @@ pub fn process_file(
         None
     };
 
-    // Log audio info
-    if let Some(duration) = duration_hint {
-        info!(
-            "Processing ~{} of audio ({:.1}s)",
-            progress::format_duration(duration),
-            duration
-        );
-    } else {
-        info!("Processing audio (duration unknown)");
-    }
-
-    // Calculate segment parameters
-    #[allow(
-        clippy::cast_possible_truncation,
-        clippy::cast_sign_loss,
-        clippy::cast_precision_loss
-    )]
-    let segment_samples = (segment_duration * target_rate as f32) as usize;
-    #[allow(
-        clippy::cast_possible_truncation,
-        clippy::cast_sign_loss,
-        clippy::cast_precision_loss
-    )]
-    let overlap_samples = (overlap * target_rate as f32) as usize;
-
-    // Estimate segment count for progress bar
-    let estimated_segments = estimate_segment_count(duration_hint, segment_duration, overlap);
-
     // Create progress bar
     let file_name = input_path
         .file_name()
@@ -522,7 +543,8 @@ pub fn process_file(
     let progress_guard = progress::ProgressGuard::new(segment_progress, "Inference complete");
 
     // Create channel with capacity for 2 batches (backpressure)
-    let channel_capacity = batch_size.saturating_mul(2).max(4);
+    // Use effective_batch_size to match adjusted memory allocation
+    let channel_capacity = effective_batch_size.saturating_mul(2).max(4);
     let (tx, rx) = sync_channel::<ChunkResult>(channel_capacity);
 
     // Spawn decode thread
@@ -546,7 +568,7 @@ pub fn process_file(
         classifier,
         input_path,
         min_confidence,
-        batch_size,
+        effective_batch_size,
         progress_guard.get(),
         &mut batch_context,
         reporter,