Skip to content

Commit da94d6d

Browse files
committed
feat: add GIXmake size limit detection and adaptive retry (fix-gixmake-fails)
- Add IndexCreationError type with size limit detection in fastga_integration.rs - Extend run_batch_alignment_with_budget() with adaptive retry for GIXmake failures - Apply halving backoff when index creation fails due to size limits (≥48MB) - Add helpful error messages in other batch functions suggesting manual fixes - Document GIXmake practical limits: ≤40MB safe, ≥48MB often fail - Support up to 5 automatic restarts with progressively smaller batches Fixes silent GIXmake failures that occurred when batch sizes exceeded ~48MB of sequence data, typically affecting batches with ≥4 yeast-sized genomes.
1 parent fcda494 commit da94d6d

File tree

2 files changed

+208
-17
lines changed

2 files changed

+208
-17
lines changed

src/batch_align.rs

Lines changed: 140 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,44 @@
33
//! When aligning many genomes, the aligner's resource usage can exceed available
44
//! disk or memory. This module partitions genomes into batches based on total
55
//! sequence data size, runs the aligner on each batch pair, and aggregates results.
6+
//!
7+
//! ## GIXmake Index Size Limits
8+
//!
9+
//! FastGA's GIXmake indexer has practical size limits for k-mer index creation:
10+
//! - **Safe range**: Batches with ≤40MB of sequence data typically succeed
11+
//! - **Failure threshold**: Batches with ≥48MB often fail silently during index creation
12+
//! - **Recommendation**: For yeast-sized genomes (~12MB each), keep batches to ≤3 genomes
13+
//!
14+
//! When GIXmake failures occur, `run_batch_alignment_with_budget()` automatically
15+
//! reduces batch size and retries. Other batch functions provide error messages
16+
//! suggesting manual --batch-bytes adjustment or switching to wfmash (no size limit).
617
718
use anyhow::{Context, Result};
819
use std::collections::{HashMap, HashSet};
920
use std::fs::File;
1021
use std::io::{BufRead, BufReader, Write};
1122
use std::path::{Path, PathBuf};
1223

24+
/// Reasons why a batch alignment attempt might need to restart
25+
#[derive(Debug)]
26+
pub enum RestartReason {
27+
/// Disk budget exceeded during alignment
28+
BudgetExceeded,
29+
/// GIXmake index creation failed due to size limits
30+
IndexSizeLimitExceeded { batch_size_mb: u64 },
31+
}
32+
33+
impl std::fmt::Display for RestartReason {
34+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
35+
match self {
36+
RestartReason::BudgetExceeded => write!(f, "disk budget exceeded"),
37+
RestartReason::IndexSizeLimitExceeded { batch_size_mb } => {
38+
write!(f, "GIXmake index size limit exceeded ({}MB)", batch_size_mb)
39+
}
40+
}
41+
}
42+
}
43+
1344
/// Trait for aligner-specific batch operations.
1445
///
1546
/// The batch loop calls these methods in order:
@@ -758,6 +789,7 @@ pub fn run_batch_alignment_with_budget(
758789
let largest_genome = genome_bp.iter().copied().max().unwrap_or(0);
759790
let mut max_batch_bp = initial_batch_bp;
760791
let mut restarts = 0u32;
792+
let mut restart_reason = RestartReason::BudgetExceeded; // Default, updated on GIXmake failure
761793

762794
let temp_base = if let Some(dir) = tempdir {
763795
PathBuf::from(dir)
@@ -828,7 +860,36 @@ pub fn run_batch_alignment_with_budget(
828860

829861
// Phase 2: For each target batch, prepare, align all queries, cleanup
830862
for i in 0..num_batches {
831-
aligner.prepare_target(i, config.quiet)?;
863+
// Try to prepare target batch - this may fail due to GIXmake size limits
864+
match aligner.prepare_target(i, config.quiet) {
865+
Ok(()) => {
866+
// Success, continue with batch processing
867+
},
868+
Err(e) => {
869+
// Check if this is a GIXmake size limit error that should trigger restart
870+
if let Some(error_chain) = e.chain().find_map(|err| {
871+
err.downcast_ref::<crate::fastga_integration::IndexCreationError>()
872+
}) {
873+
if let crate::fastga_integration::IndexCreationError::SizeLimitExceeded { batch_size_mb, .. } = error_chain {
874+
eprintln!(
875+
"[gixmake] Index creation failed for batch {} ({}MB) - batch too large for GIXmake",
876+
i + 1, batch_size_mb
877+
);
878+
879+
// Set restart reason and clean up
880+
restart_reason = RestartReason::IndexSizeLimitExceeded {
881+
batch_size_mb: *batch_size_mb
882+
};
883+
aligner.cleanup_all()?;
884+
budget_exceeded = true; // Reuse existing restart mechanism
885+
break;
886+
}
887+
}
888+
889+
// Not a GIXmake size limit error - propagate the original error
890+
return Err(e);
891+
}
892+
}
832893

833894
// Budget check after prepare_target — the index is the dominant cost
834895
let (exceeded, current, _) =
@@ -929,12 +990,24 @@ pub fn run_batch_alignment_with_budget(
929990
// Budget was exceeded — apply halving backoff
930991
restarts += 1;
931992
if restarts as usize > MAX_RESTARTS {
932-
anyhow::bail!(
933-
"Exceeded max restarts ({}) while trying to fit within disk budget {}. \
934-
Use --zstd to halve index size or increase --max-disk.",
935-
MAX_RESTARTS,
936-
format_bytes(disk_budget),
937-
);
993+
match restart_reason {
994+
RestartReason::BudgetExceeded => {
995+
anyhow::bail!(
996+
"Exceeded max restarts ({}) while trying to fit within disk budget {}. \
997+
Use --zstd to halve index size or increase --max-disk.",
998+
MAX_RESTARTS,
999+
format_bytes(disk_budget),
1000+
);
1001+
}
1002+
RestartReason::IndexSizeLimitExceeded { batch_size_mb } => {
1003+
anyhow::bail!(
1004+
"Exceeded max restarts ({}) due to GIXmake index size limits (last failed batch: {}MB). \
1005+
Try reducing --batch-bytes further or use a different aligner (wfmash) that doesn't have this limit.",
1006+
MAX_RESTARTS,
1007+
batch_size_mb
1008+
);
1009+
}
1010+
}
9381011
}
9391012

9401013
let old_batch_bp = max_batch_bp;
@@ -952,13 +1025,30 @@ pub fn run_batch_alignment_with_budget(
9521025
}
9531026
}
9541027

955-
eprintln!(
956-
"[budget] Restart {}/{}: reducing batch from {} to {}",
957-
restarts,
958-
MAX_RESTARTS,
959-
format_bytes(old_batch_bp),
960-
format_bytes(max_batch_bp),
961-
);
1028+
match restart_reason {
1029+
RestartReason::BudgetExceeded => {
1030+
eprintln!(
1031+
"[budget] Restart {}/{}: reducing batch from {} to {} (disk budget exceeded)",
1032+
restarts,
1033+
MAX_RESTARTS,
1034+
format_bytes(old_batch_bp),
1035+
format_bytes(max_batch_bp),
1036+
);
1037+
}
1038+
RestartReason::IndexSizeLimitExceeded { batch_size_mb } => {
1039+
eprintln!(
1040+
"[gixmake] Restart {}/{}: reducing batch from {} to {} (GIXmake index size limit, {}MB batch failed)",
1041+
restarts,
1042+
MAX_RESTARTS,
1043+
format_bytes(old_batch_bp),
1044+
format_bytes(max_batch_bp),
1045+
batch_size_mb
1046+
);
1047+
}
1048+
}
1049+
1050+
// Reset restart reason for next iteration
1051+
restart_reason = RestartReason::BudgetExceeded;
9621052
}
9631053
}
9641054

@@ -1041,7 +1131,24 @@ pub fn run_batch_alignment_generic(
10411131

10421132
// Phase 2: For each target batch, prepare, align all queries, cleanup
10431133
for i in 0..num_batches {
1044-
aligner.prepare_target(i, config.quiet)?;
1134+
// Try to prepare target batch - provide helpful error for GIXmake size limit failures
1135+
if let Err(e) = aligner.prepare_target(i, config.quiet) {
1136+
// Check if this is a GIXmake size limit error
1137+
if let Some(error_chain) = e.chain().find_map(|err| {
1138+
err.downcast_ref::<crate::fastga_integration::IndexCreationError>()
1139+
}) {
1140+
if let crate::fastga_integration::IndexCreationError::SizeLimitExceeded { batch_size_mb, suggested_limit, .. } = error_chain {
1141+
return Err(anyhow::anyhow!(
1142+
"GIXmake index creation failed for batch {} ({}MB). \
1143+
This batch size exceeds FastGA's index limit. \
1144+
Try --batch-bytes {}M or use run_batch_alignment_with_budget() for automatic retry.",
1145+
i + 1, batch_size_mb, suggested_limit
1146+
));
1147+
}
1148+
}
1149+
// Not a GIXmake error - propagate original error
1150+
return Err(e);
1151+
}
10451152

10461153
for j in 0..num_batches {
10471154
if !config.quiet {
@@ -1172,7 +1279,24 @@ pub fn run_batch_alignment_by_count(
11721279
let num_batches = batch_files.len();
11731280

11741281
for i in 0..num_batches {
1175-
aligner.prepare_target(i, config.quiet)?;
1282+
// Try to prepare target batch - provide helpful error for GIXmake size limit failures
1283+
if let Err(e) = aligner.prepare_target(i, config.quiet) {
1284+
// Check if this is a GIXmake size limit error
1285+
if let Some(error_chain) = e.chain().find_map(|err| {
1286+
err.downcast_ref::<crate::fastga_integration::IndexCreationError>()
1287+
}) {
1288+
if let crate::fastga_integration::IndexCreationError::SizeLimitExceeded { batch_size_mb, suggested_limit, .. } = error_chain {
1289+
return Err(anyhow::anyhow!(
1290+
"GIXmake index creation failed for batch {} ({}MB). \
1291+
This batch size exceeds FastGA's index limit. \
1292+
Try --batch-bytes {}M or use the disk budget mode for automatic retry.",
1293+
i + 1, batch_size_mb, suggested_limit
1294+
));
1295+
}
1296+
}
1297+
// Not a GIXmake error - propagate original error
1298+
return Err(e);
1299+
}
11761300

11771301
for j in 0..num_batches {
11781302
if !config.quiet {

src/fastga_integration.rs

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,36 @@ use std::sync::atomic::{AtomicBool, Ordering};
1111
use std::sync::Arc;
1212
use tempfile::NamedTempFile;
1313

14+
/// Error types specific to GIXmake index creation
15+
#[derive(Debug)]
16+
pub enum IndexCreationError {
17+
/// Index size likely exceeds GIXmake's internal limits (≥48MB sequence data)
18+
SizeLimitExceeded {
19+
batch_size_mb: u64,
20+
suggested_limit: u64,
21+
underlying_error: String
22+
},
23+
/// Other index creation failure
24+
Other(String),
25+
}
26+
27+
impl std::fmt::Display for IndexCreationError {
28+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29+
match self {
30+
IndexCreationError::SizeLimitExceeded { batch_size_mb, suggested_limit, underlying_error } => {
31+
write!(f,
32+
"GIXmake index creation failed: batch size {}MB likely exceeds FastGA's index size limit. \
33+
Try --batch-bytes {}M or smaller. Original error: {}",
34+
batch_size_mb, suggested_limit, underlying_error
35+
)
36+
}
37+
IndexCreationError::Other(msg) => write!(f, "Index creation failed: {}", msg),
38+
}
39+
}
40+
}
41+
42+
impl std::error::Error for IndexCreationError {}
43+
1444
/// Get the preferred temp directory for FastGA operations.
1545
/// Priority: explicit override > TMPDIR env var > current directory.
1646
///
@@ -292,11 +322,48 @@ impl FastGAIntegration {
292322
gdb_base,
293323
self.config.adaptive_seed_cutoff.unwrap_or(10) as i32,
294324
)
295-
.map_err(|e| anyhow::anyhow!("Failed to create index: {e}"))?;
325+
.map_err(|e| {
326+
// Try to determine batch size from GDB file for better error messages
327+
let batch_size_mb = Self::estimate_batch_size_mb(gdb_base).unwrap_or(0);
328+
329+
// Check if this looks like a GIXmake size limit failure
330+
if Self::is_likely_size_limit_error(&e.to_string(), batch_size_mb) {
331+
anyhow::anyhow!(IndexCreationError::SizeLimitExceeded {
332+
batch_size_mb,
333+
suggested_limit: std::cmp::max(32, batch_size_mb * 3 / 4), // Suggest 25% reduction
334+
underlying_error: e.to_string()
335+
})
336+
} else {
337+
anyhow::anyhow!(IndexCreationError::Other(e.to_string()))
338+
}
339+
})?;
296340

297341
Ok(())
298342
}
299343

344+
/// Estimate the batch size in MB from the GDB file size
345+
fn estimate_batch_size_mb(gdb_base: &str) -> Result<u64> {
346+
let gdb_path = format!("{}.1gdb", gdb_base);
347+
let metadata = std::fs::metadata(&gdb_path)?;
348+
// GDB file size is roughly proportional to sequence data size
349+
// Use a conservative multiplier to estimate original sequence size
350+
Ok((metadata.len() / (1024 * 1024)) * 3) // Rough approximation
351+
}
352+
353+
/// Check if an error message indicates a likely GIXmake size limit failure
354+
fn is_likely_size_limit_error(error_msg: &str, batch_size_mb: u64) -> bool {
355+
// GIXmake size limit failures typically occur at ≥48MB sequence data
356+
if batch_size_mb >= 40 {
357+
// Look for typical patterns in GIXmake/FastGA failure messages
358+
error_msg.contains("GIXmake")
359+
|| error_msg.contains("code None")
360+
|| error_msg.contains("failed with code")
361+
|| (error_msg.is_empty() && batch_size_mb >= 48) // Silent failures
362+
} else {
363+
false
364+
}
365+
}
366+
300367
/// Compress ktab index files using zstd seekable format
301368
/// This reduces disk usage by ~2x and can improve I/O performance
302369
/// Level: 1-19 (higher = smaller files but slower compression)

0 commit comments

Comments
 (0)