33//! When aligning many genomes, the aligner's resource usage can exceed available
44//! disk or memory. This module partitions genomes into batches based on total
55//! sequence data size, runs the aligner on each batch pair, and aggregates results.
6+ //!
7+ //! ## GIXmake Index Size Limits
8+ //!
9+ //! FastGA's GIXmake indexer has practical size limits for k-mer index creation:
10+ //! - **Safe range**: Batches with ≤40MB of sequence data typically succeed
11+ //! - **Failure threshold**: Batches with ≥48MB often fail silently during index creation
12+ //! - **Recommendation**: For yeast-sized genomes (~12MB each), keep batches to ≤3 genomes
13+ //!
14+ //! When GIXmake failures occur, `run_batch_alignment_with_budget()` automatically
15+ //! reduces batch size and retries. Other batch functions provide error messages
16+ //! suggesting manual --batch-bytes adjustment or switching to wfmash (no size limit).
617
718use anyhow:: { Context , Result } ;
819use std:: collections:: { HashMap , HashSet } ;
920use std:: fs:: File ;
1021use std:: io:: { BufRead , BufReader , Write } ;
1122use std:: path:: { Path , PathBuf } ;
1223
24+ /// Reasons why a batch alignment attempt might need to restart
25+ #[ derive( Debug ) ]
26+ pub enum RestartReason {
27+ /// Disk budget exceeded during alignment
28+ BudgetExceeded ,
29+ /// GIXmake index creation failed due to size limits
30+ IndexSizeLimitExceeded { batch_size_mb : u64 } ,
31+ }
32+
33+ impl std:: fmt:: Display for RestartReason {
34+ fn fmt ( & self , f : & mut std:: fmt:: Formatter < ' _ > ) -> std:: fmt:: Result {
35+ match self {
36+ RestartReason :: BudgetExceeded => write ! ( f, "disk budget exceeded" ) ,
37+ RestartReason :: IndexSizeLimitExceeded { batch_size_mb } => {
38+ write ! ( f, "GIXmake index size limit exceeded ({}MB)" , batch_size_mb)
39+ }
40+ }
41+ }
42+ }
43+
1344/// Trait for aligner-specific batch operations.
1445///
1546/// The batch loop calls these methods in order:
@@ -758,6 +789,7 @@ pub fn run_batch_alignment_with_budget(
758789 let largest_genome = genome_bp. iter ( ) . copied ( ) . max ( ) . unwrap_or ( 0 ) ;
759790 let mut max_batch_bp = initial_batch_bp;
760791 let mut restarts = 0u32 ;
792+ let mut restart_reason = RestartReason :: BudgetExceeded ; // Default, updated on GIXmake failure
761793
762794 let temp_base = if let Some ( dir) = tempdir {
763795 PathBuf :: from ( dir)
@@ -828,7 +860,36 @@ pub fn run_batch_alignment_with_budget(
828860
829861 // Phase 2: For each target batch, prepare, align all queries, cleanup
830862 for i in 0 ..num_batches {
831- aligner. prepare_target ( i, config. quiet ) ?;
863+ // Try to prepare target batch - this may fail due to GIXmake size limits
864+ match aligner. prepare_target ( i, config. quiet ) {
865+ Ok ( ( ) ) => {
866+ // Success, continue with batch processing
867+ } ,
868+ Err ( e) => {
869+ // Check if this is a GIXmake size limit error that should trigger restart
870+ if let Some ( error_chain) = e. chain ( ) . find_map ( |err| {
871+ err. downcast_ref :: < crate :: fastga_integration:: IndexCreationError > ( )
872+ } ) {
873+ if let crate :: fastga_integration:: IndexCreationError :: SizeLimitExceeded { batch_size_mb, .. } = error_chain {
874+ eprintln ! (
875+ "[gixmake] Index creation failed for batch {} ({}MB) - batch too large for GIXmake" ,
876+ i + 1 , batch_size_mb
877+ ) ;
878+
879+ // Set restart reason and clean up
880+ restart_reason = RestartReason :: IndexSizeLimitExceeded {
881+ batch_size_mb : * batch_size_mb
882+ } ;
883+ aligner. cleanup_all ( ) ?;
884+ budget_exceeded = true ; // Reuse existing restart mechanism
885+ break ;
886+ }
887+ }
888+
889+ // Not a GIXmake size limit error - propagate the original error
890+ return Err ( e) ;
891+ }
892+ }
832893
833894 // Budget check after prepare_target — the index is the dominant cost
834895 let ( exceeded, current, _) =
@@ -929,12 +990,24 @@ pub fn run_batch_alignment_with_budget(
929990 // Budget was exceeded — apply halving backoff
930991 restarts += 1 ;
931992 if restarts as usize > MAX_RESTARTS {
932- anyhow:: bail!(
933- "Exceeded max restarts ({}) while trying to fit within disk budget {}. \
934- Use --zstd to halve index size or increase --max-disk.",
935- MAX_RESTARTS ,
936- format_bytes( disk_budget) ,
937- ) ;
993+ match restart_reason {
994+ RestartReason :: BudgetExceeded => {
995+ anyhow:: bail!(
996+ "Exceeded max restarts ({}) while trying to fit within disk budget {}. \
997+ Use --zstd to halve index size or increase --max-disk.",
998+ MAX_RESTARTS ,
999+ format_bytes( disk_budget) ,
1000+ ) ;
1001+ }
1002+ RestartReason :: IndexSizeLimitExceeded { batch_size_mb } => {
1003+ anyhow:: bail!(
1004+ "Exceeded max restarts ({}) due to GIXmake index size limits (last failed batch: {}MB). \
1005+ Try reducing --batch-bytes further or use a different aligner (wfmash) that doesn't have this limit.",
1006+ MAX_RESTARTS ,
1007+ batch_size_mb
1008+ ) ;
1009+ }
1010+ }
9381011 }
9391012
9401013 let old_batch_bp = max_batch_bp;
@@ -952,13 +1025,30 @@ pub fn run_batch_alignment_with_budget(
9521025 }
9531026 }
9541027
955- eprintln ! (
956- "[budget] Restart {}/{}: reducing batch from {} to {}" ,
957- restarts,
958- MAX_RESTARTS ,
959- format_bytes( old_batch_bp) ,
960- format_bytes( max_batch_bp) ,
961- ) ;
1028+ match restart_reason {
1029+ RestartReason :: BudgetExceeded => {
1030+ eprintln ! (
1031+ "[budget] Restart {}/{}: reducing batch from {} to {} (disk budget exceeded)" ,
1032+ restarts,
1033+ MAX_RESTARTS ,
1034+ format_bytes( old_batch_bp) ,
1035+ format_bytes( max_batch_bp) ,
1036+ ) ;
1037+ }
1038+ RestartReason :: IndexSizeLimitExceeded { batch_size_mb } => {
1039+ eprintln ! (
1040+ "[gixmake] Restart {}/{}: reducing batch from {} to {} (GIXmake index size limit, {}MB batch failed)" ,
1041+ restarts,
1042+ MAX_RESTARTS ,
1043+ format_bytes( old_batch_bp) ,
1044+ format_bytes( max_batch_bp) ,
1045+ batch_size_mb
1046+ ) ;
1047+ }
1048+ }
1049+
1050+ // Reset restart reason for next iteration
1051+ restart_reason = RestartReason :: BudgetExceeded ;
9621052 }
9631053}
9641054
@@ -1041,7 +1131,24 @@ pub fn run_batch_alignment_generic(
10411131
10421132 // Phase 2: For each target batch, prepare, align all queries, cleanup
10431133 for i in 0 ..num_batches {
1044- aligner. prepare_target ( i, config. quiet ) ?;
1134+ // Try to prepare target batch - provide helpful error for GIXmake size limit failures
1135+ if let Err ( e) = aligner. prepare_target ( i, config. quiet ) {
1136+ // Check if this is a GIXmake size limit error
1137+ if let Some ( error_chain) = e. chain ( ) . find_map ( |err| {
1138+ err. downcast_ref :: < crate :: fastga_integration:: IndexCreationError > ( )
1139+ } ) {
1140+ if let crate :: fastga_integration:: IndexCreationError :: SizeLimitExceeded { batch_size_mb, suggested_limit, .. } = error_chain {
1141+ return Err ( anyhow:: anyhow!(
1142+ "GIXmake index creation failed for batch {} ({}MB). \
1143+ This batch size exceeds FastGA's index limit. \
1144+ Try --batch-bytes {}M or use run_batch_alignment_with_budget() for automatic retry.",
1145+ i + 1 , batch_size_mb, suggested_limit
1146+ ) ) ;
1147+ }
1148+ }
1149+ // Not a GIXmake error - propagate original error
1150+ return Err ( e) ;
1151+ }
10451152
10461153 for j in 0 ..num_batches {
10471154 if !config. quiet {
@@ -1172,7 +1279,24 @@ pub fn run_batch_alignment_by_count(
11721279 let num_batches = batch_files. len ( ) ;
11731280
11741281 for i in 0 ..num_batches {
1175- aligner. prepare_target ( i, config. quiet ) ?;
1282+ // Try to prepare target batch - provide helpful error for GIXmake size limit failures
1283+ if let Err ( e) = aligner. prepare_target ( i, config. quiet ) {
1284+ // Check if this is a GIXmake size limit error
1285+ if let Some ( error_chain) = e. chain ( ) . find_map ( |err| {
1286+ err. downcast_ref :: < crate :: fastga_integration:: IndexCreationError > ( )
1287+ } ) {
1288+ if let crate :: fastga_integration:: IndexCreationError :: SizeLimitExceeded { batch_size_mb, suggested_limit, .. } = error_chain {
1289+ return Err ( anyhow:: anyhow!(
1290+ "GIXmake index creation failed for batch {} ({}MB). \
1291+ This batch size exceeds FastGA's index limit. \
1292+ Try --batch-bytes {}M or use the disk budget mode for automatic retry.",
1293+ i + 1 , batch_size_mb, suggested_limit
1294+ ) ) ;
1295+ }
1296+ }
1297+ // Not a GIXmake error - propagate original error
1298+ return Err ( e) ;
1299+ }
11761300
11771301 for j in 0 ..num_batches {
11781302 if !config. quiet {
0 commit comments