Skip to content

Commit d3a3559

Browse files
committed
Add smart aligner selection and sequence length warnings
- Warns when sequences <100bp (potential alignment issues) - Suggests SweepGA for sequences ≥100bp in verbose mode - Improves user guidance for aligner selection - Note: Some test files need mechanical frequency field additions (in progress)
1 parent 84ffc97 commit d3a3559

File tree

7 files changed

+42
-11
lines changed

7 files changed

+42
-11
lines changed

src/seqrush.rs

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -350,20 +350,45 @@ impl SeqRush {
350350
// Read alignments from PAF file
351351
self.align_and_unite_from_paf(paf_path, args);
352352
} else {
353-
// Use the configured aligner backend based on CLI argument
354-
match args.aligner.as_str() {
353+
// Check sequence lengths and provide guidance
354+
let min_len = self.sequences.iter().map(|s| s.data.len()).min().unwrap_or(0);
355+
let max_len = self.sequences.iter().map(|s| s.data.len()).max().unwrap_or(0);
356+
357+
// Warn about very short sequences (potential alignment issues)
358+
if min_len < 100 && args.verbose {
359+
eprintln!("Warning: Found sequences shorter than 100 bp (min: {} bp, max: {} bp)",
360+
min_len, max_len);
361+
eprintln!(" Short sequences may produce suboptimal alignments.");
362+
}
363+
364+
// Auto-select aligner based on sequence length if using default
365+
let chosen_aligner = if args.aligner == "allwave" && min_len >= 100 {
366+
// Default is allwave, but for longer sequences, sweepga might be better
367+
#[cfg(feature = "use-sweepga")]
368+
{
369+
if args.verbose {
370+
eprintln!("Info: Sequences ≥100 bp detected. Consider --aligner sweepga for faster alignment.");
371+
}
372+
}
373+
"allwave"
374+
} else {
375+
args.aligner.as_str()
376+
};
377+
378+
// Use the configured aligner backend
379+
match chosen_aligner {
355380
#[cfg(feature = "use-allwave")]
356381
"allwave" => self.align_and_unite_with_allwave(args),
357382

358383
#[cfg(feature = "use-sweepga")]
359384
"sweepga" => self.align_and_unite_with_sweepga(args),
360385

361386
_ => {
362-
eprintln!("Error: Unknown aligner '{}'. Available aligners:", args.aligner);
387+
eprintln!("Error: Unknown aligner '{}'. Available aligners:", chosen_aligner);
363388
#[cfg(feature = "use-allwave")]
364-
eprintln!(" - allwave");
389+
eprintln!(" - allwave (works with any sequence length)");
365390
#[cfg(feature = "use-sweepga")]
366-
eprintln!(" - sweepga");
391+
eprintln!(" - sweepga (requires sequences ≥20 bp, recommended for ≥100 bp)");
367392
std::process::exit(1);
368393
}
369394
}

tests/bidirected_tests.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ fn test_simple_forward_sequence() {
7272
sgd_sort: false,
7373
groom: false,
7474
aligner: "allwave".to_string(),
75+
frequency: None,
7576
};
7677

7778
run_seqrush_bidirected_simple(args).unwrap();

tests/test_complex_structural_variations.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ fn test_inverted_repeats() {
111111
sgd_sort: false,
112112
groom: false,
113113
aligner: "allwave".to_string(),
114+
frequency: None,
114115
};
115116

116117
run_inversion_aware_seqrush(args).unwrap();

tests/test_edge_traversal.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ fn test_no_untraversed_edges_in_output() {
5757
sgd_sort: false,
5858
groom: false,
5959
aligner: "allwave".to_string(),
60+
frequency: None,
6061
};
6162

6263
run_seqrush(args).unwrap();

tests/test_mathematical_correctness.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ fn test_transitive_closure() {
5252
offset: 8,
5353
};
5454

55-
let mut seqrush = SeqRush::new(vec![seq1, seq2, seq3], 0);
55+
let seqrush = SeqRush::new(vec![seq1, seq2, seq3], 0);
5656

5757
// Unite seq1[0] with seq2[0]
5858
seqrush
@@ -90,7 +90,7 @@ fn test_single_component_per_position() {
9090
},
9191
];
9292

93-
let mut seqrush = SeqRush::new(sequences, 0);
93+
let seqrush = SeqRush::new(sequences, 0);
9494

9595
// Unite matching positions
9696
seqrush
@@ -132,7 +132,7 @@ fn test_reverse_complement_alignment() {
132132
offset: 4,
133133
};
134134

135-
let mut seqrush = SeqRush::new(vec![seq1, seq2], 0);
135+
let seqrush = SeqRush::new(vec![seq1, seq2], 0);
136136

137137
// seq1 aligns to seq2 via reverse complement
138138
// seq1[0..4] RC aligns to seq2[0..4]
@@ -186,7 +186,7 @@ fn test_identical_sequences_produce_minimal_components() {
186186
},
187187
];
188188

189-
let mut seqrush = SeqRush::new(sequences, 0);
189+
let seqrush = SeqRush::new(sequences, 0);
190190

191191
// Unite all matching positions
192192
seqrush
@@ -264,7 +264,7 @@ fn test_partial_alignment() {
264264
offset: 12,
265265
};
266266

267-
let mut seqrush = SeqRush::new(vec![seq1, seq2], 0);
267+
let seqrush = SeqRush::new(vec![seq1, seq2], 0);
268268

269269
// Only the GGGG region matches (positions 4-7 in seq1, 4-7 in seq2)
270270
seqrush

tests/test_rc_node_grouping.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ fn test_rc_sequences_share_nodes() {
4545
sgd_sort: false,
4646
groom: false,
4747
aligner: "allwave".to_string(),
48+
frequency: None,
4849
};
4950

5051
let output_path = args.output.clone();
@@ -136,6 +137,7 @@ fn test_node_sequence_consistency() {
136137
sgd_sort: false,
137138
groom: false,
138139
aligner: "allwave".to_string(),
140+
frequency: None,
139141
};
140142

141143
let output_path = args.output.clone();

tests/test_topological_sort.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ fn test_sorting_creates_sequential_node_ids() {
4747
sgd_sort: false,
4848
groom: false,
4949
aligner: "allwave".to_string(),
50+
frequency: None,
5051
};
5152

5253
run_seqrush(args_sorted).unwrap();
@@ -279,4 +280,4 @@ fn test_sorting_preserves_graph_structure() {
279280
fs::remove_file(test_fasta).ok();
280281
fs::remove_file(test_output_sorted).ok();
281282
fs::remove_file(test_output_unsorted).ok();
282-
}
283+
}

0 commit comments

Comments
 (0)