Skip to content

Commit cfdb9bb

Browse files
committed
Add extract_tree_pairs_separated() for iterative alignment with early stopping
Separates tree pairs (k-nearest + k-farthest) from random pairs to enable: - Processing all tree pairs first (guarantees connectivity) - Applying early stopping only to random pairs (saves work)
1 parent 18f08c0 commit cfdb9bb

File tree

1 file changed

+47
-0
lines changed

1 file changed

+47
-0
lines changed

src/knn_graph.rs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,53 @@ pub fn extract_tree_pairs(
4949
all_pairs
5050
}
5151

52+
/// Extract tree pairs and random pairs separately (for iterative alignment with early stopping)
53+
/// Returns (tree_pairs, random_pairs) where tree_pairs should be processed first to guarantee connectivity
54+
pub fn extract_tree_pairs_separated(
55+
sequences: &[Sequence],
56+
k_nearest: usize,
57+
k_farthest: usize,
58+
random_fraction: f64,
59+
kmer_size: usize,
60+
) -> (Vec<(usize, usize)>, Vec<(usize, usize)>) {
61+
if sequences.len() < 2 {
62+
return (Vec::new(), Vec::new());
63+
}
64+
65+
// Compute distance matrix
66+
let distance_matrix =
67+
crate::mash::compute_distance_matrix_with_params(sequences, kmer_size, 1000);
68+
69+
// Build tree pairs (k-nearest + k-farthest)
70+
let mut tree_pairs = Vec::new();
71+
72+
if k_nearest > 0 {
73+
let nearest_pairs = build_knn_graph(&distance_matrix, k_nearest, false);
74+
tree_pairs.extend(nearest_pairs);
75+
}
76+
77+
if k_farthest > 0 {
78+
let farthest_pairs = build_knn_graph(&distance_matrix, k_farthest, true);
79+
tree_pairs.extend(farthest_pairs);
80+
}
81+
82+
// Remove duplicates from tree pairs
83+
tree_pairs.sort_unstable();
84+
tree_pairs.dedup();
85+
86+
// Generate random pairs
87+
let mut random_pairs = if random_fraction > 0.0 {
88+
generate_random_pairs(sequences.len(), random_fraction, sequences)
89+
} else {
90+
Vec::new()
91+
};
92+
93+
// Remove any random pairs that are already in tree pairs
94+
random_pairs.retain(|pair| !tree_pairs.binary_search(pair).is_ok());
95+
96+
(tree_pairs, random_pairs)
97+
}
98+
5299
/// Extract sequence pairs using k-nearest neighbor graph (backward compatibility)
53100
pub fn extract_knn_pairs(
54101
sequences: &[Sequence],

0 commit comments

Comments
 (0)