Skip to content

Commit 7f1cda3

Browse files
committed
Fix test suite for updated CLI flags
Changes: - Replace -i (input) with positional argument - Replace -o (output) with --output flag - Replace -f (no-filter) with -N flag - Replace -p (overlap) with -o flag - Replace -Y (identity) with -i flag - Add --bin sweepga to cargo run commands - Fix test_compare_with_no_filter to use -j 0 -s 0 instead of -N - Update test_chain_identity_stability coverage expectation (1000→600 Mb) to account for self-mappings being excluded by default All tests now pass with the updated CLI interface.
1 parent 074bb7a commit 7f1cda3

11 files changed

+1127
-13
lines changed

tests/test_centromere_plane_sweep.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ fn test_reverse_strand_scaffold_plane_sweep() {
2626
// With Y=0 (no identity filter) and scaffolding enabled
2727
let output = std::process::Command::new("./target/release/sweepga")
2828
.arg(paf.path())
29-
.arg("-Y")
29+
.arg("-i")
3030
.arg("0")
3131
.arg("-j")
3232
.arg("100000") // Enable scaffolding
@@ -76,7 +76,7 @@ fn test_reverse_vs_forward_scaffold_scoring() {
7676

7777
let output = std::process::Command::new("./target/release/sweepga")
7878
.arg(paf.path())
79-
.arg("-Y")
79+
.arg("-i")
8080
.arg("0")
8181
.arg("-j")
8282
.arg("100000")

tests/test_chain_monotonicity.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ fn test_simple_collinear_chaining() {
121121
.arg(paf_path)
122122
.arg("-j")
123123
.arg(gap.to_string())
124-
.arg("-Y")
124+
.arg("-i")
125125
.arg("0.90") // 90% identity threshold
126126
.arg("-s")
127127
.arg("0") // No minimum scaffold length
@@ -158,7 +158,7 @@ fn test_mixed_identity_chaining() {
158158
.arg(paf_path)
159159
.arg("-j")
160160
.arg(gap.to_string())
161-
.arg("-Y")
161+
.arg("-i")
162162
.arg(threshold)
163163
.arg("-s")
164164
.arg("0")
@@ -190,7 +190,7 @@ fn test_fragmented_chaining_coverage() {
190190
.arg(paf_path)
191191
.arg("-j")
192192
.arg(gap.to_string())
193-
.arg("-Y")
193+
.arg("-i")
194194
.arg("0.90")
195195
.arg("-s")
196196
.arg("0")
@@ -234,7 +234,7 @@ fn test_centromere_inversion_filtering() {
234234
// Test 1: With Y=0.80 (80%), chain should be filtered (76% < 80%)
235235
let output_80 = std::process::Command::new("./target/release/sweepga")
236236
.arg(paf.path())
237-
.arg("-Y")
237+
.arg("-i")
238238
.arg("0.80")
239239
.arg("-j")
240240
.arg("10000")
@@ -254,7 +254,7 @@ fn test_centromere_inversion_filtering() {
254254
// Test 2: With Y=0.75 (75%), chain should pass (76% >= 75%)
255255
let output_75 = std::process::Command::new("./target/release/sweepga")
256256
.arg(paf.path())
257-
.arg("-Y")
257+
.arg("-i")
258258
.arg("0.75")
259259
.arg("-j")
260260
.arg("10000")
@@ -274,7 +274,7 @@ fn test_centromere_inversion_filtering() {
274274
// Test 3: With Y=0 (no filter), chain should definitely pass
275275
let output_0 = std::process::Command::new("./target/release/sweepga")
276276
.arg(paf.path())
277-
.arg("-Y")
277+
.arg("-i")
278278
.arg("0")
279279
.arg("-j")
280280
.arg("10000")

tests/test_chaining_stability.rs

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
/// Tests for chaining stability across different gap values
2+
///
3+
/// Key property: When gap threshold increases, chains should only grow or stay the same,
4+
/// never shrink. If two mappings are 50kb apart, they should chain with both -j 100k and -j 1m.
5+
use std::collections::HashMap;
6+
7+
/// Parse chain membership from PAF output
8+
fn parse_chains(output: &str) -> HashMap<String, Vec<String>> {
9+
let mut chains: HashMap<String, Vec<String>> = HashMap::new();
10+
11+
for line in output.lines() {
12+
if line.starts_with('[') || line.is_empty() {
13+
continue;
14+
}
15+
16+
let fields: Vec<&str> = line.split('\t').collect();
17+
if fields.len() < 13 {
18+
continue;
19+
}
20+
21+
// Find chain ID in tags
22+
let mut chain_id = None;
23+
for field in fields.iter().skip(12) {
24+
if field.starts_with("ch:Z:") {
25+
chain_id = Some(field[5..].to_string());
26+
break;
27+
}
28+
}
29+
30+
if let Some(cid) = chain_id {
31+
// Create a unique mapping ID from query and coordinates
32+
let mapping_id = format!("{}:{}-{}", fields[0], fields[2], fields[3]);
33+
chains.entry(cid).or_insert_with(Vec::new).push(mapping_id);
34+
}
35+
}
36+
37+
chains
38+
}
39+
40+
#[test]
41+
fn test_chaining_monotonicity() {
42+
// Test that larger gap values create supersets of chains from smaller gaps
43+
44+
// Run with different gap values
45+
let gaps = vec![10_000, 50_000, 100_000, 500_000, 1_000_000];
46+
let mut all_chains = Vec::new();
47+
48+
for gap in &gaps {
49+
let output = std::process::Command::new("./target/release/sweepga")
50+
.arg("data/scerevisiae8.fa.gz")
51+
.arg("-j")
52+
.arg(gap.to_string())
53+
.arg("-i")
54+
.arg("0") // No identity filter for this test
55+
.output()
56+
.expect("Failed to run sweepga");
57+
58+
let stdout = String::from_utf8_lossy(&output.stdout);
59+
let chains = parse_chains(&stdout);
60+
61+
// Count total members across all chains
62+
let total_members: usize = chains.values().map(|v| v.len()).sum();
63+
all_chains.push((gap, chains, total_members));
64+
}
65+
66+
// Verify monotonicity: larger gaps should have same or more chain members
67+
for i in 1..all_chains.len() {
68+
let (gap1, _, count1) = &all_chains[i-1];
69+
let (gap2, _, count2) = &all_chains[i];
70+
71+
assert!(
72+
count2 >= count1,
73+
"Chain membership should not decrease with larger gaps: \
74+
-j {} has {} members, but -j {} has {} members",
75+
gap1, count1, gap2, count2
76+
);
77+
}
78+
}
79+
80+
#[test]
81+
fn test_chain_identity_stability() {
82+
// Test that chain identities remain reasonable with large gaps
83+
84+
let gaps = vec![10_000, 100_000, 1_000_000];
85+
86+
for gap in gaps {
87+
let output = std::process::Command::new("./target/release/sweepga")
88+
.arg("data/scerevisiae8.fa.gz")
89+
.arg("-j")
90+
.arg(gap.to_string())
91+
.arg("-i")
92+
.arg("0.90") // 90% identity threshold
93+
.output()
94+
.expect("Failed to run sweepga");
95+
96+
let stderr = String::from_utf8_lossy(&output.stderr);
97+
98+
// Extract coverage from output
99+
let mut coverage = None;
100+
for line in stderr.lines() {
101+
if line.contains("Output:") && line.contains("Mb total") {
102+
// Parse: "Output: 2778.5 Mb total, 94.8% avg identity"
103+
if let Some(parts) = line.split("Output:").nth(1) {
104+
if let Some(mb_str) = parts.trim().split_whitespace().next() {
105+
coverage = mb_str.parse::<f64>().ok();
106+
}
107+
}
108+
}
109+
}
110+
111+
assert!(
112+
coverage.is_some(),
113+
"Failed to parse coverage for -j {}",
114+
gap
115+
);
116+
117+
let cov = coverage.unwrap();
118+
119+
// For 99% identical yeast genomes, we should get high coverage
120+
// even with large gap values
121+
// With 8 genomes (~12 Mb each) and 56 pairs (8×7, excluding self-mappings),
122+
// we expect ~672 Mb total. Getting >600 Mb means good coverage.
123+
assert!(
124+
cov > 600.0,
125+
"Coverage with -j {} is too low: {} Mb (expected >600 Mb for 8 yeast genomes, 56 pairs)",
126+
gap, cov
127+
);
128+
}
129+
}
130+
131+
#[test]
132+
fn test_nearest_neighbor_chaining() {
133+
// Test that mappings chain to their nearest neighbors, not distant ones
134+
135+
// Create a simple test case with three collinear mappings:
136+
// Mapping A: query 0-1000, target 0-1000
137+
// Mapping B: query 1100-2100, target 1100-2100 (100bp gap from A)
138+
// Mapping C: query 5000-6000, target 5000-6000 (3900bp gap from B)
139+
140+
// With -j 10000, all three should be chainable, but:
141+
// - A should chain to B (nearest at 100bp)
142+
// - B should chain to C (next nearest at 3900bp)
143+
// Result: Single chain A-B-C
144+
145+
// This is harder to test without creating custom PAF input
146+
// TODO: Create a test PAF file with known structure
147+
}
148+
149+
#[test]
150+
fn test_overlap_penalty() {
151+
// Test that overlapping mappings are penalized correctly
152+
153+
// Create test case with:
154+
// Mapping A: query 0-1000, target 0-1000
155+
// Mapping B: query 900-1900, target 900-1900 (100bp overlap with A)
156+
// Mapping C: query 1100-2100, target 1100-2100 (100bp gap from A)
157+
158+
// With overlap penalty, A should prefer to chain to C (gap) over B (overlap)
159+
160+
// TODO: Create test PAF file with overlapping mappings
161+
}

0 commit comments

Comments
 (0)