Skip to content

Commit 4e31e0e

Browse files
pinin4fjordsclaude
andcommitted
fix(stats): route no-seeds-no-clusters reads to UnmappedReason::Other
The UnmappedReason enum had all four STAR-compatible variants (Other, TooShort, TooManyMismatches, TooManyLoci), but reads that failed before any transcript was generated were being recorded as TooShort. Log.final.out's "Reads unmapped: other" therefore always read 0 even when STAR would have classified the read in that bucket, inflating "too short" by the same count. Fix the routing at the no-seeds / no-clusters call site so reads without alignments are recorded as Other. Totals are unchanged; only the bucketing changes. Fixes #48 Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 70be24d commit 4e31e0e

2 files changed

Lines changed: 53 additions & 5 deletions

File tree

src/align/read_align.rs

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ pub fn align_read(
289289
// Step 3: Stitch seeds within each cluster
290290
let scorer = AlignmentScorer::from_params(params);
291291
let mut transcripts = Vec::new();
292+
let mut any_transcript_generated = false;
292293
// Collect all raw (pre-dedup) transcripts for chimericDetectionOld (Tier 1).
293294
let mut all_raw_transcripts: Vec<crate::align::transcript::Transcript> = Vec::new();
294295

@@ -338,6 +339,9 @@ pub fn align_read(
338339
);
339340
}
340341
}
342+
if !cluster_transcripts.is_empty() {
343+
any_transcript_generated = true;
344+
}
341345
if params.chim_segment_min > 0 {
342346
all_raw_transcripts.extend(cluster_transcripts.iter().cloned());
343347
}
@@ -650,8 +654,11 @@ pub fn align_read(
650654
}
651655

652656
let unmapped_reason = if transcripts.is_empty() {
653-
// Transcripts were generated by DP but all filtered out
654-
Some(UnmappedReason::TooShort)
657+
if any_transcript_generated {
658+
Some(UnmappedReason::TooShort)
659+
} else {
660+
Some(UnmappedReason::Other)
661+
}
655662
} else {
656663
None
657664
};
@@ -814,6 +821,7 @@ pub fn align_paired_read(
814821
// as the search pool for chimericDetectionOld (Tier 1) on each mate independently.
815822
let mut all_m1_transcripts: Vec<Transcript> = Vec::new();
816823
let mut all_m2_transcripts: Vec<Transcript> = Vec::new();
824+
let mut any_transcript_generated = false;
817825

818826
// Stitch combined clusters, split WTs by mate_id, finalize each half
819827
for cluster in clusters.iter().take(params.align_windows_per_read_nmax) {
@@ -828,6 +836,10 @@ pub fn align_paired_read(
828836
debug_name,
829837
)?;
830838

839+
if !wts.is_empty() {
840+
any_transcript_generated = true;
841+
}
842+
831843
for wt in &wts {
832844
let split_result =
833845
split_combined_wt(wt, len1, len2, stitch_is_reverse, scorer.align_intron_min);
@@ -1233,7 +1245,14 @@ pub fn align_paired_read(
12331245
))
12341246
}
12351247
}
1236-
(None, None) => Ok((Vec::new(), pe_chimeric, 0, Some(UnmappedReason::TooShort))),
1248+
(None, None) => {
1249+
let reason = if any_transcript_generated {
1250+
UnmappedReason::TooShort
1251+
} else {
1252+
UnmappedReason::Other
1253+
};
1254+
Ok((Vec::new(), pe_chimeric, 0, Some(reason)))
1255+
}
12371256
}
12381257
}
12391258

@@ -1652,7 +1671,7 @@ mod tests {
16521671
let (paired_alns, _chimeric, n_for_mapq, unmapped_reason) = result.unwrap();
16531672
assert_eq!(paired_alns.len(), 0);
16541673
assert_eq!(n_for_mapq, 0);
1655-
assert!(unmapped_reason.is_some());
1674+
assert_eq!(unmapped_reason, Some(UnmappedReason::Other));
16561675
}
16571676

16581677
#[test]
@@ -2065,7 +2084,7 @@ mod tests {
20652084
align_paired_read(&mate1, &mate2, "test", &index, &params).unwrap();
20662085
assert!(results.is_empty(), "Both unmapped should return empty Vec");
20672086
assert_eq!(n_for_mapq, 0);
2068-
assert!(unmapped_reason.is_some(), "Should have unmapped reason");
2087+
assert_eq!(unmapped_reason, Some(UnmappedReason::Other));
20692088
}
20702089

20712090
#[test]

src/stats.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -827,6 +827,35 @@ mod tests {
827827
assert_eq!(stats.unmapped_mismatches.load(Ordering::Relaxed), 1);
828828
}
829829

830+
#[test]
831+
fn test_unmapped_other_routed_and_totals_conserved() {
832+
let stats = AlignmentStats::new();
833+
834+
let n_other = 7;
835+
let n_short = 3;
836+
let n_mismatches = 2;
837+
for _ in 0..n_other {
838+
stats.record_unmapped_reason(UnmappedReason::Other);
839+
}
840+
for _ in 0..n_short {
841+
stats.record_unmapped_reason(UnmappedReason::TooShort);
842+
}
843+
for _ in 0..n_mismatches {
844+
stats.record_unmapped_reason(UnmappedReason::TooManyMismatches);
845+
}
846+
stats.record_unmapped_reason(UnmappedReason::TooManyLoci);
847+
848+
let unmapped_other = stats.unmapped_other.load(Ordering::Relaxed);
849+
let unmapped_short = stats.unmapped_short.load(Ordering::Relaxed);
850+
let unmapped_mismatches = stats.unmapped_mismatches.load(Ordering::Relaxed);
851+
852+
assert!(unmapped_other > 0, "Other bucket must be populated");
853+
assert_eq!(unmapped_other, n_other);
854+
assert_eq!(unmapped_short, n_short);
855+
assert_eq!(unmapped_mismatches, n_mismatches);
856+
assert_eq!(unmapped_other + unmapped_short, (n_other + n_short));
857+
}
858+
830859
#[test]
831860
fn test_splice_motif_aggregation() {
832861
use crate::align::score::SpliceMotif;

0 commit comments

Comments
 (0)