@@ -5,12 +5,14 @@ use crate::align::stitch::{
55 PE_SPACER_BASE , cluster_seeds, finalize_transcript, split_combined_wt, stitch_seeds_core,
66 stitch_seeds_with_jdb_debug,
77} ;
8- use crate :: align:: transcript:: { Exon , Transcript } ;
8+ use crate :: align:: transcript:: { Exon , KindExt as _ , Transcript } ;
99use crate :: error:: Error ;
1010use crate :: index:: GenomeIndex ;
1111use crate :: params:: { IntronMotifFilter , IntronStrandFilter , Parameters } ;
1212use crate :: stats:: UnmappedReason ;
13+ use noodles:: sam:: alignment:: record:: cigar;
1314use rand:: { SeedableRng , rngs:: StdRng , seq:: SliceRandom } ;
15+ use std:: cmp:: Ordering ;
1416use std:: hash:: { DefaultHasher , Hash , Hasher } ;
1517
1618/// Derive a deterministic per-read RNG seed from `run_rng_seed` + the read name.
@@ -323,7 +325,6 @@ pub fn align_read(
323325 } else {
324326 "unknown"
325327 } ;
326- let cigar_str: String = t. cigar . iter ( ) . map ( |op| format ! ( "{}" , op) ) . collect ( ) ;
327328 eprintln ! (
328329 " transcript[{}]: chr={}:{}-{} ({}) score={} mm={} junctions={} cigar={}" ,
329330 ti,
@@ -334,7 +335,7 @@ pub fn align_read(
334335 t. score,
335336 t. n_mismatch,
336337 t. n_junction,
337- cigar_str
338+ t . cigar_string ( )
338339 ) ;
339340 }
340341 }
@@ -351,20 +352,9 @@ pub fn align_read(
351352
352353 // Deduplicate transcripts with identical genomic coordinates AND CIGAR.
353354 transcripts. sort_by ( |a, b| {
354- (
355- a. chr_idx ,
356- a. genome_start ,
357- a. genome_end ,
358- a. is_reverse ,
359- & a. cigar ,
360- )
361- . cmp ( & (
362- b. chr_idx ,
363- b. genome_start ,
364- b. genome_end ,
365- b. is_reverse ,
366- & b. cigar ,
367- ) )
355+ ( a. chr_idx , a. genome_start , a. genome_end , a. is_reverse )
356+ . cmp ( & ( b. chr_idx , b. genome_start , b. genome_end , b. is_reverse ) )
357+ . then_with ( || cmp_cigar ( & a. cigar , & b. cigar ) )
368358 . then_with ( || b. score . cmp ( & a. score ) )
369359 } ) ;
370360 transcripts. dedup_by ( |a, b| {
@@ -467,13 +457,13 @@ pub fn align_read(
467457
468458 // Absolute matched bases
469459 let n_matched = t. n_matched ( ) ;
470- if n_matched < params. out_filter_match_nmin {
460+ if n_matched < params. out_filter_match_nmin as usize {
471461 * filter_reasons. entry ( "match_min" ) . or_insert ( 0 ) += 1 ;
472462 return false ;
473463 }
474464
475465 // Relative matched bases: STAR casts to uint (u32)
476- if n_matched < ( params. out_filter_match_nmin_over_lread * lread_m1) as u32 {
466+ if ( n_matched as f64 ) < params. out_filter_match_nmin_over_lread * lread_m1 {
477467 * filter_reasons. entry ( "match_min_relative" ) . or_insert ( 0 ) += 1 ;
478468 return false ;
479469 }
@@ -519,7 +509,6 @@ pub fn align_read(
519509 match motif. implied_strand ( ) {
520510 Some ( '+' ) => has_plus = true ,
521511 Some ( '-' ) => has_minus = true ,
522- None => { }
523512 _ => { }
524513 }
525514 }
@@ -633,7 +622,6 @@ pub fn align_read(
633622 } else {
634623 "unknown"
635624 } ;
636- let cigar_str: String = t. cigar . iter ( ) . map ( |op| format ! ( "{}" , op) ) . collect ( ) ;
637625 eprintln ! (
638626 " FINAL[{}]: chr={}:{}-{} ({}) score={} mm={} junctions={} cigar={}" ,
639627 i,
@@ -644,7 +632,7 @@ pub fn align_read(
644632 t. score,
645633 t. n_mismatch,
646634 t. n_junction,
647- cigar_str
635+ t . cigar_string ( )
648636 ) ;
649637 }
650638 }
@@ -989,8 +977,8 @@ pub fn align_paired_read(
989977 }
990978 b. combined_wt_score
991979 . cmp ( & a. combined_wt_score )
992- . then_with ( || a. mate1_transcript . cigar . cmp ( & b. mate1_transcript . cigar ) )
993- . then_with ( || a. mate2_transcript . cigar . cmp ( & b. mate2_transcript . cigar ) )
980+ . then_with ( || cmp_cigar ( & a. mate1_transcript . cigar , & b. mate1_transcript . cigar ) )
981+ . then_with ( || cmp_cigar ( & a. mate2_transcript . cigar , & b. mate2_transcript . cigar ) )
994982 } ) ;
995983 joint_pairs. dedup_by ( |a, b| {
996984 a. mate1_transcript . chr_idx == b. mate1_transcript . chr_idx
@@ -1234,6 +1222,18 @@ pub fn align_paired_read(
12341222 }
12351223}
12361224
1225+ /// Comparator for deduplicating structs containing CIGAR ops via sort→dedup
1226+ fn cmp_cigar ( a : & [ cigar:: Op ] , b : & [ cigar:: Op ] ) -> Ordering {
1227+ a. len ( ) . cmp ( & b. len ( ) ) . then_with ( || {
1228+ ( a. iter ( ) . zip ( b. iter ( ) ) )
1229+ . find_map ( |( a, b) | {
1230+ let ord = ( a. char ( ) , a. len ( ) ) . cmp ( & ( b. char ( ) , b. len ( ) ) ) ;
1231+ ( ord != Ordering :: Equal ) . then_some ( ord)
1232+ } )
1233+ . unwrap_or ( Ordering :: Equal )
1234+ } )
1235+ }
1236+
12371237/// Attempt to pair two per-mate transcripts into a PairedAlignment.
12381238///
12391239/// Returns `None` if the mates are incompatible (same strand, different chr, too far, etc.).
@@ -1416,18 +1416,18 @@ fn extract_junctions_from_cigar(t: &Transcript) -> Vec<(u64, u64)> {
14161416 let mut junctions = Vec :: new ( ) ;
14171417 let mut genome_pos = t. genome_start ;
14181418 for op in & t. cigar {
1419- use crate :: align :: transcript :: CigarOp ;
1420- match op {
1421- CigarOp :: Match ( n ) | CigarOp :: Equal ( n ) | CigarOp :: Diff ( n ) | CigarOp :: Del ( n ) => {
1422- genome_pos += * n as u64 ;
1419+ use noodles :: sam :: alignment :: record :: cigar :: op :: Kind ;
1420+ match op. kind ( ) {
1421+ Kind :: Match | Kind :: SequenceMatch | Kind :: SequenceMismatch | Kind :: Deletion => {
1422+ genome_pos += op . len ( ) as u64 ;
14231423 }
1424- CigarOp :: RefSkip ( n ) => {
1424+ Kind :: Skip => {
14251425 let donor = genome_pos;
1426- let acceptor = genome_pos + * n as u64 ;
1426+ let acceptor = genome_pos + op . len ( ) as u64 ;
14271427 junctions. push ( ( donor, acceptor) ) ;
14281428 genome_pos = acceptor;
14291429 }
1430- CigarOp :: Ins ( _ ) | CigarOp :: SoftClip ( _ ) | CigarOp :: HardClip ( _ ) => { }
1430+ Kind :: Insertion | Kind :: SoftClip | Kind :: HardClip | Kind :: Pad => { }
14311431 }
14321432 }
14331433 junctions
@@ -1479,6 +1479,7 @@ mod tests {
14791479 use crate :: index:: sa_index:: SaIndex ;
14801480 use crate :: index:: suffix_array:: SuffixArray ;
14811481 use clap:: Parser ;
1482+ use noodles:: sam:: alignment:: record:: cigar;
14821483
14831484 fn make_test_params ( ) -> Parameters {
14841485 // Parse empty args to get default parameters
@@ -1537,8 +1538,7 @@ mod tests {
15371538
15381539 #[ test]
15391540 fn combined_transcript_for_projection_rewrites_mate2_ifrag ( ) {
1540- use crate :: align:: transcript:: CigarOp ;
1541-
1541+ use cigar:: op:: { Kind , Op } ;
15421542 let make_tr = |gs : u64 , ge : u64 , rs : usize , re : usize | Transcript {
15431543 chr_idx : 0 ,
15441544 genome_start : gs,
@@ -1551,7 +1551,7 @@ mod tests {
15511551 read_end: re,
15521552 i_frag: 0 ,
15531553 } ] ,
1554- cigar : vec ! [ CigarOp :: Match ( ( ge - gs) as u32 ) ] ,
1554+ cigar : vec ! [ Op :: new ( Kind :: Match , ( ge - gs) as usize ) ] ,
15551555 score : 100 ,
15561556 n_mismatch : 0 ,
15571557 n_gap : 0 ,
@@ -1654,7 +1654,8 @@ mod tests {
16541654
16551655 #[ test]
16561656 fn test_check_proper_pair_distance ( ) {
1657- use crate :: align:: transcript:: { CigarOp , Exon } ;
1657+ use crate :: align:: transcript:: Exon ;
1658+ use cigar:: op:: { Kind , Op } ;
16581659
16591660 let params = make_test_params ( ) ;
16601661
@@ -1671,7 +1672,7 @@ mod tests {
16711672 read_end: 100 ,
16721673 i_frag: 0 ,
16731674 } ] ,
1674- cigar : vec ! [ CigarOp :: Match ( 100 ) ] ,
1675+ cigar : vec ! [ Op :: new ( Kind :: Match , 100 ) ] ,
16751676 score : 100 ,
16761677 n_mismatch : 0 ,
16771678 n_gap : 0 ,
@@ -1693,7 +1694,7 @@ mod tests {
16931694 read_end: 100 ,
16941695 i_frag: 0 ,
16951696 } ] ,
1696- cigar : vec ! [ CigarOp :: Match ( 100 ) ] ,
1697+ cigar : vec ! [ Op :: new ( Kind :: Match , 100 ) ] ,
16971698 score : 100 ,
16981699 n_mismatch : 0 ,
16991700 n_gap : 0 ,
@@ -1709,7 +1710,8 @@ mod tests {
17091710
17101711 #[ test]
17111712 fn test_check_proper_pair_too_far ( ) {
1712- use crate :: align:: transcript:: { CigarOp , Exon } ;
1713+ use crate :: align:: transcript:: Exon ;
1714+ use cigar:: op:: { Kind , Op } ;
17131715
17141716 let mut params = make_test_params ( ) ;
17151717 params. align_mates_gap_max = 100 ;
@@ -1726,7 +1728,7 @@ mod tests {
17261728 read_end: 100 ,
17271729 i_frag: 0 ,
17281730 } ] ,
1729- cigar : vec ! [ CigarOp :: Match ( 100 ) ] ,
1731+ cigar : vec ! [ Op :: new ( Kind :: Match , 100 ) ] ,
17301732 score : 100 ,
17311733 n_mismatch : 0 ,
17321734 n_gap : 0 ,
@@ -1748,7 +1750,7 @@ mod tests {
17481750 read_end: 100 ,
17491751 i_frag: 0 ,
17501752 } ] ,
1751- cigar : vec ! [ CigarOp :: Match ( 100 ) ] ,
1753+ cigar : vec ! [ Op :: new ( Kind :: Match , 100 ) ] ,
17521754 score : 100 ,
17531755 n_mismatch : 0 ,
17541756 n_gap : 0 ,
@@ -1764,7 +1766,8 @@ mod tests {
17641766
17651767 #[ test]
17661768 fn test_calculate_insert_size_positive ( ) {
1767- use crate :: align:: transcript:: { CigarOp , Exon } ;
1769+ use crate :: align:: transcript:: Exon ;
1770+ use cigar:: op:: { Kind , Op } ;
17681771
17691772 // Mate1 is leftmost
17701773 let t1 = Transcript {
@@ -1779,7 +1782,7 @@ mod tests {
17791782 read_end: 100 ,
17801783 i_frag: 0 ,
17811784 } ] ,
1782- cigar : vec ! [ CigarOp :: Match ( 100 ) ] ,
1785+ cigar : vec ! [ Op :: new ( Kind :: Match , 100 ) ] ,
17831786 score : 100 ,
17841787 n_mismatch : 0 ,
17851788 n_gap : 0 ,
@@ -1801,7 +1804,7 @@ mod tests {
18011804 read_end: 100 ,
18021805 i_frag: 0 ,
18031806 } ] ,
1804- cigar : vec ! [ CigarOp :: Match ( 100 ) ] ,
1807+ cigar : vec ! [ Op :: new ( Kind :: Match , 100 ) ] ,
18051808 score : 100 ,
18061809 n_mismatch : 0 ,
18071810 n_gap : 0 ,
@@ -1817,8 +1820,9 @@ mod tests {
18171820
18181821 #[ test]
18191822 fn test_strand_consistency_filter ( ) {
1820- use crate :: align:: transcript:: { CigarOp , Exon , Transcript } ;
1823+ use crate :: align:: transcript:: { Exon , Transcript } ;
18211824 use crate :: params:: IntronStrandFilter ;
1825+ use cigar:: op:: { Kind , Op } ;
18221826
18231827 // Create a transcript with conflicting strand motifs (mixed + and - within one transcript)
18241828 let t_inconsistent = Transcript {
@@ -1833,7 +1837,7 @@ mod tests {
18331837 read_end: 100 ,
18341838 i_frag: 0 ,
18351839 } ] ,
1836- cigar : vec ! [ CigarOp :: Match ( 100 ) ] ,
1840+ cigar : vec ! [ Op :: new ( Kind :: Match , 100 ) ] ,
18371841 score : 100 ,
18381842 n_mismatch : 0 ,
18391843 n_gap : 0 ,
@@ -1856,7 +1860,7 @@ mod tests {
18561860 read_end: 100 ,
18571861 i_frag: 0 ,
18581862 } ] ,
1859- cigar : vec ! [ CigarOp :: Match ( 100 ) ] ,
1863+ cigar : vec ! [ Op :: new ( Kind :: Match , 100 ) ] ,
18601864 score : 100 ,
18611865 n_mismatch : 0 ,
18621866 n_gap : 0 ,
@@ -1917,7 +1921,8 @@ mod tests {
19171921
19181922 #[ test]
19191923 fn test_calculate_insert_size_negative ( ) {
1920- use crate :: align:: transcript:: { CigarOp , Exon } ;
1924+ use crate :: align:: transcript:: Exon ;
1925+ use cigar:: op:: { Kind , Op } ;
19211926
19221927 // RF pair: mate1 reverse (right side), mate2 forward (left side).
19231928 // STAR reverse cluster: tlen = mate1.genome_end - mate2.genome_start = 1300 - 1000 = 300.
@@ -1934,7 +1939,7 @@ mod tests {
19341939 read_end: 100 ,
19351940 i_frag: 0 ,
19361941 } ] ,
1937- cigar : vec ! [ CigarOp :: Match ( 100 ) ] ,
1942+ cigar : vec ! [ Op :: new ( Kind :: Match , 100 ) ] ,
19381943 score : 100 ,
19391944 n_mismatch : 0 ,
19401945 n_gap : 0 ,
@@ -1956,7 +1961,7 @@ mod tests {
19561961 read_end: 100 ,
19571962 i_frag: 0 ,
19581963 } ] ,
1959- cigar : vec ! [ CigarOp :: Match ( 100 ) ] ,
1964+ cigar : vec ! [ Op :: new ( Kind :: Match , 100 ) ] ,
19601965 score : 100 ,
19611966 n_mismatch : 0 ,
19621967 n_gap : 0 ,
@@ -1973,7 +1978,8 @@ mod tests {
19731978 #[ test]
19741979 fn test_noncanonical_unannotated_filter ( ) {
19751980 use crate :: align:: score:: SpliceMotif ;
1976- use crate :: align:: transcript:: { CigarOp , Exon , Transcript } ;
1981+ use crate :: align:: transcript:: { Exon , Transcript } ;
1982+ use cigar:: op:: { Kind , Op } ;
19771983
19781984 // Helper: check if a transcript would be filtered by RemoveNoncanonicalUnannotated
19791985 // (mirrors the logic in the retain closure)
@@ -1996,7 +2002,7 @@ mod tests {
19962002 read_end: 100 ,
19972003 i_frag: 0 ,
19982004 } ] ,
1999- cigar : vec ! [ CigarOp :: Match ( 100 ) ] ,
2005+ cigar : vec ! [ Op :: new ( Kind :: Match , 100 ) ] ,
20002006 score : 100 ,
20012007 n_mismatch : 0 ,
20022008 n_gap : 0 ,
@@ -2067,7 +2073,8 @@ mod tests {
20672073
20682074 #[ test]
20692075 fn test_paired_alignment_result_enum_variants ( ) {
2070- use crate :: align:: transcript:: { CigarOp , Exon } ;
2076+ use crate :: align:: transcript:: Exon ;
2077+ use cigar:: op:: { Kind , Op } ;
20712078
20722079 let transcript = Transcript {
20732080 chr_idx : 0 ,
@@ -2081,7 +2088,7 @@ mod tests {
20812088 read_end: 100 ,
20822089 i_frag: 0 ,
20832090 } ] ,
2084- cigar : vec ! [ CigarOp :: Match ( 100 ) ] ,
2091+ cigar : vec ! [ Op :: new ( Kind :: Match , 100 ) ] ,
20852092 score : 100 ,
20862093 n_mismatch : 0 ,
20872094 n_gap : 0 ,
0 commit comments