@@ -502,64 +502,15 @@ impl AlignmentScorer {
502502 )
503503 }
504504
505- /// Detect splice junction motif
506- ///
507- /// # Arguments
508- /// - `donor_pos`: Position of the donor site (first base after exon)
509- /// - `intron_len`: Length of the intron
510- /// - `genome`: Genome reference
511- ///
512- /// # Returns
513- /// The detected splice motif
505+ /// Detect splice junction motif (thin wrapper over the free function
506+ /// so `AlignmentScorer` callers keep working).
514507 pub fn detect_splice_motif (
515508 & self ,
516509 donor_pos : u64 ,
517510 intron_len : u32 ,
518511 genome : & Genome ,
519512 ) -> SpliceMotif {
520- // Read 2bp donor and 2bp acceptor from the FORWARD genome
521- // Donor: donor_pos, donor_pos+1
522- // Acceptor: donor_pos+intron_len-2, donor_pos+intron_len-1
523- // Always read forward strand — motif pattern determines the strand
524- let d1 = genome. get_base ( donor_pos) ;
525- let d2 = genome. get_base ( donor_pos + 1 ) ;
526- let a1 = genome. get_base ( donor_pos + intron_len as u64 - 2 ) ;
527- let a2 = genome. get_base ( donor_pos + intron_len as u64 - 1 ) ;
528-
529- // Check if all bases are valid
530- // A=0, C=1, G=2, T=3
531- match ( d1, d2, a1, a2) {
532- ( Some ( d1) , Some ( d2) , Some ( a1) , Some ( a2) ) => {
533- // Forward-strand motifs
534- // GT-AG: (2,3,0,2)
535- if d1 == 2 && d2 == 3 && a1 == 0 && a2 == 2 {
536- return SpliceMotif :: GtAg ;
537- }
538- // GC-AG: (2,1,0,2)
539- if d1 == 2 && d2 == 1 && a1 == 0 && a2 == 2 {
540- return SpliceMotif :: GcAg ;
541- }
542- // AT-AC: (0,3,0,1)
543- if d1 == 0 && d2 == 3 && a1 == 0 && a2 == 1 {
544- return SpliceMotif :: AtAc ;
545- }
546- // Reverse-strand motifs (reverse complement on forward genome)
547- // CT-AC: (1,3,0,1) — reverse complement of GT-AG
548- if d1 == 1 && d2 == 3 && a1 == 0 && a2 == 1 {
549- return SpliceMotif :: CtAc ;
550- }
551- // CT-GC: (1,3,2,1) — reverse complement of GC-AG
552- if d1 == 1 && d2 == 3 && a1 == 2 && a2 == 1 {
553- return SpliceMotif :: CtGc ;
554- }
555- // GT-AT: (2,3,0,3) — reverse complement of AT-AC
556- if d1 == 2 && d2 == 3 && a1 == 0 && a2 == 3 {
557- return SpliceMotif :: GtAt ;
558- }
559- SpliceMotif :: NonCanonical
560- }
561- _ => SpliceMotif :: NonCanonical ,
562- }
513+ detect_splice_motif ( donor_pos, intron_len, genome)
563514 }
564515
565516 /// Score a splice junction based on motif
@@ -573,6 +524,31 @@ impl AlignmentScorer {
573524 }
574525}
575526
527+ /// Detect splice junction motif from forward-strand bases at the intron
528+ /// boundaries. Stateless — exposed as a free function so both alignment
529+ /// scoring and `genomeGenerate` splice-junction insertion can share one
530+ /// truth table.
531+ ///
532+ /// `donor_pos` is the 0-based position of the intron's first base on the
533+ /// forward strand; `intron_len` is the intron length in bases.
534+ pub fn detect_splice_motif ( donor_pos : u64 , intron_len : u32 , genome : & Genome ) -> SpliceMotif {
535+ let d1 = genome. get_base ( donor_pos) ;
536+ let d2 = genome. get_base ( donor_pos + 1 ) ;
537+ let a1 = genome. get_base ( donor_pos + intron_len as u64 - 2 ) ;
538+ let a2 = genome. get_base ( donor_pos + intron_len as u64 - 1 ) ;
539+
540+ // Base encoding: A=0, C=1, G=2, T=3.
541+ match ( d1, d2, a1, a2) {
542+ ( Some ( 2 ) , Some ( 3 ) , Some ( 0 ) , Some ( 2 ) ) => SpliceMotif :: GtAg ,
543+ ( Some ( 2 ) , Some ( 1 ) , Some ( 0 ) , Some ( 2 ) ) => SpliceMotif :: GcAg ,
544+ ( Some ( 0 ) , Some ( 3 ) , Some ( 0 ) , Some ( 1 ) ) => SpliceMotif :: AtAc ,
545+ ( Some ( 1 ) , Some ( 3 ) , Some ( 0 ) , Some ( 1 ) ) => SpliceMotif :: CtAc ,
546+ ( Some ( 1 ) , Some ( 3 ) , Some ( 2 ) , Some ( 1 ) ) => SpliceMotif :: CtGc ,
547+ ( Some ( 2 ) , Some ( 3 ) , Some ( 0 ) , Some ( 3 ) ) => SpliceMotif :: GtAt ,
548+ _ => SpliceMotif :: NonCanonical ,
549+ }
550+ }
551+
576552/// Splice junction motif types
577553#[ derive( Debug , Clone , Copy , PartialEq , Eq , Hash ) ]
578554pub enum SpliceMotif {
0 commit comments