@@ -547,6 +547,118 @@ impl SamWriter {
547547 Ok ( records)
548548 }
549549
550+ /// Build transcriptome-space SAM records for `--quantMode TranscriptomeSAM`.
551+ ///
552+ /// Each projected `Transcript` is converted to a record where:
553+ /// * `chr_idx` is the transcript index (matches the transcriptome
554+ /// header's @SQ order),
555+ /// * `genome_start` is the 0-based transcript-space position (→ POS =
556+ /// t-space_pos + 1),
557+ /// * splice-aware tags (`jM`, `jI`, `XS`) are not emitted (splices
558+ /// collapse in t-space and have no meaning there),
559+ /// * standard tags (`NH`, `HI`, `AS`, `NM`/`nM`, `MD`) are emitted per
560+ /// the `--outSAMattributes` set.
561+ ///
562+ /// `primary_hit_idx` (0-based) is the projected alignment selected as
563+ /// primary (randomly among ties per STAR's `rngUniformReal0to1`). All
564+ /// other records get the SECONDARY flag (0x100).
565+ #[ allow( clippy:: too_many_arguments) ]
566+ pub fn build_transcriptome_records (
567+ read_name : & str ,
568+ read_seq : & [ u8 ] ,
569+ read_qual : & [ u8 ] ,
570+ projected : & [ Transcript ] ,
571+ mapq : u8 ,
572+ params : & Parameters ,
573+ primary_hit_idx : usize ,
574+ ) -> Result < Vec < RecordBuf > , Error > {
575+ if projected. is_empty ( ) {
576+ return Ok ( Vec :: new ( ) ) ;
577+ }
578+ let mut attrs = params. sam_attribute_set ( ) ;
579+ // Splice tags are meaningless in t-space.
580+ attrs. remove ( "jM" ) ;
581+ attrs. remove ( "jI" ) ;
582+ attrs. remove ( "XS" ) ;
583+ // MD-tag would require the transcript's t-space reference which we do
584+ // not precompute; drop it to keep this writer simple. STAR also does
585+ // not emit MD for transcriptome SAM.
586+ attrs. remove ( "MD" ) ;
587+
588+ let n_alignments = projected. len ( ) ;
589+ let mut records = Vec :: with_capacity ( n_alignments) ;
590+
591+ for ( hit_idx, t) in projected. iter ( ) . enumerate ( ) {
592+ let mut record = RecordBuf :: default ( ) ;
593+ record. name_mut ( ) . replace ( read_name. into ( ) ) ;
594+
595+ // FLAGS: SECONDARY if not the primary; REVERSE if is_reverse.
596+ let mut flags = sam:: alignment:: record:: Flags :: empty ( ) ;
597+ if t. is_reverse {
598+ flags |= sam:: alignment:: record:: Flags :: REVERSE_COMPLEMENTED ;
599+ }
600+ if hit_idx != primary_hit_idx {
601+ flags |= sam:: alignment:: record:: Flags :: SECONDARY ;
602+ }
603+ * record. flags_mut ( ) = flags;
604+
605+ // RNAME = transcript index (maps to transcriptome header).
606+ * record. reference_sequence_id_mut ( ) = Some ( t. chr_idx ) ;
607+
608+ // POS = t-space position + 1 (1-based).
609+ let pos = ( t. genome_start + 1 ) as usize ;
610+ * record. alignment_start_mut ( ) = Some ( pos. try_into ( ) . map_err ( |e| {
611+ Error :: Alignment ( format ! ( "invalid t-space position {}: {}" , pos, e) )
612+ } ) ?) ;
613+
614+ // MAPQ
615+ * record. mapping_quality_mut ( ) = MappingQuality :: new ( mapq) ;
616+
617+ // CIGAR (already has N ops stripped by align_to_transcripts)
618+ * record. cigar_mut ( ) = convert_cigar ( & t. cigar ) ?;
619+
620+ // SEQ / QUAL — STAR writes the original-orientation sequence when
621+ // FLAG 0x10 is unset (forward alignment in t-space) and RC'd seq
622+ // when 0x10 is set. We follow SAM spec: SEQ matches the CIGAR's
623+ // read orientation, so we mirror `transcript_to_record`.
624+ if t. is_reverse {
625+ let seq_bytes: Vec < u8 > = read_seq
626+ . iter ( )
627+ . rev ( )
628+ . map ( |& b| decode_base ( complement_base ( b) ) )
629+ . collect ( ) ;
630+ * record. sequence_mut ( ) = Sequence :: from ( seq_bytes) ;
631+ let mut qual = read_qual. to_vec ( ) ;
632+ qual. reverse ( ) ;
633+ * record. quality_scores_mut ( ) = QualityScores :: from ( qual) ;
634+ } else {
635+ let seq_bytes: Vec < u8 > = read_seq. iter ( ) . map ( |& b| decode_base ( b) ) . collect ( ) ;
636+ * record. sequence_mut ( ) = Sequence :: from ( seq_bytes) ;
637+ * record. quality_scores_mut ( ) = QualityScores :: from ( read_qual. to_vec ( ) ) ;
638+ }
639+
640+ // Optional tags
641+ let data = record. data_mut ( ) ;
642+ if attrs. contains ( "NH" ) {
643+ data. insert ( Tag :: ALIGNMENT_HIT_COUNT , Value :: from ( n_alignments as i32 ) ) ;
644+ }
645+ if attrs. contains ( "HI" ) {
646+ // HI is 1-based; primary = 1, secondaries > 1 in emission order.
647+ data. insert ( Tag :: HIT_INDEX , Value :: from ( ( hit_idx + 1 ) as i32 ) ) ;
648+ }
649+ if attrs. contains ( "AS" ) {
650+ data. insert ( Tag :: ALIGNMENT_SCORE , Value :: from ( t. score ) ) ;
651+ }
652+ if attrs. contains ( "NM" ) || attrs. contains ( "nM" ) {
653+ data. insert ( Tag :: new ( b'n' , b'M' ) , Value :: from ( t. n_mismatch as i32 ) ) ;
654+ }
655+
656+ records. push ( record) ;
657+ }
658+
659+ Ok ( records)
660+ }
661+
550662 /// Build unmapped paired records (both mates unmapped)
551663 pub fn build_paired_unmapped_records (
552664 read_name : & str ,
@@ -600,21 +712,33 @@ impl SamWriter {
600712
601713/// Build paired SAM header from genome
602714pub fn build_sam_header ( genome : & Genome , params : & Parameters ) -> Result < sam:: Header , Error > {
715+ build_sam_header_from_refs (
716+ ( 0 ..genome. n_chr_real )
717+ . map ( |i| ( genome. chr_name [ i] . as_str ( ) , genome. chr_length [ i] as usize ) ) ,
718+ params,
719+ )
720+ }
721+
722+ /// Build a SAM header from an iterator of (name, length) reference pairs.
723+ ///
724+ /// Used both for the genome header (chromosomes) and the transcriptome header
725+ /// (one @SQ per transcript, length = transcript-space length).
726+ pub fn build_sam_header_from_refs < ' a , I > ( refs : I , params : & Parameters ) -> Result < sam:: Header , Error >
727+ where
728+ I : IntoIterator < Item = ( & ' a str , usize ) > ,
729+ {
603730 let mut builder = sam:: Header :: builder ( ) ;
604731
605732 // @HD line (default version and unsorted)
606733 builder = builder. set_header ( Default :: default ( ) ) ;
607734
608- // @SQ lines for each chromosome
609- for i in 0 ..genome. n_chr_real {
610- let name = & genome. chr_name [ i] ;
611- let length = genome. chr_length [ i] as usize ;
612-
735+ // @SQ lines for each reference
736+ for ( name, length) in refs {
613737 let length_nz = NonZeroUsize :: new ( length)
614- . ok_or_else ( || Error :: Index ( format ! ( "chromosome {} has zero length" , name) ) ) ?;
738+ . ok_or_else ( || Error :: Index ( format ! ( "reference {} has zero length" , name) ) ) ?;
615739
616740 builder = builder. add_reference_sequence (
617- name. as_str ( ) ,
741+ name,
618742 Map :: < sam:: header:: record:: value:: map:: ReferenceSequence > :: new ( length_nz) ,
619743 ) ;
620744 }
0 commit comments