@@ -43,33 +43,54 @@ import java.io.Closeable
43
43
import scala .math .{max , min }
44
44
45
45
46
-
47
- case class Supplementary ( refName : String , start : Int , positiveStrand : Boolean , cigar : Cigar , mapq : Int , nm : Int ) {
46
+ /** Class to store information about an alignment, as described in the SAM SA tag. */
47
+ private [bam] case class AlignmentInfo ( refIndex : Int , start : Int , positiveStrand : Boolean , cigar : Option [ Cigar ] , mapq : Int , nm : Int ) {
48
48
def negativeStrand : Boolean = ! positiveStrand
49
- def refIndex (header : SAMFileHeader ): Int = header.getSequence(refName).getSequenceIndex
50
-
51
- def end : Int = start + cigar.lengthOnTarget - 1
52
- def unclippedStart : Int = {
53
- SAMUtils .getUnclippedStart(start, cigar.toHtsjdkCigar )
49
+ def refName (header : SAMFileHeader ): String = header.getSequence(refIndex).getSequenceName
50
+ def mapped : Boolean = cigar.isDefined
51
+ def unmapped : Boolean = ! mapped
52
+ private def _cigar : Cigar = cigar.getOrElse {
53
+ throw new IllegalStateException ( s " Cannot get cigar for AlignmentInfo: ${ this } " )
54
54
}
55
-
56
- def unclippedEnd : Int = {
57
- SAMUtils .getUnclippedEnd(end, cigar.toHtsjdkCigar)
55
+ def end : Int = start + _cigar.lengthOnTarget - 1
56
+ def unclippedStart : Int = _cigar.unclippedStart(start)
57
+ def unclippedEnd : Int = _cigar.unclippedEnd(end)
58
+ /** Returns a formatted alignment as per the SA tag: `(rname ,pos ,strand ,CIGAR ,mapQ ,NM ;)+` */
59
+ def toSA (header : SAMFileHeader ): String = {
60
+ val strand = if (positiveStrand) '+' else '-'
61
+ val refName = header.getSequence(refIndex).getSequenceName
62
+ val cigar = this .cigar.getOrElse(" *" )
63
+ f " ${refName}, ${start}, ${strand}, ${cigar}, ${mapq}, ${nm}"
58
64
}
59
65
}
60
66
61
- object Supplementary {
62
- /** Returns a formatted alignment as per the SA tag: `(rname ,pos ,strand ,CIGAR ,mapQ ,NM ;)+` */
63
- def toString (rec : SamRecord ): String = {
64
- val strand = if (rec.positiveStrand) '+' else '-'
65
- f " ${rec.refName}, ${rec.start}, ${strand}, ${rec.cigar}, ${rec.mapq}, ${rec.getOrElse(SAMTag .NM .name(),0 )}"
67
+ private [bam] object AlignmentInfo {
68
+ def apply (rec : SamRecord , mate : Boolean = false ): AlignmentInfo = {
69
+ if (mate) {
70
+ val mateRefIndex = if (rec.unpaired || rec.mateUnmapped) Int .MaxValue else rec.mateRefIndex
71
+ val mateCigar = if (rec.unpaired || rec.mateUnmapped) None else Some (rec.mateCigar.getOrElse {
72
+ throw new IllegalStateException (s " Mate CIGAR (Tag 'MC') not found for $rec, consider using SetMateInformation. " )
73
+ })
74
+ // NB: mateCigar has already checked for the existence of the MC tag, so using .get here is fine
75
+ val mateStart = if (rec.unpaired || rec.mateUnmapped) Int .MaxValue else rec.mateStart
76
+ val mateStrand = if (rec.unpaired || rec.mateUnmapped) true else rec.matePositiveStrand
77
+ AlignmentInfo (mateRefIndex, mateStart, mateStrand, mateCigar, rec.mapq, 0 )
78
+ } else {
79
+ val refIndex = if (rec.unmapped) Int .MaxValue else rec.refIndex
80
+ val positiveStrand = rec.positiveStrand
81
+ val start = if (rec.unmapped) Int .MaxValue else rec.start
82
+ AlignmentInfo (refIndex, start, positiveStrand, Some (rec.cigar), rec.mapq, rec.getOrElse(SAMTag .NM .name(), 0 ))
83
+ }
66
84
}
67
85
68
-
69
- def apply (sa : String ): Supplementary = {
70
- val parts = sa.split(" ," )
71
- Supplementary (parts(0 ), parts(1 ).toInt, parts(2 ) == " +" , Cigar (parts(3 )), parts(4 ).toInt, parts(5 ).toInt)
86
+ def apply (sa : String , header : SAMFileHeader ): AlignmentInfo = {
87
+ val parts = sa.split(" ," )
88
+ require(parts.length == 6 , f " Could not parse SA tag: ${sa}" )
89
+ val refIndex = header.getSequenceIndex(parts(0 ))
90
+ AlignmentInfo (refIndex, parts(1 ).toInt, parts(2 ) == " +" , Some (Cigar (parts(3 ))), parts(4 ).toInt, parts(5 ).toInt)
72
91
}
92
+ /** Returns a formatted alignment as per the SA tag: `(rname ,pos ,strand ,CIGAR ,mapQ ,NM ;)+` */
93
+ def toSA (rec : SamRecord ): String = AlignmentInfo (rec).toSA(rec.header)
73
94
}
74
95
75
96
/**
@@ -136,7 +157,7 @@ case class Template(r1: Option[SamRecord],
136
157
Template (x1, x2)
137
158
}
138
159
139
- /** Fixes mate information and sets mate cigar on all primary and supplementary (but not secondary) records. */
160
+ /** Fixes mate information and sets mate cigar on all primary, secondary, and supplementary records. */
140
161
def fixMateInfo (): Unit = {
141
162
// Set all mate info on BOTH secondary and supplementary records, not just supplementary records. We also need to
142
163
// add the "pa" and "pm" tags with information about the primary alignments. Finally, we need the MQ tag!
@@ -145,14 +166,14 @@ case class Template(r1: Option[SamRecord],
145
166
for (primary <- r1; nonPrimary <- r2NonPrimary) {
146
167
SamPairUtil .setMateInformationOnSupplementalAlignment(nonPrimary.asSam, primary.asSam, true )
147
168
nonPrimary(SAMTag .MQ .name()) = primary.mapq
148
- nonPrimary(" mp " ) = Supplementary .toString (primary)
149
- r2.foreach(r => nonPrimary(" rp " ) = Supplementary .toString (r))
169
+ nonPrimary(Template . MatePrimarySamTag ) = AlignmentInfo .toSA (primary)
170
+ r2.foreach(r => nonPrimary(Template . ReadPrimarySamTag ) = AlignmentInfo .toSA (r))
150
171
}
151
172
for (primary <- r2; nonPrimary <- r1NonPrimary) {
152
173
SamPairUtil .setMateInformationOnSupplementalAlignment(nonPrimary.asSam, primary.asSam, true )
153
174
nonPrimary(SAMTag .MQ .name()) = primary.mapq
154
- nonPrimary(" mp " ) = Supplementary .toString (primary)
155
- r1.foreach(r => nonPrimary(" rp " ) = Supplementary .toString (r))
175
+ nonPrimary(Template . MatePrimarySamTag ) = AlignmentInfo .toSA (primary)
176
+ r1.foreach(r => nonPrimary(Template . ReadPrimarySamTag ) = AlignmentInfo .toSA (r))
156
177
}
157
178
for (first <- r1; second <- r2) {
158
179
SamPairUtil .setMateInfo(first.asSam, second.asSam, true )
@@ -164,6 +185,10 @@ case class Template(r1: Option[SamRecord],
164
185
}
165
186
166
187
object Template {
188
+ /** The local SAM tag to store the alignment information of the primary alignment (in the same format as the SA tag) */
189
+ val ReadPrimarySamTag : String = " rp"
190
+ /** The local SAM tag to store the alignment information of the mate's primary alignment (in the same format as the SA tag) */
191
+ val MatePrimarySamTag : String = " mp"
167
192
/**
168
193
* Generates a Template for the next template in the buffered iterator. Assumes that the
169
194
* iterator is queryname sorted or grouped.
0 commit comments