@@ -43,33 +43,52 @@ import java.io.Closeable
43
43
import scala .math .{max , min }
44
44
45
45
46
-
47
- case class Supplementary ( refName : String , start : Int , positiveStrand : Boolean , cigar : Cigar , mapq : Int , nm : Int ) {
46
+ /** Class to store information about an alignment, as described in the SAM SA tag. */
47
+ private [bam] case class AlignmentInfo ( refIndex : Int , start : Int , positiveStrand : Boolean , cigar : Option [ Cigar ] , mapq : Int , nm : Int ) {
48
48
def negativeStrand : Boolean = ! positiveStrand
49
- def refIndex (header : SAMFileHeader ): Int = header.getSequence(refName).getSequenceIndex
50
-
51
- def end : Int = start + cigar.lengthOnTarget - 1
52
- def unclippedStart : Int = {
53
- SAMUtils .getUnclippedStart(start, cigar.toHtsjdkCigar)
49
+ def refName (header : SAMFileHeader ): String = header.getSequence(refIndex).getSequenceName
50
+ private def _cigar : Cigar = cigar.getOrElse {
51
+ throw new IllegalStateException (s " Cannot get cigar for AlignmentInfo: ${this }" )
54
52
}
55
-
56
- def unclippedEnd : Int = {
57
- SAMUtils .getUnclippedEnd(end, cigar.toHtsjdkCigar)
53
+ def end : Int = start + _cigar.lengthOnTarget - 1
54
+ def unclippedStart : Int = _cigar.unclippedStart(start)
55
+ def unclippedEnd : Int = _cigar.unclippedEnd(end)
56
+ /** Returns a formatted alignment as per the SA tag: `(rname ,pos ,strand ,CIGAR ,mapQ ,NM ;)+` */
57
+ def toSA (header : SAMFileHeader ): String = {
58
+ val strand = if (positiveStrand) '+' else '-'
59
+ val refName = header.getSequence(refIndex).getSequenceName
60
+ val cigar = this .cigar.getOrElse(" *" )
61
+ f " ${refName}, ${start}, ${strand}, ${cigar}, ${mapq}, ${nm}"
58
62
}
59
63
}
60
64
61
- object Supplementary {
62
- /** Returns a formatted alignment as per the SA tag: `(rname ,pos ,strand ,CIGAR ,mapQ ,NM ;)+` */
63
- def toString (rec : SamRecord ): String = {
64
- val strand = if (rec.positiveStrand) '+' else '-'
65
- f " ${rec.refName}, ${rec.start}, ${strand}, ${rec.cigar}, ${rec.mapq}, ${rec.getOrElse(SAMTag .NM .name(),0 )}"
65
+ private [bam] object AlignmentInfo {
66
+ def apply (rec : SamRecord , mate : Boolean = false ): AlignmentInfo = {
67
+ if (mate) {
68
+ val mateRefIndex = if (rec.unpaired || rec.mateUnmapped) Int .MaxValue else rec.mateRefIndex
69
+ val mateCigar = if (rec.unpaired || rec.mateUnmapped) None else Some (rec.mateCigar.getOrElse {
70
+ throw new IllegalStateException (s " Mate CIGAR (Tag 'MC') not found for $rec, consider using SetMateInformation. " )
71
+ })
72
+ // NB: mateCigar has already checked for the existence of the MC tag, so using .get here is fine
73
+ val mateStart = if (rec.unpaired || rec.mateUnmapped) Int .MaxValue else if (rec.mateNegativeStrand) rec.mateUnclippedEnd.get else rec.mateUnclippedStart.get
74
+ val mateStrand = if (rec.unpaired || rec.mateUnmapped) true else rec.matePositiveStrand
75
+ AlignmentInfo (mateRefIndex, mateStart, mateStrand, mateCigar, rec.mapq, 0 )
76
+ } else {
77
+ val refIndex = if (rec.unmapped) Int .MaxValue else rec.refIndex
78
+ val positiveStrand = rec.positiveStrand
79
+ val start = if (rec.unmapped) Int .MaxValue else if (rec.negativeStrand) rec.unclippedEnd else rec.unclippedStart
80
+ AlignmentInfo (refIndex, start, positiveStrand, Some (rec.cigar), rec.mapq, rec.getOrElse(SAMTag .NM .name(), 0 ))
81
+ }
66
82
}
67
83
68
-
69
- def apply (sa : String ): Supplementary = {
70
- val parts = sa.split(" ," )
71
- Supplementary (parts(0 ), parts(1 ).toInt, parts(2 ) == " +" , Cigar (parts(3 )), parts(4 ).toInt, parts(5 ).toInt)
84
+ def apply (sa : String , header : SAMFileHeader ): AlignmentInfo = {
85
+ val parts = sa.split(" ," )
86
+ require(parts.length == 6 , f " Could not parse SA tag: ${sa}" )
87
+ val refIndex = header.getSequenceIndex(parts(0 ))
88
+ AlignmentInfo (refIndex, parts(1 ).toInt, parts(2 ) == " +" , Some (Cigar (parts(3 ))), parts(4 ).toInt, parts(5 ).toInt)
72
89
}
90
+ /** Returns a formatted alignment as per the SA tag: `(rname ,pos ,strand ,CIGAR ,mapQ ,NM ;)+` */
91
+ def toSA (rec : SamRecord ): String = AlignmentInfo (rec).toSA(rec.header)
73
92
}
74
93
75
94
/**
@@ -136,7 +155,7 @@ case class Template(r1: Option[SamRecord],
136
155
Template (x1, x2)
137
156
}
138
157
139
- /** Fixes mate information and sets mate cigar on all primary and supplementary (but not secondary) records. */
158
+ /** Fixes mate information and sets mate cigar on all primary, secondary, and supplementary records. */
140
159
def fixMateInfo (): Unit = {
141
160
// Set all mate info on BOTH secondary and supplementary records, not just supplementary records. We also need to
142
161
// add the "pa" and "pm" tags with information about the primary alignments. Finally, we need the MQ tag!
@@ -145,14 +164,14 @@ case class Template(r1: Option[SamRecord],
145
164
for (primary <- r1; nonPrimary <- r2NonPrimary) {
146
165
SamPairUtil .setMateInformationOnSupplementalAlignment(nonPrimary.asSam, primary.asSam, true )
147
166
nonPrimary(SAMTag .MQ .name()) = primary.mapq
148
- nonPrimary(" mp " ) = Supplementary .toString (primary)
149
- r2.foreach(r => nonPrimary(" rp " ) = Supplementary .toString (r))
167
+ nonPrimary(Template . MatePrimarySamTag ) = AlignmentInfo .toSA (primary)
168
+ r2.foreach(r => nonPrimary(Template . ReadPrimarySamTag ) = AlignmentInfo .toSA (r))
150
169
}
151
170
for (primary <- r2; nonPrimary <- r1NonPrimary) {
152
171
SamPairUtil .setMateInformationOnSupplementalAlignment(nonPrimary.asSam, primary.asSam, true )
153
172
nonPrimary(SAMTag .MQ .name()) = primary.mapq
154
- nonPrimary(" mp " ) = Supplementary .toString (primary)
155
- r1.foreach(r => nonPrimary(" rp " ) = Supplementary .toString (r))
173
+ nonPrimary(Template . MatePrimarySamTag ) = AlignmentInfo .toSA (primary)
174
+ r1.foreach(r => nonPrimary(Template . ReadPrimarySamTag ) = AlignmentInfo .toSA (r))
156
175
}
157
176
for (first <- r1; second <- r2) {
158
177
SamPairUtil .setMateInfo(first.asSam, second.asSam, true )
@@ -164,6 +183,10 @@ case class Template(r1: Option[SamRecord],
164
183
}
165
184
166
185
object Template {
186
+ /** The local SAM tag to store the alignment information of the primary alignment (in the same format as the SA tag) */
187
+ val ReadPrimarySamTag : String = " rp"
188
+ /** The local SAM tag to store the alignment information of the mate's primary alignment (in the same format as the SA tag) */
189
+ val MatePrimarySamTag : String = " mp"
167
190
/**
168
191
* Generates a Template for the next template in the buffered iterator. Assumes that the
169
192
* iterator is queryname sorted or grouped.
0 commit comments