@@ -13,6 +13,7 @@ import com.fulcrumgenomics.vcf.DownsampleVcf.{downsampleAndRegenotype, winnowVar
13
13
14
14
import scala .math .log10
15
15
import scala .util .Random
16
+ import scala .tools .nsc .doc .html .HtmlTags
16
17
17
18
object DownsampleVcf extends LazyLogging {
18
19
/** Removes variants that are within a specified distance from a previous variant.
@@ -103,6 +104,16 @@ object DownsampleVcf extends LazyLogging {
103
104
gt.copy(attrs= Map (" PL" -> pls, " AD" -> newAds, " DP" -> newAds.sum), calls= calls)
104
105
}
105
106
107
+ /** Converts a sequence of log-likelihoods to phred-scale by 1) multiplying each by -10, 2)
108
+ * subtracting from each the min value so the smallest value is 0, and 3) rounding to the
109
+ * nearest integer.
110
+ */
111
+ def logToPhredLikelihoods (logLikelihoods : IndexedSeq [Double ]): IndexedSeq [Int ] = {
112
+ val rawPL = logLikelihoods.map(gl => gl * - 10 )
113
+ val minPL = rawPL.min
114
+ rawPL.map(pl => (pl - minPL).round.toInt)
115
+ }
116
+
106
117
object Likelihoods {
107
118
/** Computes the likelihoods for each possible biallelic genotype.
108
119
* @param alleleDepthA the reference allele depth
@@ -122,13 +133,14 @@ object DownsampleVcf extends LazyLogging {
122
133
Likelihoods (2 , IndexedSeq (rawGlAA, rawGlAB, rawGlBB))
123
134
}
124
135
125
- /** Computes the likelihoods for each possible multiallelic genotype.
136
+ /** Computes the likelihoods for each possible genotype given a sequence of read depths for any
137
+ * number of alleles.
126
138
* @param alleleDepths the sequence of allele depths in the order specified in the VCF
127
139
* @param epsilon the error rate for genotyping
128
- * @return a new `Likelihood` that has the likelihoods of all possible genotypes in the order
129
- * specified in VFC spec for the GL/PL tags.
140
+ * @return a new `Likelihood` that has the log likelihoods of all possible genotypes in the
141
+ * order specified in VFC spec for the GL/PL tags.
130
142
*/
131
- def multiallelic (alleleDepths : IndexedSeq [Int ], epsilon : Double = 0.01 ): Likelihoods = {
143
+ def generalized (alleleDepths : IndexedSeq [Int ], epsilon : Double = 0.01 ): Likelihoods = {
132
144
val numAlleles = alleleDepths.length
133
145
// probabilities associated with each possible genotype for a pair of alleles
134
146
val probs : Array [Double ] = Array (
@@ -151,8 +163,7 @@ object DownsampleVcf extends LazyLogging {
151
163
152
164
def apply (alleleDepths : IndexedSeq [Int ], epsilon : Double = 0.01 ): Likelihoods = {
153
165
require(alleleDepths.length >= 2 , " at least two alleles are required to calculate genotype likelihoods" )
154
- if (alleleDepths.length > 2 ) multiallelic(alleleDepths, epsilon)
155
- else biallelic(alleleDepths(0 ), alleleDepths(1 ), epsilon)
166
+ generalized(alleleDepths, epsilon)
156
167
}
157
168
}
158
169
@@ -166,11 +177,7 @@ object DownsampleVcf extends LazyLogging {
166
177
* @return a list of phred-scaled likelihooodS for AA, AB, BB.
167
178
*/
168
179
def pls : IndexedSeq [Int ] = {
169
- // subtract the min value so the smallest GL is 0, then multiply by -10 and convert to
170
- // Int to make it PHRED-scale
171
- val rawPL = genotypeLikelihoods.map(gl => gl * - 10 )
172
- val minPL = rawPL.min
173
- rawPL.map(pl => (pl - minPL).round.toInt)
180
+ logToPhredLikelihoods(genotypeLikelihoods)
174
181
}
175
182
176
183
def mostLikelyGenotype : Option [(Int , Int )] = {
0 commit comments