Skip to content

Commit 81e39d1

Browse files
committed
add more tests
1 parent 8f7a4eb commit 81e39d1

File tree

2 files changed

+35
-11
lines changed

2 files changed

+35
-11
lines changed

src/main/scala/com/fulcrumgenomics/vcf/DownsampleVcf.scala

+18-11
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import com.fulcrumgenomics.vcf.DownsampleVcf.{downsampleAndRegenotype, winnowVar
1313

1414
import scala.math.log10
1515
import scala.util.Random
16+
import scala.tools.nsc.doc.html.HtmlTags
1617

1718
object DownsampleVcf extends LazyLogging {
1819
/** Removes variants that are within a specified distance from a previous variant.
@@ -103,6 +104,16 @@ object DownsampleVcf extends LazyLogging {
103104
gt.copy(attrs=Map("PL" -> pls, "AD" -> newAds, "DP" -> newAds.sum), calls=calls)
104105
}
105106

107+
/**Converts a sequence of log-likelihoods to phred-scale by 1) multiplying each by -10, 2)
108+
* subtracting from each the min value so the smallest value is 0, and 3) rounding to the
109+
* nearest integer.
110+
*/
111+
def logToPhredLikelihoods(logLikelihoods: IndexedSeq[Double]): IndexedSeq[Int] = {
112+
val rawPL = logLikelihoods.map(gl => gl * -10)
113+
val minPL = rawPL.min
114+
rawPL.map(pl => (pl - minPL).round.toInt)
115+
}
116+
106117
object Likelihoods {
107118
/** Computes the likelihoods for each possible biallelic genotype.
108119
* @param alleleDepthA the reference allele depth
@@ -122,13 +133,14 @@ object DownsampleVcf extends LazyLogging {
122133
Likelihoods(2, IndexedSeq(rawGlAA, rawGlAB, rawGlBB))
123134
}
124135

125-
/** Computes the likelihoods for each possible multiallelic genotype.
136+
/** Computes the likelihoods for each possible genotype given a sequence of read depths for any
137+
* number of alleles.
126138
* @param alleleDepths the sequence of allele depths in the order specified in the VCF
127139
* @param epsilon the error rate for genotyping
128-
* @return a new `Likelihood` that has the likelihoods of all possible genotypes in the order
129-
* specified in VFC spec for the GL/PL tags.
140+
* @return a new `Likelihood` that has the log likelihoods of all possible genotypes in the
141+
* order specified in VFC spec for the GL/PL tags.
130142
*/
131-
def multiallelic(alleleDepths: IndexedSeq[Int], epsilon: Double = 0.01): Likelihoods = {
143+
def generalized(alleleDepths: IndexedSeq[Int], epsilon: Double = 0.01): Likelihoods = {
132144
val numAlleles = alleleDepths.length
133145
// probabilities associated with each possible genotype for a pair of alleles
134146
val probs: Array[Double] = Array(
@@ -151,8 +163,7 @@ object DownsampleVcf extends LazyLogging {
151163

152164
def apply(alleleDepths: IndexedSeq[Int], epsilon: Double = 0.01): Likelihoods = {
153165
require(alleleDepths.length >= 2, "at least two alleles are required to calculate genotype likelihoods")
154-
if (alleleDepths.length > 2) multiallelic(alleleDepths, epsilon)
155-
else biallelic(alleleDepths(0), alleleDepths(1), epsilon)
166+
generalized(alleleDepths, epsilon)
156167
}
157168
}
158169

@@ -166,11 +177,7 @@ object DownsampleVcf extends LazyLogging {
166177
* @return a list of phred-scaled likelihooodS for AA, AB, BB.
167178
*/
168179
def pls: IndexedSeq[Int] = {
169-
// subtract the min value so the smallest GL is 0, then multiply by -10 and convert to
170-
// Int to make it PHRED-scale
171-
val rawPL = genotypeLikelihoods.map(gl => gl * -10)
172-
val minPL = rawPL.min
173-
rawPL.map(pl => (pl - minPL).round.toInt)
180+
logToPhredLikelihoods(genotypeLikelihoods)
174181
}
175182

176183
def mostLikelyGenotype: Option[(Int, Int)] = {

src/test/scala/com/fulcrumgenomics/vcf/DownsampleVcfTest.scala

+17
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,23 @@ class DownsampleVcfTest extends UnitSpec {
202202
likelihood.pls should contain theSameElementsInOrderAs expected
203203
}
204204

205+
it should "return correct results for basic cases" in {
206+
val e = 0.01
207+
val cases: IndexedSeq[(IndexedSeq[Int], IndexedSeq[Double])] = IndexedSeq(
208+
(IndexedSeq(1, 0), IndexedSeq(1 - e, 0.5, e)),
209+
(IndexedSeq(0, 1), IndexedSeq(e, 0.5, 1 - e)),
210+
(IndexedSeq(1, 1), IndexedSeq((1 - e) * e, 0.25, (1 - e) * e)),
211+
(IndexedSeq(2, 0), IndexedSeq(math.pow((1 - e), 2), 0.25, math.pow(e, 2))),
212+
(IndexedSeq(0, 0, 1), IndexedSeq(e, e, e, 0.5, 0.5, 1 - e)),
213+
)
214+
cases.foreach { case (input, output) =>
215+
val likelihood = Likelihoods(input, e)
216+
val logOutput = output.map(p => math.log10(p))
217+
likelihood.pls.length shouldBe logOutput.length
218+
likelihood.pls should contain theSameElementsInOrderAs DownsampleVcf.logToPhredLikelihoods(logOutput)
219+
}
220+
}
221+
205222
it should "return a likelihood of 0 for AA if there are only ref alleles observed" in {
206223
val likelihood = Likelihoods(IndexedSeq(10, 0))
207224
val expected = IndexedSeq[Int](0, 30, 200)

0 commit comments

Comments
 (0)