@@ -6,7 +6,7 @@ import com.fulcrumgenomics.util.Metric
6
6
import com .fulcrumgenomics .vcf .api .Allele .SimpleAllele
7
7
import com .fulcrumgenomics .vcf .api .{Allele , AlleleSet , Genotype , Variant }
8
8
import com .fulcrumgenomics .testing .UnitSpec
9
- import com .fulcrumgenomics .vcf .DownsampleVcf .{Likelihoods , computePls , downsampleAndRegenotype }
9
+ import com .fulcrumgenomics .vcf .DownsampleVcf .{Likelihoods , downsampleAndRegenotype }
10
10
11
11
import scala .util .Random
12
12
@@ -187,7 +187,7 @@ class DownsampleVcfTest extends UnitSpec {
187
187
" DownsampleVcf.computePls" should " return new PLs that are not always 0,0,0" in {
188
188
val ads = IndexedSeq [Int ](0 , 100 )
189
189
val expected = IndexedSeq (1996 , 301 , 0 )
190
- val newlikelihoods = computePls (ads)
190
+ val newlikelihoods = Likelihoods (ads).pls
191
191
newlikelihoods should contain theSameElementsInOrderAs expected
192
192
}
193
193
@@ -196,51 +196,89 @@ class DownsampleVcfTest extends UnitSpec {
196
196
*/
197
197
198
198
" DownsampleVcf.Likelihoods" should " return ref if all allele depths are zero" in {
199
- val likelihood = Likelihoods (alleleDepthA = 0 , alleleDepthB = 0 )
199
+ val likelihood = Likelihoods (IndexedSeq ( 0 , 0 ) )
200
200
val expected = IndexedSeq [Int ](0 , 0 , 0 )
201
201
likelihood.pls.length shouldBe expected.length
202
202
likelihood.pls should contain theSameElementsInOrderAs expected
203
203
}
204
204
205
+ it should " return correct results for basic cases" in {
206
+ val e = 0.01
207
+ val cases : IndexedSeq [(IndexedSeq [Int ], IndexedSeq [Double ])] = IndexedSeq (
208
+ (IndexedSeq (1 , 0 ), IndexedSeq (1 - e, 0.5 , e)),
209
+ (IndexedSeq (0 , 1 ), IndexedSeq (e, 0.5 , 1 - e)),
210
+ (IndexedSeq (1 , 1 ), IndexedSeq ((1 - e) * e, 0.25 , (1 - e) * e)),
211
+ (IndexedSeq (2 , 0 ), IndexedSeq (math.pow((1 - e), 2 ), 0.25 , math.pow(e, 2 ))),
212
+ (IndexedSeq (0 , 0 , 1 ), IndexedSeq (e, e, e, 0.5 , 0.5 , 1 - e)),
213
+ )
214
+ cases.foreach { case (input, output) =>
215
+ val likelihood = Likelihoods (input, e)
216
+ val logOutput = output.map(p => math.log10(p))
217
+ likelihood.pls.length shouldBe logOutput.length
218
+ likelihood.pls should contain theSameElementsInOrderAs DownsampleVcf .Likelihoods .logToPhredLikelihoods(logOutput)
219
+ }
220
+ }
221
+
222
+ it should " return the same results for biallelic and generalized algorithm" in {
223
+ val e = 0.01
224
+ val cases : IndexedSeq [(IndexedSeq [Int ], IndexedSeq [Double ])] = IndexedSeq (
225
+ (IndexedSeq (1 , 0 ), IndexedSeq (1 - e, 0.5 , e)),
226
+ (IndexedSeq (0 , 1 ), IndexedSeq (e, 0.5 , 1 - e)),
227
+ (IndexedSeq (1 , 1 ), IndexedSeq ((1 - e) * e, 0.25 , (1 - e) * e)),
228
+ (IndexedSeq (2 , 0 ), IndexedSeq (math.pow((1 - e), 2 ), 0.25 , math.pow(e, 2 ))),
229
+ )
230
+ cases.foreach { case (input, output) =>
231
+ val biallelic = Likelihoods (2 , DownsampleVcf .Likelihoods .biallelic(input(0 ), input(1 ), e))
232
+ val generalized = Likelihoods (2 , DownsampleVcf .Likelihoods .generalized(input, e))
233
+ biallelic.pls should contain theSameElementsInOrderAs generalized.pls
234
+ }
235
+ }
236
+
205
237
it should " return a likelihood of 0 for AA if there are only ref alleles observed" in {
206
- val likelihood = Likelihoods (alleleDepthA = 10 , alleleDepthB = 0 )
238
+ val likelihood = Likelihoods (IndexedSeq ( 10 , 0 ) )
207
239
val expected = IndexedSeq [Int ](0 , 30 , 200 )
208
240
likelihood.pls should contain theSameElementsInOrderAs expected
209
241
}
210
242
211
243
it should " return a likelihood of 0 for BB if there are only alt alleles observed" in {
212
- val likelihood = Likelihoods (alleleDepthA = 0 , alleleDepthB = 10 )
244
+ val likelihood = Likelihoods (IndexedSeq ( 0 , 10 ) )
213
245
val expected = IndexedSeq [Int ](200 , 30 , 0 )
214
246
likelihood.pls should contain theSameElementsInOrderAs expected
215
247
}
216
248
217
249
it should " return a likelihood of 0 for AB if there are an equal number of ref and alt alleles" in {
218
- val likelihood = Likelihoods (alleleDepthA = 5 , alleleDepthB = 5 )
250
+ val likelihood = Likelihoods (IndexedSeq ( 5 , 5 ) )
219
251
val expected = IndexedSeq [Int ](70 , 0 , 70 )
220
252
likelihood.pls should contain theSameElementsInOrderAs expected
221
253
}
222
254
223
255
it should " return a likelihood of 0 for AA if the AD A >> AD B" in {
224
- val likelihood = Likelihoods (alleleDepthA = 15 , alleleDepthB = 2 )
225
- likelihood.pls(0 ) == 0
256
+ val likelihood = Likelihoods (IndexedSeq ( 15 , 2 ) )
257
+ assert( likelihood.pls(0 ) == 0 )
226
258
}
227
259
228
260
it should " return a likelihood of 0 for AB if AD.A and AD.B are similar but not equal" in {
229
- val likelihood = Likelihoods (alleleDepthA = 15 , alleleDepthB = 17 )
230
- likelihood.pls(1 ) == 0
261
+ val likelihood = Likelihoods (IndexedSeq ( 15 , 17 ) )
262
+ assert( likelihood.pls(1 ) == 0 )
231
263
}
232
264
233
265
it should " return a likelihood of 0 for BB if AD.B >> AD.A but neither are 0" in {
234
- val likelihood = Likelihoods (alleleDepthA = 3 , alleleDepthB = 30 )
235
- likelihood.pls(2 ) == 0
266
+ val likelihood = Likelihoods (IndexedSeq ( 3 , 30 ) )
267
+ assert( likelihood.pls(2 ) == 0 )
236
268
}
237
269
238
270
it should " return correct values when there are very few reads" in {
239
- Likelihoods (0 , 0 ).pls should contain theSameElementsInOrderAs IndexedSeq (0 , 0 , 0 )
240
- Likelihoods (1 , 0 ).pls should contain theSameElementsInOrderAs IndexedSeq (0 , 3 , 20 )
241
- Likelihoods (1 , 1 ).pls should contain theSameElementsInOrderAs IndexedSeq (14 , 0 , 14 )
242
- Likelihoods (0 , 2 ).pls should contain theSameElementsInOrderAs IndexedSeq (40 , 6 , 0 )
243
- Likelihoods (1 , 2 ).pls should contain theSameElementsInOrderAs IndexedSeq (31 , 0 , 11 )
271
+ Likelihoods (IndexedSeq (0 , 0 )).pls should contain theSameElementsInOrderAs IndexedSeq (0 , 0 , 0 )
272
+ Likelihoods (IndexedSeq (1 , 0 )).pls should contain theSameElementsInOrderAs IndexedSeq (0 , 3 , 20 )
273
+ Likelihoods (IndexedSeq (1 , 1 )).pls should contain theSameElementsInOrderAs IndexedSeq (14 , 0 , 14 )
274
+ Likelihoods (IndexedSeq (0 , 2 )).pls should contain theSameElementsInOrderAs IndexedSeq (40 , 6 , 0 )
275
+ Likelihoods (IndexedSeq (1 , 2 )).pls should contain theSameElementsInOrderAs IndexedSeq (31 , 0 , 11 )
276
+ }
277
+
278
+ it should " return correct values for multi-allelic variants" in {
279
+ Likelihoods (IndexedSeq (0 , 0 , 0 )).pls should contain theSameElementsInOrderAs IndexedSeq (0 , 0 , 0 , 0 , 0 , 0 )
280
+ Likelihoods (IndexedSeq (10 , 0 , 0 )).pls should contain theSameElementsInOrderAs IndexedSeq (0 , 30 , 200 , 30 , 200 , 200 )
281
+ Likelihoods (IndexedSeq (10 , 10 , 0 )).pls should contain theSameElementsInOrderAs IndexedSeq (139 , 0 , 139 , 169 , 169 , 339 )
244
282
}
245
283
246
284
@@ -251,10 +289,10 @@ class DownsampleVcfTest extends UnitSpec {
251
289
Genotype (alleles= AlleleSet (ref= SimpleAllele (ref), alts= IndexedSeq (Allele (alt))),
252
290
sample= sample,
253
291
calls= IndexedSeq [Allele ](Allele (ref), Allele (alt)),
254
- attrs= Map (" AD" -> ads, " PL" -> Likelihoods (alleleDepthA = ads(0 ), alleleDepthB = ads(1 ))))
292
+ attrs= Map (" AD" -> ads, " PL" -> Likelihoods (ads))
293
+ )
255
294
}
256
295
257
-
258
296
" DownsampleVcf.downsampleAndRegneotype(Genotype)" should " return no call if all allele depths are zero" in {
259
297
val geno = makeGt(ref= " A" , alt= " T" , ads= IndexedSeq (0 ,0 ))
260
298
val newGeno = downsampleAndRegenotype(gt= geno, proportion= 0.01 , random = new Random (42 ), epsilon = 0.01 )
@@ -298,6 +336,30 @@ class DownsampleVcfTest extends UnitSpec {
298
336
newGeno.calls should contain theSameElementsInOrderAs expected
299
337
}
300
338
339
+ /*
340
+ testing DownsampleVcf.downsampleAndRegenotype on downsampleAndRegenotypes
341
+ */
342
+ private def makeTriallelicGt (ref : String , alt1 : String , alt2 : String , ads : IndexedSeq [Int ], sample : String = " test" ): Genotype = {
343
+ val likelihoods = Likelihoods (ads)
344
+ val alleles = AlleleSet (ref= SimpleAllele (ref), alts= IndexedSeq (Allele (alt1), Allele (alt2)))
345
+ val calls = likelihoods.mostLikelyCall(alleles.toSeq)
346
+ Genotype (alleles, sample= sample, calls= calls, attrs= Map (" AD" -> ads, " PL" -> likelihoods.pls))
347
+ }
348
+
349
+ it should " return ref,alt1 for a tri-allelic genotype if those alleles have the highest depth" in {
350
+ val geno = makeTriallelicGt(ref= " A" , alt1= " T" , alt2= " G" , ads= IndexedSeq (100 , 100 , 0 ))
351
+ val newGeno = downsampleAndRegenotype(gt= geno, proportion= 0.1 , random = new Random (42 ), epsilon = 0.01 )
352
+ val expected = IndexedSeq (Allele (" A" ), Allele (" T" ))
353
+ newGeno.calls should contain theSameElementsInOrderAs expected
354
+ }
355
+
356
+ it should " return alt1,alt2 for a tri-allelic genotype if those alleles have the highest depth" in {
357
+ val geno = makeTriallelicGt(ref= " A" , alt1= " T" , alt2= " G" , ads= IndexedSeq (0 , 100 , 100 ))
358
+ val newGeno = downsampleAndRegenotype(gt= geno, proportion= 0.1 , random = new Random (42 ), epsilon = 0.01 )
359
+ val expected = IndexedSeq (Allele (" T" ), Allele (" G" ))
360
+ newGeno.calls should contain theSameElementsInOrderAs expected
361
+ }
362
+
301
363
/*
302
364
testing DownsampleVcf.downsampleAndRegenotype on Variant
303
365
*/
@@ -306,7 +368,7 @@ class DownsampleVcfTest extends UnitSpec {
306
368
Variant (chrom= " 1" ,
307
369
pos= 10 ,
308
370
alleles= AlleleSet (ref= Allele (ref), alts= Allele (alt)),
309
- genotypes= Map (sample -> makeGt(ref= ref, alt= alt, ads= ads, sample = sample))
371
+ genotypes= Map (sample -> makeGt(ref= ref, alt= alt, ads= ads, sample= sample))
310
372
)
311
373
}
312
374
@@ -345,6 +407,32 @@ class DownsampleVcfTest extends UnitSpec {
345
407
newVariant.genotypes(" test" ).calls should contain theSameElementsInOrderAs expected
346
408
}
347
409
410
+ /*
411
+ testing DownsampleVcf.downsampleAndRegenotype on downsampleAndRegenotypes
412
+ */
413
+ private def makeTriallelicVariant (ref : String , alt1 : String , alt2 : String , ads : IndexedSeq [Int ], sample : String = " test" ): Variant = {
414
+ val likelihoods = Likelihoods (ads)
415
+ val alleles = AlleleSet (ref= SimpleAllele (ref), alts= IndexedSeq (Allele (alt1), Allele (alt2)))
416
+ Variant (chrom= " 1" ,
417
+ pos= 10 ,
418
+ alleles= alleles,
419
+ genotypes= Map (sample -> makeTriallelicGt(ref= ref, alt1= alt1, alt2= alt2, ads= ads, sample= sample)))
420
+ }
421
+
422
+ it should " return ref,alt1 for a tri-allelic variant if those alleles have the highest depth" in {
423
+ val variant = makeTriallelicVariant(ref= " A" , alt1= " T" , alt2= " G" , ads= IndexedSeq (100 , 100 , 0 ))
424
+ val newVariant = downsampleAndRegenotype(variant= variant, proportions = Map (" test" -> 0.1 ), random = new Random (42 ), epsilon = 0.01 )
425
+ val expected = IndexedSeq (Allele (" A" ), Allele (" T" ))
426
+ newVariant.genotypes(" test" ).calls should contain theSameElementsInOrderAs expected
427
+ }
428
+
429
+ it should " return alt1,alt2 for a tri-allelic variant if those alleles have the highest depth" in {
430
+ val variant = makeTriallelicVariant(ref= " A" , alt1= " T" , alt2= " G" , ads= IndexedSeq (0 , 100 , 100 ))
431
+ val newVariant = downsampleAndRegenotype(variant= variant, proportions = Map (" test" -> 0.1 ), random = new Random (42 ), epsilon = 0.01 )
432
+ val expected = IndexedSeq (Allele (" T" ), Allele (" G" ))
433
+ newVariant.genotypes(" test" ).calls should contain theSameElementsInOrderAs expected
434
+ }
435
+
348
436
private val sample = " test1"
349
437
private val builder = VcfBuilder (samples= Seq (sample))
350
438
builder.add(chrom= " chr1" , pos= 100 , id= " 1" , alleles= Seq (" A" , " C" ), info= Map (),
0 commit comments