@@ -6,7 +6,7 @@ import com.fulcrumgenomics.util.Metric
6
6
import com .fulcrumgenomics .vcf .api .Allele .SimpleAllele
7
7
import com .fulcrumgenomics .vcf .api .{Allele , AlleleSet , Genotype , Variant }
8
8
import com .fulcrumgenomics .testing .UnitSpec
9
- import com .fulcrumgenomics .vcf .DownsampleVcf .{Likelihoods , computePls , downsampleAndRegenotype }
9
+ import com .fulcrumgenomics .vcf .DownsampleVcf .{Likelihoods , downsampleAndRegenotype }
10
10
11
11
import scala .util .Random
12
12
@@ -187,7 +187,7 @@ class DownsampleVcfTest extends UnitSpec {
187
187
" DownsampleVcf.computePls" should " return new PLs that are not always 0,0,0" in {
188
188
val ads = IndexedSeq [Int ](0 , 100 )
189
189
val expected = IndexedSeq (1996 , 301 , 0 )
190
- val newlikelihoods = computePls (ads)
190
+ val newlikelihoods = Likelihoods (ads).pls
191
191
newlikelihoods should contain theSameElementsInOrderAs expected
192
192
}
193
193
@@ -196,51 +196,57 @@ class DownsampleVcfTest extends UnitSpec {
196
196
*/
197
197
198
198
" DownsampleVcf.Likelihoods" should " return ref if all allele depths are zero" in {
199
- val likelihood = Likelihoods (alleleDepthA = 0 , alleleDepthB = 0 )
199
+ val likelihood = Likelihoods (IndexedSeq ( 0 , 0 ) )
200
200
val expected = IndexedSeq [Int ](0 , 0 , 0 )
201
201
likelihood.pls.length shouldBe expected.length
202
202
likelihood.pls should contain theSameElementsInOrderAs expected
203
203
}
204
204
205
205
it should " return a likelihood of 0 for AA if there are only ref alleles observed" in {
206
- val likelihood = Likelihoods (alleleDepthA = 10 , alleleDepthB = 0 )
206
+ val likelihood = Likelihoods (IndexedSeq ( 10 , 0 ) )
207
207
val expected = IndexedSeq [Int ](0 , 30 , 200 )
208
208
likelihood.pls should contain theSameElementsInOrderAs expected
209
209
}
210
210
211
211
it should " return a likelihood of 0 for BB if there are only alt alleles observed" in {
212
- val likelihood = Likelihoods (alleleDepthA = 0 , alleleDepthB = 10 )
212
+ val likelihood = Likelihoods (IndexedSeq ( 0 , 10 ) )
213
213
val expected = IndexedSeq [Int ](200 , 30 , 0 )
214
214
likelihood.pls should contain theSameElementsInOrderAs expected
215
215
}
216
216
217
217
it should " return a likelihood of 0 for AB if there are an equal number of ref and alt alleles" in {
218
- val likelihood = Likelihoods (alleleDepthA = 5 , alleleDepthB = 5 )
218
+ val likelihood = Likelihoods (IndexedSeq ( 5 , 5 ) )
219
219
val expected = IndexedSeq [Int ](70 , 0 , 70 )
220
220
likelihood.pls should contain theSameElementsInOrderAs expected
221
221
}
222
222
223
223
it should " return a likelihood of 0 for AA if the AD A >> AD B" in {
224
- val likelihood = Likelihoods (alleleDepthA = 15 , alleleDepthB = 2 )
224
+ val likelihood = Likelihoods (IndexedSeq ( 15 , 2 ) )
225
225
likelihood.pls(0 ) == 0
226
226
}
227
227
228
228
it should " return a likelihood of 0 for AB if AD.A and AD.B are similar but not equal" in {
229
- val likelihood = Likelihoods (alleleDepthA = 15 , alleleDepthB = 17 )
229
+ val likelihood = Likelihoods (IndexedSeq ( 15 , 17 ) )
230
230
likelihood.pls(1 ) == 0
231
231
}
232
232
233
233
it should " return a likelihood of 0 for BB if AD.B >> AD.A but neither are 0" in {
234
- val likelihood = Likelihoods (alleleDepthA = 3 , alleleDepthB = 30 )
234
+ val likelihood = Likelihoods (IndexedSeq ( 3 , 30 ) )
235
235
likelihood.pls(2 ) == 0
236
236
}
237
237
238
238
it should " return correct values when there are very few reads" in {
239
- Likelihoods (0 , 0 ).pls should contain theSameElementsInOrderAs IndexedSeq (0 , 0 , 0 )
240
- Likelihoods (1 , 0 ).pls should contain theSameElementsInOrderAs IndexedSeq (0 , 3 , 20 )
241
- Likelihoods (1 , 1 ).pls should contain theSameElementsInOrderAs IndexedSeq (14 , 0 , 14 )
242
- Likelihoods (0 , 2 ).pls should contain theSameElementsInOrderAs IndexedSeq (40 , 6 , 0 )
243
- Likelihoods (1 , 2 ).pls should contain theSameElementsInOrderAs IndexedSeq (31 , 0 , 11 )
239
+ Likelihoods (IndexedSeq (0 , 0 )).pls should contain theSameElementsInOrderAs IndexedSeq (0 , 0 , 0 )
240
+ Likelihoods (IndexedSeq (1 , 0 )).pls should contain theSameElementsInOrderAs IndexedSeq (0 , 3 , 20 )
241
+ Likelihoods (IndexedSeq (1 , 1 )).pls should contain theSameElementsInOrderAs IndexedSeq (14 , 0 , 14 )
242
+ Likelihoods (IndexedSeq (0 , 2 )).pls should contain theSameElementsInOrderAs IndexedSeq (40 , 6 , 0 )
243
+ Likelihoods (IndexedSeq (1 , 2 )).pls should contain theSameElementsInOrderAs IndexedSeq (31 , 0 , 11 )
244
+ }
245
+
246
+ it should " return correct values for multi-allelic variants" in {
247
+ Likelihoods (IndexedSeq (0 , 0 , 0 )).pls should contain theSameElementsInOrderAs IndexedSeq (0 , 0 , 0 , 0 , 0 , 0 )
248
+ Likelihoods (IndexedSeq (10 , 0 , 0 )).pls should contain theSameElementsInOrderAs IndexedSeq (0 , 30 , 200 , 30 , 200 , 200 )
249
+ Likelihoods (IndexedSeq (10 , 10 , 0 )).pls should contain theSameElementsInOrderAs IndexedSeq (139 , 0 , 139 , 169 , 169 , 339 )
244
250
}
245
251
246
252
@@ -251,10 +257,10 @@ class DownsampleVcfTest extends UnitSpec {
251
257
Genotype (alleles= AlleleSet (ref= SimpleAllele (ref), alts= IndexedSeq (Allele (alt))),
252
258
sample= sample,
253
259
calls= IndexedSeq [Allele ](Allele (ref), Allele (alt)),
254
- attrs= Map (" AD" -> ads, " PL" -> Likelihoods (alleleDepthA = ads(0 ), alleleDepthB = ads(1 ))))
260
+ attrs= Map (" AD" -> ads, " PL" -> Likelihoods (ads))
261
+ )
255
262
}
256
263
257
-
258
264
" DownsampleVcf.downsampleAndRegneotype(Genotype)" should " return no call if all allele depths are zero" in {
259
265
val geno = makeGt(ref= " A" , alt= " T" , ads= IndexedSeq (0 ,0 ))
260
266
val newGeno = downsampleAndRegenotype(gt= geno, proportion= 0.01 , random = new Random (42 ), epsilon = 0.01 )
@@ -298,6 +304,30 @@ class DownsampleVcfTest extends UnitSpec {
298
304
newGeno.calls should contain theSameElementsInOrderAs expected
299
305
}
300
306
307
+ /*
308
+ testing DownsampleVcf.downsampleAndRegenotype on downsampleAndRegenotypes
309
+ */
310
+ private def makeTriallelicGt (ref : String , alt1 : String , alt2 : String , ads : IndexedSeq [Int ], sample : String = " test" ): Genotype = {
311
+ val likelihoods = Likelihoods (ads)
312
+ val alleles = AlleleSet (ref= SimpleAllele (ref), alts= IndexedSeq (Allele (alt1), Allele (alt2)))
313
+ val calls = likelihoods.mostLikelyCall(alleles.toSeq)
314
+ Genotype (alleles, sample= sample, calls= calls, attrs= Map (" AD" -> ads, " PL" -> likelihoods.pls))
315
+ }
316
+
317
+ it should " return ref,alt1 for a tri-allelic genotype if those alleles have the highest depth" in {
318
+ val geno = makeTriallelicGt(ref= " A" , alt1= " T" , alt2= " G" , ads= IndexedSeq (100 , 100 , 0 ))
319
+ val newGeno = downsampleAndRegenotype(gt= geno, proportion= 0.1 , random = new Random (42 ), epsilon = 0.01 )
320
+ val expected = IndexedSeq (Allele (" A" ), Allele (" T" ))
321
+ newGeno.calls should contain theSameElementsInOrderAs expected
322
+ }
323
+
324
+ it should " return alt1,alt2 for a tri-allelic genotype if those alleles have the highest depth" in {
325
+ val geno = makeTriallelicGt(ref= " A" , alt1= " T" , alt2= " G" , ads= IndexedSeq (0 , 100 , 100 ))
326
+ val newGeno = downsampleAndRegenotype(gt= geno, proportion= 0.1 , random = new Random (42 ), epsilon = 0.01 )
327
+ val expected = IndexedSeq (Allele (" T" ), Allele (" G" ))
328
+ newGeno.calls should contain theSameElementsInOrderAs expected
329
+ }
330
+
301
331
/*
302
332
testing DownsampleVcf.downsampleAndRegenotype on Variant
303
333
*/
@@ -306,7 +336,7 @@ class DownsampleVcfTest extends UnitSpec {
306
336
Variant (chrom= " 1" ,
307
337
pos= 10 ,
308
338
alleles= AlleleSet (ref= Allele (ref), alts= Allele (alt)),
309
- genotypes= Map (sample -> makeGt(ref= ref, alt= alt, ads= ads, sample = sample))
339
+ genotypes= Map (sample -> makeGt(ref= ref, alt= alt, ads= ads, sample= sample))
310
340
)
311
341
}
312
342
@@ -345,6 +375,32 @@ class DownsampleVcfTest extends UnitSpec {
345
375
newVariant.genotypes(" test" ).calls should contain theSameElementsInOrderAs expected
346
376
}
347
377
378
+ /*
379
+ testing DownsampleVcf.downsampleAndRegenotype on downsampleAndRegenotypes
380
+ */
381
+ private def makeTriallelicVariant (ref : String , alt1 : String , alt2 : String , ads : IndexedSeq [Int ], sample : String = " test" ): Variant = {
382
+ val likelihoods = Likelihoods (ads)
383
+ val alleles = AlleleSet (ref= SimpleAllele (ref), alts= IndexedSeq (Allele (alt1), Allele (alt2)))
384
+ Variant (chrom= " 1" ,
385
+ pos= 10 ,
386
+ alleles= alleles,
387
+ genotypes= Map (sample -> makeTriallelicGt(ref= ref, alt1= alt1, alt2= alt2, ads= ads, sample= sample)))
388
+ }
389
+
390
+ it should " return ref,alt1 for a tri-allelic variant if those alleles have the highest depth" in {
391
+ val variant = makeTriallelicVariant(ref= " A" , alt1= " T" , alt2= " G" , ads= IndexedSeq (100 , 100 , 0 ))
392
+ val newVariant = downsampleAndRegenotype(variant= variant, proportions = Map (" test" -> 0.1 ), random = new Random (42 ), epsilon = 0.01 )
393
+ val expected = IndexedSeq (Allele (" A" ), Allele (" T" ))
394
+ newVariant.genotypes(" test" ).calls should contain theSameElementsInOrderAs expected
395
+ }
396
+
397
+ it should " return alt1,alt2 for a tri-allelic variant if those alleles have the highest depth" in {
398
+ val variant = makeTriallelicVariant(ref= " A" , alt1= " T" , alt2= " G" , ads= IndexedSeq (0 , 100 , 100 ))
399
+ val newVariant = downsampleAndRegenotype(variant= variant, proportions = Map (" test" -> 0.1 ), random = new Random (42 ), epsilon = 0.01 )
400
+ val expected = IndexedSeq (Allele (" T" ), Allele (" G" ))
401
+ newVariant.genotypes(" test" ).calls should contain theSameElementsInOrderAs expected
402
+ }
403
+
348
404
private val sample = " test1"
349
405
private val builder = VcfBuilder (samples= Seq (sample))
350
406
builder.add(chrom= " chr1" , pos= 100 , id= " 1" , alleles= Seq (" A" , " C" ), info= Map (),
0 commit comments